
存储池管理
- 1.查看存储池列表
[root@ceph141 ~]# ceph osd pool ls
.mgr
violet
[root@ceph141 ~]#
- 2.查看存储池及其编号信息
[root@ceph141 ~]# ceph osd lspools
1 .mgr
2 violet
[root@ceph141 ~]#
- 3.查看存储池的详细信息
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 63 lfor 0/0/61 flags hashpspool stripe_width 0 read_balance_score 1.97
[root@ceph141 ~]#
- 4.创建副本存储池(默认)
[root@ceph141 ~]# ceph osd pool create xixi replicated # 创建副本池类型
pool 'xixi' created
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool create haha erasure # 创建纠删码池类型
pool 'haha' created
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 63 lfor 0/0/61 flags hashpspool stripe_width 0 read_balance_score 1.97
pool 3 'xixi' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 83 lfor 0/0/81 flags hashpspool stripe_width 0 read_balance_score 2.25
pool 4 'haha' erasure profile default size 4 min_size 3 crush_rule 1 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 88 flags hashpspool stripe_width 8192
[root@ceph141 ~]#
- 5.修改存储池的配置参数
[root@ceph141 ~]# ceph osd pool set xixi size 2
set pool 3 size to 2
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 63 lfor 0/0/61 flags hashpspool stripe_width 0 read_balance_score 1.97
pool 3 'xixi' replicated size 2 min_size 1 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 95 lfor 0/0/81 flags hashpspool stripe_width 0 read_balance_score 2.25
pool 4 'haha' erasure profile default size 4 min_size 3 crush_rule 1 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 92 lfor 0/0/90 flags hashpspool stripe_width 8192
[root@ceph141 ~]#
- 6.修改存储池的名称
[root@ceph141 ~]# ceph osd pool rename xixi hehe
pool 'xixi' renamed to 'hehe'
[root@ceph141 ~]#
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 63 lfor 0/0/61 flags hashpspool stripe_width 0 read_balance_score 1.97
pool 3 'hehe' replicated size 2 min_size 1 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 95 lfor 0/0/81 flags hashpspool stripe_width 0 read_balance_score 2.25
pool 4 'haha' erasure profile default size 4 min_size 3 crush_rule 1 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 92 lfor 0/0/90 flags hashpspool stripe_width 8192
[root@ceph141 ~]#
- 7.查看存储池利用率统计信息
[root@ceph141 ~]# rados df
POOL_NAME USED OBJECTS CLONES COPIES MISSING_ON_PRIMARY UNFOUND DEGRADED RD_OPS RD WR_OPS WR USED COMPR UNDER COMPR
.mgr 1.3 MiB 2 0 6 0 0 0 214 306 KiB 147 1.6 MiB 0 B 0 B
haha 0 B 0 0 0 0 0 0 0 0 B 0 0 B 0 B 0 B
hehe 0 B 0 0 0 0 0 0 0 0 B 0 0 B 0 B 0 B
yinzhengjie 0 B 0 0 0 0 0 0 1 0 B 1 0 B 0 B 0 B
total_objects 2
total_used 280 MiB
total_avail 5.3 TiB
total_space 5.3 TiB
[root@ceph141 ~]#
- 8.获取特定池或所有池的I/O信息
[root@ceph141 ~]# ceph osd pool stats
pool .mgr id 1
nothing is going on
pool violet id 2
nothing is going on
pool hehe id 3
nothing is going on
pool haha id 4
nothing is going on
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool stats violet
pool violet id 2
nothing is going on
[root@ceph141 ~]#
- 9.获取存储池某个特定的配置参数
[root@ceph141 ~]# ceph osd pool get hehe size
size: 2
[root@ceph141 ~]#
- 10.获取存储的大小相关配置信息
[root@ceph141 ~]# ceph osd dump | grep 'replicated size'
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 63 lfor 0/0/61 flags hashpspool stripe_width 0 read_balance_score 1.97
pool 3 'hehe' replicated size 2 min_size 1 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 95 lfor 0/0/81 flags hashpspool stripe_width 0 read_balance_score 2.25
[root@ceph141 ~]#
删除存储池的两种机制
- 1.删除存储池的机制概述
一旦一个存储池被删除,那么该存储池的所有数据都会被删除且无法找回。
因此为了安全起见,ceph有存储池保护机制,ceph支持两种保护机制: "nodelete"和"mon_allow_pool_delete"
- nodelete:
一旦一个存储池被打上该标记,则意味着存储池不可被删除,默认值为false,表示可以被删除。
- mon_allow_pool_delete:
告诉所有mon组件,可以删除存储池。默认值为false,表示不可以被删除。
生产环境中,为了安全起见,建议大家将存储池设置为nodelete的属性为"ture",mon_allow_pool_delete的值为false。
'nodelete'和'mon_allow_pool_delete'任意一种机制都具有一票否决权,如果想要删除一个存储池,2者都得允许删除,这就是ceph的存储池保护机制。
- 2.mon_allow_pool_delete验证案例
[root@ceph141 ~]# ceph osd pool ls
.mgr
violet
hehe
haha
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool get hehe nodelete
nodelete: false
[root@ceph141 ~]#
[root@ceph141 ~]# ceph tell mon.* injectargs --mon_allow_pool_delete=true
mon.ceph141: {}
mon.ceph141: mon_allow_pool_delete = ''
mon.ceph142: {}
mon.ceph142: mon_allow_pool_delete = ''
mon.ceph143: {}
mon.ceph143: mon_allow_pool_delete = ''
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool delete hehe hehe --yes-i-really-really-mean-it
pool 'hehe' removed
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls
.mgr
violet
haha
[root@ceph141 ~]#
[root@ceph141 ~]# ceph tell mon.* injectargs --mon_allow_pool_delete=false
mon.ceph141: {}
mon.ceph141: mon_allow_pool_delete = ''
mon.ceph142: {}
mon.ceph142: mon_allow_pool_delete = ''
mon.ceph143: {}
mon.ceph143: mon_allow_pool_delete = ''
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool get haha nodelete
nodelete: false
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool delete haha haha --yes-i-really-really-mean-it
Error EPERM: pool deletion is disabled; you must first set the mon_allow_pool_delete config option to true before you can destroy a pool
[root@ceph141 ~]#
- 3.nodelete示例
[root@ceph141 ~]# ceph osd pool get haha nodelete
nodelete: false
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool set haha nodelete true
set pool 4 nodelete to true
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool get haha nodelete
nodelete: true
[root@ceph141 ~]#
[root@ceph141 ~]# ceph tell mon.* injectargs --mon_allow_pool_delete=true
mon.ceph141: {}
mon.ceph141: mon_allow_pool_delete = ''
mon.ceph142: {}
mon.ceph142: mon_allow_pool_delete = ''
mon.ceph143: {}
mon.ceph143: mon_allow_pool_delete = ''
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool delete haha haha --yes-i-really-really-mean-it
Error EPERM: pool deletion is disabled; you must unset nodelete flag for the pool first
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool set haha nodelete false
set pool 4 nodelete to false
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool delete haha haha --yes-i-really-really-mean-it
pool 'haha' removed
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls
.mgr
violet
[root@ceph141 ~]#
存储池的pg,pgp,size,min_size数量配置
参考链接:
https://docs.ceph.com/en/squid/rados/operations/placement-groups/
https://docs.ceph.com/en/squid/rados/operations/placement-groups/#choosing-number-of-placement-groups
- 1.官方推荐合理设置PG数量
OSD数量 * 100
----------------- ---> PG数量
pool存储池的size
假设你有9块磁盘,则配置如下
9*100
------- ----> 300PG
3
但是得到的结果是300,官方建议是2的次方,和300比较接近的是:256,因此集群的合理PG数量为256。
- 2.当autoscale_mode开启式,修改pg的数量时会自动变会默认的pg数量
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 63 lfor 0/0/61 flags hashpspool stripe_width 0 read_balance_score 1.97
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool set violet pg_num 2
set pool 2 pg_num to 2
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool set violet pgp_num 2
set pool 2 pgp_num to 2
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 29 pgp_num 27 pg_num_target 2 pgp_num_target 2 pg_num_pending 28 autoscale_mode on last_change 121 lfor 0/121/121 flags hashpspool stripe_width 0 read_balance_score 2.17
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode on last_change 144 lfor 0/139/142 flags hashpspool stripe_width 0 read_balance_score 1.97
[root@ceph141 ~]#
- 3.关闭autoscale_mode模式,再次修改pg和pgp数量
[root@ceph141 ~]# ceph osd pool set violet pg_autoscale_mode off
set pool 2 pg_autoscale_mode to off
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode off last_change 149 lfor 0/139/142 flags hashpspool stripe_width 0 read_balance_score 1.97
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool set violet pgp_num 2
set pool 2 pgp_num to 2
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool set violet pg_num 2
set pool 2 pg_num to 2
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 30 pg_num_target 2 pgp_num_target 2 autoscale_mode off last_change 154 lfor 0/139/142 flags hashpspool stripe_width 0 read_balance_score 2.25
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 16 pgp_num 16 pg_num_target 2 pgp_num_target 2 autoscale_mode off last_change 239 lfor 0/239/237 flags hashpspool stripe_width 0 read_balance_score 2.25
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 2 pgp_num 2 autoscale_mode off last_change 316 lfor 0/316/314 flags hashpspool stripe_width 0 read_balance_score 4.46
[root@ceph141 ~]#
- 4.创建存储池时指定pg和pgp
[root@ceph141 ~]# ceph osd pool create haha 128 128 --autoscale_mode off
pool 'haha' created
[root@ceph141 ~]#
[root@ceph141 ~]# ceph osd pool ls detail
pool 1 '.mgr' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 autoscale_mode on last_change 21 flags hashpspool stripe_width 0 pg_num_max 32 pg_num_min 1 application mgr read_balance_score 9.09
pool 2 'violet' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 2 pgp_num 2 autoscale_mode off last_change 316 lfor 0/316/314 flags hashpspool stripe_width 0 read_balance_score 4.46
pool 5 'xixi' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 1 pgp_num 1 pg_num_target 32 pgp_num_target 32 autoscale_mode on last_change 324 flags hashpspool stripe_width 0 read_balance_score 9.09
pool 6 'haha' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 128 pgp_num 128 autoscale_mode off last_change 323 flags hashpspool stripe_width 0 read_balance_score 2.11
[root@ceph141 ~]#
- 5.总结
pg:
一个存储池可以有多个pg,数据分布式存储在不同的pg中,pg和pgp数量要保持一致。
size:
数据存储几份,对于副本池而言,若不指定,则默认存储3副本。
min_size
最小可用的副本数量,比如3副本,如果你设置为最小的副本数量为1,表示可以允许挂掉2个节点。但是如果你设置的为2,表示可以挂掉1个节点。
温馨提示: pg_num和pgp-num的区别:
- pg_num:
创建pg的数量。
启用pg自动缩放时,允许集群根据预期集群利用率和预期池利用率,对每个池的pg数量(pgp_num)进行建议或自动调整。
参考链接:
https://docs.ceph.com/en/latest/rados/operations/placement-groups/#autoscaling-placement-groups
- pgp-num:
用于放置目的的PG的总数。这应该等于pg的总数,除非在pg_num增加或减少时短暂增加或减少。
参考链接:
https://docs.ceph.com/en/latest/rados/operations/pools/#creating-a-pool
存储池需要关注的配置参数说明:
size:
存储池中的对象副本数量,默认值为3。
min_size:
I/O所需要的最小副本数。
pg_num:
存储池的PG数量。
pgp_num:
计算数据归置时要使用PG的有效数量。
crush_rule
用于在集群中映射对象归置的规则组。
nodelete:
控制是否可删除存储池。
nopgchange:
控制是否可更改存储池的pg_num和pgp_num。
nosizechanche:
控制是否可更改存储池的大小。
noscrub和nodeep-scrub:
控制是否可整理货深度整理存储池以解决临时高I/O负载的问题。
scrub_min_interval:
集群负载较低时整理存储池的最小时间间隔,默认值为0,配置文件对应"osd_scrub_min_interval"参数。
scrub_max_interval:
整理存储池最大时间间隔,默认值为0,配置文件对应"osd_scrub_max_interval"参数。
deep_scrub_interval:
深度整理存储池的间隔,默认值为0,配置文件对应"osd_deep_scrub"参数。
常用的镜像features属性说明:
- layering:
分层克隆机制,磁盘的数据分层获取克隆机制。
- striping:
是否支持数据对象间的数据条带化。
- exclusive-lock:
排它锁的机制,磁盘应用于多路写机制场景,限制同时仅能有有一个客户端访问当前image。
- object-map:
对象位图机制,主要用于加速导入,导出及已用容量统计等操作,依赖于"exclusive-lock"特性。
- fast-diff:
快照定制机制,快速对比数据差异,便于做快照管理,依赖于object-map特性。
- deep-flatten:
数据处理机制,解除父子image及快照的依赖关系。
- journaling:
磁盘日志机制,将image的所有修改操作进行日志话,便于异地备份,依赖于"exclusive-lock"特性。
- data-pool:
是否支持将image的数据对象存储于纠删码存储池,主要用于将image的元数据于数据放置于不同的存储池。