若是一個硬盤故障致使osd節點出現以下的down狀態,且一直沒法恢復( reweight列等於0,表示osd已經out此集羣)node
[root@os-node3 ~]# ceph osd tree ide
# id weight type name up/down reweightrem
-1 4 root defaultit
-2 1 host os-node5class
0 1 osd.24 down 0集羣
經過命令得到down狀態的osd的IDawk
osd_id =` ceph osd tree | grep down | grep osd | awk '{print $3}' | awk -F . '{print $2}`date
1)、在集羣中刪除一個osd硬盤file
[root@PBS-OS-node155 ~]# ceph osd rm 24map
removed osd.24
2)、在集羣中刪除一個osd 硬盤 crush map
[root@PBS-OS-node155 ~]# ceph osd crush rm osd.24
removed item id 24 name 'osd.24' from crush map
3)、刪除此osd在ceph集羣中的認證
[root@PBS-OS-node155 ~]# ceph auth del osd.24
updated
4)、卸載osd所掛載的硬盤
[root@PBS-OS-node155 ~]# umount /var/lib/ceph/osd/ceph-24
摘掉osd的腳本以下
osd_id=`ceph osd tree | grep down | grep osd | awk '{print $3}' | awk -F . '{print $2}'`
ceph osd rm ${osd_id}
ceph osd crush rm osd.${osd_id}
ceph auth del osd.${osd_id}
umount /var/lib/ceph/osd/ceph-${osd_id}
更換完硬盤後再把此硬盤從新加入集羣osd
osd_id=`ceph osd create`
mkfs.xfs -f /dev/sdf
mount /dev/sdf /var/lib/ceph/osd/ceph-${osd_id}
mount -o remount,user_xattr /var/lib/ceph/osd/ceph-${osd_id}
ceph-osd -i ${osd_id} --mkfs --mkkey
ceph auth add osd.${osd_id} osd 'allow *' mon 'allow profile osd' -i /var/lib/ceph/osd/ceph-${osd_id}/keyring
touch /var/lib/ceph/osd/ceph-${osd_id}/sysvinit
/etc/init.d/ceph start osd.${osd_id}