主機名node |
系統python |
版本linux |
角色算法 |
IP數據庫 |
ceph1json |
RHEL7.3_64bitflask |
jewelbootstrap |
admin/mon1vim |
10.10.64.130bash |
ceph2 |
RHEL7.3_64bit |
jewel |
mon2 |
10.10.64.131 |
ceph3 |
RHEL7.3_64bit |
jewel |
mon3 |
10.10.64.132 |
ceph4 |
RHEL7.3_64bit |
jewel |
osd1 |
10.10.64.133 |
ceph5 |
RHEL7.3_64bit |
jewel |
osd2 |
10.10.64.134 |
ceph6 |
RHEL7.3_64bit |
jewel |
osd3 |
10.10.64.135 |
client_ceph |
RHEL7.3_64bit |
jewel |
client_ceph |
10.10.64.136 |
#系統鏡像源
[root@localhost ~]# cat /etc/yum.repos.d/base.repo
[rhel7.3_64bit]
name=Red Hat Enterprise Linux $releasever - $basearch - Source
baseurl=http://10.0.40.34/yum/rhel7.3_64bit
enabled=1
gpgcheck=0
#ceph鏡像源
[root@localhost ~]# cat /etc/yum.repos.d/ceph.repo
[ceph]
name=ceph
baseurl=http://10.10.64.120/mirrors.aliyun.com/ceph/rpm-jewel/el7/x86_64
enabled=1
gpgcheck=0
[ceph_noarch]
name=ceph_noarch
baseurl=http://10.10.64.120/mirrors.aliyun.com/ceph/rpm-jewel/el7/noarch/
enabled=1
gpgcheck=0
應爲是離線安裝,好多依賴包在上面兩個yum源中沒有,我把一些關鍵的依賴包下載下來,手動安裝
#flask依賴
[root@localhost ~]# cd flask/
[root@localhost flask]# rpm -ivh *
warning: python-babel-0.9.6-8.el7.noarch.rpm: Header V3 RSA/SHA256 Signature, key ID f4a80eb5: NOKEY
Preparing... ################################# [100%]
Updating / installing...
1:python-werkzeug-0.9.1-2.el7 ################################# [ 17%]
2:python-markupsafe-0.11-10.el7 ################################# [ 33%]
3:python-itsdangerous-0.23-2.el7 ################################# [ 50%]
4:python-babel-0.9.6-8.el7 ################################# [ 67%]
5:python-jinja2-2.7.2-2.el7 ################################# [ 83%]
6:python-flask-1:0.10.1-4.el7 ################################# [100%]
#selinux依賴
[root@localhost ~]# cd selinux_policy/
[root@localhost selinux_policy]# ls
selinux-policy-3.13.1-166.el7_4.9.noarch.rpm
selinux-policy-targeted-3.13.1-166.el7_4.9.noarch.rpm
[root@localhost selinux_policy]# yum -y install selinux-policy-*
#其餘依賴
[root@localhost ~]# rpm -ivh userspace-rcu-0.7.16-1.el7.x86_64.rpm
[root@localhost ~]# rpm -ivh lttng-ust-2.4.1-4.el7.x86_64.rpm
[root@localhost ~]# rpm -ivh leveldb-1.12.0-11.el7.x86_64.rpm
[root@localhost ~]# rpm -ivh libbabeltrace-1.2.4-3.el7.x86_64.rpm
[root@localhost ~]# rpm -ivh fcgi-2.4.0-25.el7.x86_64.rpm
[root@localhost ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
10.10.64.130 ceph1
10.10.64.131 ceph2
10.10.64.132 ceph3
10.10.64.133 ceph4
10.10.64.134 ceph5
10.10.64.135 ceph6
10.10.64.136 client_ceph
[root@localhost ~]# hostnamectl set-hostname ceph1
[root@localhost ~]# getenforce
Disabled
[root@localhost ~]# systemctl status iptables
Unit iptables.service could not be found.
[root@localhost ~]# systemctl status firewalld.service
firewalld.service - firewalld - dynamic firewall daemon
Loaded: loaded (/usr/lib/systemd/system/firewalld.service; disabled; vendor preset: enabled)
Active: inactive (dead)
Docs: man:firewalld(1)
[root@localhost ~]# crontab -l
02 01 * * * /usr/sbin/ntpdate -u 10.0.54.54
略
根據環境介紹中的信息更改
[root@ceph1 my-cluster]# yum -y install ceph-deploy
[root@ceph1 ~]# ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
/root/.ssh/id_rsa already exists.
Overwrite (y/n)? y
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
fa:9b:a0:a9:35:c7:29:90:62:53:ed:b1:92:8a:fc:88 root@ceph1
The key's randomart image is:
+--[ RSA 2048]----+
| |
| . |
| . o |
| ..o o |
|.ooo o S |
|+.o... o |
|.o + * |
|. o. * o . |
|E..oo +. |
+-----------------+
[root@ceph1 ~]# ssh-copy-id root@10.10.64.131
[root@ceph1 ~]# ssh-copy-id root@10.10.64.132
[root@ceph1 ~]# ssh-copy-id root@10.10.64.133
[root@ceph1 ~]# ssh-copy-id root@10.10.64.134
[root@ceph1 ~]# ssh-copy-id root@10.10.64.135
[root@ceph1 ~]# mkdir my-cluster && cd my-cluster/
[root@ceph1 ~]# ceph-deploy new ceph{1..6}
[ceph_deploy.new][DEBUG ] Resolving host ceph6
[ceph_deploy.new][DEBUG ] Monitor ceph6 at 10.10.64.135
[ceph_deploy.new][DEBUG ] Monitor initial members are ['ceph1', 'ceph2', 'ceph3', 'ceph4', 'ceph5', 'ceph6']
[ceph_deploy.new][DEBUG ] Monitor addrs are ['10.10.64.130', '10.10.64.131', '10.10.64.132', '10.10.64.133', '10.10.64.134', '10.10.64.135']
[ceph_deploy.new][DEBUG ] Creating a random mon key...
[ceph_deploy.new][DEBUG ] Writing monitor keyring to ceph.mon.keyring...
[ceph_deploy.new][DEBUG ] Writing initial config to ceph.conf...
#增長網絡信息
[root@ceph1 my-cluster]# vim ceph.conf
public network = 10.10.64.130/24
#保留以下信息,多餘的刪掉
mon_initial_members = ceph1, ceph2, ceph3
mon_host = 10.10.64.130,10.10.64.131,10.10.64.132
[root@ceph1 my-cluster]# ceph-deploy install ceph1 ceph2 ceph3
#初始化monitor並蒐集全部祕鑰
[root@ceph1 my-cluster]# ceph-deploy mon create-initial
keyring auth get-or-create client.bootstrap-mgr mon allow profile bootstrap-mgr
[ceph1][INFO ] Running command: /usr/bin/ceph --connect-timeout=25 --cluster=ceph --name mon. --keyring=/var/lib/ceph/mon/ceph-ceph1/keyring auth get client.bootstrap-osd
[ceph1][INFO ] Running command: /usr/bin/ceph --connect-timeout=25 --cluster=ceph --name mon. --keyring=/var/lib/ceph/mon/ceph-ceph1/keyring auth get client.bootstrap-rgw
[ceph_deploy.gatherkeys][INFO ] Storing ceph.client.admin.keyring
[ceph_deploy.gatherkeys][INFO ] Storing ceph.bootstrap-mds.keyring
[ceph_deploy.gatherkeys][INFO ] Storing ceph.bootstrap-mgr.keyring
[ceph_deploy.gatherkeys][INFO ] keyring 'ceph.mon.keyring' already exists
[ceph_deploy.gatherkeys][INFO ] Storing ceph.bootstrap-osd.keyring
[ceph_deploy.gatherkeys][INFO ] Storing ceph.bootstrap-rgw.keyring
[ceph_deploy.gatherkeys][INFO ] Destroy temp directory /tmp/tmprtj_ZH
#查看是否成功
[root@ceph1 my-cluster]# ceph -s
cluster 59d344b8-5051-43e4-90c6-c29c2a91961f
health HEALTH_ERR
clock skew detected on mon.ceph2, mon.ceph3
64 pgs are stuck inactive for more than 300 seconds
64 pgs stuck inactive
64 pgs stuck unclean
no osds
Monitor clock skew detected
monmap e1: 3 mons at {ceph1=10.10.64.130:6789/0,ceph2=10.10.64.131:6789/0,ceph3=10.10.64.132:6789/0}
election epoch 6, quorum 0,1,2 ceph1,ceph2,ceph3
osdmap e1: 0 osds: 0 up, 0 in
flags sortbitwise,require_jewel_osds
pgmap v2: 64 pgs, 1 pools, 0 bytes data, 0 objects
0 kB used, 0 kB / 0 kB avail
64 creating
#安裝ceph軟件
ceph-deploy install ceph4 ceph5 ceph6
在虛擬機中添加磁盤,添加磁盤規範以下
hostname |
IP |
osd數據盤 |
journal盤 |
備註 |
node3 |
10.10.64.133 |
/dev/sdb/c/d |
/dev/sde |
|
node4 |
10.10.64.134 |
/dev/sdb/c/d |
/dev/sde |
|
node5 |
10.10.64.135 |
/dev/sdb/c/d |
/dev/sde |
#3個node節點都是這種配置
[root@ceph4 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
fd0 2:0 1 4K 0 disk
sda 8:0 0 60G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 59G 0 part
├─rhel-root 253:0 0 35.6G 0 lvm /
├─rhel-swap 253:1 0 6G 0 lvm [SWAP]
└─rhel-home 253:2 0 17.4G 0 lvm /home
sdb 8:16 0 100G 0 disk
sdc 8:32 0 100G 0 disk
sdd 8:48 0 100G 0 disk
sde 8:64 0 50G 0 disk
sr0 11:0 1 66.5M 0 rom
PS:
在每臺osd服務器上咱們須要對3塊SAS硬盤分區、建立 xfs 文件系統
對1塊用作journal的硬盤分3個區,每一個區對應一塊硬盤,不須要建立文件系統,留給 Ceph 本身處理。
編寫腳本以下:
[root@ceph4 ~]# cat parted.sh
#!/bin/bash
set -e
if [ ! -x "/sbin/parted" ]; then
echo "This script requires /sbin/parted to run!" >&2
exit 1
fi
DISKS="b c d"
for i in ${DISKS}; do
echo "Creating partitions on /dev/sd${i} ..."
parted -a optimal --script /dev/sd${i} -- mktable gpt
parted -a optimal --script /dev/sd${i} -- mkpart primary xfs 0% 100%
sleep 1
#echo "Formatting /dev/sd${i}1 ..."
mkfs.xfs -f /dev/sd${i}1 &
done
SSDS="e"
for i in ${SSDS}; do
parted -s /dev/sd${i} mklabel gpt
parted -s /dev/sd${i} mkpart primary 0% 32%
parted -s /dev/sd${i} mkpart primary 33% 65%
parted -s /dev/sd${i} mkpart primary 66% 98%
done
#分發到各個osd節點上
[root@ceph4 ~]# scp parted.sh root@10.10.64.134:/root/
[root@ceph4 ~]# scp parted.sh root@10.10.64.135:/root/
每一個osd節點都是同樣的,這裏摘取ceph4展現
[root@ceph4 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
fd0 2:0 1 4K 0 disk
sda 8:0 0 60G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 59G 0 part
├─rhel-root 253:0 0 35.6G 0 lvm /
├─rhel-swap 253:1 0 6G 0 lvm [SWAP]
└─rhel-home 253:2 0 17.4G 0 lvm /home
sdb 8:16 0 100G 0 disk
└─sdb1 8:17 0 100G 0 part
sdc 8:32 0 100G 0 disk
└─sdc1 8:33 0 100G 0 part
sdd 8:48 0 100G 0 disk
└─sdd1 8:49 0 100G 0 part
sde 8:64 0 50G 0 disk
├─sde1 8:65 0 16G 0 part
├─sde2 8:66 0 16G 0 part
└─sde3 8:67 0 16G 0 part
sr0 11:0 1 66.5M 0 rom
#經過admin節點查看
[root@ceph1 my-cluster]# ceph-deploy disk list ceph4
[ceph_deploy.conf][DEBUG ] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO ] Invoked (1.5.39): /usr/bin/ceph-deploy disk list ceph4
[ceph_deploy.cli][INFO ] ceph-deploy options:
[ceph_deploy.cli][INFO ] username : None
[ceph_deploy.cli][INFO ] verbose : False
[ceph_deploy.cli][INFO ] overwrite_conf : False
[ceph_deploy.cli][INFO ] subcommand : list
[ceph_deploy.cli][INFO ] quiet : False
[ceph_deploy.cli][INFO ] cd_conf : <ceph_deploy.conf.cephdeploy.Conf instance at 0x7fe2b5d3d440>
[ceph_deploy.cli][INFO ] cluster : ceph
[ceph_deploy.cli][INFO ] func : <function disk at 0x7fe2b5d32488>
[ceph_deploy.cli][INFO ] ceph_conf : None
[ceph_deploy.cli][INFO ] default_release : False
[ceph_deploy.cli][INFO ] disk : [('ceph4', None, None)]
[ceph4][DEBUG ] connected to host: ceph4
[ceph4][DEBUG ] detect platform information from remote host
[ceph4][DEBUG ] detect machine type
[ceph4][DEBUG ] find the location of an executable
[ceph_deploy.osd][INFO ] Distro info: Red Hat Enterprise Linux Server 7.3 Maipo
[ceph_deploy.osd][DEBUG ] Listing disks on ceph4...
[ceph4][DEBUG ] find the location of an executable
[ceph4][INFO ] Running command: /usr/sbin/ceph-disk list
[ceph4][DEBUG ] /dev/dm-0 other, xfs, mounted on /
[ceph4][DEBUG ] /dev/dm-1 swap, swap
[ceph4][DEBUG ] /dev/dm-2 other, xfs, mounted on /home
[ceph4][DEBUG ] /dev/sda :
[ceph4][DEBUG ] /dev/sda2 other, LVM2_member
[ceph4][DEBUG ] /dev/sda1 other, xfs, mounted on /boot
[ceph4][DEBUG ] /dev/sdb :
[ceph4][DEBUG ] /dev/sdb1 other, xfs
[ceph4][DEBUG ] /dev/sdc :
[ceph4][DEBUG ] /dev/sdc1 other, xfs
[ceph4][DEBUG ] /dev/sdd :
[ceph4][DEBUG ] /dev/sdd1 other, xfs
[ceph4][DEBUG ] /dev/sde :
[ceph4][DEBUG ] /dev/sde1 other, ebd0a0a2-b9e5-4433-87c0-68b6b72699c7
[ceph4][DEBUG ] /dev/sde2 other, ebd0a0a2-b9e5-4433-87c0-68b6b72699c7
[ceph4][DEBUG ] /dev/sde3 other, ebd0a0a2-b9e5-4433-87c0-68b6b72699c7
[ceph4][DEBUG ] /dev/sr0 other, iso9660
初始化Ceph硬盤,而後建立osd存儲節點:規則存儲節點:單個硬盤:對應的journal分區,要對應
#配置osd數據庫和journal關係
[root@ceph1 my-cluster]# ceph-deploy osd prepare ceph4:/dev/sdb1:/dev/sde1 ceph4:/dev/sdc1:/dev/sde2 ceph4:/dev/sdd1:/dev/sde3
[ceph4][DEBUG ] realtime =none extsz=4096 blocks=0, rtextents=0
[ceph4][WARNIN] command: Running command: /usr/sbin/restorecon /var/lib/ceph/tmp/mnt.7im5ip
[ceph4][WARNIN] populate_data_path: Preparing osd data dir /var/lib/ceph/tmp/mnt.7im5ip
[ceph4][WARNIN] command: Running command: /usr/sbin/restorecon -R /var/lib/ceph/tmp/mnt.7im5ip/ceph_fsid.15797.tmp
[ceph4][WARNIN] command: Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/tmp/mnt.7im5ip/ceph_fsid.15797.tmp
[ceph4][WARNIN] command: Running command: /usr/sbin/restorecon -R /var/lib/ceph/tmp/mnt.7im5ip/fsid.15797.tmp
[ceph4][WARNIN] command: Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/tmp/mnt.7im5ip/fsid.15797.tmp
[ceph4][WARNIN] command: Running command: /usr/sbin/restorecon -R /var/lib/ceph/tmp/mnt.7im5ip/magic.15797.tmp
[ceph4][WARNIN] command: Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/tmp/mnt.7im5ip/magic.15797.tmp
[ceph4][WARNIN] command: Running command: /usr/sbin/restorecon -R /var/lib/ceph/tmp/mnt.7im5ip/journal_uuid.15797.tmp
[ceph4][WARNIN] command: Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/tmp/mnt.7im5ip/journal_uuid.15797.tmp
[ceph4][WARNIN] adjust_symlink: Creating symlink /var/lib/ceph/tmp/mnt.7im5ip/journal -> /dev/sde3
[ceph4][WARNIN] command: Running command: /usr/sbin/restorecon -R /var/lib/ceph/tmp/mnt.7im5ip
[ceph4][WARNIN] command: Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/tmp/mnt.7im5ip
[ceph4][WARNIN] unmount: Unmounting /var/lib/ceph/tmp/mnt.7im5ip
[ceph4][WARNIN] command_check_call: Running command: /bin/umount -- /var/lib/ceph/tmp/mnt.7im5ip
[ceph4][WARNIN] get_dm_uuid: get_dm_uuid /dev/sdd1 uuid path is /sys/dev/block/8:49/dm/uuid
[ceph4][INFO ] checking OSD status...
[ceph4][DEBUG ] find the location of an executable
[ceph4][INFO ] Running command: /bin/ceph --cluster=ceph osd stat --format=json
[root@ceph1 my-cluster]# ceph-deploy osd prepare ceph5:/dev/sdb1:/dev/sde1 ceph5:/dev/sdc1:/dev/sde2 ceph5:/dev/sdd1:/dev/sde3
[root@ceph1 my-cluster]# ceph-deploy osd prepare ceph6:/dev/sdb1:/dev/sde1 ceph6:/dev/sdc1:/dev/sde2 ceph6:/dev/sdd1:/dev/sde3
#設置磁盤權限
[root@ceph1 my-cluster]# ssh ceph4 'chown ceph:ceph /dev/sdb1 /dev/sdc1 /dev/sdd1 /dev/sde1 /dev/sde2 /dev/sde3'
[root@ceph1 my-cluster]# ssh ceph5 'chown ceph:ceph /dev/sdb1 /dev/sdc1 /dev/sdd1 /dev/sde1 /dev/sde2 /dev/sde3'
[root@ceph1 my-cluster]# ssh ceph6 'chown ceph:ceph /dev/sdb1 /dev/sdc1 /dev/sdd1 /dev/sde1 /dev/sde2 /dev/sde3'
#啓動osd
[root@ceph1 my-cluster]# ceph-deploy osd activate ceph4:/dev/sdb1:/dev/sde1 ceph4:/dev/sdc1:/dev/sde2 ceph4:/dev/sdd1:/dev/sde3
[ceph4][WARNIN] command: Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/tmp/mnt.TJBcf5/systemd
[ceph4][WARNIN] activate: Authorizing OSD key...
[ceph4][WARNIN] command_check_call: Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring auth add osd.2 -i /var/lib/ceph/tmp/mnt.TJBcf5/keyring osd allow * mon allow profile osd
[ceph4][WARNIN] added key for osd.2
[ceph4][WARNIN] command: Running command: /usr/sbin/restorecon -R /var/lib/ceph/tmp/mnt.TJBcf5/active.16792.tmp
[ceph4][WARNIN] command: Running command: /usr/bin/chown -R ceph:ceph /var/lib/ceph/tmp/mnt.TJBcf5/active.16792.tmp
[ceph4][WARNIN] activate: ceph osd.2 data dir is ready at /var/lib/ceph/tmp/mnt.TJBcf5
[ceph4][WARNIN] move_mount: Moving mount to final location...
[ceph4][WARNIN] command_check_call: Running command: /bin/mount -o noatime,inode64 -- /dev/sdd1 /var/lib/ceph/osd/ceph-2
[ceph4][WARNIN] command_check_call: Running command: /bin/umount -l -- /var/lib/ceph/tmp/mnt.TJBcf5
[ceph4][WARNIN] start_daemon: Starting ceph osd.2...
[ceph4][WARNIN] command_check_call: Running command: /usr/bin/systemctl disable ceph-osd@2
[ceph4][WARNIN] command_check_call: Running command: /usr/bin/systemctl disable ceph-osd@2 --runtime
[ceph4][WARNIN] command_check_call: Running command: /usr/bin/systemctl enable ceph-osd@2
[ceph4][WARNIN] Created symlink from /etc/systemd/system/ceph-osd.target.wants/ceph-osd@2.service to /usr/lib/systemd/system/ceph-osd@.service.
[ceph4][WARNIN] command_check_call: Running command: /usr/bin/systemctl start ceph-osd@2
[ceph4][INFO ] checking OSD status...
[ceph4][DEBUG ] find the location of an executable
[ceph4][INFO ] Running command: /bin/ceph --cluster=ceph osd stat --format=json
[ceph4][INFO ] Running command: systemctl enable ceph.target
[root@ceph1 my-cluster]# ceph-deploy osd activate ceph5:/dev/sdb1:/dev/sde1 ceph5:/dev/sdc1:/dev/sde2 ceph5:/dev/sdd1:/dev/sde3
[root@ceph1 my-cluster]# ceph-deploy osd activate ceph6:/dev/sdb1:/dev/sde1 ceph6:/dev/sdc1:/dev/sde2 ceph6:/dev/sdd1:/dev/sde3
#admin上查看
#其實還有時鐘的問題懶得弄了,手動ntpdata一下就解決了,應爲個人ntpdata是用crontab,一天執行一次。
[root@ceph1 my-cluster]# ceph -s
cluster 59d344b8-5051-43e4-90c6-c29c2a91961f
health HEALTH_WARN
clock skew detected on mon.ceph2, mon.ceph3
too few PGs per OSD (21 < min 30)
Monitor clock skew detected
monmap e1: 3 mons at {ceph1=10.10.64.130:6789/0,ceph2=10.10.64.131:6789/0,ceph3=10.10.64.132:6789/0}
election epoch 10, quorum 0,1,2 ceph1,ceph2,ceph3
osdmap e53: 9 osds: 9 up, 9 in
flags sortbitwise,require_jewel_osds
pgmap v176: 64 pgs, 1 pools, 0 bytes data, 0 objects
307 MB used, 899 GB / 899 GB avail
64 active+clean
#ceph4節點上查看(ceph4/5/6同樣)
[root@ceph4 dev]# ps -ef | grep osd
ceph 16287 1 0 10:56 ? 00:00:00 /usr/bin/ceph-osd -f --cluster ceph --id 0 --setuser ceph --setgroup ceph
ceph 16655 1 0 10:57 ? 00:00:00 /usr/bin/ceph-osd -f --cluster ceph --id 1 --setuser ceph --setgroup ceph
ceph 17054 1 0 10:57 ? 00:00:00 /usr/bin/ceph-osd -f --cluster ceph --id 2 --setuser ceph --setgroup ceph
root 17267 2488 0 11:02 pts/0 00:00:00 grep --color=auto osd
注意上面的ceph 處於 HEALTH_WARN 狀態
#查看副本數
[root@ceph1 my-cluster]# ceph osd dump | grep 'replicated size'
pool 0 'rbd' replicated size 3 min_size 2 crush_ruleset 0 object_hash rjenkins pg_num 64 pgp_num 64 last_change 1 flags hashpspool stripe_width 0
#查看已存在的pools
[root@ceph1 my-cluster]# ceph osd lspools
0 rbd,
#查看rbd pool中的 pg_num 和 pgp_num 屬性
[root@ceph1 my-cluster]# ceph osd pool get rbd pg_num
pg_num: 64
[root@ceph1 my-cluster]# ceph osd pool get rbd pgp_num
pgp_num: 64
#健康的 pg_num 和 pgp_num 計算方法:
#關於pgmap的數目,osd_num *100 / replica_num,向上取2的冪。好比15個osd,三備份,15 *100/3=500,獲得pg_num = 512,線上從新設定這個數值時會引發數據遷移,請謹慎處理。
增長 PG 數目,根據 Total PGs = (#OSDs * 100) / pool size 公式來決定 pg_num(pgp_num 應該設成和 pg_num 同樣),因此 9*100/3=300,Ceph 官方推薦取最接近2的指數倍,注意線上從新設定這個數值時會引發數據遷移,請謹慎處理。
#設置pg_num和pgp_num
[root@ceph1 my-cluster]# ceph osd pool set rbd pg_num 300
set pool 0 pg_num to 300
[root@ceph1 my-cluster]# ceph osd pool set rbd pgp_num 300
set pool 0 pgp_num to 300
#再次查看ceph集羣狀態
在ceph1上使用ceph-deploy 工具將ceph二進制程序安裝到client_ceph上面
#給client_ceph安裝ceph軟件,這裏不適用公鑰,使用密碼來進行安裝
[root@ceph1 my-cluster]# ceph-deploy install client_ceph
The authenticity of host 'client_ceph (10.10.64.136)' can't be established.
ECDSA key fingerprint is e6:d0:0a:8e:7c:ed:26:2f:2b:3b:00:65:7e:c9:a3:7d.
Are you sure you want to continue connecting (yes/no)? yes #輸入yes
Warning: Permanently added 'client_ceph,10.10.64.136' (ECDSA) to the list of known hosts.
root@client_ceph's password: #輸入密碼
root@client_ceph's password: #驗證密碼
#將ceph配置文件(ceph.conf)複製到client_ceph
[root@ceph1 my-cluster]# ceph-deploy config push client_ceph
[ceph_deploy.conf][DEBUG ] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO ] Invoked (1.5.39): /usr/bin/ceph-deploy config push client_ceph
[ceph_deploy.cli][INFO ] ceph-deploy options:
[ceph_deploy.cli][INFO ] username : None
[ceph_deploy.cli][INFO ] verbose : False
[ceph_deploy.cli][INFO ] overwrite_conf : False
[ceph_deploy.cli][INFO ] subcommand : push
[ceph_deploy.cli][INFO ] quiet : False
[ceph_deploy.cli][INFO ] cd_conf : <ceph_deploy.conf.cephdeploy.Conf instance at 0x7f178e10a518>
[ceph_deploy.cli][INFO ] cluster : ceph
[ceph_deploy.cli][INFO ] client : ['client_ceph']
[ceph_deploy.cli][INFO ] func : <function config at 0x7f178e0e6848>
[ceph_deploy.cli][INFO ] ceph_conf : None
[ceph_deploy.cli][INFO ] default_release : False
[ceph_deploy.config][DEBUG ] Pushing config to client_ceph
root@client_ceph's password:
root@client_ceph's password:
[client_ceph][DEBUG ] connected to host: client_ceph
[client_ceph][DEBUG ] detect platform information from remote host
[client_ceph][DEBUG ] detect machine type
[client_ceph][DEBUG ] write cluster configuration to /etc/ceph/{cluster}.conf
客戶機須要ceph密鑰去訪問ceph集羣。ceph建立了一個默認用戶 client.admin,它有足夠的權限去訪問ceph集羣。不建議把client.admin共享到全部其餘客戶端節點。更好的作法是用分開的密鑰建立一個新的ceph用戶去訪問特定的存儲池。
#這裏,建立了一個ceph用戶 client.rbd,它擁有訪問rbd存儲池的權限
[root@ceph1 my-cluster]# ceph auth get-or-create client.rbd mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=rbd'
[client.rbd]
key = AQBYTeBaNUttDxAALwi67OOuNtaG4OUs+YcaPw==
#爲ceph-cli上的client.rbd用戶添加密鑰
[root@ceph1 my-cluster]# ceph auth get-or-create client.rbd | ssh client_ceph 'tee /etc/ceph/ceph.client.rbd.keyring'
root@client_ceph's password:
[client.rbd]
key = AQBYTeBaNUttDxAALwi67OOuNtaG4OUs+YcaPw==
#至此,client_ceph 應該準備好充當ceph客戶端了。經過提供用戶名和密鑰在client_ceph上檢查集羣的狀態
[root@ceph1 my-cluster]# ssh client_ceph 'cat /etc/ceph/ceph.client.rbd.keyring >> /etc/ceph/keyring'
root@client_ceph's password:
[root@client_ceph ~]# ceph -s --name client.rbd
cluster 59d344b8-5051-43e4-90c6-c29c2a91961f
health HEALTH_WARN
clock skew detected on mon.ceph2, mon.ceph3
Monitor clock skew detected
monmap e1: 3 mons at {ceph1=10.10.64.130:6789/0,ceph2=10.10.64.131:6789/0,ceph3=10.10.64.132:6789/0}
election epoch 10, quorum 0,1,2 ceph1,ceph2,ceph3
osdmap e57: 9 osds: 9 up, 9 in
flags sortbitwise,require_jewel_osds
pgmap v339: 300 pgs, 1 pools, 0 bytes data, 0 objects
322 MB used, 899 GB / 899 GB avail
300 active+clean
#建立一個102400M 大小的RADOS 塊設備,取名 rbd1
[root@client_ceph ~]# rbd create rbd1 --size 102400 --name client.rbd --image-feature layering
#列出RBD 鏡像
[root@client_ceph ~]# rbd ls --name client.rbd
rbd1
#檢查rbd 鏡像的細節
[root@client_ceph ~]# rbd --image rbd1 info --name client.rbd
rbd image 'rbd1':
size 102400 MB in 25600 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.10ef238e1f29
format: 2
features: layering
flags:
#映射塊設備並初始化
[root@client_ceph ~]# rbd map --image rbd1 --name client.rbd
/dev/rbd0
#檢查被映射的塊設備
[root@client_ceph ~]# rbd showmapped --name client.rbd
id pool image snap device
0 rbd rbd1 - /dev/rbd0
#要使用這個塊設備,咱們須要建立並掛載一個文件系統
[root@client_ceph ~]# fdisk -l /dev/rbd0
Disk /dev/rbd0: 107.4 GB, 107374182400 bytes, 209715200 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
I/O size (minimum/optimal): 4194304 bytes / 4194304 bytes
#格式化文件系統
[root@client_ceph ~]# mkfs.xfs /dev/rbd0
meta-data=/dev/rbd0 isize=512 agcount=17, agsize=1637376 blks
= sectsz=512 attr=2, projid32bit=1
= crc=1 finobt=0, sparse=0
data = bsize=4096 blocks=26214400, imaxpct=25
= sunit=1024 swidth=1024 blks
naming =version 2 bsize=4096 ascii-ci=0 ftype=1
log =internal log bsize=4096 blocks=12800, version=2
= sectsz=512 sunit=8 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
#掛載磁盤
[root@client_ceph ~]# mkdir /mnt/ceph-disk1 && mount /dev/rbd0 /mnt/ceph-disk1
[root@client_ceph ~]# df -h /mnt/ceph-disk1
Filesystem Size Used Avail Use% Mounted on
/dev/rbd0 100G 33M 100G 1% /mnt/ceph-disk1
#測試塊設備
[root@client_ceph ~]# dd if=/dev/zero of=/mnt/ceph-disk1/file1 count=100 bs=1M
100+0 records in
100+0 records out
104857600 bytes (105 MB) copied, 0.12016 s, 873 MB/s
此問題是由於磁盤權限不足致使,解決方法以下:
#在osd節點上更改有權限問題的磁盤或者是分區
chown ceph:ceph /dev/sdd1
問題延伸:
此問題本次修復後,系統重啓磁盤權限會被修改回,致使osd服務沒法正常啓動,這個權限問題很坑,寫了個for 循環,加入到rc.local,每次系統啓動自動修改磁盤權限;
#跟進本身實際的磁盤場景來更改,此處只是提供範例
for i in a b c d e f g h i l j k;do chown ceph.ceph /dev/sd"$i"*;done
查找ceph資料,發現這實際上是一個bug,社區暫未解決。
參考信息:
http://tracker.ceph.com/issues/13833
使用ceph-deploy osd activate ceph4:/dev/sdb:/dev/sde1 ceph4:/dev/sdc:/dev/sde2 ceph4:/dev/sdd:/dev/sde3
緣由:因爲ceph對磁盤進行了分區,/dev/sdb磁盤分區爲/dev/sdb1
#解決方法:
ceph-deploy osd activate ceph4:/dev/sdb1:/dev/sde1 ceph4:/dev/sdc1:/dev/sde2 ceph4:/dev/sdd1:/dev/sde3
若是以前ceph.conf已存在,你更新過配置文件,須要添加—overwrite-conf config push
把改過的配置文件分發給集羣內各主機
ceph-deploy --overwrite-conf config push node{1..3}
若是在初始化osd配置時出現ceph.conf須要使用以下命令
ceph-deploy --overwrite-conf mon create-initial
Ceph Monitor運行爲一個輕量級的進程,通常狀況下只會消耗不多的系統資源。大多數場景下能夠選擇入門級的CPU、千兆網卡便可,可是須要有足夠大的磁盤空間來保存集羣日誌。 Ceph Monitor集羣經過Paxos分佈式協調算法來進行leader的選舉。由於是經過法定人數選舉的,因此當Ceph集羣中有多個Monitor時,Monitor的數量應該是一個奇數,通常線上環境應該至少有3個。
前面在試驗環境中初始化的Ceph集羣只有一個MON。
192.168.61.30 c0 - admin-node, deploy-node
192.168.61.31 c1 - mon1
192.168.61.32 c2 - osd.1
192.168.61.33 c3 - osd.2
接下來咱們添加兩個MON,即把c2,c3也做爲MON節點。
修改ceph.conf,在global章節加入:
mon_initial_members = c1,c2,c3
mon_host = 192.168.61.31,192.168.61.32,192.168.61.33
public network = 192.168.61.0/24
在admin-node上將配置文件推送到各個節點:
ceph-deploy --overwrite-conf config push c1 c2 c3
添加MON節點:
ceph-deploy mon create c2 c3
ceph -s
cluster 4873f3c0-2a5b-4868-b30e-f0c6f93b800a
health HEALTH_OK
monmap e11: 3 mons at {c1=192.168.61.31:6789/0,c2=192.168.61.32:6789/0,c3=192.168.61.33:6789/0}
election epoch 26, quorum 0,1,2 c1,c2,c3
osdmap e66: 2 osds: 2 up, 2 in
flags sortbitwise,require_jewel_osds
pgmap v2924: 192 pgs, 2 pools, 76900 kB data, 35 objects
12844 MB used, 182 GB / 195 GB avail
192 active+clean
移除MON節點:
ceph-deploy mon destroy c2 c3
sudo stop ceph-all 中止全部ceph進程
ceph-deploy uninstall [{ceph-node}] 卸載全部ceph程序
ceph-deploy purge [[ceph-node} [{ceph-node}] 刪除ceph相關的包
ceph-deploy purgedata {ceph-node} [{ceph-node}] 刪除ceph相關配置文件
ceph-deploy forgetkeys 刪除key
#查看pool數量
[root@ceph01 osd]# ceph osd lspools
1 pool-877be9dced4840e68b3629767389a35c,
#查看pool pg數量
[root@ceph01 osd]# ceph osd pool get pool-877be9dced4840e68b3629767389a35c pg_num
pg_num: 9000
#查看pool副本數
[root@ceph01 osd]# ceph osd dump | grep pool
pool 1 'pool-877be9dced4840e68b3629767389a35c' replicated size 3 min_size 1 crush_ruleset 1 object_hash rjenkins pg_num 9000 pgp_num 9000 last_change 296 flags hashpspool stripe_width 0 osd_full_ratio 0.9
#修改pool副本數
ceph osd pool set POOLNAME size NUM
#查看現有集羣佈局
[root@ceph01 osd]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-2 218.32196 root pool-877be9dced4840e68b3629767389a35c-root
-3 36.38699 host pool-877be9dced4840e68b3629767389a35c-b416f44c4eb848a7aabe755fc0dd7e0f
2 3.63899 osd.2 up 1.00000 1.00000
4 3.63899 osd.4 up 1.00000 1.00000
6 3.63899 osd.6 up 1.00000 1.00000
1 3.63899 osd.1 up 1.00000 1.00000
3 3.63899 osd.3 up 1.00000 1.00000
5 3.63899 osd.5 up 1.00000 1.00000
8 3.63899 osd.8 up 1.00000 1.00000
7 3.63899 osd.7 up 1.00000 1.00000
9 3.63899 osd.9 up 1.00000 1.00000
0 3.63899 osd.0 up 1.00000 1.00000
-4 36.38699 host pool-877be9dced4840e68b3629767389a35c-16b71b05afcf489e87b29a23aa64ff51
32 3.63899 osd.32 up 1.00000 1.00000
34 3.63899 osd.34 up 1.00000 1.00000
30 3.63899 osd.30 up 1.00000 1.00000
31 3.63899 osd.31 up 1.00000 1.00000
33 3.63899 osd.33 up 1.00000 1.00000
39 3.63899 osd.39 up 1.00000 1.00000
37 3.63899 osd.37 up 1.00000 1.00000
36 3.63899 osd.36 up 1.00000 1.00000
38 3.63899 osd.38 up 1.00000 1.00000
35 3.63899 osd.35 up 1.00000 1.00000
-5 36.38699 host pool-877be9dced4840e68b3629767389a35c-7fd856b99b944cd8a53757fff8917940
21 3.63899 osd.21 up 1.00000 1.00000
25 3.63899 osd.25 up 1.00000 1.00000
23 3.63899 osd.23 up 1.00000 1.00000
24 3.63899 osd.24 up 1.00000 1.00000
22 3.63899 osd.22 up 1.00000 1.00000
26 3.63899 osd.26 up 1.00000 1.00000
27 3.63899 osd.27 up 1.00000 1.00000
20 3.63899 osd.20 up 1.00000 1.00000
29 3.63899 osd.29 up 1.00000 1.00000
28 3.63899 osd.28 up 1.00000 1.00000
-6 36.38699 host pool-877be9dced4840e68b3629767389a35c-502b0fed30a9417a92593b6acfd5c80f
54 3.63899 osd.54 up 1.00000 1.00000
50 3.63899 osd.50 up 1.00000 1.00000
55 3.63899 osd.55 up 1.00000 1.00000
59 3.63899 osd.59 up 1.00000 1.00000
52 3.63899 osd.52 up 1.00000 1.00000
57 3.63899 osd.57 up 1.00000 1.00000
53 3.63899 osd.53 up 1.00000 1.00000
56 3.63899 osd.56 up 1.00000 1.00000
58 3.63899 osd.58 up 1.00000 1.00000
51 3.63899 osd.51 up 1.00000 1.00000
-7 36.38699 host pool-877be9dced4840e68b3629767389a35c-9303e065dff34847b9d427b58b3dc419
15 3.63899 osd.15 up 1.00000 1.00000
17 3.63899 osd.17 up 1.00000 1.00000
10 3.63899 osd.10 up 1.00000 1.00000
11 3.63899 osd.11 up 1.00000 1.00000
12 3.63899 osd.12 up 1.00000 1.00000
14 3.63899 osd.14 up 1.00000 1.00000
18 3.63899 osd.18 up 1.00000 1.00000
13 3.63899 osd.13 up 1.00000 1.00000
16 3.63899 osd.16 up 1.00000 1.00000
19 3.63899 osd.19 up 1.00000 1.00000
-8 36.38699 host pool-877be9dced4840e68b3629767389a35c-201902df4cd34666914a75ebbdfa3f07
48 3.63899 osd.48 up 1.00000 1.00000
43 3.63899 osd.43 up 1.00000 1.00000
44 3.63899 osd.44 up 1.00000 1.00000
46 3.63899 osd.46 up 1.00000 1.00000
42 3.63899 osd.42 up 1.00000 1.00000
47 3.63899 osd.47 up 1.00000 1.00000
49 3.63899 osd.49 up 1.00000 1.00000
40 3.63899 osd.40 up 1.00000 1.00000
45 3.63899 osd.45 up 1.00000 1.00000
41 3.63899 osd.41 up 1.00000 1.00000
-1 0 root default
#查看mon map
[root@ceph01 ~]# ceph mon dump
dumped monmap epoch 5
epoch 5
fsid 0dbf4cdc-21cc-46ae-aa98-dc562bcf81e4
last_changed 2017-06-21 11:27:52.969705
created 2017-06-20 17:56:39.789318
0: 10.1.50.1:6789/0 mon.ceph01
1: 10.1.50.2:6789/0 mon.ceph02
2: 10.1.50.3:6789/0 mon.ceph03
#查看osd map
[root@ceph01 ~]# ceph osd dump|more
epoch 732
fsid 0dbf4cdc-21cc-46ae-aa98-dc562bcf81e4
created 2017-06-20 17:56:40.149872
modified 2018-04-14 22:44:37.838302
flags noscrub,nodeep-scrub,sortbitwise,require_jewel_osds
pool 1 'pool-877be9dced4840e68b3629767389a35c' replicated size 3 min_size 1 crush_ruleset 1 object_hash rjenkins pg_num 9000 pgp_num 9000 last_change 296 flags hashpspool stripe_wi
dth 0 osd_full_ratio 0.9
removed_snaps [1~b]
max_osd 60
#查看pg map
[root@ceph01 ~]# ceph pg dump | more
dumped all in format plain
version 27409250
stamp 2018-05-11 14:15:06.522510
last_osdmap_epoch 732
last_pg_scan 732
full_ratio 0.97
nearfull_ratio 0.85
pg_stat objects mip degr misp unf bytes log disklog state state_stamp v reported up up_primary acting acting_primary last_scrub scrub_stamp last_deep_scrub deep_scrub_stamp
1.fff 1050 0 0 0 0 4381890560 3011 3011 active+clean 2017-12-17 15:45:14.606042 732'2070811 732:6409846 [1,57,33] 1 [1,57,33] 1 323'28937 2017-08-24 14:58:39.470020 0'0 2017-06-20 18:40:53.526324
1.ffe 1085 0 0 0 0 4539440128 3045 3045 active+clean 2018-03-15 08:04:06.174409 732'2659595 732:4519618 [45,56,32] 45 [45,56,32] 45 325'87639 2017-08-25 11:47:08.619325 0'0 2017-06-20 18:40:53.526306
1.ffd 1067 0 0 0 0 4459056128 3008 3008 active+clean 2018-01-15 19:50:07.260339 732'936258 732:2563398 [16,26,54] 16 [16,26,54] 16 325'42078 2017-08-24 22:27:26.093606 0'0 2017-06-20 18:40:53.526299
1.ffc 1092 0 0 0 0 4565081088 3036 3036 active+clean 2017-12-11 16:43:52.140190 732'2065836 732:3347814 [48,25,52] 48 [48,25,52] 48 325'167904 2017-08-25 05:56:08.974475 0'0 2017-06-20 18:40:53.526289
1.ffb 1030 0 0 0 0 4285212672 3034 3034 active+clean 2017-09-17 06:47:04.756990 732'2010034 732:3453033 [37,8,49] 37 [37,8,49] 37 323'31693 2017-08-24 18:04:10.904974 0'0 2017-06-20 18:40:53.526281
#查看osd 使用狀況
[root@ceph01 ceph]# ceph osd df |more
#要觀察集羣內正發生的事件
ceph –w
#檢查監視器(monitor)的法定人數(mon選舉狀態)
[root@ceph01 ~]# ceph quorum_status
{"election_epoch":34,"quorum":[0,1,2],"quorum_names":["ceph01","ceph02","ceph03"],"quorum_leader_name":"ceph01","monmap":{"epoch":5,"fsid":"0dbf4cdc-21cc-46ae-aa98-dc562bcf81e4","modified":"2017-06-21 11:27:52.969705","created":"2017-06-20 17:56:39.789318","mons":[{"rank":0,"name":"ceph01","addr":"10.1.50.1:6789\/0"},{"rank":1,"name":"ceph02","addr":"10.1.50.2:6789\/0"},{"rank":2,"name":"ceph03","addr":"10.1.50.3:6789\/0"}]}}
#查看集羣詳細信息
[root@ceph01 ~]# ceph daemon mon.ceph01 config show | more
#查看log存儲位置
[root@ceph01 ~]# ceph-conf --name mon.ceph01 --show-config-value log_file
/var/log/ceph/ceph-mon.ceph01.log
#暫停osd
[root@admin ~]# ceph osd pause
#開啓osd
[root@admin ~]# ceph osd unpause
#查詢pg詳細新
[root@ceph01 ~]# ceph pg 1.ffe query
{
"state": "active+clean",
"snap_trimq": "[]",
"epoch": 732,
"up": [
45,
56,
32
],
"acting": [
45,
56,
32
],
#建立一個鏡像
[root@ceph01 ~]# rbd create -p pool-877be9dced4840e68b3629767389a35c --size 10000 lose
#列出images
[root@ceph01 ~]# rbd ls pool-877be9dced4840e68b3629767389a35c
lose
test1
volume-1a3ebb0e95bc440f90bb2df4123f7593
volume-1eab1f13c6c14514bcde91e8810b5b2f
volume-2cf091aa66264429936c037eee9bd92f
volume-4f92f5d5e3394f5d9c26eb9cf6b6aae5
volume-6f2669c5ecf94db88ce6e7266880e149
volume-964ada4c6ae94c289b736f9b70eaeca9
volume-970f97af4c0b4597a0358d6952f8477c
volume-b60ab59db5d4495f85e81966f07edfe6
volume-c0c2210fe3904fd19dd0c81da9f82945
volume-d747877e622c4847ac3f6ea10bf9df7b
volume-e46033b5e0f94ef8a8d2911e439c9cba
volume-e9422388ef90405fb0562bb2a1bbbbad
volume-efb22c8b5dc54522bc678e6f23aff6d0
volume-f1d87c7556344a33b39e4bb994fefac1
#查看詳細信息
[root@ceph01 ~]# rbd info -p pool-877be9dced4840e68b3629767389a35c lose
rbd image 'lose':
size 10000 MB in 2500 objects
order 22 (4096 kB objects)
block_name_prefix: rbd_data.e580fa6b8b4567
format: 2
features: layering, deep-flatten
flags:
#調整大小
[root@ceph01 ~]# rbd resize -p pool-877be9dced4840e68b3629767389a35c --size 20000 lose
Resizing image: 100% complete...done.
#建立快照,快照格式pool/image_name@snapshot_name #池/鏡像@快照
[root@ceph01 ~]# rbd snap create pool-877be9dced4840e68b3629767389a35c/lose@test_snapshot
#查看鏡像文件快照
[root@ceph01 ~]# rbd snap ls -p pool-877be9dced4840e68b3629767389a35c lose
SNAPID NAME SIZE
12 test_snapshot 20000 MB
#刪除鏡像快照
[root@ceph01 ~]# rbd snap rm pool-877be9dced4840e68b3629767389a35c/lose@test_snapshot
#若是刪除快照出錯,有多是寫保護,須要把保護關掉後再刪除
[root@ceph01 ~]# rbd snap unprotect pool-877be9dced4840e68b3629767389a35c/lose@test_snapshot
#刪除一個鏡像下全部快照
[root@ceph01 ~]# rbd snap purge -p pool-877be9dced4840e68b3629767389a35c lose
#刪除鏡像
[root@ceph01 ~]# rbd rm -p pool-877be9dced4840e68b3629767389a35c lose
Removing image: 100% complete...done.