高可用集羣框架html
圖片轉載之http://www.178linux.com/16656node
實驗拓撲:python
兩臺節點服務器:mysql
node1 192.168.150.137 node1.comlinux
node2 192.168.150.138 node2.comios
nfs 192.168.150.139nginx
ansible 192.168.150.140web
一、集羣配置前準備redis
兩節點配置時間同步,訪問互信,host名稱和解析一致算法
因爲兩節點配置,能夠和ansible一塊兒玩
修改hosts
~]# hostnamectl set-hostname node1.com
~]# uname -n
node1.com
~]# vim /etc/hosts
192.168.150.137 node1 node1.com
192.168.150.138 node2 node2.com
~]# hostnamectl set-hostname node2.com
~]# uname -n
node2.com
~]# vim /etc/hosts
192.168.150.137 node1 node1.com
192.168.150.138 node2 node2.com
ansible主機安裝配置
yum -y install ansible 配置好epel源
編輯配置
~]# cd /etc/ansible/
ansible]# cp hosts{,.bak}
ansible]# vim hosts
[haservers]
192.168.150.137
192.168.150.138
創建ssh公鑰認證
[root@localhost ~]# ssh-keygen -t rsa -P ''
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Created directory '/root/.ssh'.
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
db:54:47:3f:ab:04:0e:55:be:fc:1f:cb:ef:59:d1:e9 root@localhost.localdomain
The key's randomart p_w_picpath is:
+--[ RSA 2048]----+
| .... |
| . .. . |
| . ......|
| o.o.. =|
| S .. + +.|
| + . + .|
| . . . E.|
| . *|
| ==|
+-----------------+
[root@localhost ~]# ssh-copy-id -i .ssh/id_rsa.pub root@192.168.150.137
The authenticity of host '192.168.150.137 (192.168.150.137)' can't be established.
ECDSA key fingerprint is 1f:41:1e:c2:4f:20:9b:24:65:dc:9e:50:28:46:be:36.
Are you sure you want to continue connecting (yes/no)? yes
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any th
at are already installed/usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it
is to install the new keysroot@192.168.150.137's password:
Number of key(s) added: 1
Now try logging into the machine, with: "ssh 'root@192.168.150.137'"
and check to make sure that only the key(s) you wanted were added.
[root@localhost ~]# ssh-copy-id -i .ssh/id_rsa.pub root@192.168.150.138
The authenticity of host '192.168.150.138 (192.168.150.138)' can't be established.
ECDSA key fingerprint is 1f:41:1e:c2:4f:20:9b:24:65:dc:9e:50:28:46:be:36.
Are you sure you want to continue connecting (yes/no)? yes
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any th
at are already installed/usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it
is to install the new keysroot@192.168.150.138's password:
Permission denied, please try again.
root@192.168.150.138's password:
Number of key(s) added: 1
Now try logging into the machine, with: "ssh 'root@192.168.150.138'"
and check to make sure that only the key(s) you wanted were added.
[root@localhost ~]# ssh 192.168.150.137
Last login: Tue Jan 17 18:50:53 2017 from 192.168.150.1
[root@node1 ~]# exit
登出
Connection to 192.168.150.137 closed.
[root@localhost ~]# ssh 192.168.150.138
Last failed login: Tue Jan 17 19:26:55 CST 2017 from 192.168.150.140 on ssh:notty
There was 1 failed login attempt since the last successful login.
Last login: Tue Jan 17 18:51:06 2017 from 192.168.150.1
[root@node2 ~]# exit
登出
Connection to 192.168.150.138 closed.
測試
~]# ansible all -m ping
192.168.150.137 | SUCCESS => {
"changed": false,
"ping": "pong"
}
192.168.150.138 | SUCCESS => {
"changed": false,
"ping": "pong"
}
進行ntpdate安裝
~]# ansible all -m yum -a "name=ntpdate state=present"
執行計劃配置
~]# ansible all -m cron -a "minute=*/5 job='/sbin/ntpdate 1.cn.pool.ntp.org &>/dev/null' na
me=Synctime"
二、安裝corosync,pacemaker,crmsh
首先安裝corosync和pacemaker
因爲corosync是pacemake的依賴包,全部安裝完pacemaker後corosync自動被安裝上
使用ansible安裝
~]# ansible all -m yum -a "name=pacemaker state=present"
node節點確認
~]# rpm -qa pacemaker
pacemaker-1.1.15-11.el7_3.2.x86_64
~]# rpm -qa corosync
corosync-2.4.0-4.el7.x86_64
crmsh安裝
因爲crmsh在yum倉庫和epel源中沒有,需到指定地址去下載
http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/noarch/
將包下載至ansible主機並拷貝至節點主機,進行安裝
~]# ls crmsh/
asciidoc-8.6.9-32.1.noarch.rpm crmsh-scripts-2.3.2-1.1.noarch.rpm
asciidoc-examples-8.6.9-32.1.noarch.rpm crmsh-test-2.3.2-1.1.noarch.rpm
crmsh-2.3.2-1.1.noarch.rpm python-parallax-1.0.1-28.1.noarch.rpm
~]# ansible all -m shell -a 'mkdir /root/crmsh'
192.168.150.137 | SUCCESS | rc=0 >>
192.168.150.138 | SUCCESS | rc=0 >>
[root@localhost ~]# ansible all -m copy -a "src=/root/crmsh/ dest=/root/crmsh/"
192.168.150.137 | SUCCESS => {
"changed": true,
"dest": "/root/crmsh/",
"src": "/root/crmsh"
}
192.168.150.138 | SUCCESS => {
"changed": true,
"dest": "/root/crmsh/",
"src": "/root/crmsh"
}
~]# ansible all -m shell -a 'yum -y install /root/crmsh/*.rpm'
節點確認
~]# crm
crm(live)#
三、配置corosync和pacemaker,並進行服務啓動
corosync配置文件修改,ansible主機上修改並部署
~]# yum -y install pacemaker
~]# cd /etc/corosync/
corosync]# ls
corosync.conf.example corosync.conf.example.udpu corosync.xml.example uidgid.d
corosync]# cp corosync.conf.example corosync.conf
corosync]# vim corosync.conf
corosync]# grep -v "^[[:space:]]*#" corosync.conf | grep -v "^$"
totem {
version: 2
cluster_name:mycluster
crypto_cipher: aes128 節點內通訊的加密
crypto_hash: sha1
interface {
ringnumber: 0
bindnetaddr: 192.168.150.0
mcastaddr: 239.255.1.1 廣播地址
mcastport: 5405
ttl: 1 防止環路
}
}
logging {
fileline: off
to_stderr: no
to_logfile: yes
logfile: /var/log/cluster/corosync.log
to_syslog: no
debug: off
timestamp: on
logger_subsys {
subsys: QUORUM
debug: off
}
}
quorum {
provider: corosync_votequorum 投票機制使用corosync自帶
}
nodelist { 定義節點
node {
ring0_addr: 192.168.150.137
nodeid: 1
}
node {
ring0_addr: 192.168.150.138
nodeid: 2
}
}
建立認證文件
[root@localhost corosync]# corosync-keygen -l
Corosync Cluster Engine Authentication key generator.
Gathering 1024 bits for key from /dev/urandom.
Writing corosync key to /etc/corosync/authkey.
[root@localhost corosync]# ls -lh
總用量 20K
-r-------- 1 root root 128 1月 17 20:27 authkey
-rw-r--r-- 1 root root 3.0K 1月 17 20:22 corosync.conf
-rw-r--r-- 1 root root 2.9K 11月 7 18:09 corosync.conf.example
-rw-r--r-- 1 root root 767 11月 7 18:09 corosync.conf.example.udpu
-rw-r--r-- 1 root root 3.3K 11月 7 18:09 corosync.xml.example
drwxr-xr-x 2 root root 6 11月 7 18:09 uidgid.d
使用ansible將配置文件及認證文件所有拷貝至節點服務器,注意authkey的權限
corosync]# ansible all -m copy -a "src=/etc/corosync/authkey mode=400 dest =/etc/corosync/authkey"
corosync]# ansible all -m copy -a "src=/etc/corosync/corosync.conf dest=/e tc/corosync/corosync.conf"
進入節點主機進行驗證
~]# ls -l /etc/corosync/
總用量 20
-r-------- 1 root root 128 1月 17 14:45 authkey
-rw-r--r-- 1 root root 3027 1月 17 14:45 corosync.conf
-rw-r--r-- 1 root root 2881 11月 7 18:09 corosync.conf.example
-rw-r--r-- 1 root root 767 11月 7 18:09 corosync.conf.example.udpu
-rw-r--r-- 1 root root 3278 11月 7 18:09 corosync.xml.example
drwxr-xr-x 2 root root 6 11月 7 18:09 uidgid.d
開啓corosync和pacemaker服務
corosync]# ansible all -m service -a "name=corosync state=started"
corosync]# ansible all -m service -a "name=pacemaker state=started"
節點中查看服務狀態
~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 14:51:41 2017 Last change: Tue Jan 17 14:51:11 2017 by h
acluster via crmd on node1.com
2 nodes and 0 resources configured
Online: [ node1.com node2.com ] 兩個節點所有online
No resources
四、使用crmsh配置羣集和羣集資源
crmsh:
獲取幫助:ls,help
help COMMAND
COMMAND --help
查看集羣狀態
status [<option> ...]
option :: bynode | inactive | ops | timing | failcounts
設定及管理集羣:
cluster
配置CIB:
configure/ CIB configuration
acl_target Define target access rights
_test Help for command _test
clone Define a clone
colocation Colocate resources
commit Commit the changes to the CIB
default-timeouts Set timeouts for operations to minimums from the meta-data
delete Delete CIB objects
edit Edit CIB objects
erase Erase the CIB
fencing_topology Node fencing order
filter Filter CIB objects
graph Generate a directed graph
group Define a group
load Import the CIB from a file
location A location preference
modgroup Modify group
monitor Add monitor operation to a primitive
ms Define a master-slave resource
node Define a cluster node
op_defaults Set resource operations defaults
order Order resources
primitive Define a resource
property Set a cluster property
ptest Show cluster actions if changes were committed
refresh Refresh from CIB
_regtest Help for command _regtest
rename Rename a CIB object
role Define role access rights
rsc_defaults Set resource defaults
rsc_template Define a resource template
rsc_ticket Resources ticket dependency
rsctest Test resources as currently configured
save Save the CIB to a file
schema Set or display current CIB RNG schema
show Display CIB objects
_objects Help for command _objects
tag Define resource tags
upgrade Upgrade the CIB to version 1.0
user Define user access rights
verify Verify the CIB with crm_verify
xml Raw xml
cib CIB shadow management
cibstatus CIB status management and editing
template Edit and import a configuration from a template
管理RA:
ra/Resource Agents (RA) lists and documentation
classes List classes and providers
info Show meta data for a RA
list List RA for a class (and provider)
providers Show providers for a RA and a class
節點管理:
node/ Nodes management
attribute Manage attributes
clearstate Clear node state
delete Delete node
fence Fence node
maintenance Put node into maintenance mode
online Set node online
ready Put node into ready mode
show Show node
standby Put node into standby
status Show nodes' status as XML
status-attr Manage status attributes
utilization Manage utilization attributes
資源管理:
resource/ Resource management
cleanup Cleanup resource status
demote Demote a master-slave resource
failcount Manage failcounts
maintenance Enable/disable per-resource maintenance mode
manage Put a resource into managed mode
meta Manage a meta attribute
migrate Migrate a resource to another node
param Manage a parameter of a resource
promote Promote a master-slave resource
refresh Refresh CIB from the LRM status
reprobe Probe for resources not started by the CRM
restart Restart a resource
scores Display resource scores
secret Manage sensitive parameters
start Start a resource
status Show status of resources
stop Stop a resource
trace Start RA tracing
unmanage Put a resource into unmanaged mode
unmigrate Unmigrate a resource to another node
untrace Stop RA tracing
utilization Manage a utilization attribute
配置集羣:
配置集羣屬性:property
配置資源的默認屬性:rsc_defaults
配置集羣資源:
premitive
group
clone
ms/master
配置約束:
location
colocation
order
示例:配置一個高可用的httpd服務
組成資源:vip,httpd,[filesystem]
vip:IPaddr,IPaddr2
httpd:systemd httpd unit file
filesystem:Filesystem
約束:
colocation,group
order
共享存儲:
集中式存儲:
NAS:Network Attached Storage,file
File Server:NFS,CIFS
SAN:Storage Area Network,block
FC SAN
IP SAN
...
SAN的共享掛載
集羣文件系統(dlm):
GFS2:Global File System
OCFS2:Oracle Cluster File System
分佈式存儲:
GlusterFS,Ceph,MogileFS,MooseFS,HDFS
crm也分爲命令行和交換式兩種,使用命令行查看當前狀態
~]# crm_mon
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 14:53:34 2017 Last change: Tue Jan 17 14:51:11 2017 by h
acluster via crmd on node1.com
2 nodes and 0 resources configured
Online: [ node1.com node2.com ]
No active resources
交互式查看
~]# crm
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 14:54:40 2017 Last change: Tue Jan 17 14:51:11 2017 by h
acluster via crmd on node1.com
2 nodes and 0 resources configured
Online: [ node1.com node2.com ]
No resources
crm(live)# ra classes 查看ra資源類型
lsb
ocf / .isolation heartbeat openstack pacemaker
service
systemd
crm(live)ra# list lsb 使用list命令查看ra類型中的所執行的應用
netconsole network
crm(live)ra# list systemd
NetworkManager NetworkManager-wait-online
auditd brandbot
corosync cpupower
crond dbus
display-manager dm-event
dracut-shutdown emergency
exim getty@tty1
ip6tables iptables
irqbalance kdump
kmod-static-nodes ldconfig
lvm2-activation lvm2-lvmetad
lvm2-lvmpolld lvm2-monitor
lvm2-pvscan@8:2 microcode
network pacemaker
plymouth-quit plymouth-quit-wait
plymouth-read-write plymouth-start
polkit postfix
rc-local rescue
rhel-autorelabel rhel-autorelabel-mark
rhel-configure rhel-dmesg
rhel-import-state rhel-loadmodules
rhel-readonly rsyslog
sendmail sshd
sshd-keygen syslog
systemd-ask-password-console systemd-ask-password-plymouth
systemd-ask-password-wall systemd-binfmt
systemd-firstboot systemd-fsck-root
systemd-hwdb-update systemd-initctl
systemd-journal-catalog-update systemd-journal-flush
systemd-journald systemd-logind
systemd-machine-id-commit systemd-modules-load
systemd-random-seed systemd-random-seed-load
systemd-readahead-collect systemd-readahead-done
systemd-readahead-replay systemd-reboot
systemd-remount-fs systemd-rfkill@rfkill2
systemd-shutdownd systemd-sysctl
systemd-sysusers systemd-tmpfiles-clean
systemd-tmpfiles-setup systemd-tmpfiles-setup-dev
systemd-udev-trigger systemd-udevd
systemd-update-done systemd-update-utmp
systemd-update-utmp-runlevel systemd-user-sessions
systemd-vconsole-setup tuned
wpa_supplicant
crm(live)ra# list ocf
CTDB ClusterMon Delay
Dummy Filesystem HealthCPU
HealthSMART IPaddr IPaddr2
IPsrcaddr LVM MailTo
NovaEvacuate Route SendArp
Squid Stateful SysInfo
SystemHealth VirtualDomain Xinetd
apache clvm conntrackd
controld db2 dhcpd
docker ethmonitor exportfs
galera garbd iSCSILogicalUnit
iSCSITarget iface-vlan mysql
nagios named nfsnotify
nfsserver nginx nova-compute-wait
oracle oralsnr pgsql
ping pingd portblock
postfix rabbitmq-cluster redis
remote rsyncd slapd
symlink tomcat
crm(live)ra# list ocf heartbeat
CTDB Delay Dummy
Filesystem IPaddr IPaddr2
IPsrcaddr LVM MailTo
Route SendArp Squid
VirtualDomain Xinetd apache
clvm conntrackd db2
dhcpd docker ethmonitor
exportfs galera garbd
iSCSILogicalUnit iSCSITarget iface-vlan
mysql nagios named
nfsnotify nfsserver nginx
oracle oralsnr pgsql
portblock postfix rabbitmq-cluster
redis rsyncd slapd
symlink tomcat
crm(live)ra# list ocf pacemaker
ClusterMon Dummy HealthCPU HealthSMART
Stateful SysInfo SystemHealth controld
ping pingd remote
crm(live)ra# list ocf openstack
NovaEvacuate nova-compute-wait
crm(live)ra# info ocf:heartbeat:IPaddr 使用info命令查看具體使用方法
crm(live)node# ls 節點模式,能夠控制節點的standby 上線等動做
.. help fence
show attribute back
cd ready status-attr
quit end utilization
exit ls maintenance
online bye ?
status clearstate standby
list up server
delete
crm(live)node# standby 不加參數即爲當前節點做爲standby服務器
crm(live)node# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:07:44 2017 Last change: Tue Jan 17 15:07:40 2017 by r
oot via crm_attribute on node1.com
2 nodes and 0 resources configured
Node node1.com: standby 此時node1狀態以及爲standby
Online: [ node2.com ]
No resources
crm(live)# node
crm(live)node# online 使用online命令進行上線
crm(live)node# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:08:36 2017 Last change: Tue Jan 17 15:08:33 2017 by r
oot via crm_attribute on node1.com
2 nodes and 0 resources configured
Online: [ node1.com node2.com ]
No resources
也能夠直接在最外層進行操做
crm(live)# node online node2.com
crm(live)# status
Stack: corosync
n with quorum
15 09:48:17 2017 by root via crm_attribute on node1.com
2 nodes and 0 resources configured
Online: [ node1.com node2.com ]
No resources
使用configure進入配置模式
crm(live)# configure
crm(live)configure# ls
.. get_property cibstatus
primitive set validate_all
help rsc_template ptest
back cd default-timeouts
erase validate-all rsctest
rename op_defaults modgroup
xml quit upgrade
group graph load
master location template
save collocation rm
bye clone ?
ls node default_timeouts
exit acl_target colocation
fencing_topology assist alert
ra schema user
simulate rsc_ticket end
role rsc_defaults monitor
cib property resource
edit show up
refresh order filter
get-property tag ms
verify commit history
delete
location定義資源粘性
property定義羣集全局屬性
crm(live)# configure
crm(live)configure# property 使用tab鍵能夠查看property的具體內容和使用方法
batch-limit= node-health-strategy=
cluster-delay= node-health-yellow=
cluster-recheck-interval= notification-agent=
concurrent-fencing= notification-recipient=
crmd-transition-delay= pe-error-series-max=
dc-deadtime= pe-input-series-max=
default-action-timeout= pe-warn-series-max=
default-resource-stickiness= placement-strategy=
election-timeout= remove-after-stop=
enable-acl= shutdown-escalation=
enable-startup-probes= start-failure-is-fatal=
have-watchdog= startup-fencing=
is-managed-default= stonith-action=
load-threshold= stonith-enabled=
maintenance-mode= stonith-timeout=
migration-limit= stonith-watchdog-timeout=
no-quorum-policy= stop-all-resources=
node-action-limit= stop-orphan-actions=
node-health-green= stop-orphan-resources=
node-health-red= symmetric-cluster=
crm(live)configure# property no-quorum-policy=
t have quorum
: stop, freeze, ignore, suicide
crm(live)configure# property no-quorum-policy=stop
crm(live)configure# show 查看當前設置
node 1: node1.com
node 2: node2.com \
attributes standby=off
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=1.1.15-11.el7_3.2-e174ec8 \
cluster-infrastructure=corosync \
cluster-name=mycluster \
no-quorum-policy=stop
關閉stonith設備
crm(live)configure# property stonith-enabled=false
定義羣集ip
crm(live)configure# primitive webip ocf:heartbeat:IPaddr params ip=192.168.150.80
使用verify進行配置驗證
crm(live)configure# verify
使用commit進行配置確認並生效
crm(live)configure# commit
crm(live)configure# show 查看配置
node 1: node1.com \
attributes standby=off
node 2: node2.com
primitive webip IPaddr \
params ip=192.168.150.80
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=1.1.15-11.el7_3.2-e174ec8 \
cluster-infrastructure=corosync \
cluster-name=mycluster \
no-quorum-policy=stop \
stonith-enabled=false
查看狀態
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:23:58 2017 Last change: Tue Jan 17 15:23:55 2017 by r
oot via cibadmin on node1.com
2 nodes and 1 resource configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node1.com 此時webip這個羣集資源在node1上
能夠經過ip addr 在node1上進行認證
node1 ~]# ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen
1000 link/ether 00:0c:29:98:ad:a4 brd ff:ff:ff:ff:ff:ff
inet 192.168.150.137/24 brd 192.168.150.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.150.80/24 brd 192.168.150.255 scope global secondary eno16777736
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fe98:ada4/64 scope link
valid_lft forever preferred_lft forever
讓node1切換成standby,查看資源是否會遷移至node2
[root@node1 ~]# crm node standby
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:26:43 2017 Last change: Tue Jan 17 15:26:40 2017 by r
oot via crm_attribute on node1.com
2 nodes and 1 resource configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com webip資源以及遷移至node2
驗證
node2 ~]# ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen
1000 link/ether 00:0c:29:f3:13:56 brd ff:ff:ff:ff:ff:ff
inet 192.168.150.138/24 brd 192.168.150.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.150.80/24 brd 192.168.150.255 scope global secondary eno16777736
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fef3:1356/64 scope link
valid_lft forever preferred_lft forever
對node1進行online操做,因爲沒有設定組合粘性,資源仍是會停留在node2
[root@node1 ~]# crm node online
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:30:05 2017 Last change: Tue Jan 17 15:30:02 2017 by r
oot via crm_attribute on node1.com
2 nodes and 1 resource configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
可使用migrate進行資源轉移
crm(live)resource# migrate webip node1.com
INFO: Move constraint created for webip to node2.com
crm(live)resource# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:37:00 2017 Last change: Tue Jan 17 15:36:49 2017 by r
oot via crm_resource on node1.com
2 nodes and 1 resource configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node1.com
在resource中能夠進行資源的中止開啓和刪除
crm(live)resource# stop webip
crm(live)resource# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:38:52 2017 Last change: Tue Jan 17 15:38:50 2017 by r
oot via cibadmin on node1.com
2 nodes and 1 resource configured: 1 resource DISABLED and 0 BLOCKED from being started du
e to failures
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Stopped (disabled)
crm(live)# resource
crm(live)resource# start webip
crm(live)resource# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:39:03 2017 Last change: Tue Jan 17 15:39:00 2017 by r
oot via cibadmin on node1.com
2 nodes and 1 resource configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node1.com
五、配置集羣http服務
使用ansible進行各節點httpd安裝
~]# ansible all -m yum -a "name=httpd state=present"
各節點編輯一個測試頁面
vim /var/www/html/index.html
node1
node2
將httpd裝在至unitfile
~]# ansible all -m shell -a 'systemctl enable httpd.service'
192.168.150.138 | SUCCESS | rc=0 >>
Created symlink from /etc/systemd/system/multi-user.target.wants/httpd.service to /usr/lib
/systemd/system/httpd.service.
192.168.150.137 | SUCCESS | rc=0 >>
Created symlink from /etc/systemd/system/multi-user.target.wants/httpd.service to /usr/lib
/systemd/system/httpd.service.
使用crmsh進行httpd資源添加
[root@node1 ~]# crm
crm(live)# ra
crm(live)ra# list systemd 查看systemd中是否已經有httpd
查看httpd資源的默認建議屬性
crm(live)ra# info systemd:httpd
systemd unit file for httpd (systemd:httpd)
The Apache HTTP Server
Operations' defaults (advisory minimum):
start timeout=100
stop timeout=100
status timeout=100
monitor timeout=100 interval=60
進行資源配置
crm(live)# configure
crm(live)configure# primitive webserver systemd:httpd op start timeout=100 op stop timeout
=100crm(live)configure# verify
crm(live)configure# commit
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:54:45 2017 Last change: Tue Jan 17 15:54:32 2017 by r
oot via cibadmin on node1.com
2 nodes and 2 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node1.com
webserver (systemd:httpd): Started node2.com
此時咱們兩個資源在不一樣的節點上生成,這個對於httpd羣集來講確定是有問題的
因此咱們要定義資源組group將這兩個資源捆綁在一塊兒
crm(live)configure# group webservice webip webserver 此時在資源組中配置的資源順序爲資源開啓的前後順序
INFO: modified location:cli-prefer-webip from webip to webservice
crm(live)configure# show
node 1: node1.com \
attributes standby=off
node 2: node2.com \
attributes standby=off
primitive webip IPaddr \
params ip=192.168.150.80 \
meta target-role=Started
primitive webserver systemd:httpd \
op start timeout=100 interval=0 \
op stop timeout=100 interval=0
group webservice webip webserver
location cli-prefer-webip webservice role=Started inf: node1.com
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=1.1.15-11.el7_3.2-e174ec8 \
cluster-infrastructure=corosync \
cluster-name=mycluster \
no-quorum-policy=stop \
stonith-enabled=false
crm(live)configure# verify
crm(live)configure# commit
crm(live)configure# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:58:30 2017 Last change: Tue Jan 17 15:58:24 2017 by r
oot via cibadmin on node1.com
2 nodes and 2 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
進行羣集功能驗證:
node1進行standby操做
[root@node1 ~]# crm node standby
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:59:18 2017 Last change: Tue Jan 17 15:59:15 2017 by r
oot via crm_attribute on node1.com
2 nodes and 2 resources configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node2.com
webserver (systemd:httpd): Stopped
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 15:59:21 2017 Last change: Tue Jan 17 15:59:15 2017 by r
oot via crm_attribute on node1.com
2 nodes and 2 resources configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node2.com
webserver (systemd:httpd): Started node2.com
六、添加共享存儲資源
使用nfs做爲共享存儲,模擬共享存儲羣集
共享存儲主機進行nfs配置:
yum -y install nfs-utils
[root@localhost ~]# mkdir /www/html -pv
mkdir: 已建立目錄 "/www"
mkdir: 已建立目錄 "/www/html"
[root@localhost ~]# vim /etc/exports
[root@localhost ~]# cat /etc/exports
/www/html 192.168.150.0/24(rw,no_root_squash)
[root@localhost ~]# systemctl start nfs.service
[root@localhost ~]# ss -tnl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 64 *:39439 *:*
LISTEN 0 128 *:111 *:*
LISTEN 0 128 *:20048 *:*
LISTEN 0 128 *:33073 *:*
LISTEN 0 128 *:22 *:*
LISTEN 0 100 127.0.0.1:25 *:*
LISTEN 0 64 *:2049 *:*
LISTEN 0 128 :::111 :::*
LISTEN 0 128 :::20048 :::*
LISTEN 0 128 :::58611 :::*
LISTEN 0 128 :::22 :::*
LISTEN 0 100 ::1:25 :::*
LISTEN 0 64 :::2049 :::*
LISTEN 0 64 :::59877 :::*
ansible進行各節點nfs掛載測試
~]# ansible all -m yum -a "name=nfs-utils state=present"
~]# ansible all -m shell -a 'mount -t nfs 192.168.150.139:/www/html /var/w
ww/html'192.168.150.137 | SUCCESS | rc=0 >>
192.168.150.138 | SUCCESS | rc=0 >
節點確認
~]# df -h
文件系統 容量 已用 可用 已用% 掛載點
/dev/mapper/centos-root 28G 8.5G 20G 31% /
devtmpfs 479M 0 479M 0% /dev
tmpfs 489M 54M 436M 11% /dev/shm
tmpfs 489M 6.8M 483M 2% /run
tmpfs 489M 0 489M 0% /sys/fs/cgroup
/dev/sda1 497M 125M 373M 25% /boot
tmpfs 98M 0 98M 0% /run/user/0
192.168.150.139:/www/html 28G 8.4G 20G 31% /var/www/html
卸載
~]# ansible all -m shell -a 'umount /var/www/html'
192.168.150.138 | SUCCESS | rc=0 >>
192.168.150.137 | SUCCESS | rc=0 >>
store資源配置
[root@node1 ~]# crm
crm(live)# configure
crm(live)configure# primitive webstore ocf:heartbeat:Filesystem params device="192.168.150
.139:/www/html" directory="/var/www/html" fstype=nfs op start timeout=60 op stop timeout=60crm(live)configure# verify
crm(live)configure# cd
There are changes pending. Do you want to commit them (y/n)? y
crm(live)# resource
crm(live)resource# stop webservice
Do you want to override target-role for child resource webip (y/n)? y
crm(live)resource# cd
crm(live)# configure
crm(live)configure# delete webservice
INFO: modified location:cli-prefer-webip from webservice to webip
crm(live)configure# show
node 1: node1.com \
attributes standby=off
node 2: node2.com \
attributes standby=off
primitive webip IPaddr \
params ip=192.168.150.80
primitive webserver systemd:httpd \
op start timeout=100 interval=0 \
op stop timeout=100 interval=0
primitive webstore Filesystem \
params device="192.168.150.139:/www/html" directory="/var/www/html" fstype=nfs \
op start timeout=60 interval=0 \
op stop timeout=60 interval=0
location cli-prefer-webip webip role=Started inf: node1.com
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=1.1.15-11.el7_3.2-e174ec8 \
cluster-infrastructure=corosync \
cluster-name=mycluster \
no-quorum-policy=stop \
stonith-enabled=false
crm(live)configure# group webservice webip webstore webserver
INFO: modified location:cli-prefer-webip from webip to webservice
crm(live)configure# verify
crm(live)configure# commit
查看狀態
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 16:22:12 2017 Last change: Tue Jan 17 16:21:44 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node1.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
驗證羣集
[root@node1 ~]# vim /var/www/html/index.html
[root@node1 ~]# cat /var/www/html/index.html
<h1>nfs server</h1>
[root@node1 ~]# curl http://192.168.150.80
<h1>nfs server</h1>
將節點1standby
[root@node1 ~]# crm node standby
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 16:24:47 2017 Last change: Tue Jan 17 16:24:44 2017 by r
oot via crm_attribute on node1.com
2 nodes and 3 resources configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node2.com
webserver (systemd:httpd): Stopped
[root@node1 ~]# curl http://192.168.150.80 仍是能夠正常訪問,集羣正常
<h1>nfs server</h1>
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 16:24:59 2017 Last change: Tue Jan 17 16:24:44 2017 by r
oot via crm_attribute on node1.com
2 nodes and 3 resources configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node2.com
webserver (systemd:httpd): Started node2.com
節點1開啓
crm node online
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 16:26:16 2017 Last change: Tue Jan 17 16:26:11 2017 by r
oot via crm_attribute on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node1.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
七、資源location設定並測試
刪除以前定義的組,進行每一個資源的粘性設定
crm(live)# resource
crm(live)resource# stop webservice
crm(live)# configure
crm(live)configure# delete webservice
INFO: modified location:cli-prefer-webip from webservice to webip
crm(live)configure# commit
設定location
crm(live)configure# location webip_pre_node1 webip 50: node1.com
crm(live)configure# show
node 1: node1.com \
attributes standby=off
node 2: node2.com \
attributes standby=off
primitive webip IPaddr \
params ip=192.168.150.80
primitive webserver systemd:httpd \
op start timeout=100 interval=0 \
op stop timeout=100 interval=0
primitive webstore Filesystem \
params device="192.168.150.139:/www/html" directory="/var/www/html" fstype=nfs \
op start timeout=60 interval=0 \
op stop timeout=60 interval=0
location cli-prefer-webip webip role=Started inf: node1.com
location webip_pre_node1 webip 50: node1.com
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=1.1.15-11.el7_3.2-e174ec8 \
cluster-infrastructure=corosync \
cluster-name=mycluster \
no-quorum-policy=stop \
stonith-enabled=false
crm(live)configure# verify
crm(live)configure# commit
狀態查看
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 16:32:58 2017 Last change: Tue Jan 17 16:31:44 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node1.com
webstore (ocf::heartbeat:Filesystem): Started node2.com
webserver (systemd:httpd): Started node1.com
默認粘性查看,默認爲0
crm(live)# configure
crm(live)configure# property
batch-limit= node-health-strategy=
cluster-delay= node-health-yellow=
cluster-recheck-interval= notification-agent=
concurrent-fencing= notification-recipient=
crmd-transition-delay= pe-error-series-max=
dc-deadtime= pe-input-series-max=
default-action-timeout= pe-warn-series-max=
default-resource-stickiness= placement-strategy=
election-timeout= remove-after-stop=
enable-acl= shutdown-escalation=
enable-startup-probes= start-failure-is-fatal=
have-watchdog= startup-fencing=
is-managed-default= stonith-action=
load-threshold= stonith-enabled=
maintenance-mode= stonith-timeout=
migration-limit= stonith-watchdog-timeout=
no-quorum-policy= stop-all-resources=
node-action-limit= stop-orphan-actions=
node-health-green= stop-orphan-resources=
node-health-red= symmetric-cluster=
crm(live)configure# property default-resource-stickiness=
default-resource-stickiness (integer, [0]):
此處有個注意點:
有一條默認配置設定了node1的location,先進行刪除進行測試
location cli-prefer-webip webip role=Started inf: node1.com inf是無窮大
能夠在configure模式輸入edit命令,相似於進入了一個vim模式,能夠對配置進行手動修改
crm(live)configure# verify
crm(live)configure# commit
此時設定一個node2的location值大於node1進行測試
location webip_pre_node2 webip 100: node2.com
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 21:12:40 2017 Last change: Tue Jan 17 21:11:25 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
此時webip的資源已經遷移至node2節點
八、colocation的設定及測試
crm(live)# configure
crm(live)configure# colocation webserver_with_webip inf: webserver webip 定義了兩個資源之間的粘性,必須在一塊兒
crm(live)configure# verify
crm(live)configure# commit
crm(live)configure# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 21:16:11 2017 Last change: Tue Jan 17 21:16:09 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 21:16:50 2017 Last change: Tue Jan 17 21:16:09 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node2.com
九、順序約束order
crm(live)configure# order webip_bef_webstore_bef_webserver mandatory: webip webstore webse 強制資源啓動順序
rvercrm(live)configure# verify
crm(live)configure# commit
crm(live)configure# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 22:08:26 2017 Last change: Tue Jan 17 22:08:24 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node2.com
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 22:08:39 2017 Last change: Tue Jan 17 22:08:24 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node2.com
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 22:08:40 2017 Last change: Tue Jan 17 22:08:24 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node2.com
十、定義帶有監控的資源
因爲2節點羣集的話會出現法定票數不足致使資源不轉移的狀況,解決此方法有一下幾種:
增長ping node節點
增長一個仲裁盤
讓羣集中的節點數成奇數個
直接忽略當集羣沒有法定票數時直接忽略,使用此方法必須得對資源進行監控
crm(live)configure# property no-quorum-policy=
no-quorum-policy (enum, [stop]): What to do when the cluster does not have quorum
What to do when the cluster does not have quorum Allowed values: stop, freeze, ignore
, suicide
crm(live)configure# property no-quorum-policy=ignore
crm(live)configure# verify
crm(live)configure# commit
定義資源監控
crm(live)configure# primitive webserver systemd:httpd op start timeout=100 op stop timeout=100 op monitor interval=60 timeout=100
手動關閉httpd服務
[root@node1 ~]# killall httpd
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 22:26:31 2017 Last change: Tue Jan 17 22:23:51 2017 by root via cibadmin on node2.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
root@node1 ~]# ss -tnl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 *:111 *:*
LISTEN 0 128 *:22 *:*
LISTEN 0 100 127.0.0.1:25 *:*
LISTEN 0 64 *:43550 *:*
LISTEN 0 128 :::111 :::*
LISTEN 0 128 :::22 :::*
LISTEN 0 100 ::1:25 :::*
LISTEN 0 64 :::36414 :::*
60s後會自動啓動
root@node1 ~]# ss -tnl
State Recv-Q Send-Q Local Address:P
LISTEN 0 128 *:1
LISTEN 0 128 *:2
LISTEN 0 100 127.0.0.1:2
LISTEN 0 64 *:4
LISTEN 0 128 :::1
LISTEN 0 128 :::8
LISTEN 0 128 :::2
LISTEN 0 100 ::1:2
LISTEN 0 64 :::3
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 22:30:24 2017 Last change: Tue Jan 17 22:23:51 2017 by r
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
Failed Actions: 監控後會出現此錯誤信息
* webserver_monitor_60000 on node1.com 'not running' (7): call=66, status=complete, exitre
last-rc-change='Tue Jan 17 22:26:53 2017', queued=0ms, exec=0ms
使用cleanup能夠進行錯誤信息刪除
[root@node1 ~]# crm
crm(live)# resource
crm(live)resource# cleaup webserver
Cleaning up webserver on node1.com, removing fail-count-webserver
Cleaning up webserver on node2.com, removing fail-count-webserver
Waiting for 2 replies from the CRMd.. OK
crm(live)resource# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Tue Jan 17 22:33:56 2017 Last change: Tue Jan 17 22:33:52 2017 by h
acluster via crmd on node2.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
將node上配置httpd的配置文檔修改,形成沒法正常啓動httpd,看資源是否會遷移至node2
~]# mv /etc/httpd/conf/httpd.conf /etc/httpd/conf/httpd.conf.bak
[root@node1 ~]# killall httpd
[root@node1 ~]# ss -tnl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 *:111 *:*
LISTEN 0 64 *:47028 *:*
LISTEN 0 128 *:22 *:*
LISTEN 0 100 127.0.0.1:25 *:*
LISTEN 0 128 :::111 :::*
LISTEN 0 128 :::22 :::*
LISTEN 0 100 ::1:25 :::*
LISTEN 0 64 :::60901 :::*
因爲資源在node1上沒法本身啓動,全部在node2上啓動
[root@node2 ~]# ss -tnl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 *:111 *:*
LISTEN 0 128 *:22 *:*
LISTEN 0 100 127.0.0.1:25 *:*
LISTEN 0 128 :::111 :::*
LISTEN 0 128 :::80 :::*
LISTEN 0 128 :::22 :::*
LISTEN 0 100 ::1:25 :::*
]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Wed Jan 18 11:03:15 2017 Last change: Wed Jan 18 10:56:07 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node2.com
Failed Actions:
* webserver_start_0 on node1.com 'not running' (7): call=86, status=complete, exitreason='
none', last-rc-change='Wed Jan 18 10:59:01 2017', queued=0ms, exec=2106ms
當從新恢復httpd服務後記得清除資源的錯誤信息,不然沒法啓動資源
[root@node1 ~]# crm
crm(live)# resource
crm(live)resource# cleaup webserver
十、高可用LVS中的DRserver
須要藉助ldirectord來實現
ansible主機進行兩節點ldirectord的部署
~]# ansible all -m copy -a "src=/root/ldirectord-3.9.6-0rc1.1.1.x86_64.rpm des t=/root/ldirectord-3.9.6-0rc1.1.1.x86_64.rpm"
~]# ansible all -m shell -a 'yum -y install ldirectord-3.9.6-0rc1.1.1.x86_ 64.rpm'
節點主機確認安裝是否正常
[root@node1 ~]# rpm -qa ldirectord
ldirectord-3.9.6-0rc1.1.1.x86_64
ansible主機編輯配置文檔並部署至節點主機
yum -y install ldirectord-3.9.6-0rc1.1.1.x86_ 64.rpm
~]# cp /usr/share/doc/ldirectord-3.9.6/ldirectord.cf /etc/ha.d/
~]# cd /etc/ha.d/
ha.d]# vim ldirectord.cf
ha.d]# grep -v "^#" ldirectord.cf | grep -v "^$"
checktimeout=3
checkinterval=1
autoreload=yes
quiescent=no
virtual=192.168.150.81:80 定義VIP
real=192.168.150.7:80 gate 定義realserver的ip地址
real=192.168.150.8:80 gate
real=192.168.6.6:80 gate
fallback=127.0.0.1:80 gate 定義sorryserver,本地
service=http 服務
scheduler=rr 調度算法
#persistent=600
#netmask=255.255.255.255
protocol=tcp
checktype=negotiate
checkport=80
request="index.html"
receive="Test Page"
ha.d]# ansible all -m copy -a "src=/etc/ha.d/ldirectord.cf dest=/etc/ha.d/ldirectord.cf"
將服務添加至systemd中
ha.d]# ansible all -m shell -a 'systemctl enable ldirectord.service'
192.168.150.137 | SUCCESS | rc=0 >>
Created symlink from /etc/systemd/system/multi-user.target.wants/ldirectord.service to /usr/lib/systemd/system/ldirectord.service.
192.168.150.138 | SUCCESS | rc=0 >>
Created symlink from /etc/systemd/system/multi-user.target.wants/ldirectord.service to /usr/lib/systemd/system/ldirectord.service.
節點上進行服務開啓測試
~]# systemctl start ldirectord.service
~]# systemctl status ldirectord.service
● ldirectord.service - Monitor and administer real servers in a LVS cluster of load balanced virtual servers
Loaded: loaded (/usr/lib/systemd/system/ldirectord.service; enabled; vendor preset: disabled)
Active: active (running) since 三 2017-01-18 11:31:21 CST; 9s ago
Process: 17474 ExecStartPost=/usr/bin/touch /var/lock/subsys/ldirectord (code=exited, status=0/SUCCESS)
Process: 17472 ExecStart=/usr/sbin/ldirectord start (code=exited, status=0/SUCCESS)
Main PID: 17476 (ldirectord)
CGroup: /system.slice/ldirectord.service
└─17476 /usr/bin/perl -w /usr/sbin/ldirectord start
~]# ipvsadm -Ln drserver可正常開啓
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.150.81:80 rr
-> 127.0.0.1:80 Route 1 0 0
測試前清空以前測試的全部配置
resource中進行stop資源,cleanup資源,configure中進行edit進行配置刪除
從新定義VIP資源和ldirector的羣集資源,兩資源同時在drservice羣組中
crm(live)configure# primitive vip ocf:heartbeat:IPaddr2 params ip=192.168.150.81
crm(live)configure# primitive director systemd:ldirectord op start timeout=100 op stop tim eout=100
crm(live)configure# group drservice vip director
crm(live)configure# verify
crm(live)configure# commit
~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Wed Jan 18 11:42:38 2017 Last change: Wed Jan 18 11:42:09 2017 by r
oot via cibadmin on node1.com
2 nodes and 2 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
Resource Group: drservice
vip (ocf::heartbeat:IPaddr2): Started node1.com
director (systemd:ldirectord): Started node1.com
lvs狀態
~]# ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.150.81:80 rr
-> 127.0.0.1:80 Route 1 0 0
vip已經在node1上啓動
[root@node1 ~]# ip addr show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen
1000 link/ether 00:0c:29:98:ad:a4 brd ff:ff:ff:ff:ff:ff
inet 192.168.150.137/24 brd 192.168.150.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.150.81/24 brd 192.168.150.255 scope global secondary eno16777736
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fe98:ada4/64 scope link
valid_lft forever preferred_lft forever
關閉節點1進行測試
[root@node1 ~]# crm node standby
[root@node1 ~]# crm status 資源所有遷移至node2
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) - partition with quorum
Last updated: Wed Jan 18 11:45:08 2017 Last change: Wed Jan 18 11:44:57 2017 by r
oot via crm_attribute on node1.com
2 nodes and 2 resources configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
Resource Group: drservice
vip (ocf::heartbeat:IPaddr2): Started node2.com
director (systemd:ldirectord): Started node2.com
此時查看node2上的資源組狀態
lvs到了節點2上 因爲我剛纔配置的realserver是網絡不通的,全部在應用的是sorry server
[root@node2 ~]# ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.150.81:80 rr
-> 127.0.0.1:80 Route 1 0 0
[root@node2 ~]# ip addr vip也過來了
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen
1000 link/ether 00:0c:29:f3:13:56 brd ff:ff:ff:ff:ff:ff
inet 192.168.150.138/24 brd 192.168.150.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.150.81/24 brd 192.168.150.255 scope global secondary eno16777736
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fef3:1356/64 scope link
valid_lft forever preferred_lft forever