mysql高可用方案之MaxScale-HA-with-Corosync-Pacemaker

時間 2019-11-16

標籤 mysql 可用方案 maxscale corosync pacemaker 欄目 MySQL 简体版

原文原文鏈接

前面一篇文章已經實現了mysql的主從複製以及MHA的高可用，那麼接下來這一章就要實現Maxscale的讀寫分離和HA，對於Maxscale的HA能夠用keepalived、Heartbeat來實現，不過官方推薦corosync+pacemaker，熟悉高可用的朋友們就會知道corosync+pacemaker更增強大，配置靈活，corosync則容許爲不一樣的資源組配置不一樣的主服務，在corosync中，其會自行處理配置文件的同步問題，corosync支持多個節點的集羣，支持把資源進行分組，按照組進行資源的管理，設置主服務，自行進行啓停，固然Corosync有必定的複雜度，因此咱們在配置的時候須要一點耐心，所以，通常來講選擇corosync來進行心跳檢測，再搭配pacemaker的資源管理系統來構建高可用的系統。node

#初始化python

ntpdate 120.25.108.11 /root/init_system_centos7.sh #hosts文件配置(maxscale61,maxscale62)mysql

cat >> /etc/hosts << EOF 192.168.5.61 maxscale61.blufly.com 192.168.5.62 maxscale62.blufly.com 192.168.5.51 db51.blufly.com 192.168.5.52 db52.blufly.com 192.168.5.53 db53.blufly.com EOF #配置雙機信任sql

[root@maxscale61 ~]# ssh-keygen -t rsa [root@maxscale61 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.62 [root@maxscale62 ~]# ssh-keygen -t rsa [root@maxscale62 ~]# ssh-copy-id -i ~/.ssh/id_rsa.pub -p 65535 root@192.168.5.61 #####---------------- 1、安裝maxscale -----------------#####數據庫

#在mysql master節點上建立監控、路由賬戶(db52，故障切換後，如今db52爲master)後端

CREATE USER maxscale@'%' IDENTIFIED BY "balala369"; GRANT replication slave, replication client ON . TO maxscale@'%'; GRANT SELECT ON mysql.* TO maxscale@'%'; GRANT ALL ON maxscale_schema.* TO maxscale@'%'; GRANT SHOW DATABASES ON . TO maxscale@'%'; flush privileges; #安裝maxscale(maxscale61,maxscale62)centos

[root@maxscale61 opt]# yum -y install libcurl libaio openssl [root@maxscale61 opt]# cd /opt [root@maxscale61 opt]# wget downloads.mariadb.com/MaxScale/la… [root@maxscale61 opt]# yum -y localinstall maxscale-2.2.13-1.centos.7.x86_64.rpm [root@maxscale61 opt]# maxkeys [root@maxscale61 opt]# maxpasswd balala369 47794130FFBA029760829CD50C10ABAC chown -R maxscale:maxscale /var/lib/maxscale/ #Maxscale 配置文件(maxscale61,maxscale62)緩存

cat /etc/maxscale.cnf [maxscale]bash

開啓線程個數，默認爲1.設置爲auto會同cpu核數相同

threads=auto服務器

timestamp精度

ms_timestamp=1

將日誌寫入到syslog中

syslog=1

將日誌寫入到maxscale的日誌文件中

maxlog=1

不將日誌寫入到共享緩存中，開啓debug模式時可打開加快速度

log_to_shm=0

記錄告警信息

log_warning=1

記錄notice

log_notice=1

記錄info

log_info=1

不打開debug模式

log_debug=0

日誌遞增

log_augmentation=1 [server1] type=server address=192.168.5.51 port=9106 protocol=MariaDBBackend serv_weight=3 [server2] type=server address=192.168.5.52 port=9106 protocol=MariaDBBackend serv_weight=1 [server3] type=server address=192.168.5.53 port=9106 protocol=MariaDBBackend serv_weight=3 [MariaDB-Monitor] type=monitor module=mariadbmon servers=server1,server2,server3 user=maxscale passwd=47794130FFBA029760829CD50C10ABAC monitor_interval=2000 detect_stale_master=true [Read-Only-Service] type=service router=readconnroute servers=server1,server2,server3 user=maxscale passwd=47794130FFBA029760829CD50C10ABAC router_options=slave enable_root_user=1 weightby=serv_weight [Read-Write-Service] type=service router=readwritesplit servers=server1,server2,server3 user=maxscale passwd=47794130FFBA029760829CD50C10ABAC enable_root_user=1 [MaxAdmin-Service] type=service router=cli [Read-Only-Listener] type=listener service=Read-Only-Service protocol=MariaDBClient port=4008 [Read-Write-Listener] type=listener service=Read-Write-Service protocol=MariaDBClient port=4006 [MaxAdmin-Listener] type=listener service=MaxAdmin-Service protocol=maxscaled socket=default #配置上 systemctl 的方式啓動 maxscale

vi /usr/lib/systemd/system/maxscale.service [Unit] Description=MariaDB MaxScale Database Proxy After=network.target

[Service] Type=forking Restart=on-abort

PIDFile=/var/run/maxscale/maxscale.pid

ExecStartPre=/usr/bin/install -d /var/run/maxscale -o maxscale -g maxscale ExecStart=/usr/bin/maxscale --user=maxscale -f /etc/maxscale.cnf TimeoutStartSec=120 LimitNOFILE=65535

[Install] WantedBy=multi-user.target #測試maxscale啓動、中止

systemctl start maxscale.service systemctl status maxscale.service systemctl stop maxscale.service systemctl status maxscale.service #開機自啓動

systemctl enable maxscale.service #啓動maxscale

[root@maxscale61 opt]# maxscale --user=maxscale -f /etc/maxscale.cnf [root@maxscale61 opt]# netstat -tnlup|grep maxscale tcp 0 0 127.0.0.1:8989 0.0.0.0:* LISTEN 31708/maxscale
tcp6 0 0 :::4008 :::* LISTEN 31708/maxscale
tcp6 0 0 :::4006 :::* LISTEN 31708/maxscale #登陸 MaxScale 管理器，查看一下數據庫鏈接狀態

[root@maxscale61 ~]# maxadmin -S /tmp/maxadmin.sock MaxScale> list servers Servers. -------------------+-----------------+-------+-------------+-------------------- Server | Address | Port | Connections | Status
-------------------+-----------------+-------+-------------+-------------------- server1 | 192.168.5.51 | 9106 | 0 | Slave, Running server2 | 192.168.5.52 | 9106 | 0 | Master, Running server3 | 192.168.5.53 | 9106 | 0 | Slave, Running -------------------+-----------------+-------+-------------+-------------------- MaxScale> MaxScale> list services Services. --------------------------+-------------------+--------+----------------+------------------- Service Name | Router Module | #Users | Total Sessions | Backend databases --------------------------+-------------------+--------+----------------+------------------- Read-Only-Service | readconnroute | 1 | 1 | server1, server2, server3 Read-Write-Service | readwritesplit | 1 | 1 | server1, server2, server3 MaxAdmin-Service | cli | 2 | 2 | --------------------------+-------------------+--------+----------------+------------------- ###驗證maxscale的monitor插件，關閉db51的數據庫服務

[root@db51 ~]# /etc/init.d/mysqld stop Stopping mysqld (via systemctl): [ 肯定 ] [root@maxscale61 opt]# maxadmin -S /tmp/maxadmin.sock MaxScale> list servers Servers. -------------------+-----------------+-------+-------------+-------------------- Server | Address | Port | Connections | Status
-------------------+-----------------+-------+-------------+-------------------- server1 | 192.168.5.51 | 9106 | 0 | Down server2 | 192.168.5.52 | 9106 | 0 | Master, Running server3 | 192.168.5.53 | 9106 | 0 | Slave, Running -------------------+-----------------+-------+-------------+-------------------- #啓動db51的數據庫服務

[root@db51 ~]# /etc/init.d/mysqld start Starting mysqld (via systemctl): [ 肯定 ] MaxScale> list servers Servers. -------------------+-----------------+-------+-------------+-------------------- Server | Address | Port | Connections | Status
-------------------+-----------------+-------+-------------+-------------------- server1 | 192.168.5.51 | 9106 | 0 | Slave, Running server2 | 192.168.5.52 | 9106 | 0 | Master, Running server3 | 192.168.5.53 | 9106 | 0 | Slave, Running -------------------+-----------------+-------+-------------+-------------------- ###驗證讀寫分離(在db51操做，maxscale61沒有裝mysql，因此沒有mysql命令)

[root@db51 ~]# mysql -ublufly -p852741 -h192.168.5.61 -P4006 #注意: 這邊登陸的用戶就是普通的MySQL用戶, 不是maxscale用戶

MySQL [(none)]> select @@hostname; +-----------------+ | @@hostname | +-----------------+ | db51.blufly.com | +-----------------+ 1 row in set (0.001 sec) MySQL [mysql]> use test; Database changed #建立表

MySQL [test]> CREATE TABLE bf_staff( -> staff_id INT NOT NULL AUTO_INCREMENT, -> staff_name VARCHAR(40) NOT NULL, -> staff_title VARCHAR(100) NOT NULL, -> entry_date DATE, -> PRIMARY KEY ( staff_id ) -> )ENGINE=InnoDB DEFAULT CHARSET=utf8; Query OK, 0 rows affected (0.167 sec)

MySQL [test]> show tables; +----------------+ | Tables_in_test | +----------------+ | bf_staff | +----------------+ 1 row in set (0.001 sec)

#插入數據

MySQL [test]> insert into bf_staff (staff_name,staff_title,entry_date) values('張森','軟件工程師','1988-10-11'),('王梅','人事專員','1993-3-20'); Query OK, 2 rows affected (0.012 sec) Records: 2 Duplicates: 0 Warnings: 0 MySQL [test]> select * from bf_staff; +----------+------------+-----------------+------------+ | staff_id | staff_name | staff_title | entry_date | +----------+------------+-----------------+------------+ | 1 | 張森 | 軟件工程師 | 1988-10-11 | | 2 | 王梅 | 人事專員 | 1993-03-20 | +----------+------------+-----------------+------------+ 2 rows in set (0.001 sec) MySQL [test]> insert into bf_staff (staff_name,staff_title,entry_date) values('李自在','產品經理','1979-11-19'),('王衡','測試工程師','1995-6-2'); #在maxscale61查看讀寫分離的過程

[root@maxscale61 ~]# cat /var/log/maxscale/maxscale.log #select被分配到db51 2018-09-12 16:51:46.262 info : (5) [readwritesplit] (log_transaction_status): > Autocommit: [enabled], trx is [not open], cmd: (0x03) COM_QUERY, plen: 16, type: QUERY_TYPE_SHOW_TABLES, stmt: show tables 2018-09-12 16:51:46.262 info : (5) [readwritesplit] (handle_got_target): Route query to slave [192.168.5.51]:9106 < 2018-09-12 16:51:46.262 info : (5) [readwritesplit] (clientReply): Reply complete, last reply from server1 2018-09-12 16:51:58.842 info : (5) [readwritesplit] (log_transaction_status): > Autocommit: [enabled], trx is [not open], cmd: (0x03) COM_QUERY, plen: 27, type: QUERY_TYPE_READ, stmt: select * from bf_staff 2018-09-12 16:51:58.842 info : (5) [readwritesplit] (handle_got_target): Route query to slave [192.168.5.51]:9106 < 2018-09-12 16:51:58.843 info : (5) [readwritesplit] (clientReply): Reply complete, last reply from server1 #insert被分配到db52

2018-09-12 16:59:52.066 info : (5) [readwritesplit] (log_transaction_status): > Autocommit: [enabled], trx is [not open], cmd: (0x03) COM_QUERY, plen: 149, type: QUERY_TYPE_WRITE, stmt: insert into bf_staff (staff_name,staff_title,entry_date) values('李自在','產品經理','1979-11-19'),('王衡','測試工程師','1995-6-2') 2018-09-12 16:59:52.066 info : (5) [readwritesplit] (handle_got_target): Route query to master [192.168.5.52]:9106 < 2018-09-12 16:59:52.071 info : (5) [readwritesplit] (clientReply): Reply complete, last reply from server2 ##------- maxscale注意事項 --------##

#詳細的注意事項連接 mariadb.com/kb/en/maria…

#這裏我主要講些重點須要注意的：

1）建立連接的時候，不支持壓縮協議

2）轉發路由不能動態的識別master節點的遷移

3）LONGLOB字段不支持

4）在一下狀況會將語句轉到master節點中(保證事務一致)：

明確指定事務；

 prepared的語句；

 語句中包含存儲過程，自定義函數

包含多條語句信息：INSERT INTO ... ; SELECT LAST_INSERT_ID();
複製代碼

5）一些語句默認會發送到後端的全部server中，可是能夠指定

use_sql_variables_in=[master|all] (default: all)

爲master的時候能夠將語句都轉移到master 上執行。可是自動提交值和prepared的語句仍然發送到全部後端server。

這些語句爲：

COM_INIT_DB (USE creates this)
COM_CHANGE_USER
COM_STMT_CLOSE
COM_STMT_SEND_LONG_DATA
COM_STMT_RESET
COM_STMT_PREPARE
COM_QUIT (no response, session is closed)
COM_REFRESH
COM_DEBUG
COM_PING
SQLCOM_CHANGE_DB (USE ... statements)
SQLCOM_DEALLOCATE_PREPARE
SQLCOM_PREPARE
SQLCOM_SET_OPTION
SELECT ..INTO variable|OUTFILE|DUMPFILE
SET autocommit=1|0 6）maxscale不支持主機名匹配的認證模式，只支持IP地址方式的host解析。因此在添加user的時候記得使用合適的範式。

7）跨庫查詢不支持，會顯示的指定到第一個數據庫中

8）經過select方式改變會話變量的行爲不支持

#####------------ 2、安裝配置pacemaker+corosync --------------#####

#官方推薦用pacemaker+corosync來實現maxscale的高可用

yum install pcs pacemaker corosync fence-agents-all -y #啓動pcsd服務（開機自啓動）(maxscale61,maxscale62)

systemctl start pcsd.service systemctl enable pcsd.service #爲hacluster設置密碼，安裝組件生成的hacluster用戶，用來本地啓動pcs進程，所以咱們須要設定密碼，每一個節點的密碼相同(maxscale61,maxscale62)

passwd hacluster balala369 #集羣各節點之間認證

[root@maxscale61 ~]# pcs cluster auth 192.168.5.61 192.168.5.62 Username: hacluster Password: 192.168.5.62: Authorized 192.168.5.61: Authorized #建立 maxscalecluster 集羣資源

[root@maxscale61 ~]# pcs cluster setup --name maxscalecluster 192.168.5.61 192.168.5.62 Destroying cluster on nodes: 192.168.5.61, 192.168.5.62... 192.168.5.62: Stopping Cluster (pacemaker)... 192.168.5.61: Stopping Cluster (pacemaker)... 192.168.5.62: Successfully destroyed cluster 192.168.5.61: Successfully destroyed cluster Sending 'pacemaker_remote authkey' to '192.168.5.61', '192.168.5.62' 192.168.5.61: successful distribution of the file 'pacemaker_remote authkey' 192.168.5.62: successful distribution of the file 'pacemaker_remote authkey' Sending cluster config files to the nodes... 192.168.5.61: Succeeded 192.168.5.62: Succeeded Synchronizing pcsd certificates on nodes 192.168.5.61, 192.168.5.62... 192.168.5.62: Success 192.168.5.61: Success Restarting pcsd on the nodes in order to reload the certificates... 192.168.5.62: Success 192.168.5.61: Success #查看corosync配置文件

cat /etc/corosync/corosync.conf #設置集羣自啓動

[root@maxscale61 ~]# pcs cluster enable --all 192.168.5.61: Cluster Enabled 192.168.5.62: Cluster Enabled #查看集羣狀態

[root@maxscale61 ~]# pcs cluster status Error: cluster is not currently running on this node #n the back-end , 「pcs cluster start」 command will trigger the following command on each cluster node [root@maxscale61 ~]# systemctl start corosync.service [root@maxscale61 ~]# systemctl start pacemaker.service [root@maxscale61 ~]# systemctl enable corosync [root@maxscale61 ~]# systemctl enable pacemaker [root@maxscale62 ~]# systemctl start corosync.service [root@maxscale62 ~]# systemctl start pacemaker.service [root@maxscale62 ~]# systemctl enable corosync [root@maxscale62 ~]# systemctl enable pacemaker [root@maxscale61 ~]# pcs cluster status Cluster Status: Stack: corosync Current DC: maxscale61.blufly.com (version 1.1.18-11.el7_5.3-2b07d5c5a9) - partition with quorum Last updated: Tue Sep 18 16:05:30 2018 Last change: Tue Sep 18 15:47:57 2018 by hacluster via crmd on maxscale61.blufly.com 2 nodes configured 0 resources configured PCSD Status: maxscale62.blufly.com (192.168.5.62): Online maxscale61.blufly.com (192.168.5.61): Online #查看啓動節點狀態

[root@maxscale61 ~]# corosync-cfgtool -s Printing ring status. Local node ID 1 RING ID 0 id = 192.168.5.61 status = ring 0 active with no faults [root@maxscale62 ~]# corosync-cfgtool -s Printing ring status. Local node ID 2 RING ID 0 id = 192.168.5.62 status = ring 0 active with no faults #查看pacemaker進程

[root@maxscale61 ~]# ps axf |grep pacemaker 17859 pts/0 S+ 0:00 | _ grep --color=auto pacemaker 17699 ? Ss 0:00 /usr/sbin/pacemakerd -f 17700 ? Ss 0:00 _ /usr/libexec/pacemaker/cib 17701 ? Ss 0:00 _ /usr/libexec/pacemaker/stonithd 17702 ? Ss 0:00 _ /usr/libexec/pacemaker/lrmd 17703 ? Ss 0:00 _ /usr/libexec/pacemaker/attrd 17704 ? Ss 0:02 _ /usr/libexec/pacemaker/pengine 17705 ? Ss 0:00 _ /usr/libexec/pacemaker/crmd #查看集羣信息

[root@maxscale61 ~]# corosync-cmapctl | grep members runtime.totem.pg.mrp.srp.members.1.config_version (u64) = 0 runtime.totem.pg.mrp.srp.members.1.ip (str) = r(0) ip(192.168.5.61) runtime.totem.pg.mrp.srp.members.1.join_count (u32) = 1 runtime.totem.pg.mrp.srp.members.1.status (str) = joined runtime.totem.pg.mrp.srp.members.2.config_version (u64) = 0 runtime.totem.pg.mrp.srp.members.2.ip (str) = r(0) ip(192.168.5.62) runtime.totem.pg.mrp.srp.members.2.join_count (u32) = 1 runtime.totem.pg.mrp.srp.members.2.status (str) = joined #禁用STONITH

pcs property set stonith-enabled=false #沒法仲裁時候，選擇忽略

pcs property set no-quorum-policy=ignore #檢查配置是否正確

crm_verify -L -V #用crm添加集羣資源

[root@maxscale61 ~]# crm -bash: crm: 未找到命令 [root@maxscale61 ~]# rpm -qa pacemaker pacemaker-1.1.18-11.el7_5.3.x86_64 #從pacemaker 1.1.8開始，crm發展成了一個獨立項目，叫crmsh。也就是說，咱們安裝了pacemaker後，並無crm這個命令，咱們要實現對集羣資源管理，還須要獨立安裝crmsh，crmsh依賴於許多包如：pssh

[root@maxscale61 ~]# wget -O /etc/yum.repos.d/network:ha-clustering:Stable.repo download.opensuse.org/repositorie… [root@maxscale61 ~]# yum -y install crmsh [root@maxscale62 ~]# wget -O /etc/yum.repos.d/network:ha-clustering:Stable.repo download.opensuse.org/repositorie… [root@maxscale62 ~]# yum -y install crmsh #若是yum安裝報錯，那就下載rpm包進行安裝(maxscale61,maxscale62)

cd /opt wget download.opensuse.org/repositorie… wget download.opensuse.org/repositorie… wget download.opensuse.org/repositorie… wget mirror.yandex.ru/opensuse/re… wget download.opensuse.org/repositorie… yum -y install crmsh-3.0.0-6.2.noarch.rpm crmsh-scripts-3.0.0-6.2.noarch.rpm pssh-2.3.1-7.3.noarch.rpm python-parallax-1.0.1-29.1.noarch.rpm python-pssh-2.3.1-7.3.noarch.rpm #配置VIP和監控的服務(只在maxscale61上配置)

crm crm(live)# status #查看systemd類型可代理的服務，其中有maxscale crm(live)ra# list systemd crm(live)# configure crm(live)configure# primitive maxscalevip ocf:IPaddr params ip=192.168.5.60 op monitor timeout=30s interval=60s #在這裏咱們以192.168.5.60做爲浮動IP，名字爲maxscalevip而且告訴集羣每30秒檢查它一次 #配置監控的服務(maxscale.service) crm(live)configure# primitive maxscaleserver systemd:maxscale op monitor timeout=30s interval=60s #將 VIP(MaxScaleVIP)和監聽的服務(maxscaleserver)歸爲同一個組 crm(live)configure# group maxscalegroup maxscalevip maxscaleserver #驗證配置, 提交修改的配置 crm(live)configure# verify crm(live)configure# commit crm(live)configure# show #查看服務狀況

crm(live)# status Stack: corosync Current DC: maxscale61.blufly.com (version 1.1.18-11.el7_5.3-2b07d5c5a9) - partition with quorum Last updated: Tue Sep 18 16:50:13 2018 Last change: Tue Sep 18 16:48:12 2018 by root via cibadmin on maxscale61.blufly.com 2 nodes configured 2 resources configured Online: [ maxscale61.blufly.com maxscale62.blufly.com ] Full list of resources: Resource Group: maxscalegroup maxscalevip (ocf::heartbeat:IPaddr): Started maxscale61.blufly.com maxscaleserver (systemd:maxscale): Started maxscale61.blufly.com crm(live)# quit #查看啓動的資源

[root@maxscale61 opt]# ip addr | grep 192.168.5.60 inet 192.168.5.60/24 brd 192.168.5.255 scope global secondary eno16777984 [root@maxscale61 opt]# ps -ef | grep maxscale maxscale 22159 1 0 16:48 ? 00:00:01 /usr/bin/maxscale root 22529 13940 0 16:51 pts/0 00:00:00 grep --color=auto maxscale #服務轉跳測試

#中止maxscale61上的maxscale服務

[root@maxscale61 opt]# systemctl stop maxscale.service

#在使用systemctl stop maxscale.service進行故障切換的時候，它不會立刻發生VIP漂移，而是會先在本機(maxscale61:192.168.5.61)上嘗試啓動maxscale服務, 通過屢次嘗試不行才發生VIP和服務的轉移。

#這邊我要誇一下這樣的資源管理實際上是很符合常理的很好的。這比咱們的MHA符合常理的多，其實在壓力比較大的數據庫中，也是不該該若是一宕機就立刻轉移的，應該先在原先的服務器上再次啓動一下服務，起步來在轉跳的。由於若是壓力大致使的奔潰，啓動服務應該須要先把熱數據加載到數據庫中的。

#演示maxscale61宕機的狀況下，看看maxscale服務和VIP是不是會漂移到maxscale62（192.168.5.62）上。

[root@maxscale61 opt]# shutdown -h now

#maxscale61被關機後，VIP立馬切換到maxscale62上。從ping上面來看沒有掉包的狀況，作到無縫切換