vim /etc/hosts # 添加對應主機 192.168.28.128 mha1 192.168.28.131 mha2 192.168.28.132 mha3
# 關閉防火牆 systemctl stop firewalld
systemctl disable firewalld # 關閉自啓動 # 修改selinux vim /etc/sysconfig/selinux SELINUX=disabled # 設置爲disabled
建立主從能夠參考 MySQL主從搭建
node
注意必須有以下參數mysql
server-id=1 # 每一個節點不能相同 log-bin=/data/mysql3306/logs/mysql-bin relay-log=/data/mysql3306/logs/relay-log skip-name-resolve # 建議加上 非必須項 #read_only = ON # 從庫開啓,主庫關閉只讀 relay_log_purge = 0 # 關閉自動清理中繼日誌 log_slave_updates = 1 # 從庫經過binlog更新的數據寫進從庫二進制日誌中,必加,不然切換後可能丟失數據
建立mha管理帳號linux
# 特別注意: mha的密碼不要出現特殊字符,不然後面沒法切換主庫
create user mha@'192.168.28.%' identified by 'MHAadmin123'; create user mha@'localhost' identified by 'MHAadmin123'; grant all on *.* to mha@'192.168.28.%'; grant all on *.* to mha@'localhost';
MHA管理節點上執行(但建議每臺主機均執行,便於切換管理節點及集羣間維護,但注意主機安全),包含本機到本機的互信sql
ssh-keygen ssh-copy-id -i ~/.ssh/id_rsa.pub root@192.168.28.128 ssh-copy-id -i ~/.ssh/id_rsa.pub root@192.168.28.131 ssh-copy-id -i ~/.ssh/id_rsa.pub root@192.168.28.132
配置完成後記得測試一下是否配置成功(必須測試)數據庫
ssh root@192.168.28.128 ssh root@192.168.28.131 ssh root@192.168.28.132 ssh root@mha1 ssh root@mha2 ssh root@mha3
yum install perl-DBI perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes perl-Params-Validate perl-DateTime -y yum install perl-ExtUtils-Embed -y yum install cpan -y yum install perl-ExtUtils-CBuilder perl-ExtUtils-MakeMaker -y
注意: MySQL數據庫安裝時不建議用rpm包方式安裝,不然此處部分包可能有衝突vim
# 全部節點均需安裝 rpm -ivh mha4mysql-node-0.58-0.el7.centos.noarch.rpm #管理節點需安裝(其餘節點也能夠安裝) mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
若是以上安裝包未安裝全,則會出現相似下面的錯誤,如出現能夠調整yum源或找下載好的同窗獲取centos
[root@mha3 local]# rpm -ivh mha4mysql-manager-0.58-0.el7.centos.noarch.rpm error: Failed dependencies: perl(Log::Dispatch) is needed by mha4mysql-manager-0.58-0.el7.centos.noarch perl(Log::Dispatch::File) is needed by mha4mysql-manager-0.58-0.el7.centos.noarch perl(Log::Dispatch::Screen) is needed by mha4mysql-manager-0.58-0.el7.centos.noarch perl(Parallel::ForkManager) is needed by mha4mysql-manager-0.58-0.el7.centos.noarch
建立配置文件路徑、日誌文件路徑安全
mkdir -p /etc/masterha mkdir -p /var/log/masterha/app1
建立mha配置文件服務器
vim /etc/masterha/app1.conf user=mha [server default] manager_workdir=/var/log/masterha/app1 manager_log=/var/log/masterha/app1/app1.log master_ip_failover_script=/usr/bin/master_ip_failover master_ip_online_change_script=/usr/bin/master_ip_online_change ##mysql用戶名和密碼 user=mha password=MHAadmin123 ssh_user=root repl_user=repl repl_password=repl ping_interval=3 remote_workdir=/tmp report_script=/usr/bin/send_report
# secondary_check_script 能夠不加
# secondary_check_script=/usr/bin/masterha_secondary_check -s mha2 -s mha3 --user=mha --master_host=mha1 --master_ip=192.168.28.128 --master_port=3306 --password=MHAadmin123 shutdown_script="" report_script="" [server1] hostname=192.168.28.128 master_binlog_dir=/data/mysql3306/logs candidate_master=1 [server2] hostname=192.168.28.131 master_binlog_dir=/data/mysql3306/logs candidate_master=1 check_repl_delay=0 [server3] hostname=192.168.28.132 master_binlog_dir=/data/mysql3306/logs no_master=1
配置兩個重要的腳本 master_ip_failover 、 master_ip_online_changeapp
/usr/bin/master_ip_failover
vim /usr/bin/master_ip_failover #!/usr/bin/env perl use strict; use warnings FATAL => 'all'; use Getopt::Long; my ( $command, $ssh_user, $orig_master_host, $orig_master_ip, $orig_master_port, $new_master_host, $new_master_ip, $new_master_port ); my $vip = '192.168.28.199/24'; my $if = 'ens33'; my $ssh_start_vip = "/sbin/ip addr add $vip dev $if"; my $ssh_stop_vip = "/sbin/ip addr del $vip dev $if"; GetOptions( 'command=s' => \$command, 'ssh_user=s' => \$ssh_user, 'orig_master_host=s' => \$orig_master_host, 'orig_master_ip=s' => \$orig_master_ip, 'orig_master_port=i' => \$orig_master_port, 'new_master_host=s' => \$new_master_host, 'new_master_ip=s' => \$new_master_ip, 'new_master_port=i' => \$new_master_port, ); exit &main(); sub main { print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n"; if ( $command eq "stop" || $command eq "stopssh" ) { my $exit_code = 1; eval { print "Disabling the VIP on old master: $orig_master_host \n"; &stop_vip(); $exit_code = 0; }; if ($@) { warn "Got Error: $@\n"; exit $exit_code; } exit $exit_code; } elsif ( $command eq "start" ) { my $exit_code = 10; eval { print "Enabling the VIP - $vip on the new master - $new_master_host \n"; &start_vip(); $exit_code = 0; }; if ($@) { warn $@; exit $exit_code; } exit $exit_code; } elsif ( $command eq "status" ) { print "Checking the Status of the script.. OK \n"; exit 0; } else { &usage(); exit 1; } } sub start_vip() { `ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`; } sub stop_vip() { return 0 unless ($ssh_user); `ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`; } sub usage { print "Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n"; }
/usr/bin/master_ip_online_change
vim /usr/bin/master_ip_online_change #!/usr/bin/env perl use strict; use warnings FATAL => 'all'; use Getopt::Long; #my ( # $command, $ssh_user, $orig_master_host, $orig_master_ip, # $orig_master_port, $new_master_host, $new_master_ip, $new_master_port #); my ( $command, $orig_master_is_new_slave, $orig_master_host, $orig_master_ip, $orig_master_port, $orig_master_user, $orig_master_password, $orig_master_ssh_user, $new_master_host, $new_master_ip, $new_master_port, $new_master_user, $new_master_password, $new_master_ssh_user, ); my $vip = '192.168.28.199/24'; my $if = 'ens33'; my $ssh_start_vip = "/sbin/ip addr add $vip dev $if"; my $ssh_stop_vip = "/sbin/ip addr del $vip dev $if"; my $ssh_user = "root"; GetOptions( 'command=s' => \$command, #'ssh_user=s' => \$ssh_user, #'orig_master_host=s' => \$orig_master_host, #'orig_master_ip=s' => \$orig_master_ip, #'orig_master_port=i' => \$orig_master_port, #'new_master_host=s' => \$new_master_host, #'new_master_ip=s' => \$new_master_ip, #'new_master_port=i' => \$new_master_port, 'orig_master_is_new_slave' => \$orig_master_is_new_slave, 'orig_master_host=s' => \$orig_master_host, 'orig_master_ip=s' => \$orig_master_ip, 'orig_master_port=i' => \$orig_master_port, 'orig_master_user=s' => \$orig_master_user, 'orig_master_password=s' => \$orig_master_password, 'orig_master_ssh_user=s' => \$orig_master_ssh_user, 'new_master_host=s' => \$new_master_host, 'new_master_ip=s' => \$new_master_ip, 'new_master_port=i' => \$new_master_port, 'new_master_user=s' => \$new_master_user, 'new_master_password=s' => \$new_master_password, 'new_master_ssh_user=s' => \$new_master_ssh_user, ); exit &main(); sub main { print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n"; if ( $command eq "stop" || $command eq "stopssh" ) { my $exit_code = 1; eval { print "Disabling the VIP on old master: $orig_master_host \n"; &stop_vip(); $exit_code = 0; }; if ($@) { warn "Got Error: $@\n"; exit $exit_code; } exit $exit_code; } elsif ( $command eq "start" ) { my $exit_code = 10; eval { print "Enabling the VIP - $vip on the new master - $new_master_host \n"; &start_vip(); $exit_code = 0; }; if ($@) { warn $@; exit $exit_code; } exit $exit_code; } elsif ( $command eq "status" ) { print "Checking the Status of the script.. OK \n"; exit 0; } else { &usage(); exit 1; } } sub start_vip() { `ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`; } sub stop_vip() { return 0 unless ($ssh_user); `ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`; } sub usage { print "Usage: master_ip_failover --command=start|stop|stopssh|status --ssh-user=user --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n"; }
檢測互信
檢查各節點互信是否正常,相似於以前的檢查,此處有腳本實現檢查
[root@mha3 app1]# masterha_check_ssh --conf=/etc/masterha/app1.conf Sun May 24 17:33:08 2020 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Sun May 24 17:33:08 2020 - [info] Reading application default configuration from /etc/masterha/app1.conf.. Sun May 24 17:33:08 2020 - [info] Reading server configuration from /etc/masterha/app1.conf.. Sun May 24 17:33:08 2020 - [info] Starting SSH connection tests.. Sun May 24 17:33:12 2020 - [debug] Sun May 24 17:33:08 2020 - [debug] Connecting via SSH from root@192.168.28.131(192.168.28.131:22) to root@192.168.28.128(192.168.28.128:22).. Sun May 24 17:33:10 2020 - [debug] ok. Sun May 24 17:33:10 2020 - [debug] Connecting via SSH from root@192.168.28.131(192.168.28.131:22) to root@192.168.28.132(192.168.28.132:22).. Sun May 24 17:33:12 2020 - [debug] ok. Sun May 24 17:33:12 2020 - [debug] Sun May 24 17:33:08 2020 - [debug] Connecting via SSH from root@192.168.28.128(192.168.28.128:22) to root@192.168.28.131(192.168.28.131:22).. Sun May 24 17:33:09 2020 - [debug] ok. Sun May 24 17:33:09 2020 - [debug] Connecting via SSH from root@192.168.28.128(192.168.28.128:22) to root@192.168.28.132(192.168.28.132:22).. Sun May 24 17:33:12 2020 - [debug] ok. Sun May 24 17:33:13 2020 - [debug] Sun May 24 17:33:09 2020 - [debug] Connecting via SSH from root@192.168.28.132(192.168.28.132:22) to root@192.168.28.128(192.168.28.128:22).. Sun May 24 17:33:11 2020 - [debug] ok. Sun May 24 17:33:11 2020 - [debug] Connecting via SSH from root@192.168.28.132(192.168.28.132:22) to root@192.168.28.131(192.168.28.131:22).. Sun May 24 17:33:13 2020 - [debug] ok. Sun May 24 17:33:13 2020 - [info] All SSH connection tests passed successfully.
檢查複製集羣是否正常
如按照我以前的步驟配置,則此處會有以下異常
masterha_check_repl --conf=/etc/masterha/app1.conf Sun May 24 17:34:02 2020 - [info] Connecting to root@192.168.28.131(192.168.28.131:22).. Can't exec "mysqlbinlog": No such file or directory at /usr/share/perl5/vendor_perl/MHA/BinlogManager.pm line 106. mysqlbinlog version command failed with rc 1:0, please verify PATH, LD_LIBRARY_PATH, and client options at /usr/bin/apply_diff_relay_logs line 532.
報錯信息很明確,找不到mysqlbinlog命令,處理方式比較簡單,作個軟鏈接便可
ln -s /usr/local/mysql5.7/bin/mysql /usr/bin/ ln -s /usr/local/mysql5.7/bin/mysqlbinlog /usr/bin/
再進行檢測
[root@mha3 app1]# masterha_check_repl --conf=/etc/masterha/app1.conf Sun May 24 17:34:41 2020 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Sun May 24 17:34:41 2020 - [info] Reading application default configuration from /etc/masterha/app1.conf.. Sun May 24 17:34:41 2020 - [info] Reading server configuration from /etc/masterha/app1.conf.. Sun May 24 17:34:41 2020 - [info] MHA::MasterMonitor version 0.58. Sun May 24 17:34:42 2020 - [info] GTID failover mode = 0 Sun May 24 17:34:42 2020 - [info] Dead Servers: Sun May 24 17:34:42 2020 - [info] Alive Servers: Sun May 24 17:34:42 2020 - [info] 192.168.28.128(192.168.28.128:3306) Sun May 24 17:34:42 2020 - [info] 192.168.28.131(192.168.28.131:3306) Sun May 24 17:34:42 2020 - [info] 192.168.28.132(192.168.28.132:3306) Sun May 24 17:34:42 2020 - [info] Alive Slaves: Sun May 24 17:34:42 2020 - [info] 192.168.28.131(192.168.28.131:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 17:34:42 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 17:34:42 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sun May 24 17:34:42 2020 - [info] 192.168.28.132(192.168.28.132:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 17:34:42 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 17:34:42 2020 - [info] Not candidate for the new Master (no_master is set) Sun May 24 17:34:42 2020 - [info] Current Alive Master: 192.168.28.128(192.168.28.128:3306) Sun May 24 17:34:42 2020 - [info] Checking slave configurations.. Sun May 24 17:34:42 2020 - [info] Checking replication filtering settings.. Sun May 24 17:34:42 2020 - [info] binlog_do_db= , binlog_ignore_db= Sun May 24 17:34:42 2020 - [info] Replication filtering check ok. Sun May 24 17:34:42 2020 - [info] GTID (with auto-pos) is not supported Sun May 24 17:34:42 2020 - [info] Starting SSH connection tests.. Sun May 24 17:34:48 2020 - [info] All SSH connection tests passed successfully. Sun May 24 17:34:48 2020 - [info] Checking MHA Node version.. Sun May 24 17:34:49 2020 - [info] Version check ok. Sun May 24 17:34:49 2020 - [info] Checking SSH publickey authentication settings on the current master.. Sun May 24 17:34:50 2020 - [info] HealthCheck: SSH to 192.168.28.128 is reachable. Sun May 24 17:34:51 2020 - [info] Master MHA Node version is 0.58. Sun May 24 17:34:51 2020 - [info] Checking recovery script configurations on 192.168.28.128(192.168.28.128:3306).. Sun May 24 17:34:51 2020 - [info] Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql3306/data --output_file=/tmp/save_binary_logs_test --manager_version=0.58 --start_file=mysql-bin.000012 Sun May 24 17:34:51 2020 - [info] Connecting to root@192.168.28.128(192.168.28.128:22).. Creating /tmp if not exists.. ok. Checking output directory is accessible or not.. ok. Binlog found at /data/mysql3306/data, up to mysql-bin.000012 Sun May 24 17:34:52 2020 - [info] Binlog setting check done. Sun May 24 17:34:52 2020 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers.. Sun May 24 17:34:52 2020 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='mha' --slave_host=192.168.28.131 --slave_ip=192.168.28.131 --slave_port=3306 --workdir=/tmp --target_version=5.7.25-28-log --manager_version=0.58 --relay_log_info=/data/mysql3306/data/relay-log.info --relay_dir=/data/mysql3306/data/ --slave_pass=xxx Sun May 24 17:34:52 2020 - [info] Connecting to root@192.168.28.131(192.168.28.131:22).. Checking slave recovery environment settings.. Opening /data/mysql3306/data/relay-log.info ... ok. Relay log found at /data/mysql3306/data, up to relay-log.000003 Temporary relay log file is /data/mysql3306/data/relay-log.000003 Checking if super_read_only is defined and turned on.. not present or turned off, ignoring. Testing mysql connection and privileges.. mysql: [Warning] Using a password on the command line interface can be insecure. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Sun May 24 17:34:53 2020 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='mha' --slave_host=192.168.28.132 --slave_ip=192.168.28.132 --slave_port=3306 --workdir=/tmp --target_version=5.7.25-28-log --manager_version=0.58 --relay_log_info=/data/mysql3306/data/relay-log.info --relay_dir=/data/mysql3306/data/ --slave_pass=xxx Sun May 24 17:34:53 2020 - [info] Connecting to root@192.168.28.132(192.168.28.132:22).. Checking slave recovery environment settings.. Opening /data/mysql3306/data/relay-log.info ... ok. Relay log found at /data/mysql3306/data, up to relay-log.000003 Temporary relay log file is /data/mysql3306/data/relay-log.000003 Checking if super_read_only is defined and turned on.. not present or turned off, ignoring. Testing mysql connection and privileges.. mysql: [Warning] Using a password on the command line interface can be insecure. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Sun May 24 17:34:54 2020 - [info] Slaves settings check done. Sun May 24 17:34:54 2020 - [info] 192.168.28.128(192.168.28.128:3306) (current master) +--192.168.28.131(192.168.28.131:3306) +--192.168.28.132(192.168.28.132:3306) Sun May 24 17:34:54 2020 - [info] Checking replication health on 192.168.28.131.. Sun May 24 17:34:54 2020 - [info] ok. Sun May 24 17:34:54 2020 - [info] Checking replication health on 192.168.28.132.. Sun May 24 17:34:54 2020 - [info] ok. Sun May 24 17:34:54 2020 - [info] Checking master_ip_failover_script status: Sun May 24 17:34:54 2020 - [info] /usr/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.28.128 --orig_master_ip=192.168.28.128 --orig_master_port=3306 IN SCRIPT TEST====/sbin/ip addr del 192.168.28.199/24 dev ens33==/sbin/ip addr add 192.168.28.199/24 dev ens33=== Checking the Status of the script.. OK Sun May 24 17:34:54 2020 - [info] OK. Sun May 24 17:34:54 2020 - [warning] shutdown_script is not defined. Sun May 24 17:34:54 2020 - [info] Got exit code 0 (Not master dead). MySQL Replication Health is OK.
看到 "MySQL Replication Health is OK." 表明檢測經過。
開啓MHA服務的腳本以下,也能夠寫成腳本或服務
nohup masterha_manager --conf=/etc/masterha/app1.conf < /dev/null > /var/log/masterha/app1/manager.log 2>&1 &
開啓服務後,日誌以下,與集羣檢測相似
Sun May 24 18:31:54 2020 - [info] MHA::MasterMonitor version 0.58. Sun May 24 18:31:55 2020 - [info] GTID failover mode = 0 Sun May 24 18:31:55 2020 - [info] Dead Servers: Sun May 24 18:31:55 2020 - [info] Alive Servers: Sun May 24 18:31:55 2020 - [info] 192.168.28.128(192.168.28.128:3306) Sun May 24 18:31:55 2020 - [info] 192.168.28.131(192.168.28.131:3306) Sun May 24 18:31:55 2020 - [info] 192.168.28.132(192.168.28.132:3306) Sun May 24 18:31:55 2020 - [info] Alive Slaves: Sun May 24 18:31:55 2020 - [info] 192.168.28.131(192.168.28.131:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:31:55 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:31:55 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sun May 24 18:31:55 2020 - [info] 192.168.28.132(192.168.28.132:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:31:55 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:31:55 2020 - [info] Not candidate for the new Master (no_master is set) Sun May 24 18:31:55 2020 - [info] Current Alive Master: 192.168.28.128(192.168.28.128:3306) Sun May 24 18:31:55 2020 - [info] Checking slave configurations.. Sun May 24 18:31:55 2020 - [info] Checking replication filtering settings.. Sun May 24 18:31:55 2020 - [info] binlog_do_db= , binlog_ignore_db= Sun May 24 18:31:55 2020 - [info] Replication filtering check ok. Sun May 24 18:31:55 2020 - [info] GTID (with auto-pos) is not supported Sun May 24 18:31:55 2020 - [info] Starting SSH connection tests.. Sun May 24 18:32:01 2020 - [info] All SSH connection tests passed successfully. Sun May 24 18:32:01 2020 - [info] Checking MHA Node version.. Sun May 24 18:32:03 2020 - [info] Version check ok. Sun May 24 18:32:03 2020 - [info] Checking SSH publickey authentication settings on the current master.. Sun May 24 18:32:03 2020 - [info] HealthCheck: SSH to 192.168.28.128 is reachable. Sun May 24 18:32:04 2020 - [info] Master MHA Node version is 0.58. Sun May 24 18:32:04 2020 - [info] Checking recovery script configurations on 192.168.28.128(192.168.28.128:3306).. Sun May 24 18:32:04 2020 - [info] Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql3306/data --output_file=/tmp/save_binary_logs_test --manager_version=0.58 --start_file=mysql-bin.000013 Sun May 24 18:32:04 2020 - [info] Connecting to root@192.168.28.128(192.168.28.128:22).. Creating /tmp if not exists.. ok. Checking output directory is accessible or not.. ok. Binlog found at /data/mysql3306/data, up to mysql-bin.000013 Sun May 24 18:32:05 2020 - [info] Binlog setting check done. Sun May 24 18:32:05 2020 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers.. Sun May 24 18:32:05 2020 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='mha' --slave_host=192.168.28.131 --slave_ip=192.168.28.131 --slave_port=3306 --workdir=/tmp --target_version=5.7.25-28-log --manager_version=0.58 --relay_log_info=/data/mysql3306/data/relay-log.info --relay_dir=/data/mysql3306/data/ --slave_pass=xxx Sun May 24 18:32:05 2020 - [info] Connecting to root@192.168.28.131(192.168.28.131:22).. Checking slave recovery environment settings.. Opening /data/mysql3306/data/relay-log.info ... ok. Relay log found at /data/mysql3306/data, up to relay-log.000005 Temporary relay log file is /data/mysql3306/data/relay-log.000005 Checking if super_read_only is defined and turned on.. not present or turned off, ignoring. Testing mysql connection and privileges.. mysql: [Warning] Using a password on the command line interface can be insecure. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Sun May 24 18:32:06 2020 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='mha' --slave_host=192.168.28.132 --slave_ip=192.168.28.132 --slave_port=3306 --workdir=/tmp --target_version=5.7.25-28-log --manager_version=0.58 --relay_log_info=/data/mysql3306/data/relay-log.info --relay_dir=/data/mysql3306/data/ --slave_pass=xxx Sun May 24 18:32:06 2020 - [info] Connecting to root@192.168.28.132(192.168.28.132:22).. Checking slave recovery environment settings.. Opening /data/mysql3306/data/relay-log.info ... ok. Relay log found at /data/mysql3306/data, up to relay-log.000005 Temporary relay log file is /data/mysql3306/data/relay-log.000005 Checking if super_read_only is defined and turned on.. not present or turned off, ignoring. Testing mysql connection and privileges. mysql: [Warning] Using a password on the command line interface can be insecure. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Sun May 24 18:32:07 2020 - [info] Slaves settings check done. Sun May 24 18:32:07 2020 - [info] 192.168.28.128(192.168.28.128:3306) (current master) +--192.168.28.131(192.168.28.131:3306) +--192.168.28.132(192.168.28.132:3306) Sun May 24 18:32:07 2020 - [info] Checking master_ip_failover_script status: Sun May 24 18:32:07 2020 - [info] /usr/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=192.168.28.128 --orig_master_ip=192.168.28.128 --orig_master_port=3306 IN SCRIPT TEST====/sbin/ip addr del 192.168.28.199/24 dev ens33==/sbin/ip addr add 192.168.28.199/24 dev ens33=== Checking the Status of the script.. OK Sun May 24 18:32:08 2020 - [info] OK. Sun May 24 18:32:08 2020 - [warning] shutdown_script is not defined. Sun May 24 18:32:08 2020 - [info] Set master ping interval 3 seconds. Sun May 24 18:32:08 2020 - [warning] secondary_check_script is not defined. It is highly recommended setting it to check master reachability from two or more routes. Sun May 24 18:32:08 2020 - [info] Starting ping health check on 192.168.28.128(192.168.28.128:3306).. Sun May 24 18:32:08 2020 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn't respond..
模擬主庫數據庫down
主庫執行shutdown
mysql> shutdown;
觀察日誌:
日誌中大體的流程是檢測到主庫(192.168.28.128:3306)不可用-->連續試探3次(次數可自定義)-->檢測進羣中剩餘存活的節點-->從備選主節點中選擇一個節點爲主節點-->漂移VIP至新的主節點(若是原主節點系統正常則將VIP在原主機上刪除)-->拷貝原主節點的binlog日誌-->新主節點判斷是否須要補充日誌-->其餘節點所有改成重新主節點複製數據(組成新的集羣)
Sun May 24 18:35:56 2020 - [warning] Got error on MySQL select ping: 2006 (MySQL server has gone away) Sun May 24 18:35:56 2020 - [info] Executing SSH check script: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql3306/data --output_file=/tmp/save_binary_logs_test --manager_version=0.58 --binlog_prefix=mysql-bin Sun May 24 18:35:56 2020 - [info] HealthCheck: SSH to 192.168.28.128 is reachable. Sun May 24 18:35:59 2020 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '192.168.28.128' (111)) Sun May 24 18:35:59 2020 - [warning] Connection failed 2 time(s).. Sun May 24 18:36:02 2020 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '192.168.28.128' (111)) Sun May 24 18:36:02 2020 - [warning] Connection failed 3 time(s).. Sun May 24 18:36:05 2020 - [warning] Got error on MySQL connect: 2003 (Can't connect to MySQL server on '192.168.28.128' (111)) Sun May 24 18:36:05 2020 - [warning] Connection failed 4 time(s).. Sun May 24 18:36:05 2020 - [warning] Master is not reachable from health checker! Sun May 24 18:36:05 2020 - [warning] Master 192.168.28.128(192.168.28.128:3306) is not reachable! Sun May 24 18:36:05 2020 - [warning] SSH is reachable. Sun May 24 18:36:05 2020 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/masterha/app1.conf again, and trying to connect to all servers to check server status.. Sun May 24 18:36:05 2020 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Sun May 24 18:36:05 2020 - [info] Reading application default configuration from /etc/masterha/app1.conf.. Sun May 24 18:36:05 2020 - [info] Reading server configuration from /etc/masterha/app1.conf.. Sun May 24 18:36:06 2020 - [info] GTID failover mode = 0 Sun May 24 18:36:06 2020 - [info] Dead Servers: Sun May 24 18:36:06 2020 - [info] 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:06 2020 - [info] Alive Servers: Sun May 24 18:36:06 2020 - [info] 192.168.28.131(192.168.28.131:3306) Sun May 24 18:36:06 2020 - [info] 192.168.28.132(192.168.28.132:3306) Sun May 24 18:36:06 2020 - [info] Alive Slaves: Sun May 24 18:36:06 2020 - [info] 192.168.28.131(192.168.28.131:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:36:06 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:06 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sun May 24 18:36:06 2020 - [info] 192.168.28.132(192.168.28.132:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:36:06 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:06 2020 - [info] Not candidate for the new Master (no_master is set) Sun May 24 18:36:06 2020 - [info] Checking slave configurations.. Sun May 24 18:36:06 2020 - [info] Checking replication filtering settings.. Sun May 24 18:36:06 2020 - [info] Replication filtering check ok. Sun May 24 18:36:06 2020 - [info] Master is down! Sun May 24 18:36:06 2020 - [info] Terminating monitoring script. Sun May 24 18:36:06 2020 - [info] Got exit code 20 (Master dead). Sun May 24 18:36:06 2020 - [info] MHA::MasterFailover version 0.58. Sun May 24 18:36:06 2020 - [info] Starting master failover. Sun May 24 18:36:06 2020 - [info] Sun May 24 18:36:06 2020 - [info] * Phase 1: Configuration Check Phase.. Sun May 24 18:36:06 2020 - [info] Sun May 24 18:36:07 2020 - [info] GTID failover mode = 0 Sun May 24 18:36:07 2020 - [info] Dead Servers: Sun May 24 18:36:07 2020 - [info] 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:07 2020 - [info] Checking master reachability via MySQL(double check)... Sun May 24 18:36:07 2020 - [info] ok. Sun May 24 18:36:07 2020 - [info] Alive Servers: Sun May 24 18:36:07 2020 - [info] 192.168.28.131(192.168.28.131:3306) Sun May 24 18:36:07 2020 - [info] 192.168.28.132(192.168.28.132:3306) Sun May 24 18:36:07 2020 - [info] Alive Slaves: Sun May 24 18:36:07 2020 - [info] 192.168.28.131(192.168.28.131:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:36:07 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:07 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sun May 24 18:36:07 2020 - [info] 192.168.28.132(192.168.28.132:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:36:07 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:07 2020 - [info] Not candidate for the new Master (no_master is set) Sun May 24 18:36:07 2020 - [info] Starting Non-GTID based failover. Sun May 24 18:36:07 2020 - [info] Sun May 24 18:36:07 2020 - [info] ** Phase 1: Configuration Check Phase completed. Sun May 24 18:36:07 2020 - [info] Sun May 24 18:36:07 2020 - [info] * Phase 2: Dead Master Shutdown Phase.. Sun May 24 18:36:07 2020 - [info] Sun May 24 18:36:07 2020 - [info] * Phase 2: Dead Master Shutdown Phase.. Sun May 24 18:36:07 2020 - [info] Sun May 24 18:36:07 2020 - [info] Forcing shutdown so that applications never connect to the current master.. Sun May 24 18:36:07 2020 - [info] Executing master IP deactivation script: Sun May 24 18:36:07 2020 - [info] /usr/bin/master_ip_failover --orig_master_host=192.168.28.128 --orig_master_ip=192.168.28.128 --orig_master_port=3306 --command=stopssh --ssh_user=root IN SCRIPT TEST====/sbin/ip addr del 192.168.28.199/24 dev ens33==/sbin/ip addr add 192.168.28.199/24 dev ens33=== Disabling the VIP on old master: 192.168.28.128 Sun May 24 18:36:08 2020 - [info] done. Sun May 24 18:36:08 2020 - [warning] shutdown_script is not set. Skipping explicit shutting down of the dead master. Sun May 24 18:36:08 2020 - [info] * Phase 2: Dead Master Shutdown Phase completed. Sun May 24 18:36:08 2020 - [info] Sun May 24 18:36:08 2020 - [info] * Phase 3: Master Recovery Phase.. Sun May 24 18:36:08 2020 - [info] Sun May 24 18:36:08 2020 - [info] * Phase 3.1: Getting Latest Slaves Phase.. Sun May 24 18:36:08 2020 - [info] Sun May 24 18:36:08 2020 - [info] The latest binary log file/position on all slaves is mysql-bin.000013:154 Sun May 24 18:36:08 2020 - [info] Latest slaves (Slaves that received relay log files to the latest): Sun May 24 18:36:08 2020 - [info] 192.168.28.131(192.168.28.131:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:36:08 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:08 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sun May 24 18:36:08 2020 - [info] 192.168.28.132(192.168.28.132:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:36:08 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:08 2020 - [info] Not candidate for the new Master (no_master is set) Sun May 24 18:36:08 2020 - [info] The oldest binary log file/position on all slaves is mysql-bin.000013:154 Sun May 24 18:36:08 2020 - [info] Oldest slaves: Sun May 24 18:36:08 2020 - [info] 192.168.28.131(192.168.28.131:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:36:08 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:08 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sun May 24 18:36:08 2020 - [info] 192.168.28.132(192.168.28.132:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:36:08 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:08 2020 - [info] Not candidate for the new Master (no_master is set) Sun May 24 18:36:08 2020 - [info] Sun May 24 18:36:08 2020 - [info] * Phase 3.2: Saving Dead Master's Binlog Phase.. Sun May 24 18:36:08 2020 - [info] Sun May 24 18:36:09 2020 - [info] Fetching dead master's binary logs.. Sun May 24 18:36:09 2020 - [info] Executing command on the dead master 192.168.28.128(192.168.28.128:3306): save_binary_logs --command=save --start_file=mysql-bin.000013 --start_pos=154 --binlog_dir=/data/mysql3306/data --output_file=/tmp/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.58 Creating /tmp if not exists.. ok. Concat binary/relay logs from mysql-bin.000013 pos 154 to mysql-bin.000013 EOF into /tmp/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog .. Binlog Checksum enabled Dumping binlog format description event, from position 0 to 154.. ok. Dumping effective binlog data from /data/mysql3306/data/mysql-bin.000013 position 154 to tail(177).. ok. Binlog Checksum enabled Concat succeeded. Sun May 24 18:36:11 2020 - [info] scp from root@192.168.28.128:/tmp/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog to local:/var/log/masterha/app1/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog succeeded. Sun May 24 18:36:12 2020 - [info] HealthCheck: SSH to 192.168.28.131 is reachable. Sun May 24 18:36:13 2020 - [info] HealthCheck: SSH to 192.168.28.132 is reachable. Sun May 24 18:36:14 2020 - [info] Sun May 24 18:36:14 2020 - [info] * Phase 3.3: Determining New Master Phase.. Sun May 24 18:36:14 2020 - [info] Sun May 24 18:36:14 2020 - [info] Finding the latest slave that has all relay logs for recovering other slaves.. Sun May 24 18:36:14 2020 - [info] All slaves received relay logs to the same position. No need to resync each other. Sun May 24 18:36:14 2020 - [info] Searching new master from slaves.. Sun May 24 18:36:14 2020 - [info] Candidate masters from the configuration file: Sun May 24 18:36:14 2020 - [info] 192.168.28.131(192.168.28.131:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:36:14 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:14 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sun May 24 18:36:14 2020 - [info] Non-candidate masters: Sun May 24 18:36:14 2020 - [info] 192.168.28.132(192.168.28.132:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 18:36:14 2020 - [info] Replicating from 192.168.28.128(192.168.28.128:3306) Sun May 24 18:36:14 2020 - [info] Not candidate for the new Master (no_master is set) Sun May 24 18:36:14 2020 - [info] New master is 192.168.28.131(192.168.28.131:3306) Sun May 24 18:36:14 2020 - [info] Starting master failover.. Sun May 24 18:36:14 2020 - [info] From: 192.168.28.128(192.168.28.128:3306) (current master) +--192.168.28.131(192.168.28.131:3306) +--192.168.28.132(192.168.28.132:3306) To: 192.168.28.131(192.168.28.131:3306) (new master) +--192.168.28.132(192.168.28.132:3306) Sun May 24 18:36:14 2020 - [info] Sun May 24 18:36:14 2020 - [info] * Phase 3.4: New Master Diff Log Generation Phase.. Sun May 24 18:36:14 2020 - [info] Sun May 24 18:36:14 2020 - [info] This server has all relay logs. No need to generate diff files from the latest slave. Sun May 24 18:36:14 2020 - [info] Sending binlog.. Sun May 24 18:36:15 2020 - [info] scp from local:/var/log/masterha/app1/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog to root@192.168.28.131:/tmp/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog succeeded. Sun May 24 18:36:15 2020 - [info] Sun May 24 18:36:15 2020 - [info] * Phase 3.5: Master Log Apply Phase.. Sun May 24 18:36:15 2020 - [info] Sun May 24 18:36:15 2020 - [info] *NOTICE: If any error happens from this phase, manual recovery is needed. Sun May 24 18:36:15 2020 - [info] Starting recovery on 192.168.28.131(192.168.28.131:3306).. Sun May 24 18:36:15 2020 - [info] Generating diffs succeeded. Sun May 24 18:36:15 2020 - [info] Waiting until all relay logs are applied. Sun May 24 18:36:15 2020 - [info] done. Sun May 24 18:36:15 2020 - [info] Getting slave status.. Sun May 24 18:36:15 2020 - [info] This slave(192.168.28.131)'s Exec_Master_Log_Pos equals to Read_Master_Log_Pos(mysql-bin.000013:154). No need to recover from Exec_Master_Log_Pos. Sun May 24 18:36:15 2020 - [info] Connecting to the target slave host 192.168.28.131, running recover script.. Sun May 24 18:36:15 2020 - [info] Executing command: apply_diff_relay_logs --command=apply --slave_user='mha' --slave_host=192.168.28.131 --slave_ip=192.168.28.131 --slave_port=3306 --apply_files=/tmp/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog --workdir=/tmp --target_version=5.7.25-28-log --timestamp=20200524183606 --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.58 --slave_pass=xxx Sun May 24 18:36:16 2020 - [info] MySQL client version is 5.7.25. Using --binary-mode. Applying differential binary/relay log files /tmp/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog on 192.168.28.131:3306. This may take long time... Applying log files succeeded. Sun May 24 18:36:16 2020 - [info] All relay logs were successfully applied. Sun May 24 18:36:16 2020 - [info] Getting new master's binlog name and position.. Sun May 24 18:36:16 2020 - [info] mysql-bin.000008:154 Sun May 24 18:36:16 2020 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.28.131', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000008', MASTER_LOG_POS=154, MASTER_USER='repl', MASTER_PASSWORD='xxx'; Sun May 24 18:36:16 2020 - [info] Executing master IP activate script: Sun May 24 18:36:16 2020 - [info] /usr/bin/master_ip_failover --command=start --ssh_user=root --orig_master_host=192.168.28.128 --orig_master_ip=192.168.28.128 --orig_master_port=3306 --new_master_host=192.168.28.131 --new_master_ip=192.168.28.131 --new_master_port=3306 --new_master_user='mha' --new_master_password=xxx Unknown option: new_master_user Unknown option: new_master_password IN SCRIPT TEST====/sbin/ip addr del 192.168.28.199/24 dev ens33==/sbin/ip addr add 192.168.28.199/24 dev ens33=== Enabling the VIP - 192.168.28.199/24 on the new master - 192.168.28.131 Sun May 24 18:36:17 2020 - [info] OK. Sun May 24 18:36:17 2020 - [info] Setting read_only=0 on 192.168.28.131(192.168.28.131:3306).. Sun May 24 18:36:17 2020 - [info] ok. Sun May 24 18:36:17 2020 - [info] ** Finished master recovery successfully. Sun May 24 18:36:17 2020 - [info] * Phase 3: Master Recovery Phase completed. Sun May 24 18:36:17 2020 - [info] Sun May 24 18:36:17 2020 - [info] * Phase 4: Slaves Recovery Phase.. Sun May 24 18:36:17 2020 - [info] Sun May 24 18:36:17 2020 - [info] * Phase 4.1: Starting Parallel Slave Diff Log Generation Phase.. Sun May 24 18:36:17 2020 - [info] Sun May 24 18:36:17 2020 - [info] -- Slave diff file generation on host 192.168.28.132(192.168.28.132:3306) started, pid: 48890. Check tmp log /var/log/masterha/app1/192.168.28.132_3306_20200524183606.log if it takes time.. Sun May 24 18:36:18 2020 - [info] Sun May 24 18:36:18 2020 - [info] Sun May 24 18:36:18 2020 - [info] Log messages from 192.168.28.132 ... Sun May 24 18:36:18 2020 - [info] Sun May 24 18:36:17 2020 - [info] This server has all relay logs. No need to generate diff files from the latest slave. Sun May 24 18:36:18 2020 - [info] End of log messages from 192.168.28.132. Sun May 24 18:36:18 2020 - [info] -- 192.168.28.132(192.168.28.132:3306) has the latest relay log events. Sun May 24 18:36:18 2020 - [info] Generating relay diff files from the latest slave succeeded. Sun May 24 18:36:18 2020 - [info] Sun May 24 18:36:18 2020 - [info] * Phase 4.2: Starting Parallel Slave Log Apply Phase.. Sun May 24 18:36:18 2020 - [info] Sun May 24 18:36:18 2020 - [info] -- Slave recovery on host 192.168.28.132(192.168.28.132:3306) started, pid: 48892. Check tmp log /var/log/masterha/app1/192.168.28.132_3306_20200524183606.log if it takes time.. Sun May 24 18:36:21 2020 - [info] Sun May 24 18:36:21 2020 - [info] Log messages from 192.168.28.132 ... Sun May 24 18:36:21 2020 - [info] Sun May 24 18:36:18 2020 - [info] Sending binlog.. Sun May 24 18:36:19 2020 - [info] scp from local:/var/log/masterha/app1/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog to root@192.168.28.132:/tmp/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog succeeded. Sun May 24 18:36:19 2020 - [info] Starting recovery on 192.168.28.132(192.168.28.132:3306).. Sun May 24 18:36:19 2020 - [info] Generating diffs succeeded. Sun May 24 18:36:19 2020 - [info] Waiting until all relay logs are applied. Sun May 24 18:36:19 2020 - [info] done. Sun May 24 18:36:19 2020 - [info] Getting slave status.. Sun May 24 18:36:19 2020 - [info] This slave(192.168.28.132)'s Exec_Master_Log_Pos equals to Read_Master_Log_Pos(mysql-bin.000013:154). No need to recover from Exec_Master_Log_Pos. Sun May 24 18:36:19 2020 - [info] Connecting to the target slave host 192.168.28.132, running recover script.. Sun May 24 18:36:19 2020 - [info] Executing command: apply_diff_relay_logs --command=apply --slave_user='mha' --slave_host=192.168.28.132 --slave_ip=192.168.28.132 --slave_port=3306 --apply_files=/tmp/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog --workdir=/tmp --target_version=5.7.25-28-log --timestamp=20200524183606 --handle_raw_binlog=1 --disable_log_bin=0 --manager_version=0.58 --slave_pass=xxx Sun May 24 18:36:20 2020 - [info] MySQL client version is 5.7.25. Using --binary-mode. Applying differential binary/relay log files /tmp/saved_master_binlog_from_192.168.28.128_3306_20200524183606.binlog on 192.168.28.132:3306. This may take long time... Applying log files succeeded. Sun May 24 18:36:20 2020 - [info] All relay logs were successfully applied. Sun May 24 18:36:20 2020 - [info] Resetting slave 192.168.28.132(192.168.28.132:3306) and starting replication from the new master 192.168.28.131(192.168.28.131:3306).. Sun May 24 18:36:20 2020 - [info] Executed CHANGE MASTER. Sun May 24 18:36:20 2020 - [info] Slave started. Sun May 24 18:36:21 2020 - [info] End of log messages from 192.168.28.132. Sun May 24 18:36:21 2020 - [info] -- Slave recovery on host 192.168.28.132(192.168.28.132:3306) succeeded. Sun May 24 18:36:21 2020 - [info] All new slave servers recovered successfully. Sun May 24 18:36:21 2020 - [info] Sun May 24 18:36:21 2020 - [info] * Phase 5: New master cleanup phase.. Sun May 24 18:36:21 2020 - [info] Sun May 24 18:36:21 2020 - [info] Resetting slave info on the new master.. Sun May 24 18:36:21 2020 - [info] 192.168.28.131: Resetting slave info succeeded. Sun May 24 18:36:21 2020 - [info] Master failover to 192.168.28.131(192.168.28.131:3306) completed successfully. Sun May 24 18:36:21 2020 - [info] ----- Failover Report ----- app1: MySQL Master failover 192.168.28.128(192.168.28.128:3306) to 192.168.28.131(192.168.28.131:3306) succeeded Master 192.168.28.128(192.168.28.128:3306) is down! Check MHA Manager logs at mha3:/var/log/masterha/app1/app1.log for details. Started automated(non-interactive) failover. Invalidated master IP address on 192.168.28.128(192.168.28.128:3306) The latest slave 192.168.28.131(192.168.28.131:3306) has all relay logs for recovery. Selected 192.168.28.131(192.168.28.131:3306) as a new master. 192.168.28.131(192.168.28.131:3306): OK: Applying all logs succeeded. 192.168.28.131(192.168.28.131:3306): OK: Activated master IP address. 192.168.28.132(192.168.28.132:3306): This host has the latest relay log events. Generating relay diff files from the latest slave succeeded. 192.168.28.132(192.168.28.132:3306): OK: Applying all logs succeeded. Slave started, replicating from 192.168.28.131(192.168.28.131:3306) 192.168.28.131(192.168.28.131:3306): Resetting slave info succeeded. Master failover to 192.168.28.131(192.168.28.131:3306) completed successfully.
此時的VIP在 192.168.28.131機器上了
原主節點已刪掉該VIP
將原主節點恢復並加入集羣,保證集羣3個節點在線
[root@mha1 masterha]# /usr/local/mysql5.7/bin/mysqld_safe --defaults-file=/data/mysql3306/etc/my.cnf & [root@mha1 masterha]# mysql -uroot -p'123456' --socket=/data/mysql3306/tmp/mysql.sock change master to master_host='192.168.28.131',master_user='repl', master_password='repl',master_log_file='mysql-bin.000008',master_log_pos=154; /*生產環境的恢復建議備份主庫再配置同步*/
此時再檢測集羣狀態
[root@mha3 app1]# masterha_check_repl --conf=/etc/masterha/app1.conf
手動切換主庫
不少時候須要主動進行主從切換,此時就能夠用MHA的手動切換腳原本進行,例如將主庫再切回192.168.28.128:3306上(此時MHA若是是啓動狀態則必須關閉)
masterha_master_switch --conf=/etc/masterha/app1.conf --master_state=alive --orig_master_is_new_slave --new_master_host=192.168.28.128 --new_master_port=3306
切換過程以下:
[root@mha3 app1]# masterha_master_switch --conf=/etc/masterha/app1.conf --master_state=alive --orig_master_is_new_slave --new_master_host=192.168.28.128 --new_master_port=3306 Sun May 24 19:10:29 2020 - [info] MHA::MasterRotate version 0.58. Sun May 24 19:10:29 2020 - [info] Starting online master switch.. Sun May 24 19:10:29 2020 - [info] Sun May 24 19:10:29 2020 - [info] * Phase 1: Configuration Check Phase.. Sun May 24 19:10:29 2020 - [info] Sun May 24 19:10:29 2020 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping. Sun May 24 19:10:29 2020 - [info] Reading application default configuration from /etc/masterha/app1.conf.. Sun May 24 19:10:29 2020 - [info] Reading server configuration from /etc/masterha/app1.conf.. Sun May 24 19:10:30 2020 - [info] GTID failover mode = 0 Sun May 24 19:10:30 2020 - [info] Current Alive Master: 192.168.28.131(192.168.28.131:3306) Sun May 24 19:10:30 2020 - [info] Alive Slaves: Sun May 24 19:10:30 2020 - [info] 192.168.28.128(192.168.28.128:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 19:10:30 2020 - [info] Replicating from 192.168.28.131(192.168.28.131:3306) Sun May 24 19:10:30 2020 - [info] Primary candidate for the new Master (candidate_master is set) Sun May 24 19:10:30 2020 - [info] 192.168.28.132(192.168.28.132:3306) Version=5.7.25-28-log (oldest major version between slaves) log-bin:enabled Sun May 24 19:10:30 2020 - [info] Replicating from 192.168.28.131(192.168.28.131:3306) Sun May 24 19:10:30 2020 - [info] Not candidate for the new Master (no_master is set) It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 192.168.28.131(192.168.28.131:3306)? (YES/no): yes Sun May 24 19:10:32 2020 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time.. Sun May 24 19:10:32 2020 - [info] ok. Sun May 24 19:10:32 2020 - [info] Checking MHA is not monitoring or doing failover.. Sun May 24 19:10:32 2020 - [info] Checking replication health on 192.168.28.128.. Sun May 24 19:10:32 2020 - [info] ok. Sun May 24 19:10:32 2020 - [info] Checking replication health on 192.168.28.132.. Sun May 24 19:10:32 2020 - [info] ok. Sun May 24 19:10:32 2020 - [info] 192.168.28.128 can be new master. Sun May 24 19:10:32 2020 - [info] From: 192.168.28.131(192.168.28.131:3306) (current master) +--192.168.28.128(192.168.28.128:3306) +--192.168.28.132(192.168.28.132:3306) To: 192.168.28.128(192.168.28.128:3306) (new master) +--192.168.28.132(192.168.28.132:3306) +--192.168.28.131(192.168.28.131:3306) Starting master switch from 192.168.28.131(192.168.28.131:3306) to 192.168.28.128(192.168.28.128:3306)? (yes/NO): yes Sun May 24 19:10:33 2020 - [info] Checking whether 192.168.28.128(192.168.28.128:3306) is ok for the new master.. Sun May 24 19:10:33 2020 - [info] ok. Sun May 24 19:10:33 2020 - [info] 192.168.28.131(192.168.28.131:3306): SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host. Sun May 24 19:10:33 2020 - [info] 192.168.28.131(192.168.28.131:3306): Resetting slave pointing to the dummy host. Sun May 24 19:10:33 2020 - [info] ** Phase 1: Configuration Check Phase completed. Sun May 24 19:10:33 2020 - [info] Sun May 24 19:10:33 2020 - [info] * Phase 2: Rejecting updates Phase.. Sun May 24 19:10:33 2020 - [info] Sun May 24 19:10:33 2020 - [info] Executing master ip online change script to disable write on the current master: Sun May 24 19:10:33 2020 - [info] /usr/bin/master_ip_online_change --command=stop --orig_master_host=192.168.28.131 --orig_master_ip=192.168.28.131 --orig_master_port=3306 --orig_master_user='mha' --new_master_host=192.168.28.128 --new_master_ip=192.168.28.128 --new_master_port=3306 --new_master_user='mha' --orig_master_ssh_user=root --new_master_ssh_user=root --orig_master_is_new_slave --orig_master_password=xxx --new_master_password=xxx IN SCRIPT TEST====/sbin/ip addr del 192.168.28.199/24 dev ens33==/sbin/ip addr add 192.168.28.199/24 dev ens33=== Disabling the VIP on old master: 192.168.28.131 Sun May 24 19:10:33 2020 - [info] ok. Sun May 24 19:10:33 2020 - [info] Locking all tables on the orig master to reject updates from everybody (including root): Sun May 24 19:10:33 2020 - [info] Executing FLUSH TABLES WITH READ LOCK.. Sun May 24 19:10:33 2020 - [info] ok. Sun May 24 19:10:33 2020 - [info] Orig master binlog:pos is mysql-bin.000008:154. Sun May 24 19:10:33 2020 - [info] Waiting to execute all relay logs on 192.168.28.128(192.168.28.128:3306).. Sun May 24 19:10:33 2020 - [info] master_pos_wait(mysql-bin.000008:154) completed on 192.168.28.128(192.168.28.128:3306). Executed 0 events. Sun May 24 19:10:33 2020 - [info] done. Sun May 24 19:10:33 2020 - [info] Getting new master's binlog name and position.. Sun May 24 19:10:33 2020 - [info] mysql-bin.000014:154 Sun May 24 19:10:33 2020 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.28.128', MASTER_PORT=3306, MASTER_LOG_FILE='mysql-bin.000014', MASTER_LOG_POS=154, MASTER_USER='repl', MASTER_PASSWORD='xxx'; Sun May 24 19:10:33 2020 - [info] Executing master ip online change script to allow write on the new master: Sun May 24 19:10:33 2020 - [info] /usr/bin/master_ip_online_change --command=start --orig_master_host=192.168.28.131 --orig_master_ip=192.168.28.131 --orig_master_port=3306 --orig_master_user='mha' --new_master_host=192.168.28.128 --new_master_ip=192.168.28.128 --new_master_port=3306 --new_master_user='mha' --orig_master_ssh_user=root --new_master_ssh_user=root --orig_master_is_new_slave --orig_master_password=xxx --new_master_password=xxx IN SCRIPT TEST====/sbin/ip addr del 192.168.28.199/24 dev ens33==/sbin/ip addr add 192.168.28.199/24 dev ens33=== Enabling the VIP - 192.168.28.199/24 on the new master - 192.168.28.128 Sun May 24 19:10:34 2020 - [info] ok. Sun May 24 19:10:34 2020 - [info] Setting read_only=0 on 192.168.28.128(192.168.28.128:3306).. Sun May 24 19:10:34 2020 - [info] ok. Sun May 24 19:10:34 2020 - [info] Sun May 24 19:10:34 2020 - [info] * Switching slaves in parallel.. Sun May 24 19:10:34 2020 - [info] Sun May 24 19:10:34 2020 - [info] -- Slave switch on host 192.168.28.132(192.168.28.132:3306) started, pid: 49178 Sun May 24 19:10:34 2020 - [info] Sun May 24 19:10:35 2020 - [info] Log messages from 192.168.28.132 ... Sun May 24 19:10:35 2020 - [info] Sun May 24 19:10:34 2020 - [info] Waiting to execute all relay logs on 192.168.28.132(192.168.28.132:3306).. Sun May 24 19:10:34 2020 - [info] master_pos_wait(mysql-bin.000008:154) completed on 192.168.28.132(192.168.28.132:3306). Executed 0 events. Sun May 24 19:10:34 2020 - [info] done. Sun May 24 19:10:34 2020 - [info] Resetting slave 192.168.28.132(192.168.28.132:3306) and starting replication from the new master 192.168.28.128(192.168.28.128:3306).. Sun May 24 19:10:34 2020 - [info] Executed CHANGE MASTER. Sun May 24 19:10:34 2020 - [info] Slave started. Sun May 24 19:10:35 2020 - [info] End of log messages from 192.168.28.132 ... Sun May 24 19:10:35 2020 - [info] Sun May 24 19:10:35 2020 - [info] -- Slave switch on host 192.168.28.132(192.168.28.132:3306) succeeded. Sun May 24 19:10:35 2020 - [info] Unlocking all tables on the orig master: Sun May 24 19:10:35 2020 - [info] Executing UNLOCK TABLES.. Sun May 24 19:10:35 2020 - [info] ok. Sun May 24 19:10:35 2020 - [info] Starting orig master as a new slave.. Sun May 24 19:10:35 2020 - [info] Resetting slave 192.168.28.131(192.168.28.131:3306) and starting replication from the new master 192.168.28.128(192.168.28.128:3306).. Sun May 24 19:10:35 2020 - [info] Executed CHANGE MASTER. Sun May 24 19:10:35 2020 - [info] Slave started. Sun May 24 19:10:35 2020 - [info] All new slave servers switched successfully. Sun May 24 19:10:35 2020 - [info] Sun May 24 19:10:35 2020 - [info] * Phase 5: New master cleanup phase.. Sun May 24 19:10:35 2020 - [info] Sun May 24 19:10:35 2020 - [info] 192.168.28.128: Resetting slave info succeeded. Sun May 24 19:10:35 2020 - [info] Switching master to 192.168.28.128(192.168.28.128:3306) completed successfully.
此時查看,主庫已切回192.168.28.128:3306節點上了。
配置2個定時任務,分別用於清理relay-log及服務器時鐘同步,每臺機器上均配置
清理relay-log
因MHA集羣建議關閉relay-log 因此relay-log須要手動清理,所以能夠配置一個定時任務進行清理
00 01 * * 0 /usr/bin/purge_relay_logs --user=mha --password='MHAadmin123' --host=192.168.28.131' --disable_relay_log_purge >> /var/log/masterha/app1/purge_relay_logs.log 2>&1
配置時鐘同步
能夠配置公網的時鐘服務器,也能夠本身搭建(生產環境需有自建的時鐘服務器,能夠參考 時鐘服務器搭建)
*/15 * * * * /usr/sbin/ntpdate ntp1.aliyun.com; /sbin/hwclock -w
MHA的搭建過程當中最大的困難點在於常常依賴包安裝不全以及相關腳本與版本不對應致使一直沒法部署,還有一個問題是集羣複製檢查、手動切換主庫均正常,可是主庫異常宕機時沒法切換(切換腳本問題)。小夥伴們在實踐過程當中遇到問題能夠多多與我溝通,相互學習提升排坑技能。如需相關安裝軟件及依賴包也能夠關注公衆號 數據庫乾貨鋪 聯繫我獲取或回覆「加羣」進入技術交流羣與更多小夥伴交流。