MHA 一主两从搭建-脚本VIP-自动切换

时间：2017-10-31 21:33:15 阅读：428 评论：0 收藏：0 [点我收藏+]

标签：auth ble logs other 打开 backup etc event .gz

环境介绍：
主机名 IP            MHA角色 MySQL角色
canal1 10.30.13.51   Node     MySQL Master
canal2 10.30.13.52   Node     MySQL Master behind
canal3 10.30.13.53   Node     MySQL slave
node3   10.30.13.107 Manager None

所有主机 /etc/hosts 添加信息
10.30.13.107 node3
10.30.13.51 canal1
10.30.13.52 canal2
10.30.13.53 canal3

关闭selinux
永久生效：
/etc/selinux/config 文件修改如下参数
SELINUX=disabled

临时生效：
setenforce 0

关闭防火墙
chkconfig iptables off
chkconfig ip6tables off

/etc/init.d/iptables stop

安装MySQL：

解压软件
mkdir /opt/mysql
mv mysql-5.6.34-linux-glibc2.5-x86_64.tar.gz /opt/mysql
cd /opt/mysql
tar -zxvf mysql-5.6.34-linux-glibc2.5-x86_64.tar.gz

创建软连接
ln -s /opt/mysql/mysql-5.6.34-linux-glibc2.5-x86_64 /usr/local/mysql

创建运行用户
groupadd mysql
useradd -g mysql -d /usr/local/mysql -s /sbin/nologin -M -n mysql

创建所需的目录
mkdir -p /data/mysql/3307/{data,logs,tmp}
chown -R mysql:mysql /data/mysql/3307/
chown -R mysql:mysql /usr/local/mysql

配置文件内容
#my.cnf
[client]
port = 3307
socket = /data/mysql/3307/tmp/3307.sock

[mysql]
#pager="less -i -n -S"
#tee=/opt/mysql/query.log
no-auto-rehash

[mysqld]
#misc
user = mysql
basedir = /usr/local/mysql
datadir = /data/mysql/3307/data
port = 3307
socket = /data/mysql/3307/tmp/3307.sock
event_scheduler = 0

tmpdir = /data/mysql/3307/tmp
#timeout
interactive_timeout = 300
wait_timeout = 300

#character set
character-set-server = utf8

open_files_limit = 65535
max_connections = 100
max_connect_errors = 100000
lower_case_table_names =1

#symi replication

#rpl_semi_sync_master_enabled=1
#rpl_semi_sync_master_timeout=1000 # 1 second
#rpl_semi_sync_slave_enabled=1

#logs
log-output=file
slow_query_log = 1
slow_query_log_file = slow.log
log-error = error.log
log_warnings = 2
pid-file = mysql.pid
long_query_time = 1
#log-slow-admin-statements = 1
#log-queries-not-using-indexes = 1
log-slow-slave-statements = 1

#binlog
#binlog_format = STATEMENT
binlog_format = row
server-id = 330728
log-bin = /data/mysql/3307/logs/mysql-bin
binlog_cache_size = 4M
max_binlog_size = 256M
max_binlog_cache_size = 1M
sync_binlog = 0
expire_logs_days = 10
#procedure
log_bin_trust_function_creators=1

#
#gtid-mode = on
#enforce-gtid-consistency=1

#relay log
skip_slave_start = 1
max_relay_log_size = 128M
relay_log_purge = 1
relay_log_recovery = 1
relay-log=relay-bin
relay-log-index=relay-bin.index
log_slave_updates
#slave-skip-errors=1032,1053,1062
#skip-grant-tables

#buffers & cache
table_open_cache = 2048
table_definition_cache = 2048
table_open_cache = 2048
max_heap_table_size = 96M
sort_buffer_size = 128K
join_buffer_size = 128K
thread_cache_size = 200
query_cache_size = 0
query_cache_type = 0
query_cache_limit = 256K
query_cache_min_res_unit = 512
thread_stack = 192K
tmp_table_size = 96M
key_buffer_size = 8M
read_buffer_size = 2M
read_rnd_buffer_size = 16M
bulk_insert_buffer_size = 32M

#myisam
myisam_sort_buffer_size = 128M
myisam_max_sort_file_size = 10G
myisam_repair_threads = 1

#innodb
innodb_buffer_pool_size = 100M
innodb_buffer_pool_instances = 1
innodb_data_file_path = ibdata1:100M:autoextend
innodb_flush_log_at_trx_commit = 2
innodb_log_buffer_size = 8M
innodb_log_file_size = 100M
innodb_log_files_in_group = 3
innodb_max_dirty_pages_pct = 50
innodb_file_per_table = 1
innodb_rollback_on_timeout
innodb_status_file = 1
innodb_io_capacity = 100
transaction_isolation = READ-COMMITTED
innodb_flush_method = O_DIRECT

修改my3307.cnf权限
chown mysql:mysql /etc/my.cnf

初始化数据库
/usr/local/mysql/scripts/mysql_install_db --defaults-file=/etc/my.cnf --basedir=/usr/local/mysql --datadir=/data/mysql/3307/data --user=mysql

设置环境变量
echo "export PATH=$PATH:/usr/local/mysql/bin">> /etc/profile
source /etc/profile

cp /usr/local/mysql/support-files/mysql.server /etc/init.d/mysqld

启动MySQL
/etc/init.d/mysqld start

账号处理
grant all privileges on *.* to ‘root‘@‘%‘ identified by ‘chengce243‘ with grant option;
delete from mysql.user where password =‘‘;
flush privileges;

搭建从库
主库创建复制账号
create user ‘repl‘@‘10.30.13.%‘ identified by ‘chengce243‘;
grant replication slave on *.* to ‘repl‘@‘10.30.13.%‘;
flush privileges;

两个从库分别执行如下语句
MASTER_LOG_FILE 和 MASTER_LOG_POS 值，从主库 show master status;查看

CHANGE MASTER TO MASTER_HOST=‘10.30.13.51‘,MASTER_USER=‘repl‘,MASTER_PASSWORD=‘chengce243‘,MASTER_PORT=3307,MASTER_LOG_FILE=‘mysql-bin.000003‘,MASTER_LOG_POS=2676;

两个从库分别开启复制
start slave;

主库上创建
create user ‘mhauser‘@‘10.30.13.%‘ identified by ‘chengce243‘;
grant all privileges on *.* to ‘mhauser‘@‘10.30.13.%‘;
flush privileges;

配置root用户互信

在 node3 节点生成互信文件，输入如下命令，一路回车就行
ssh-keygen

cd ~/.ssh

cat id_rsa.pub > authorized_keys
chmod 600 *
cd ~/
scp -r .ssh 10.30.13.51:~/
scp -r .ssh 10.30.13.52:~/
scp -r .ssh 10.30.13.53:~/

最后每个节点都要验证
ssh node3 date
ssh canal1 date
ssh canal2 date
ssh canal3 date

每个节点都要安装相关依赖包
yum install -y perl-devel
yum install -y perl-CPAN
yum install -y perl-Time-HiRes
yum install -y perl-DBD-MySQL
yum install -y perl-Params-Validate

yum install -y perl-Config-Tiny
yum install -y perl-Log-Dispatch
yum install -y perl-Parallel-ForkManager

MHA master节点，即 node3 安装
rpm -ivh mha4mysql-manager-0.56-0.el6.noarch.rpm
rpm -ivh mha4mysql-node-0.56-0.el6.noarch.rpm

MHA node节点，即 canal1/canal2/canal3安装
rpm -ivh mha4mysql-node-0.56-0.el6.noarch.rpm

为MHA的相关配置信息的存放规划目录结构（四台服务器均要操作）：
mkdir -p /masterha_work/{conf,manager_workdir,rmt_mysql_binlog_workdir,script_dir,log}

自定义规则：
conf，存放MHA的配置文件
log，存放于MHA有关的日志信息
script_dir，除了默认的存放脚本的位置外，另一个存放自定义的或者官方提供的脚本的位置
manager_workdir，Manager的工作目录
rmt_mysql_binlog_workdir，当发生切换的时候，MySQL的binlog的临时存放路径

修改配置目录权限：
chown -R mysql:mysql /masterha_work

[root@node4 conf]# cat /masterha_work/conf/mha_total.cnf

[server default]
manager_workdir=/masterha_work/manager_workdir
manager_log=/masterha_work/log/manager.log

master_binlog_dir=/data/mysql/3307/logs
user=mhauser
password=chengce243

#master_ip_failover_script=/usr/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=canal1 --orig_master_ip=10.30.13.51 --orig_master_port=3307
master_ip_online_change_script=/usr/bin/master_ip_online_change

ping_interval=1

remote_workdir=/masterha_work/rmt_mysql_binlog_workdir

repl_user=repl
repl_password=chengce243
port=3307

report_script=/usr/bin/send_report

secondary_check_script=/usr/local/bin/masterha_secondary_check -s canal2 -s canal1 --user=mhame --master_host=canal2 --master_ip=10.30.13.52 --master_port=3307
shutdown_script=""

ssh_user=root

[server1]
hostname=10.30.13.51
port=3307

[server2]
hostname=10.30.13.52
candidate_master=1
check_repl_delay=0
port=3307

[server3]
hostname=10.30.13.53
port=3307
no_master=1

测试SSH连通性：

[root@node3 ~]# masterha_check_ssh --conf=/masterha_work/conf/mha_total.cnf
Tue Oct 31 15:12:21 2017 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue Oct 31 15:12:21 2017 - [info] Reading application default configuration from /masterha_work/conf/mha_total.cnf..
Tue Oct 31 15:12:21 2017 - [info] Reading server configuration from /masterha_work/conf/mha_total.cnf..
Tue Oct 31 15:12:21 2017 - [info] Starting SSH connection tests..
Tue Oct 31 15:12:22 2017 - [debug]
Tue Oct 31 15:12:21 2017 - [debug] Connecting via SSH from root@10.30.13.51(10.30.13.51:22) to root@10.30.13.52(10.30.13.52:22)..
Tue Oct 31 15:12:21 2017 - [debug]   ok.
Tue Oct 31 15:12:21 2017 - [debug] Connecting via SSH from root@10.30.13.51(10.30.13.51:22) to root@10.30.13.53(10.30.13.53:22)..
Tue Oct 31 15:12:22 2017 - [debug]   ok.
Tue Oct 31 15:12:22 2017 - [debug]
Tue Oct 31 15:12:22 2017 - [debug] Connecting via SSH from root@10.30.13.52(10.30.13.52:22) to root@10.30.13.51(10.30.13.51:22)..
Tue Oct 31 15:12:22 2017 - [debug]   ok.
Tue Oct 31 15:12:22 2017 - [debug] Connecting via SSH from root@10.30.13.52(10.30.13.52:22) to root@10.30.13.53(10.30.13.53:22)..
Tue Oct 31 15:12:22 2017 - [debug]   ok.
Tue Oct 31 15:12:23 2017 - [debug]
Tue Oct 31 15:12:22 2017 - [debug] Connecting via SSH from root@10.30.13.53(10.30.13.53:22) to root@10.30.13.51(10.30.13.51:22)..
Tue Oct 31 15:12:22 2017 - [debug]   ok.
Tue Oct 31 15:12:22 2017 - [debug] Connecting via SSH from root@10.30.13.53(10.30.13.53:22) to root@10.30.13.52(10.30.13.52:22)..
Tue Oct 31 15:12:23 2017 - [debug]   ok.
Tue Oct 31 15:12:23 2017 - [info] All SSH connection tests passed successfully.

测试MySQL复制的情况：

[root@node3 ~]# masterha_check_repl --conf=/masterha_work/conf/mha_total.cnf
Tue Oct 31 15:17:09 2017 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue Oct 31 15:17:09 2017 - [info] Reading application default configuration from /masterha_work/conf/mha_total.cnf..
Tue Oct 31 15:17:09 2017 - [info] Reading server configuration from /masterha_work/conf/mha_total.cnf..
Tue Oct 31 15:17:09 2017 - [info] MHA::MasterMonitor version 0.56.
Tue Oct 31 15:17:09 2017 - [info] GTID failover mode = 0
Tue Oct 31 15:17:09 2017 - [info] Dead Servers:
Tue Oct 31 15:17:09 2017 - [info] Alive Servers:
Tue Oct 31 15:17:09 2017 - [info]   10.30.13.51(10.30.13.51:3307)
Tue Oct 31 15:17:09 2017 - [info]   10.30.13.52(10.30.13.52:3307)
Tue Oct 31 15:17:09 2017 - [info]   10.30.13.53(10.30.13.53:3307)
Tue Oct 31 15:17:09 2017 - [info] Alive Slaves:
Tue Oct 31 15:17:09 2017 - [info]   10.30.13.52(10.30.13.52:3307) Version=5.6.34-log (oldest major version between slaves) log-bin:enabled
Tue Oct 31 15:17:09 2017 - [info]     Replicating from 10.30.13.51(10.30.13.51:3307)
Tue Oct 31 15:17:09 2017 - [info]     Primary candidate for the new Master (candidate_master is set)
Tue Oct 31 15:17:09 2017 - [info]   10.30.13.53(10.30.13.53:3307) Version=5.6.34-log (oldest major version between slaves) log-bin:enabled
Tue Oct 31 15:17:09 2017 - [info]     Replicating from 10.30.13.51(10.30.13.51:3307)
Tue Oct 31 15:17:09 2017 - [info]     Not candidate for the new Master (no_master is set)
Tue Oct 31 15:17:09 2017 - [info] Current Alive Master: 10.30.13.51(10.30.13.51:3307)
Tue Oct 31 15:17:09 2017 - [info] Checking slave configurations..
Tue Oct 31 15:17:09 2017 - [info] read_only=1 is not set on slave 10.30.13.52(10.30.13.52:3307).
Tue Oct 31 15:17:09 2017 - [warning] relay_log_purge=0 is not set on slave 10.30.13.52(10.30.13.52:3307).
Tue Oct 31 15:17:09 2017 - [info] read_only=1 is not set on slave 10.30.13.53(10.30.13.53:3307).
Tue Oct 31 15:17:09 2017 - [warning] relay_log_purge=0 is not set on slave 10.30.13.53(10.30.13.53:3307).
Tue Oct 31 15:17:09 2017 - [info] Checking replication filtering settings..
Tue Oct 31 15:17:09 2017 - [info] binlog_do_db= , binlog_ignore_db=
Tue Oct 31 15:17:09 2017 - [info] Replication filtering check ok.
Tue Oct 31 15:17:09 2017 - [info] GTID (with auto-pos) is not supported
Tue Oct 31 15:17:09 2017 - [info] Starting SSH connection tests..
Tue Oct 31 15:17:10 2017 - [info] All SSH connection tests passed successfully.
Tue Oct 31 15:17:10 2017 - [info] Checking MHA Node version..
Tue Oct 31 15:17:10 2017 - [info] Version check ok.
Tue Oct 31 15:17:10 2017 - [info] Checking SSH publickey authentication settings on the current master..
Tue Oct 31 15:17:11 2017 - [info] HealthCheck: SSH to 10.30.13.51 is reachable.
Tue Oct 31 15:17:11 2017 - [info] Master MHA Node version is 0.56.
Tue Oct 31 15:17:11 2017 - [info] Checking recovery script configurations on 10.30.13.51(10.30.13.51:3307)..
Tue Oct 31 15:17:11 2017 - [info]   Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/3307/logs --output_file=/masterha_work/rmt_mysql_binlog_workdir/save_binary_logs_test --manager_version=0.56 --start_file=mysql-bin.000003
Tue Oct 31 15:17:11 2017 - [info]   Connecting to root@10.30.13.51(10.30.13.51:22)..
Creating /masterha_work/rmt_mysql_binlog_workdir if not exists..    ok.
Checking output directory is accessible or not..
   ok.
Binlog found at /data/mysql/3307/logs, up to mysql-bin.000003
Tue Oct 31 15:17:11 2017 - [info] Binlog setting check done.
Tue Oct 31 15:17:11 2017 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..
Tue Oct 31 15:17:11 2017 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user=‘mhauser‘ --slave_host=10.30.13.52 --slave_ip=10.30.13.52 --slave_port=3307 --workdir=/masterha_work/rmt_mysql_binlog_workdir --target_version=5.6.34-log --manager_version=0.56 --relay_log_info=/data/mysql/3307/data/relay-log.info --relay_dir=/data/mysql/3307/data/ --slave_pass=xxx
Tue Oct 31 15:17:11 2017 - [info]   Connecting to root@10.30.13.52(10.30.13.52:22)..
Checking slave recovery environment settings..
    Opening /data/mysql/3307/data/relay-log.info ... ok.
    Relay log found at /data/mysql/3307/data, up to relay-bin.000002
    Temporary relay log file is /data/mysql/3307/data/relay-bin.000002
    Testing mysql connection and privileges..Warning: Using a password on the command line interface can be insecure.
done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Tue Oct 31 15:17:11 2017 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user=‘mhauser‘ --slave_host=10.30.13.53 --slave_ip=10.30.13.53 --slave_port=3307 --workdir=/masterha_work/rmt_mysql_binlog_workdir --target_version=5.6.34-log --manager_version=0.56 --relay_log_info=/data/mysql/3307/data/relay-log.info --relay_dir=/data/mysql/3307/data/ --slave_pass=xxx
Tue Oct 31 15:17:11 2017 - [info]   Connecting to root@10.30.13.53(10.30.13.53:22)..
Checking slave recovery environment settings..
    Opening /data/mysql/3307/data/relay-log.info ... ok.
    Relay log found at /data/mysql/3307/data, up to relay-bin.000002
    Temporary relay log file is /data/mysql/3307/data/relay-bin.000002
    Testing mysql connection and privileges..Warning: Using a password on the command line interface can be insecure.
done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Tue Oct 31 15:17:11 2017 - [info] Slaves settings check done.
Tue Oct 31 15:17:11 2017 - [info]
10.30.13.51(10.30.13.51:3307) (current master)
+--10.30.13.52(10.30.13.52:3307)
+--10.30.13.53(10.30.13.53:3307)

Tue Oct 31 15:17:11 2017 - [info] Checking replication health on 10.30.13.52..
Tue Oct 31 15:17:11 2017 - [info] ok.
Tue Oct 31 15:17:11 2017 - [info] Checking replication health on 10.30.13.53..
Tue Oct 31 15:17:11 2017 - [info] ok.
Tue Oct 31 15:17:11 2017 - [warning] master_ip_failover_script is not defined.
Tue Oct 31 15:17:11 2017 - [warning] shutdown_script is not defined.
Tue Oct 31 15:17:11 2017 - [info] Got exit code 0 (Not master dead).

MySQL Replication Health is OK.

配置VIP

str_vip="10.30.13.19"
str_mask="255.255.255.0"
str_nic_name="eth0:0"

ifconfig $str_nic_name $str_vip netmask $str_mask up
ifconfig $str_nic_name $str_vip netmask $str_mask down

master_ip_failover脚本修改

注释掉 FIXME，

原：
## Creating an app user on the new master
print "Creating app user on the new master..\n";
FIXME_xxx_create_user( $new_master_handler->{dbh} );
$new_master_handler->enable_log_bin_local();
$new_master_handler->disconnect();
## Update master ip on the catalog database, etc
FIXME_xxx;

修改如下：

## Creating an app user on the new master
print "Creating app user on the new master..\n";
#FIXME_xxx_create_user( $new_master_handler->{dbh} );
$new_master_handler->enable_log_bin_local();
$new_master_handler->disconnect();
## Update master ip on the catalog database, etc
#FIXME_xxx;

在 FIXME_xxx 后面增加对do_vip_crontab.sh 脚本的调用：

`/usr/bin/ssh -t root\@${orig_master_ip} "sh /data/script/do_vip_crontab.sh 1 0"`;
`/usr/bin/ssh -t root\@${new_master_ip} "sh /data/script/do_vip_crontab.sh 0 1"`;
`sh /script/shell/do_mha.sh > /work_dir/mha_manager/test_me/do_mha.log`;

注意，一定要用【;】结尾，否则脚本执行会出错。

修改 new_master_handler 中 mhamhauser用户与密码：

# args: hostname, port, user, password, raise_error_or_not
#$new_master_handler->connect( $new_master_ip, $new_master_port,
# $new_master_user, $new_master_password, 1 );
$new_master_handler->connect( $new_master_ip, $new_master_port,
‘mhauser‘, ‘chengce243‘, 1 );

修改后 master_ip_failover 脚本如下：

[root@node3 ~]# cat /usr/bin/master_ip_failover
#!/usr/bin/env perl

# Copyright (C) 2011 DeNA Co.,Ltd.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
# Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

## Note: This is a sample script and is not complete. Modify the script based on your environment.

use strict;
use warnings FATAL => ‘all‘;

use Getopt::Long;
use MHA::DBHelper;

my (
$command,        $ssh_user,         $orig_master_host,
$orig_master_ip, $orig_master_port, $new_master_host,
$new_master_ip, $new_master_port, $new_master_user,
$new_master_password
);
GetOptions(
‘command=s‘             => \$command,
‘ssh_user=s‘            => \$ssh_user,
‘orig_master_host=s‘    => \$orig_master_host,
‘orig_master_ip=s‘      => \$orig_master_ip,
‘orig_master_port=i‘    => \$orig_master_port,
‘new_master_host=s‘     => \$new_master_host,
‘new_master_ip=s‘       => \$new_master_ip,
‘new_master_port=i‘     => \$new_master_port,
‘new_master_user=s‘     => \$new_master_user,
‘new_master_password=s‘ => \$new_master_password,
);

exit &main();

sub main {
if ( $command eq "stop" || $command eq "stopssh" ) {

    # $orig_master_host, $orig_master_ip, $orig_master_port are passed.
    # If you manage master ip address at global catalog database,
    # invalidate orig_master_ip here.
    my $exit_code = 1;
    eval {

      # updating global catalog, etc
      $exit_code = 0;
    };
    if ($@) {
      warn "Got Error: $@\n";
      exit $exit_code;
    }
    exit $exit_code;
}
elsif ( $command eq "start" ) {

    # all arguments are passed.
    # If you manage master ip address at global catalog database,
    # activate new_master_ip here.
    # You can also grant write access (create user, set read_only=0, etc) here.
    my $exit_code = 10;
    eval {
      my $new_master_handler = new MHA::DBHelper();

      # args: hostname, port, user, password, raise_error_or_not
      $new_master_handler->connect( $new_master_ip, $new_master_port,
        "mhauser", "chengce243", 1 );

      ## Set read_only=0 on the new master
      $new_master_handler->disable_log_bin_local();
      print "Set read_only=0 on the new master.\n";
      $new_master_handler->disable_read_only();

      ## Creating an app user on the new master
      print "Creating app user on the new master..\n";
      #FIXME_xxx_create_user( $new_master_handler->{dbh} );
      $new_master_handler->enable_log_bin_local();
      $new_master_handler->disconnect();

      ## Update master ip on the catalog database, etc
      #FIXME_xxx;

      `echo "script begin running..." > /work_dir/mha_manager/test_me/master_ip_failover_run_shell`;

      #`/usr/bin/ssh -t root\@${orig_master_ip} "service keepalived stop" > /work_dir/mha_manager/test_me/orig_master.log`;
      #`/usr/bin/ssh -t root\@${new_master_ip} "service keepalived start" > /work_dir/mha_manager/test_me/new_master.log`;
       `/usr/bin/ssh -t root\@${orig_master_ip} "sh /data/script/do_vip_crontab.sh 1 0"`;
       `/usr/bin/ssh -t root\@${new_master_ip} "sh /data/script/do_vip_crontab.sh 0 1"`;
      `sh /script/shell/do_mha.sh > /work_dir/mha_manager/test_me/do_mha.log`;

      $exit_code = 0;
    };
    if ($@) {
      warn $@;

      # If you want to continue failover, exit 10.
      exit $exit_code;
    }
    exit $exit_code;
}
elsif ( $command eq "status" ) {

    # do nothing
    exit 0;
}
else {
    &usage();
    exit 1;
}
}

sub usage {
print
"Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}

最后修改脚本权限：
chmod 755 /usr/bin/master_ip_failover

master_ip_online_change 脚本修改：

注释 FIXME_xxx_drop_app_user
原来：
## Drop application user so that nobody can connect. Disabling per-session binlog beforehand
$orig_master_handler->disable_log_bin_local();
print current_time_us() . " Drpping app user on the orig master..\n";
FIXME_xxx_drop_app_user($orig_master_handler);

修改如下：
## Drop application user so that nobody can connect. Disabling per-session binlog beforehand
$orig_master_handler->disable_log_bin_local();
print current_time_us() . " Drpping app user on the orig master..\n";
#FIXME_xxx_drop_app_user($orig_master_handler);

注释 FIXME_xxx_create_app_user
原来：
## Creating an app user on the new master
print current_time_us() . " Creating app user on the new master..\n";
FIXME_xxx_create_app_user($new_master_handler);
$new_master_handler->enable_log_bin_local();
$new_master_handler->disconnect();

修改如下：
## Creating an app user on the new master
print current_time_us() . " Creating app user on the new master..\n";
#FIXME_xxx_create_app_user($new_master_handler);
$new_master_handler->enable_log_bin_local();
$new_master_handler->disconnect();

修改 mhauser 用户的位置：
位置一：
# args: hostname, port, user, password, raise_error(die_on_error)_or_not
$new_master_handler->connect( $new_master_ip, $new_master_port,
"mhauser", "chengce243", 1 );

位置二：
# args: hostname, port, user, password, raise_error_or_not
$new_master_handler->connect( $new_master_ip, $new_master_port,
"mhauser", "chengce243", 1 );

增加do_vip_crontab.sh 脚本的调用：
`/usr/bin/ssh -t root\@${orig_master_ip} "sh /data/script/do_vip_crontab.sh 1 0"`;
`/usr/bin/ssh -t root\@${new_master_ip} "sh /data/script/do_vip_crontab.sh 0 1"`;

修改后的脚本如下：
[root@node3 ~]# cat /usr/bin/master_ip_online_change
#!/usr/bin/env perl

# Copyright (C) 2011 DeNA Co.,Ltd.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
# Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

## Note: This is a sample script and is not complete. Modify the script based on your environment.

use strict;
use warnings FATAL => ‘all‘;

use Getopt::Long;
use MHA::DBHelper;
use MHA::NodeUtil;
use Time::HiRes qw( sleep gettimeofday tv_interval );
use Data::Dumper;

my $_tstart;
my $_running_interval = 0.1;
my (
$command,              $orig_master_is_new_slave, $orig_master_host,
$orig_master_ip,       $orig_master_port,         $orig_master_user,
$orig_master_password, $orig_master_ssh_user,     $new_master_host,
$new_master_ip,        $new_master_port,          $new_master_user,
$new_master_password, $new_master_ssh_user,
);
GetOptions(
‘command=s‘                => \$command,
‘orig_master_is_new_slave‘ => \$orig_master_is_new_slave,
‘orig_master_host=s‘       => \$orig_master_host,
‘orig_master_ip=s‘         => \$orig_master_ip,
‘orig_master_port=i‘       => \$orig_master_port,
‘orig_master_user=s‘       => \$orig_master_user,
‘orig_master_password=s‘   => \$orig_master_password,
‘orig_master_ssh_user=s‘   => \$orig_master_ssh_user,
‘new_master_host=s‘        => \$new_master_host,
‘new_master_ip=s‘          => \$new_master_ip,
‘new_master_port=i‘        => \$new_master_port,
‘new_master_user=s‘        => \$new_master_user,
‘new_master_password=s‘    => \$new_master_password,
‘new_master_ssh_user=s‘    => \$new_master_ssh_user,
);

exit &main();

sub current_time_us {
my ( $sec, $microsec ) = gettimeofday();
my $curdate = localtime($sec);
return $curdate . " " . sprintf( "%06d", $microsec );
}

sub sleep_until {
my $elapsed = tv_interval($_tstart);
if ( $_running_interval > $elapsed ) {
    sleep( $_running_interval - $elapsed );
}
}

sub get_threads_util {
my $dbh                    = shift;
my $my_connection_id       = shift;
my $running_time_threshold = shift;
my $type                   = shift;
$running_time_threshold = 0 unless ($running_time_threshold);
$type                   = 0 unless ($type);
my @threads;

my $sth = $dbh->prepare("SHOW PROCESSLIST");
$sth->execute();

while ( my $ref = $sth->fetchrow_hashref() ) {
    my $id         = $ref->{Id};
    my $user       = $ref->{User};
    my $host       = $ref->{Host};
    my $command    = $ref->{Command};
    my $state      = $ref->{State};
    my $query_time = $ref->{Time};
    my $info       = $ref->{Info};
    $info =~ s/^\s*(.*?)\s*$/$1/ if defined($info);
    next if ( $my_connection_id == $id );
    next if ( defined($query_time) && $query_time < $running_time_threshold );
    next if ( defined($command)    && $command eq "Binlog Dump" );
    next if ( defined($user)       && $user eq "system user" );
    next
      if ( defined($command)
      && $command eq "Sleep"
      && defined($query_time)
      && $query_time >= 1 );

    if ( $type >= 1 ) {
      next if ( defined($command) && $command eq "Sleep" );
      next if ( defined($command) && $command eq "Connect" );
    }

    if ( $type >= 2 ) {
      next if ( defined($info) && $info =~ m/^select/i );
      next if ( defined($info) && $info =~ m/^show/i );
    }

    push @threads, $ref;
}
return @threads;
}

sub main {
if ( $command eq "stop" ) {
    ## Gracefully killing connections on the current master
    # 1. Set read_only= 1 on the new master
    # 2. DROP USER so that no app user can establish new connections
    # 3. Set read_only= 1 on the current master
    # 4. Kill current queries
    # * Any database access failure will result in script die.
    my $exit_code = 1;
    eval {
      ## Setting read_only=1 on the new master (to avoid accident)
      my $new_master_handler = new MHA::DBHelper();

      # args: hostname, port, user, password, raise_error(die_on_error)_or_not
      $new_master_handler->connect( $new_master_ip, $new_master_port,
        "mhauser", "chengce243", 1 );
      print current_time_us() . " Set read_only on the new master.. ";
      $new_master_handler->enable_read_only();
      if ( $new_master_handler->is_read_only() ) {
        print "ok.\n";
      }
      else {
        die "Failed!\n";
      }
      $new_master_handler->disconnect();

      # Connecting to the orig master, die if any database error happens
      my $orig_master_handler = new MHA::DBHelper();
      $orig_master_handler->connect( $orig_master_ip, $orig_master_port,
        $orig_master_user, $orig_master_password, 1 );

      ## Drop application user so that nobody can connect. Disabling per-session binlog beforehand
      $orig_master_handler->disable_log_bin_local();
      print current_time_us() . " Drpping app user on the orig master..\n";
      #FIXME_xxx_drop_app_user($orig_master_handler);

      ## Waiting for N * 100 milliseconds so that current connections can exit
      my $time_until_read_only = 15;
      $_tstart = [gettimeofday];
      my @threads = get_threads_util( $orig_master_handler->{dbh},
        $orig_master_handler->{connection_id} );
      while ( $time_until_read_only > 0 && $#threads >= 0 ) {
        if ( $time_until_read_only % 5 == 0 ) {
          printf
"%s Waiting all running %d threads are disconnected.. (max %d milliseconds)\n",
            current_time_us(), $#threads + 1, $time_until_read_only * 100;
          if ( $#threads < 5 ) {
            print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump . "\n"
              foreach (@threads);
          }
        }
        sleep_until();
        $_tstart = [gettimeofday];
        $time_until_read_only--;
        @threads = get_threads_util( $orig_master_handler->{dbh},
          $orig_master_handler->{connection_id} );
      }

      ## Setting read_only=1 on the current master so that nobody(except SUPER) can write
      print current_time_us() . " Set read_only=1 on the orig master.. ";
      $orig_master_handler->enable_read_only();
      if ( $orig_master_handler->is_read_only() ) {
        print "ok.\n";
      }
      else {
        die "Failed!\n";
      }

      ## Waiting for M * 100 milliseconds so that current update queries can complete
      my $time_until_kill_threads = 5;
      @threads = get_threads_util( $orig_master_handler->{dbh},
        $orig_master_handler->{connection_id} );
      while ( $time_until_kill_threads > 0 && $#threads >= 0 ) {
        if ( $time_until_kill_threads % 5 == 0 ) {
          printf
"%s Waiting all running %d queries are disconnected.. (max %d milliseconds)\n",
            current_time_us(), $#threads + 1, $time_until_kill_threads * 100;
          if ( $#threads < 5 ) {
            print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump . "\n"
              foreach (@threads);
          }
        }
        sleep_until();
        $_tstart = [gettimeofday];
        $time_until_kill_threads--;
        @threads = get_threads_util( $orig_master_handler->{dbh},
          $orig_master_handler->{connection_id} );
      }

      ## Terminating all threads
      print current_time_us() . " Killing all application threads..\n";
      $orig_master_handler->kill_threads(@threads) if ( $#threads >= 0 );
      print current_time_us() . " done.\n";
      $orig_master_handler->enable_log_bin_local();
      $orig_master_handler->disconnect();

      ## After finishing the script, MHA executes FLUSH TABLES WITH READ LOCK
      $exit_code = 0;
    };
    if ($@) {
      warn "Got Error: $@\n";
      exit $exit_code;
    }
    exit $exit_code;
}
elsif ( $command eq "start" ) {
    ## Activating master ip on the new master
    # 1. Create app user with write privileges
    # 2. Moving backup script if needed
    # 3. Register new master‘s ip to the catalog database

# We don‘t return error even though activating updatable accounts/ip failed so that we don‘t interrupt slaves‘ recovery.
# If exit code is 0 or 10, MHA does not abort
    my $exit_code = 10;
    eval {
      my $new_master_handler = new MHA::DBHelper();

      # args: hostname, port, user, password, raise_error_or_not
      $new_master_handler->connect( $new_master_ip, $new_master_port,
        "mhauser", "chengce243", 1 );

      ## Set read_only=0 on the new master
      $new_master_handler->disable_log_bin_local();
      print current_time_us() . " Set read_only=0 on the new master.\n";
      $new_master_handler->disable_read_only();

      ## Creating an app user on the new master
      print current_time_us() . " Creating app user on the new master..\n";
      #FIXME_xxx_create_app_user($new_master_handler);
      $new_master_handler->enable_log_bin_local();
      $new_master_handler->disconnect();

      ## Update master ip on the catalog database, etc
      #`/usr/bin/ssh -t root\@${orig_master_ip} "service keepalived stop"`;
      #`/usr/bin/ssh -t root\@${new_master_ip} "service keepalived start"`;
       `/usr/bin/ssh -t root\@${orig_master_ip} "sh /data/script/do_vip_crontab.sh 1 0"`;
       `/usr/bin/ssh -t root\@${new_master_ip} "sh /data/script/do_vip_crontab.sh 0 1"`;

      $exit_code = 0;
    };
    if ($@) {
      warn "Got Error: $@\n";
      exit $exit_code;
    }
    exit $exit_code;
}
elsif ( $command eq "status" ) {

    # do nothing
    exit 0;
}
else {
    &usage();
    exit 1;
}
}

sub usage {
print
"Usage: master_ip_online_change --command=start|stop|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
die;
}

修改文件的权限：
chmod 755 /usr/bin/master_ip_online_change

MHA配置文件中打开对FAILOVER脚本的引用

[root@node3 ~]# cat /masterha_work/conf/mha_total.cnf

[server default]
manager_workdir=/masterha_work/manager_workdir
manager_log=/masterha_work/log/manager.log

master_binlog_dir=/data/mysql/3307/logs
user=mhauser
password=chengce243

master_ip_failover_script=/usr/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=canal1 --orig_master_ip=10.30.13.51 --orig_master_port=3307
master_ip_online_change_script=/usr/bin/master_ip_online_change

ping_interval=1

remote_workdir=/masterha_work/rmt_mysql_binlog_workdir

repl_user=repl
repl_password=chengce243
port=3307

report_script=/usr/bin/send_report

secondary_check_script=/usr/local/bin/masterha_secondary_check -s canal2 -s canal1 --user=mhame --master_host=canal2 --master_ip=10.30.13.52 --master_port=3307
shutdown_script=""

ssh_user=root

[server1]
hostname=10.30.13.51
port=3307

[server2]
hostname=10.30.13.52
candidate_master=1
check_repl_delay=0
port=3307

[server3]
hostname=10.30.13.53
port=3307
no_master=1

测试一下：

[root@node3 ~]# masterha_check_repl --conf=/masterha_work/conf/mha_total.cnf
Tue Oct 31 16:43:47 2017 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue Oct 31 16:43:47 2017 - [info] Reading application default configuration from /masterha_work/conf/mha_total.cnf..
Tue Oct 31 16:43:47 2017 - [info] Reading server configuration from /masterha_work/conf/mha_total.cnf..
Tue Oct 31 16:43:47 2017 - [info] MHA::MasterMonitor version 0.56.
Tue Oct 31 16:43:47 2017 - [info] GTID failover mode = 0
Tue Oct 31 16:43:47 2017 - [info] Dead Servers:
Tue Oct 31 16:43:47 2017 - [info] Alive Servers:
Tue Oct 31 16:43:47 2017 - [info]   10.30.13.51(10.30.13.51:3307)
Tue Oct 31 16:43:47 2017 - [info]   10.30.13.52(10.30.13.52:3307)
Tue Oct 31 16:43:47 2017 - [info]   10.30.13.53(10.30.13.53:3307)
Tue Oct 31 16:43:47 2017 - [info] Alive Slaves:
Tue Oct 31 16:43:47 2017 - [info]   10.30.13.52(10.30.13.52:3307) Version=5.6.34-log (oldest major version between slaves) log-bin:enabled
Tue Oct 31 16:43:47 2017 - [info]     Replicating from 10.30.13.51(10.30.13.51:3307)
Tue Oct 31 16:43:47 2017 - [info]     Primary candidate for the new Master (candidate_master is set)
Tue Oct 31 16:43:47 2017 - [info]   10.30.13.53(10.30.13.53:3307) Version=5.6.34-log (oldest major version between slaves) log-bin:enabled
Tue Oct 31 16:43:47 2017 - [info]     Replicating from 10.30.13.51(10.30.13.51:3307)
Tue Oct 31 16:43:47 2017 - [info]     Not candidate for the new Master (no_master is set)
Tue Oct 31 16:43:47 2017 - [info] Current Alive Master: 10.30.13.51(10.30.13.51:3307)
Tue Oct 31 16:43:47 2017 - [info] Checking slave configurations..
Tue Oct 31 16:43:47 2017 - [info] read_only=1 is not set on slave 10.30.13.52(10.30.13.52:3307).
Tue Oct 31 16:43:47 2017 - [warning] relay_log_purge=0 is not set on slave 10.30.13.52(10.30.13.52:3307).
Tue Oct 31 16:43:47 2017 - [info] read_only=1 is not set on slave 10.30.13.53(10.30.13.53:3307).
Tue Oct 31 16:43:47 2017 - [warning] relay_log_purge=0 is not set on slave 10.30.13.53(10.30.13.53:3307).
Tue Oct 31 16:43:47 2017 - [info] Checking replication filtering settings..
Tue Oct 31 16:43:47 2017 - [info] binlog_do_db= , binlog_ignore_db=
Tue Oct 31 16:43:47 2017 - [info] Replication filtering check ok.
Tue Oct 31 16:43:47 2017 - [info] GTID (with auto-pos) is not supported
Tue Oct 31 16:43:47 2017 - [info] Starting SSH connection tests..
Tue Oct 31 16:43:49 2017 - [info] All SSH connection tests passed successfully.
Tue Oct 31 16:43:49 2017 - [info] Checking MHA Node version..
Tue Oct 31 16:43:49 2017 - [info] Version check ok.
Tue Oct 31 16:43:49 2017 - [info] Checking SSH publickey authentication settings on the current master..
Tue Oct 31 16:43:49 2017 - [info] HealthCheck: SSH to 10.30.13.51 is reachable.
Tue Oct 31 16:43:49 2017 - [info] Master MHA Node version is 0.56.
Tue Oct 31 16:43:49 2017 - [info] Checking recovery script configurations on 10.30.13.51(10.30.13.51:3307)..
Tue Oct 31 16:43:49 2017 - [info]   Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql/3307/logs --output_file=/masterha_work/rmt_mysql_binlog_workdir/save_binary_logs_test --manager_version=0.56 --start_file=mysql-bin.000003
Tue Oct 31 16:43:49 2017 - [info]   Connecting to root@10.30.13.51(10.30.13.51:22)..
Creating /masterha_work/rmt_mysql_binlog_workdir if not exists..    ok.
Checking output directory is accessible or not..
   ok.
Binlog found at /data/mysql/3307/logs, up to mysql-bin.000003
Tue Oct 31 16:43:49 2017 - [info] Binlog setting check done.
Tue Oct 31 16:43:49 2017 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers..
Tue Oct 31 16:43:49 2017 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user=‘mhauser‘ --slave_host=10.30.13.52 --slave_ip=10.30.13.52 --slave_port=3307 --workdir=/masterha_work/rmt_mysql_binlog_workdir --target_version=5.6.34-log --manager_version=0.56 --relay_log_info=/data/mysql/3307/data/relay-log.info --relay_dir=/data/mysql/3307/data/ --slave_pass=xxx
Tue Oct 31 16:43:49 2017 - [info]   Connecting to root@10.30.13.52(10.30.13.52:22)..
Checking slave recovery environment settings..
    Opening /data/mysql/3307/data/relay-log.info ... ok.
    Relay log found at /data/mysql/3307/data, up to relay-bin.000002
    Temporary relay log file is /data/mysql/3307/data/relay-bin.000002
    Testing mysql connection and privileges..Warning: Using a password on the command line interface can be insecure.
done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Tue Oct 31 16:43:50 2017 - [info]   Executing command : apply_diff_relay_logs --command=test --slave_user=‘mhauser‘ --slave_host=10.30.13.53 --slave_ip=10.30.13.53 --slave_port=3307 --workdir=/masterha_work/rmt_mysql_binlog_workdir --target_version=5.6.34-log --manager_version=0.56 --relay_log_info=/data/mysql/3307/data/relay-log.info --relay_dir=/data/mysql/3307/data/ --slave_pass=xxx
Tue Oct 31 16:43:50 2017 - [info]   Connecting to root@10.30.13.53(10.30.13.53:22)..
Checking slave recovery environment settings..
    Opening /data/mysql/3307/data/relay-log.info ... ok.
    Relay log found at /data/mysql/3307/data, up to relay-bin.000002
    Temporary relay log file is /data/mysql/3307/data/relay-bin.000002
    Testing mysql connection and privileges..Warning: Using a password on the command line interface can be insecure.
done.
    Testing mysqlbinlog output.. done.
    Cleaning up test file(s).. done.
Tue Oct 31 16:43:50 2017 - [info] Slaves settings check done.
Tue Oct 31 16:43:50 2017 - [info]
10.30.13.51(10.30.13.51:3307) (current master)
+--10.30.13.52(10.30.13.52:3307)
+--10.30.13.53(10.30.13.53:3307)

Tue Oct 31 16:43:50 2017 - [info] Checking replication health on 10.30.13.52..
Tue Oct 31 16:43:50 2017 - [info] ok.
Tue Oct 31 16:43:50 2017 - [info] Checking replication health on 10.30.13.53..
Tue Oct 31 16:43:50 2017 - [info] ok.
Tue Oct 31 16:43:50 2017 - [info] Checking master_ip_failover_script status:
Tue Oct 31 16:43:50 2017 - [info]   /usr/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=canal1 --orig_master_ip=10.30.13.51 --orig_master_port=3307 --command=status --ssh_user=root --orig_master_host=10.30.13.51 --orig_master_ip=10.30.13.51 --orig_master_port=3307
Tue Oct 31 16:43:50 2017 - [info] OK.
Tue Oct 31 16:43:50 2017 - [warning] shutdown_script is not defined.
Tue Oct 31 16:43:50 2017 - [info] Got exit code 0 (Not master dead).

MySQL Replication Health is OK.

手动执行在线切换：

node1的IP：
[root@node1 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 08:00:27:68:5f:50 brd ff:ff:ff:ff:ff:ff
inet 192.168.56.26/24 brd 192.168.56.255 scope global eth0
inet 192.168.56.206/24 scope global secondary eth0
inet6 fe80::a00:27ff:fe68:5f50/64 scope link
valid_lft forever preferred_lft forever

node3的从库状态：
[root@node3 ~]# mysql -uroot -pchengce243 -P3307
Warning: Using a password on the command line interface can be insecure.
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 21
Server version: 5.6.34-log MySQL Community Server (GPL)

Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.

Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.

Type ‘help;‘ or ‘\h‘ for help. Type ‘\c‘ to clear the current input statement.

mysql> show slave status \G
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 192.168.56.26
Master_User: repl
Master_Port: 3307
Connect_Retry: 60
Master_Log_File: mysql-bin.000003
Read_Master_Log_Pos: 2759
Relay_Log_File: relay-bin.000002
Relay_Log_Pos: 1324
Relay_Master_Log_File: mysql-bin.000003
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
Replicate_Do_DB:
Replicate_Ignore_DB:
Replicate_Do_Table:
Replicate_Ignore_Table:
Replicate_Wild_Do_Table:
Replicate_Wild_Ignore_Table:
Last_Errno: 0
Last_Error:
Skip_Counter: 0
Exec_Master_Log_Pos: 2759
Relay_Log_Space: 1491
Until_Condition: None
Until_Log_File:
Until_Log_Pos: 0
Master_SSL_Allowed: No
Master_SSL_CA_File:
Master_SSL_CA_Path:
Master_SSL_Cert:
Master_SSL_Cipher:
Master_SSL_Key:
Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
Last_IO_Errno: 0
Last_IO_Error:
Last_SQL_Errno: 0
Last_SQL_Error:
Replicate_Ignore_Server_Ids:
Master_Server_Id: 330726
Master_UUID: 336d78f5-a9d1-11e7-ab8a-080027685f50
Master_Info_File: /data/mysql/3307/data/master.info
SQL_Delay: 0
SQL_Remaining_Delay: NULL
Slave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it
Master_Retry_Count: 86400
Master_Bind:
Last_IO_Error_Timestamp:
Last_SQL_Error_Timestamp:
Master_SSL_Crl:
Master_SSL_Crlpath:
Retrieved_Gtid_Set:
Executed_Gtid_Set:
Auto_Position: 0
1 row in set (0.00 sec)

执行切换命令：
首先应该停止MHA：
[root@node4 ~]# masterha_stop --conf=/masterha_work/conf/mha_total.cnf

[root@node4 ~]# masterha_master_switch --master_state=alive --conf=/masterha_work/conf/mha_total.cnf
Fri Oct 6 16:09:39 2017 - [info] MHA::MasterRotate version 0.56.
Fri Oct 6 16:09:39 2017 - [info] Starting online master switch..
Fri Oct 6 16:09:39 2017 - [info]
Fri Oct 6 16:09:39 2017 - [info] * Phase 1: Configuration Check Phase..
Fri Oct 6 16:09:39 2017 - [info]
Fri Oct 6 16:09:39 2017 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Fri Oct 6 16:09:39 2017 - [info] Reading application default configuration from /masterha_work/conf/mha_total.cnf..
Fri Oct 6 16:09:39 2017 - [info] Reading server configuration from /masterha_work/conf/mha_total.cnf..
Fri Oct 6 16:09:39 2017 - [info] GTID failover mode = 0
Fri Oct 6 16:09:39 2017 - [info] Current Alive Master: 192.168.56.26(192.168.56.26:3307)
Fri Oct 6 16:09:39 2017 - [info] Alive Slaves:
Fri Oct 6 16:09:39 2017 - [info] 192.168.56.27(192.168.56.27:3307) Version=5.6.34-log (oldest major version between slaves) log-bin:enabled
Fri Oct 6 16:09:39 2017 - [info] Replicating from 192.168.56.26(192.168.56.26:3307)
Fri Oct 6 16:09:39 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Fri Oct 6 16:09:39 2017 - [info] 192.168.56.28(192.168.56.28:3307) Version=5.6.34-log (oldest major version between slaves) log-bin:enabled
Fri Oct 6 16:09:39 2017 - [info] Replicating from 192.168.56.26(192.168.56.26:3307)
Fri Oct 6 16:09:39 2017 - [info] Not candidate for the new Master (no_master is set)

It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 192.168.56.26(192.168.56.26:3307)? (YES/no): yes
Fri Oct 6 16:10:08 2017 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time..
Fri Oct 6 16:10:08 2017 - [info] ok.
Fri Oct 6 16:10:08 2017 - [info] Checking MHA is not monitoring or doing failover..
Fri Oct 6 16:10:09 2017 - [info] Checking replication health on 192.168.56.27..
Fri Oct 6 16:10:09 2017 - [info] ok.
Fri Oct 6 16:10:09 2017 - [info] Checking replication health on 192.168.56.28..
Fri Oct 6 16:10:09 2017 - [info] ok.
Fri Oct 6 16:10:09 2017 - [info] Searching new master from slaves..
Fri Oct 6 16:10:09 2017 - [info] Candidate masters from the configuration file:
Fri Oct 6 16:10:09 2017 - [info] 192.168.56.27(192.168.56.27:3307) Version=5.6.34-log (oldest major version between slaves) log-bin:enabled
Fri Oct 6 16:10:09 2017 - [info] Replicating from 192.168.56.26(192.168.56.26:3307)
Fri Oct 6 16:10:09 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Fri Oct 6 16:10:09 2017 - [info] Non-candidate masters:
Fri Oct 6 16:10:09 2017 - [info] 192.168.56.28(192.168.56.28:3307) Version=5.6.34-log (oldest major version between slaves) log-bin:enabled
Fri Oct 6 16:10:09 2017 - [info] Replicating from 192.168.56.26(192.168.56.26:3307)
Fri Oct 6 16:10:09 2017 - [info] Not candidate for the new Master (no_master is set)
Fri Oct 6 16:10:09 2017 - [info] Searching from candidate_master slaves which have received the latest relay log events..
Fri Oct 6 16:10:09 2017 - [info]
From:
192.168.56.26(192.168.56.26:3307) (current master)
+--192.168.56.27(192.168.56.27:3307)
+--192.168.56.28(192.168.56.28:3307)

To:
192.168.56.27(192.168.56.27:3307) (new master)
+--192.168.56.28(192.168.56.28:3307)

Starting master switch from 192.168.56.26(192.168.56.26:3307) to 192.168.56.27(192.168.56.27:3307)? (yes/NO): yes
Fri Oct 6 16:10:10 2017 - [info] Checking whether 192.168.56.27(192.168.56.27:3307) is ok for the new master..
Fri Oct 6 16:10:10 2017 - [info] ok.
Fri Oct 6 16:10:10 2017 - [info] ** Phase 1: Configuration Check Phase completed.
Fri Oct 6 16:10:10 2017 - [info]
Fri Oct 6 16:10:10 2017 - [info] * Phase 2: Rejecting updates Phase..
Fri Oct 6 16:10:10 2017 - [info]
Fri Oct 6 16:10:10 2017 - [info] Executing master ip online change script to disable write on the current master:
Fri Oct 6 16:10:10 2017 - [info] /usr/bin/master_ip_online_change --command=stop --orig_master_host=192.168.56.26 --orig_master_ip=192.168.56.26 --orig_master_port=3307 --orig_master_user=‘mhauser‘ --orig_master_password=‘chengce243‘ --new_master_host=192.168.56.27 --new_master_ip=192.168.56.27 --new_master_port=3307 --new_master_user=‘mhauser‘ --new_master_password=‘chengce243‘ --orig_master_ssh_user=root --new_master_ssh_user=root
Fri Oct 6 16:10:10 2017 856828 Set read_only on the new master.. ok.
Fri Oct 6 16:10:10 2017 898583 Drpping app user on the orig master..
Fri Oct 6 16:10:10 2017 900126 Set read_only=1 on the orig master.. ok.
Fri Oct 6 16:10:10 2017 923958 Killing all application threads..
Fri Oct 6 16:10:10 2017 924002 done.
Fri Oct 6 16:10:10 2017 - [info] ok.
Fri Oct 6 16:10:10 2017 - [info] Locking all tables on the orig master to reject updates from everybody (including root):
Fri Oct 6 16:10:10 2017 - [info] Executing FLUSH TABLES WITH READ LOCK..
Fri Oct 6 16:10:10 2017 - [info] ok.
Fri Oct 6 16:10:10 2017 - [info] Orig master binlog:pos is mysql-bin.000003:2759.
Fri Oct 6 16:10:10 2017 - [info] Waiting to execute all relay logs on 192.168.56.27(192.168.56.27:3307)..
Fri Oct 6 16:10:10 2017 - [info] master_pos_wait(mysql-bin.000003:2759) completed on 192.168.56.27(192.168.56.27:3307). Executed 0 events.
Fri Oct 6 16:10:10 2017 - [info] done.
Fri Oct 6 16:10:11 2017 - [info] Getting new master‘s binlog name and position..
Fri Oct 6 16:10:11 2017 - [info] mysql-bin.000003:2743
Fri Oct 6 16:10:11 2017 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST=‘192.168.56.27‘, MASTER_PORT=3307, MASTER_LOG_FILE=‘mysql-bin.000003‘, MASTER_LOG_POS=2743, MASTER_USER=‘repl‘, MASTER_PASSWORD=‘xxx‘;
Fri Oct 6 16:10:11 2017 - [info] Executing master ip online change script to allow write on the new master:
Fri Oct 6 16:10:11 2017 - [info] /usr/bin/master_ip_online_change --command=start --orig_master_host=192.168.56.26 --orig_master_ip=192.168.56.26 --orig_master_port=3307 --orig_master_user=‘mhauser‘ --orig_master_password=‘chengce243‘ --new_master_host=192.168.56.27 --new_master_ip=192.168.56.27 --new_master_port=3307 --new_master_user=‘mhauser‘ --new_master_password=‘chengce243‘ --orig_master_ssh_user=root --new_master_ssh_user=root
Fri Oct 6 16:10:11 2017 117572 Set read_only=0 on the new master.
Fri Oct 6 16:10:11 2017 121387 Creating app user on the new master..
Connection to 192.168.56.26 closed.
Fri Oct 6 16:10:11 2017 - [info] ok.
Fri Oct 6 16:10:11 2017 - [info]
Fri Oct 6 16:10:11 2017 - [info] * Switching slaves in parallel..
Fri Oct 6 16:10:11 2017 - [info]
Fri Oct 6 16:10:11 2017 - [info] -- Slave switch on host 192.168.56.28(192.168.56.28:3307) started, pid: 1871
Fri Oct 6 16:10:11 2017 - [info]
Fri Oct 6 16:10:11 2017 - [info] Log messages from 192.168.56.28 ...
Fri Oct 6 16:10:11 2017 - [info]
Fri Oct 6 16:10:11 2017 - [info] Waiting to execute all relay logs on 192.168.56.28(192.168.56.28:3307)..
Fri Oct 6 16:10:11 2017 - [info] master_pos_wait(mysql-bin.000003:2759) completed on 192.168.56.28(192.168.56.28:3307). Executed 0 events.
Fri Oct 6 16:10:11 2017 - [info] done.
Fri Oct 6 16:10:11 2017 - [info] Resetting slave 192.168.56.28(192.168.56.28:3307) and starting replication from the new master 192.168.56.27(192.168.56.27:3307)..
Fri Oct 6 16:10:11 2017 - [info] Executed CHANGE MASTER.
Fri Oct 6 16:10:11 2017 - [info] Slave started.
Fri Oct 6 16:10:11 2017 - [info] End of log messages from 192.168.56.28 ...
Fri Oct 6 16:10:11 2017 - [info]
Fri Oct 6 16:10:11 2017 - [info] -- Slave switch on host 192.168.56.28(192.168.56.28:3307) succeeded.
Fri Oct 6 16:10:11 2017 - [info] Unlocking all tables on the orig master:
Fri Oct 6 16:10:11 2017 - [info] Executing UNLOCK TABLES..
Fri Oct 6 16:10:11 2017 - [info] ok.
Fri Oct 6 16:10:11 2017 - [info] All new slave servers switched successfully.
Fri Oct 6 16:10:11 2017 - [info]
Fri Oct 6 16:10:11 2017 - [info] * Phase 5: New master cleanup phase..
Fri Oct 6 16:10:11 2017 - [info]
Fri Oct 6 16:10:12 2017 - [info] 192.168.56.27: Resetting slave info succeeded.
Fri Oct 6 16:10:12 2017 - [info] Switching master to 192.168.56.27(192.168.56.27:3307) completed successfully.

node1的IP：
[root@node1 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 08:00:27:68:5f:50 brd ff:ff:ff:ff:ff:ff
inet 192.168.56.26/24 brd 192.168.56.255 scope global eth0
inet6 fe80::a00:27ff:fe68:5f50/64 scope link
valid_lft forever preferred_lft forever

node1的状态：
mysql> show master status ;
+------------------+----------+--------------+------------------+-------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+------------------+----------+--------------+------------------+-------------------+
| mysql-bin.000003 | 2759 | | | |
+------------------+----------+--------------+------------------+-------------------+
1 row in set (0.00 sec)

mysql> show slave status \G
Empty set (0.00 sec)

可以看到node1现在已经不是从库了。原master可能已经废弃掉了。

node2的IP：
[root@node2 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 08:00:27:7c:8a:78 brd ff:ff:ff:ff:ff:ff
inet 192.168.56.27/24 brd 192.168.56.255 scope global eth0
inet 192.168.56.206/24 scope global secondary eth0
inet6 fe80::a00:27ff:fe7c:8a78/64 scope link
valid_lft forever preferred_lft forever

node3的从库状态：
mysql> show slave status \G
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 192.168.56.27
Master_User: repl
Master_Port: 3307
Connect_Retry: 60
Master_Log_File: mysql-bin.000003
Read_Master_Log_Pos: 2743
Relay_Log_File: relay-bin.000002
Relay_Log_Pos: 283
Relay_Master_Log_File: mysql-bin.000003
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
Replicate_Do_DB:
Replicate_Ignore_DB:
Replicate_Do_Table:
Replicate_Ignore_Table:
Replicate_Wild_Do_Table:
Replicate_Wild_Ignore_Table:
Last_Errno: 0
Last_Error:
Skip_Counter: 0
Exec_Master_Log_Pos: 2743
Relay_Log_Space: 450
Until_Condition: None
Until_Log_File:
Until_Log_Pos: 0
Master_SSL_Allowed: No
Master_SSL_CA_File:
Master_SSL_CA_Path:
Master_SSL_Cert:
Master_SSL_Cipher:
Master_SSL_Key:
Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
Last_IO_Errno: 0
Last_IO_Error:
Last_SQL_Errno: 0
Last_SQL_Error:
Replicate_Ignore_Server_Ids:
Master_Server_Id: 330727
Master_UUID: 8a12a46e-a9e0-11e7-abee-0800277c8a78
Master_Info_File: /data/mysql/3307/data/master.info
SQL_Delay: 0
SQL_Remaining_Delay: NULL
Slave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it
Master_Retry_Count: 86400
Master_Bind:
Last_IO_Error_Timestamp:
Last_SQL_Error_Timestamp:
Master_SSL_Crl:
Master_SSL_Crlpath:
Retrieved_Gtid_Set:
Executed_Gtid_Set:
Auto_Position: 0
1 row in set (0.00 sec)

可以看到现在主库已经是node2了。
自此，切换成功。

根据上面输出的日志，找出如下内容，然后在node1上执行。
CHANGE MASTER TO MASTER_HOST=‘192.168.56.27‘, MASTER_PORT=3307, MASTER_LOG_FILE=‘mysql-bin.000003‘, MASTER_LOG_POS=2743, MASTER_USER=‘repl‘, MASTER_PASSWORD=‘chengce243‘;

mysql> start slave ;
Query OK, 0 rows affected (0.06 sec)

mysql> show slave status \G
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 192.168.56.27
Master_User: repl
Master_Port: 3307
Connect_Retry: 60
Master_Log_File: mysql-bin.000003
Read_Master_Log_Pos: 2743
Relay_Log_File: relay-bin.000002
Relay_Log_Pos: 283
Relay_Master_Log_File: mysql-bin.000003
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
Replicate_Do_DB:
Replicate_Ignore_DB:
Replicate_Do_Table:
Replicate_Ignore_Table:
Replicate_Wild_Do_Table:
Replicate_Wild_Ignore_Table:
Last_Errno: 0
Last_Error:
Skip_Counter: 0
Exec_Master_Log_Pos: 2743
Relay_Log_Space: 450
Until_Condition: None
Until_Log_File:
Until_Log_Pos: 0
Master_SSL_Allowed: No
Master_SSL_CA_File:
Master_SSL_CA_Path:
Master_SSL_Cert:
Master_SSL_Cipher:
Master_SSL_Key:
Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
Last_IO_Errno: 0
Last_IO_Error:
Last_SQL_Errno: 0
Last_SQL_Error:
Replicate_Ignore_Server_Ids:
Master_Server_Id: 330727
Master_UUID: 8a12a46e-a9e0-11e7-abee-0800277c8a78
Master_Info_File: /data/mysql/3307/data/master.info
SQL_Delay: 0
SQL_Remaining_Delay: NULL
Slave_SQL_Running_State: Slave has read all relay log; waiting for the slave I/O thread to update it
Master_Retry_Count: 86400
Master_Bind:
Last_IO_Error_Timestamp:
Last_SQL_Error_Timestamp:
Master_SSL_Crl:
Master_SSL_Crlpath:
Retrieved_Gtid_Set:
Executed_Gtid_Set:
Auto_Position: 0
1 row in set (0.00 sec)

自此，node1已经是node2的从库

标签：auth ble logs other 打开 backup etc event .gz

原文地址：http://www.cnblogs.com/liang545621/p/7763560.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行