$ cat /etc/redhat-release CentOS Linux release 7.0.1406 (Core) $ uname -a Linux zhaopin-2-201 3.10.0-123.el7.x86_64 #1 SMP Mon Jun 30 12:09:22 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux $ mongo MongoDB shell version: 3.0.6 connecting to: test rs0:PRIMARY> rs.status(); { "set" : "rs0", "date" : ISODate("2015-09-28T07:00:05.507Z"), "myState" : 1, "members" : [ { "_id" : 0, "name" : "172.30.2.201:27017", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 83, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "electionTime" : Timestamp(1443423535, 2), "electionDate" : ISODate("2015-09-28T06:58:55Z"), "configVersion" : 3, "self" : true }, { "_id" : 1, "name" : "172.30.2.203:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 44, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:00:04.918Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:00:05.042Z"), "pingMs" : 0, "syncingTo" : "172.30.2.201:27017", "configVersion" : 3 }, { "_id" : 2, "name" : "172.30.2.202:27017", "health" : 1, "state" : 5, "stateStr" : "STARTUP2", "uptime" : 4, "optime" : Timestamp(0, 0), "optimeDate" : ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:00:04.918Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:00:04.926Z"), "pingMs" : 0, "configVersion" : 3 } ], "ok" : 1 }
1)關閉primay節點mongodb
rs0:PRIMARY> use admin; switched to db admin rs0:PRIMARY> db.shutdownServer(); 2015-09-28T15:00:51.828+0800 I NETWORK DBClientCursor::init call() failed server should be down... 2015-09-28T15:00:51.830+0800 I NETWORK trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed 2015-09-28T15:00:51.831+0800 I NETWORK reconnect 127.0.0.1:27017 (127.0.0.1) ok 2015-09-28T15:00:51.831+0800 I NETWORK DBClientCursor::init call() failed > bye
2)查看集羣狀態shell
$ mongo MongoDB shell version: 3.0.6 connecting to: test rs0:SECONDARY> rs.status(); { "set" : "rs0", "date" : ISODate("2015-09-28T07:01:28.818Z"), "myState" : 2, "members" : [ { "_id" : 0, "name" : "172.30.2.201:27017", "health" : 0, "state" : 8, "stateStr" : "(not reachable/healthy)", "uptime" : 0, "optime" : Timestamp(0, 0), "optimeDate" : ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:01:27.006Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:00:50.935Z"), "pingMs" : 0, "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.201:27017; couldn't connect to server 172.30.2.201:27017 (172.30.2.201), connection attempt failed", "configVersion" : -1 }, { "_id" : 1, "name" : "172.30.2.203:27017", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 87, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:01:26.963Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:01:27.078Z"), "pingMs" : 0, "electionTime" : Timestamp(1443423653, 1), "electionDate" : ISODate("2015-09-28T07:00:53Z"), "configVersion" : 3 }, { "_id" : 2, "name" : "172.30.2.202:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 90, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "configVersion" : 3, "self" : true } ], "ok" : 1 }
發現集羣進行了自動切換,把172.30.2.202:27017變爲了primarysocket
3)啓動原來的primaryspa
$ sudo /opt/mongodb/bin/mongod --config /data/mongodb/conf/db0/mongodb.conf about to fork child process, waiting until server is ready for connections. forked process: 25738 child process started successfully, parent exiting $ mongo MongoDB shell version: 3.0.6 connecting to: test rs0:PRIMARY> rs.status(); { "set" : "rs0", "date" : ISODate("2015-09-28T07:02:24.312Z"), "myState" : 1, "members" : [ { "_id" : 0, "name" : "172.30.2.201:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 13, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:02:23.189Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:02:22.873Z"), "pingMs" : 0, "configVersion" : 3 }, { "_id" : 1, "name" : "172.30.2.203:27017", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 185, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "electionTime" : Timestamp(1443423653, 1), "electionDate" : ISODate("2015-09-28T07:00:53Z"), "configVersion" : 3, "self" : true }, { "_id" : 2, "name" : "172.30.2.202:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 143, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:02:23.103Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:02:22.990Z"), "pingMs" : 0, "configVersion" : 3 } ], "ok" : 1 }
發現原來的primary自動切爲了secondarycode
1)關閉secondary節點server
$ mongo MongoDB shell version: 3.0.6 connecting to: test rs0:SECONDARY> use admin; switched to db admin rs0:SECONDARY> db.shutdownServer(); 2015-09-28T15:04:39.064+0800 I NETWORK DBClientCursor::init call() failed server should be down... 2015-09-28T15:04:39.066+0800 I NETWORK trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed 2015-09-28T15:04:39.067+0800 W NETWORK Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused 2015-09-28T15:04:39.067+0800 I NETWORK reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed 2015-09-28T15:04:39.070+0800 I NETWORK trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed 2015-09-28T15:04:39.070+0800 W NETWORK Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused 2015-09-28T15:04:39.070+0800 I NETWORK reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed > bye
2)查看集羣狀態ci
$ mongo MongoDB shell version: 3.0.6 connecting to: test rs0:PRIMARY> rs.status(); { "set" : "rs0", "date" : ISODate("2015-09-28T07:05:12.140Z"), "myState" : 1, "members" : [ { "_id" : 0, "name" : "172.30.2.201:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 180, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:05:11.265Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:05:10.951Z"), "pingMs" : 0, "configVersion" : 3 }, { "_id" : 1, "name" : "172.30.2.203:27017", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 353, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "electionTime" : Timestamp(1443423653, 1), "electionDate" : ISODate("2015-09-28T07:00:53Z"), "configVersion" : 3, "self" : true }, { "_id" : 2, "name" : "172.30.2.202:27017", "health" : 0, "state" : 8, "stateStr" : "(not reachable/healthy)", "uptime" : 0, "optime" : Timestamp(0, 0), "optimeDate" : ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:05:11.226Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:04:37.055Z"), "pingMs" : 0, "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.202:27017; couldn't connect to server 172.30.2.202:27017 (172.30.2.202), connection attempt failed", "configVersion" : -1 } ], "ok" : 1 }
可見單個secondary節點故障對集羣沒有影響rem
3)再啓動secondaryit
$ sudo /opt/mongodb/bin/mongod --config /data/mongodb/conf/db0/mongodb.conf about to fork child process, waiting until server is ready for connections. forked process: 49507 child process started successfully, parent exiting $ mongo MongoDB shell version: 3.0.6 connecting to: test rs0:SECONDARY> rs.status(); { "set" : "rs0", "date" : ISODate("2015-09-28T07:06:41.733Z"), "myState" : 2, "members" : [ { "_id" : 0, "name" : "172.30.2.201:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 12, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:06:40.999Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:06:41.233Z"), "pingMs" : 0, "lastHeartbeatMessage" : "could not find member to sync from", "configVersion" : 3 }, { "_id" : 1, "name" : "172.30.2.203:27017", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 12, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:06:40.999Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:06:41.360Z"), "pingMs" : 0, "electionTime" : Timestamp(1443423653, 1), "electionDate" : ISODate("2015-09-28T07:00:53Z"), "configVersion" : 3 }, { "_id" : 2, "name" : "172.30.2.202:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 13, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "configVersion" : 3, "self" : true } ], "ok" : 1 }
從新啓動後又從新連上了集羣io
1)停掉一個secondary節點
$ mongo MongoDB shell version: 3.0.6 connecting to: test rs0:SECONDARY> use admin; switched to db admin rs0:SECONDARY> db.shutdownServer(); 2015-09-28T15:10:43.049+0800 I NETWORK DBClientCursor::init call() failed server should be down... 2015-09-28T15:10:43.051+0800 I NETWORK trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed 2015-09-28T15:10:43.052+0800 W NETWORK Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused 2015-09-28T15:10:43.052+0800 I NETWORK reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed 2015-09-28T15:10:43.055+0800 I NETWORK trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed 2015-09-28T15:10:43.055+0800 W NETWORK Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused 2015-09-28T15:10:43.055+0800 I NETWORK reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed
2)停掉primary節點
$ mongo
MongoDB shell version: 3.0.6
connecting to: test
rs0:PRIMARY> use admin;
switched to db admin
rs0:PRIMARY> db.shutdownServer();
2015-09-28T15:10:53.069+0800 I NETWORK DBClientCursor::init call() failed
server should be down...
2015-09-28T15:10:53.072+0800 I NETWORK trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed
2015-09-28T15:10:53.073+0800 I NETWORK reconnect 127.0.0.1:27017 (127.0.0.1) ok
2015-09-28T15:10:53.073+0800 I NETWORK DBClientCursor::init call() failed
2015-09-28T15:10:53.076+0800 I NETWORK trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed
2015-09-28T15:10:53.076+0800 I NETWORK reconnect 127.0.0.1:27017 (127.0.0.1) ok
2015-09-28T15:10:53.888+0800 I NETWORK Socket recv() errno:104 Connection reset by peer 127.0.0.1:27017
2015-09-28T15:10:53.888+0800 I NETWORK SocketException: remote: 127.0.0.1:27017 error: 9001 socket exception [RECV_ERROR] server [127.0.0.1:27017]
2015-09-28T15:10:53.888+0800 I NETWORK DBClientCursor::init call() failed
3)查看集羣狀態
$ mongo MongoDB shell version: 3.0.6 connecting to: test rs0:SECONDARY> rs.status(); { "set" : "rs0", "date" : ISODate("2015-09-28T07:12:10.946Z"), "myState" : 2, "members" : [ { "_id" : 0, "name" : "172.30.2.201:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 600, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "configVersion" : 3, "self" : true }, { "_id" : 1, "name" : "172.30.2.203:27017", "health" : 0, "state" : 8, "stateStr" : "(not reachable/healthy)", "uptime" : 0, "optime" : Timestamp(0, 0), "optimeDate" : ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:12:10.008Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:10:51.422Z"), "pingMs" : 0, "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.203:27017; couldn't connect to server 172.30.2.203:27017 (172.30.2.203), connection attempt failed", "configVersion" : -1 }, { "_id" : 2, "name" : "172.30.2.202:27017", "health" : 0, "state" : 8, "stateStr" : "(not reachable/healthy)", "uptime" : 0, "optime" : Timestamp(0, 0), "optimeDate" : ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:12:09.477Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:10:41.112Z"), "pingMs" : 0, "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.202:27017; couldn't connect to server 172.30.2.202:27017 (172.30.2.202), connection attempt failed", "configVersion" : -1 } ], "ok" : 1 }
只剩下一個secondary節點,集羣變得不可用了
4)解決方案
從新配置:
rs0:SECONDARY> cfg={_id:"rs0", members:[ {_id:0,host:"172.30.2.201:27017"}] } { "_id" : "rs0", "members" : [ { "_id" : 0, "host" : "172.30.2.201:27017" } ] } rs0:SECONDARY> rs.reconfig(cfg, {force:true}); { "ok" : 1 } rs0:PRIMARY> rs.status(); { "set" : "rs0", "date" : ISODate("2015-09-28T07:14:09.350Z"), "myState" : 1, "members" : [ { "_id" : 0, "name" : "172.30.2.201:27017", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 719, "optime" : Timestamp(1443423600, 1), "optimeDate" : ISODate("2015-09-28T07:00:00Z"), "electionTime" : Timestamp(1443424428, 1), "electionDate" : ISODate("2015-09-28T07:13:48Z"), "configVersion" : 71840, "self" : true } ], "ok" : 1 }
此時就變成了單primary節點,能夠提供讀寫服務,而後再製做secondary節點
1)故障前狀態
rs0:PRIMARY> rs.status(); { "set" : "rs0", "date" : ISODate("2015-09-28T07:16:06.571Z"), "myState" : 1, "members" : [ { "_id" : 0, "name" : "172.30.2.201:27017", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 836, "optime" : Timestamp(1443424538, 1), "optimeDate" : ISODate("2015-09-28T07:15:38Z"), "electionTime" : Timestamp(1443424534, 1), "electionDate" : ISODate("2015-09-28T07:15:34Z"), "configVersion" : 71842, "self" : true }, { "_id" : 1, "name" : "172.30.2.202:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 31, "optime" : Timestamp(1443424538, 1), "optimeDate" : ISODate("2015-09-28T07:15:38Z"), "lastHeartbeat" : ISODate("2015-09-28T07:16:06.230Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:16:06.089Z"), "pingMs" : 0, "configVersion" : 71842 }, { "_id" : 2, "name" : "172.30.2.203:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 26, "optime" : Timestamp(1443424538, 1), "optimeDate" : ISODate("2015-09-28T07:15:38Z"), "lastHeartbeat" : ISODate("2015-09-28T07:16:06.229Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:16:06.233Z"), "pingMs" : 0, "configVersion" : 71842 } ], "ok" : 1 }
2)停掉兩個secondary節點
在兩個secondary節點分別執行:
$ mongo MongoDB shell version: 3.0.6 connecting to: test rs0:SECONDARY> use admin; switched to db admin rs0:SECONDARY> db.shutdownServer(); 2015-09-28T15:18:11.114+0800 I NETWORK DBClientCursor::init call() failed server should be down... 2015-09-28T15:18:11.117+0800 I NETWORK trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed 2015-09-28T15:18:11.118+0800 W NETWORK Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused 2015-09-28T15:18:11.118+0800 I NETWORK reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed 2015-09-28T15:18:11.121+0800 I NETWORK trying reconnect to 127.0.0.1:27017 (127.0.0.1) failed 2015-09-28T15:18:11.121+0800 W NETWORK Failed to connect to 127.0.0.1:27017, reason: errno:111 Connection refused 2015-09-28T15:18:11.121+0800 I NETWORK reconnect 127.0.0.1:27017 (127.0.0.1) failed failed couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connection attempt failed > bye
3)查看集羣狀態
rs0:SECONDARY> rs.status(); { "set" : "rs0", "date" : ISODate("2015-09-28T07:19:09.196Z"), "myState" : 2, "members" : [ { "_id" : 0, "name" : "172.30.2.201:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 1019, "optime" : Timestamp(1443424538, 1), "optimeDate" : ISODate("2015-09-28T07:15:38Z"), "configVersion" : 71842, "self" : true }, { "_id" : 1, "name" : "172.30.2.202:27017", "health" : 0, "state" : 8, "stateStr" : "(not reachable/healthy)", "uptime" : 0, "optime" : Timestamp(0, 0), "optimeDate" : ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:19:08.371Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:18:10.147Z"), "pingMs" : 0, "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.202:27017; couldn't connect to server 172.30.2.202:27017 (172.30.2.202), connection attempt failed", "configVersion" : -1 }, { "_id" : 2, "name" : "172.30.2.203:27017", "health" : 0, "state" : 8, "stateStr" : "(not reachable/healthy)", "uptime" : 0, "optime" : Timestamp(0, 0), "optimeDate" : ISODate("1970-01-01T00:00:00Z"), "lastHeartbeat" : ISODate("2015-09-28T07:19:08.350Z"), "lastHeartbeatRecv" : ISODate("2015-09-28T07:18:34.298Z"), "pingMs" : 0, "lastHeartbeatMessage" : "Failed attempt to connect to 172.30.2.203:27017; couldn't connect to server 172.30.2.203:27017 (172.30.2.203), connection attempt failed", "configVersion" : -1 } ], "ok" : 1 }
可見剩下的primary節點自動變爲了secondary節點,集羣變得不可用了
4)解決方案
rs0:SECONDARY> cfg={_id:"rs0", members:[ {_id:0,host:"172.30.2.201:27017"}] } { "_id" : "rs0", "members" : [ { "_id" : 0, "host" : "172.30.2.201:27017" } ] } rs0:SECONDARY> rs.reconfig(cfg, {force:true}); { "ok" : 1 } rs0:PRIMARY> rs.status(); { "set" : "rs0", "date" : ISODate("2015-09-28T07:20:08.099Z"), "myState" : 1, "members" : [ { "_id" : 0, "name" : "172.30.2.201:27017", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 1078, "optime" : Timestamp(1443424538, 1), "optimeDate" : ISODate("2015-09-28T07:15:38Z"), "electionTime" : Timestamp(1443424795, 1), "electionDate" : ISODate("2015-09-28T07:19:55Z"), "configVersion" : 127342, "self" : true } ], "ok" : 1 }
處理方法和上面的相同,也是強制將剩下的secondary節點配置爲單primary節點