$ cat /etc/redhat-release CentOS Linux release 7.0.1406 (Core) $ uname -a Linux zhaopin-2-201 3.10.0-123.el7.x86_64 #1 SMP Mon Jun 30 12:09:22 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux $ mongo --port 30000 MongoDB shell version: 3.0.6 connecting to: 127.0.0.1:30000/test mongos> sh.status(); --- Sharding Status --- sharding version: { "_id" : 1, "minCompatibleVersion" : 5, "currentVersion" : 6, "clusterId" : ObjectId("561728b4030ea038bcb57fa0") } shards: { "_id" : "sh0", "host" : "sh0/172.30.2.201:27010" } { "_id" : "sh1", "host" : "sh1/172.30.2.201:27011" } balancer: Currently enabled: yes Currently running: no Failed balancer rounds in last 5 attempts: 0 Migration Results for the last 24 hours: No recent migrations databases: { "_id" : "admin", "partitioned" : false, "primary" : "config" } { "_id" : "mydb", "partitioned" : true, "primary" : "sh0" }
插入測試數據 java
$ mongo --port 30000 MongoDB shell version: 3.0.6 connecting to: 127.0.0.1:30000/test mongos> use mydb switched to db mydb mongos> sh.shardCollection("mydb.test",{"Uid":1}); { "collectionsharded" : "mydb.test", "ok" : 1 } mongos> for (var i = 1; i <= 100000; i++) db.test.save({"Uid":i,"Name":"zhanjindong","Age":13,"Date":new Date()}); WriteResult({ "nInserted" : 1 }) mongos> use config; switched to db config mongos> db.chunks.find().limit(3); { "_id" : "mydb.test-Uid_MinKey", "lastmod" : Timestamp(10, 1), "lastmodEpoch" : ObjectId("56178dad030ea038bcb592c2"), "ns" : "mydb.test", "min" : { "Uid" : { "$minKey" : 1 } }, "max" : { "Uid" : 2 }, "shard" : "sh0" } { "_id" : "mydb.test-Uid_2.0", "lastmod" : Timestamp(1, 2), "lastmodEpoch" : ObjectId("56178dad030ea038bcb592c2"), "ns" : "mydb.test", "min" : { "Uid" : 2 }, "max" : { "Uid" : 10 }, "shard" : "sh0" } { "_id" : "mydb.test-Uid_10.0", "lastmod" : Timestamp(11, 1), "lastmodEpoch" : ObjectId("56178dad030ea038bcb592c2"), "ns" : "mydb.test", "min" : { "Uid" : 10 }, "max" : { "Uid" : 4691 }, "shard" : "sh1" }
參考MongoDB副本集故障測試和解決方案,Query Routers會自動更新副本集的配置,只須要在副本集裏面解決故障 mongodb
分片故障是指分片中的節點所有不可用了(若是分片中配置了副本集的話,基本是不會出現這種狀況的) shell
1)將sh0停機 服務器
$ ps aux | grep mongo wenhang+ 1493 0.0 0.0 112640 976 pts/0 S+ 10:42 0:00 grep --color=auto mongo root 41492 0.5 0.1 1045696 118336 ? Sl Oct09 8:12 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 41509 0.5 0.1 1028196 108720 ? Sl Oct09 7:30 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 41855 0.6 0.0 566248 55208 ? Sl Oct09 9:14 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf0/config.conf root 41870 0.5 0.0 565220 56180 ? Sl Oct09 8:29 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf root 42170 0.5 0.0 565216 56872 ? Sl Oct09 8:25 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 260180 13200 ? Sl Oct09 5:40 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 259148 11564 ? Sl Oct09 4:31 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf $ ps aux | grep mongo | grep -v grep root 41492 0.5 0.1 1045696 118336 ? Sl Oct09 8:12 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 41509 0.5 0.1 1028196 108720 ? Sl Oct09 7:31 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 41855 0.6 0.0 566248 54988 ? Sl Oct09 9:14 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf0/config.conf root 41870 0.5 0.1 565220 72392 ? Sl Oct09 8:29 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf root 42170 0.5 0.0 565216 56776 ? Sl Oct09 8:25 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 260180 13200 ? Sl Oct09 5:40 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 259148 11564 ? Sl Oct09 4:31 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf $ sudo kill 41492 $ ps aux | grep mongo | grep -v grep root 41509 0.5 0.1 1028196 108736 ? Sl Oct09 7:31 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 41855 0.6 0.1 566248 87832 ? Sl Oct09 9:14 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf0/config.conf root 41870 0.5 0.1 565220 89116 ? Sl Oct09 8:29 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf root 42170 0.5 0.0 565216 56852 ? Sl Oct09 8:26 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 260180 13200 ? Sl Oct09 5:40 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 259148 11564 ? Sl Oct09 4:31 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf
2)查看集羣狀態 socket
$ mongo --port 30000 MongoDB shell version: 3.0.6 connecting to: 127.0.0.1:30000/test mongos> sh.status(); --- Sharding Status --- sharding version: { "_id" : 1, "minCompatibleVersion" : 5, "currentVersion" : 6, "clusterId" : ObjectId("561728b4030ea038bcb57fa0") } shards: { "_id" : "sh0", "host" : "sh0/172.30.2.201:27010" } { "_id" : "sh1", "host" : "sh1/172.30.2.201:27011" } balancer: Currently enabled: yes Currently running: no Failed balancer rounds in last 5 attempts: 5 Last reported error: socket exception [CONNECT_ERROR] for sh0/172.30.2.201:27010 Time of Reported error: Sat Oct 10 2015 10:44:56 GMT+0800 (CST) Migration Results for the last 24 hours: 19 : Success 2 : Failed with error 'migration already in progress', from sh0 to sh1 databases: { "_id" : "admin", "partitioned" : false, "primary" : "config" } { "_id" : "mydb", "partitioned" : true, "primary" : "sh0" } mydb.test shard key: { "Uid" : 1 } chunks: sh0 11 sh1 10 too many chunks to print, use verbose if you want to force print
3)測試可用性 測試
mongos> db.test.find({Uid:12}); { "_id" : ObjectId("56178e473af16d338338d3bf"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-09T09:52:07.205Z") } mongos> db.test.find({Uid:8}); Error: error: { "$err" : "socket exception [CONNECT_ERROR] for sh0/172.30.2.201:27010", "code" : 11002, "shard" : "sh0" } mongos> db.test.insert({"Uid":12,"Name":"zhanjindong","Age":13,"Date":new Date()}); WriteResult({ "nInserted" : 1 }) mongos> db.test.insert({"Uid":8,"Name":"zhanjindong","Age":13,"Date":new Date()}); WriteResult({ "nInserted" : 0, "writeError" : { "code" : 7, "errmsg" : "could not contact primary for replica set sh0" } }) mongos> bye
設計到sh0的操做都會報錯,其餘分片的能夠正常執行 spa
從新啓動sh0 設計
$ sudo /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf about to fork child process, waiting until server is ready for connections. forked process: 2534 child process started successfully, parent exiting $ ps aux | grep mongo | grep -v grep root 2534 1.1 0.0 971800 64060 ? Sl 10:53 0:00 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 41509 0.5 0.1 1028196 108804 ? Sl Oct09 7:33 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 41855 0.6 0.0 566248 55572 ? Sl Oct09 9:17 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf0/config.conf root 41870 0.5 0.0 565220 56480 ? Sl Oct09 8:33 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf root 42170 0.5 0.0 565216 57240 ? Sl Oct09 8:29 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 260180 13388 ? Sl Oct09 5:42 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 259148 11572 ? Sl Oct09 4:32 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf $ mongo --port 30000 MongoDB shell version: 3.0.6 connecting to: 127.0.0.1:30000/test mongos> use mydb; switched to db mydb mongos> db.test.find({Uid:8}); { "_id" : ObjectId("56178e453af16d338338d3bb"), "Uid" : 8, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-09T09:52:05.539Z") } mongos> db.test.find({Uid:12}); { "_id" : ObjectId("56178e473af16d338338d3bf"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-09T09:52:07.205Z") } { "_id" : ObjectId("56187d6743c2e9badb0ca53b"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T02:52:23.497Z") } mongos> db.test.insert({"Uid":8,"Name":"zhanjindong","Age":13,"Date":new Date()}); WriteResult({ "nInserted" : 1 })
兩個分片均可讀寫 code
1)停掉cf0 server
$ ps axu | grep mongo | grep -v grep root 2534 0.4 0.1 982052 67608 ? Sl 10:53 0:03 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 41509 0.5 0.1 1028196 108808 ? Sl Oct09 7:37 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 41855 0.6 0.0 566248 55444 ? Sl Oct09 9:23 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf0/config.conf root 41870 0.5 0.0 565220 56456 ? Sl Oct09 8:38 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf root 42170 0.5 0.0 565216 56820 ? Sl Oct09 8:34 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 261204 13408 ? Sl Oct09 5:45 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 259148 11596 ? Sl Oct09 4:35 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf [wenhang.pan@zhaopin-2-201 ~]$ sudo kill 41855 [wenhang.pan@zhaopin-2-201 ~]$ ps axu | grep mongo | grep -v grep root 2534 0.4 0.1 982052 67636 ? Sl 10:53 0:03 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 41509 0.5 0.1 1028196 108808 ? Sl Oct09 7:37 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 41870 0.5 0.0 565220 56324 ? Sl Oct09 8:38 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf root 42170 0.5 0.0 565216 56600 ? Sl Oct09 8:34 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 261204 13408 ? Sl Oct09 5:45 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 259148 11596 ? Sl Oct09 4:35 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf
2)查看狀態
$ mongo --port 30000 MongoDB shell version: 3.0.6 connecting to: 127.0.0.1:30000/test mongos> sh.status(); --- Sharding Status --- sharding version: { "_id" : 1, "minCompatibleVersion" : 5, "currentVersion" : 6, "clusterId" : ObjectId("561728b4030ea038bcb57fa0") } shards: { "_id" : "sh0", "host" : "sh0/172.30.2.201:27010" } { "_id" : "sh1", "host" : "sh1/172.30.2.201:27011" } balancer: Currently enabled: yes Currently running: yes Balancer lock taken at Sat Oct 10 2015 11:08:14 GMT+0800 (CST) by zhaopin-2-201:30000:1444358323:1804289383:Balancer:1681692777 Failed balancer rounds in last 5 attempts: 0 Migration Results for the last 24 hours: 19 : Success 2 : Failed with error 'migration already in progress', from sh0 to sh1 databases: { "_id" : "admin", "partitioned" : false, "primary" : "config" } { "_id" : "mydb", "partitioned" : true, "primary" : "sh0" } mydb.test shard key: { "Uid" : 1 } chunks: sh0 11 sh1 10 too many chunks to print, use verbose if you want to force print
3)測試可用性
mongos> use mydb; switched to db mydb mongos> db.test.find({Uid:12}); { "_id" : ObjectId("56178e473af16d338338d3bf"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-09T09:52:07.205Z") } { "_id" : ObjectId("56187d6743c2e9badb0ca53b"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T02:52:23.497Z") } mongos> db.test.find({Uid:8}); { "_id" : ObjectId("56178e453af16d338338d3bb"), "Uid" : 8, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-09T09:52:05.539Z") } { "_id" : ObjectId("56187e117ec37f9bb7244496"), "Uid" : 8, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T02:55:13.529Z") } mongos> db.test.insert({"Uid":8,"Name":"zhanjindong","Age":13,"Date":new Date()}); WriteResult({ "nInserted" : 1 }) mongos> db.test.insert({"Uid":12,"Name":"zhanjindong","Age":13,"Date":new Date()}); WriteResult({ "nInserted" : 1 }) mongos> bye
集羣可正常使用
1)停掉cf1
$ ps axu | grep mongo | grep -v grep root 2534 0.4 0.1 982052 68128 ? Sl 10:53 0:05 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 41509 0.5 0.1 1028196 108892 ? Sl Oct09 7:39 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 41870 0.5 0.1 565220 92328 ? Sl Oct09 8:40 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf root 42170 0.5 0.1 565216 90392 ? Sl Oct09 8:36 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 261204 13508 ? Sl Oct09 5:46 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 260172 11716 ? Sl Oct09 4:37 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf $ sudo kill 41870 $ ps axu | grep mongo | grep -v grep root 2534 0.4 0.1 982052 68128 ? Sl 10:53 0:05 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 41509 0.5 0.1 1028196 108892 ? Sl Oct09 7:39 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 42170 0.5 0.1 565216 90408 ? Sl Oct09 8:36 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 261204 13508 ? Sl Oct09 5:46 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 260172 11716 ? Sl Oct09 4:37 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf
2)查看集羣狀態
$ mongo --port 30000 MongoDB shell version: 3.0.6 connecting to: 127.0.0.1:30000/test mongos> sh.status(); --- Sharding Status --- sharding version: { "_id" : 1, "minCompatibleVersion" : 5, "currentVersion" : 6, "clusterId" : ObjectId("561728b4030ea038bcb57fa0") } shards: { "_id" : "sh0", "host" : "sh0/172.30.2.201:27010" } { "_id" : "sh1", "host" : "sh1/172.30.2.201:27011" } balancer: Currently enabled: yes Currently running: yes Balancer lock taken at Sat Oct 10 2015 11:08:14 GMT+0800 (CST) by zhaopin-2-201:30000:1444358323:1804289383:Balancer:1681692777 Failed balancer rounds in last 5 attempts: 0 Migration Results for the last 24 hours: 19 : Success 2 : Failed with error 'migration already in progress', from sh0 to sh1 databases: { "_id" : "admin", "partitioned" : false, "primary" : "config" } { "_id" : "mydb", "partitioned" : true, "primary" : "sh0" } mydb.test shard key: { "Uid" : 1 } chunks: sh0 11 sh1 10 too many chunks to print, use verbose if you want to force print
3)測試可用性
mongos> use mydb; switched to db mydb mongos> db.test.find({Uid:8}); { "_id" : ObjectId("56178e453af16d338338d3bb"), "Uid" : 8, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-09T09:52:05.539Z") } { "_id" : ObjectId("56187e117ec37f9bb7244496"), "Uid" : 8, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T02:55:13.529Z") } { "_id" : ObjectId("5618824d7bd66d65a90fb01c"), "Uid" : 8, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T03:13:17.564Z") } mongos> db.test.find({Uid:12}); { "_id" : ObjectId("56178e473af16d338338d3bf"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-09T09:52:07.205Z") } { "_id" : ObjectId("56187d6743c2e9badb0ca53b"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T02:52:23.497Z") } { "_id" : ObjectId("561882527bd66d65a90fb01d"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T03:13:22.594Z") } mongos> db.test.insert({"Uid":12,"Name":"zhanjindong","Age":13,"Date":new Date()}); WriteResult({ "nInserted" : 1 }) mongos> db.test.insert({"Uid":8,"Name":"zhanjindong","Age":13,"Date":new Date()}); WriteResult({ "nInserted" : 1 }) mongos> bye
集羣能夠正常使用
4)從新啓動cf0和cf1
$ ps axu | grep mongo | grep -v grep root 2534 0.4 0.1 982052 68228 ? Sl 10:53 0:05 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 41509 0.5 0.1 1028196 107976 ? Sl Oct09 7:39 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 42170 0.5 0.1 565216 93140 ? Sl Oct09 8:37 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 261204 13540 ? Sl Oct09 5:47 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 260172 11752 ? Sl Oct09 4:37 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf $ sudo /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf0/config.conf about to fork child process, waiting until server is ready for connections. forked process: 7309 child process started successfully, parent exiting $ sudo /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf about to fork child process, waiting until server is ready for connections. forked process: 7355 child process started successfully, parent exiting $ ps axu | grep mongo | grep -v grep root 2534 0.4 0.1 982052 68228 ? Sl 10:53 0:06 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 7309 0.6 0.0 545696 37504 ? Sl 11:17 0:00 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf0/config.conf root 7355 0.5 0.0 542616 33748 ? Sl 11:17 0:00 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf root 41509 0.5 0.1 1028196 108188 ? Sl Oct09 7:40 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 42170 0.5 0.0 565216 57992 ? Sl Oct09 8:37 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 261204 13540 ? Sl Oct09 5:47 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 260172 11768 ? Sl Oct09 4:38 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf
5)解決方案
參考:
http://docs.mongodb.org/manual/tutorial/replace-config-server/
http://docs.mongodb.org/manual/tutorial/migrate-config-servers-with-different-hostnames/
1)將ms0停機
$ ps axu | grep mongo | grep -v grep root 2534 0.4 0.1 982052 68228 ? Sl 10:53 0:06 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 7309 0.6 0.0 545696 37504 ? Sl 11:17 0:00 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf0/config.conf root 7355 0.5 0.0 542616 33748 ? Sl 11:17 0:00 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf root 41509 0.5 0.1 1028196 108188 ? Sl Oct09 7:40 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 42170 0.5 0.0 565216 57992 ? Sl Oct09 8:37 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42233 0.3 0.0 261204 13540 ? Sl Oct09 5:47 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms0/mongos.conf root 42286 0.3 0.0 260172 11768 ? Sl Oct09 4:38 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf $ sudo kill 42233 $ ps axu | grep mongo | grep -v grep root 2534 0.4 0.1 982052 68408 ? Sl 10:53 0:06 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh0/mongodb.conf root 7309 0.6 0.0 547752 39096 ? Sl 11:17 0:01 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf0/config.conf root 7355 0.6 0.0 544672 34256 ? Sl 11:17 0:01 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf1/config.conf root 41509 0.5 0.1 1028196 108164 ? Sl Oct09 7:40 /opt/mongodb/bin/mongod --config /data/mongodb/conf/sh1/mongodb.conf root 42170 0.5 0.0 565216 57796 ? Sl Oct09 8:38 /opt/mongodb/bin/mongod --config /data/mongodb/conf/cf2/config.conf root 42286 0.3 0.0 260172 11768 ? Sl Oct09 4:38 /opt/mongodb/bin/mongos --config /data/mongodb/conf/ms1/mongos.conf
2)查看集羣狀態
$ mongo --port 30000 MongoDB shell version: 3.0.6 connecting to: 127.0.0.1:30000/test 2015-10-10T11:21:04.681+0800 W NETWORK Failed to connect to 127.0.0.1:30000, reason: errno:111 Connection refused 2015-10-10T11:21:04.683+0800 E QUERY Error: couldn't connect to server 127.0.0.1:30000 (127.0.0.1), connection attempt failed at connect (src/mongo/shell/mongo.js:181:14) at (connect):1:6 at src/mongo/shell/mongo.js:181 exception: connect failed $ mongo --port 30001 MongoDB shell version: 3.0.6 connecting to: 127.0.0.1:30001/test mongos> sh.status(); --- Sharding Status --- sharding version: { "_id" : 1, "minCompatibleVersion" : 5, "currentVersion" : 6, "clusterId" : ObjectId("561728b4030ea038bcb57fa0") } shards: { "_id" : "sh0", "host" : "sh0/172.30.2.201:27010" } { "_id" : "sh1", "host" : "sh1/172.30.2.201:27011" } balancer: Currently enabled: yes Currently running: no Failed balancer rounds in last 5 attempts: 0 Migration Results for the last 24 hours: 19 : Success 2 : Failed with error 'migration already in progress', from sh0 to sh1 databases: { "_id" : "admin", "partitioned" : false, "primary" : "config" } { "_id" : "mydb", "partitioned" : true, "primary" : "sh0" } mydb.test shard key: { "Uid" : 1 } chunks: sh0 11 sh1 10 too many chunks to print, use verbose if you want to force print mongos> bye
可見30000端口的mongos已經宕機,不能提供服務了,但30001端口的mongos可使用
3)測試可用性
mongos> use mydb; switched to db mydb mongos> db.test.find({Uid:12}); { "_id" : ObjectId("56178e473af16d338338d3bf"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-09T09:52:07.205Z") } { "_id" : ObjectId("56187d6743c2e9badb0ca53b"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T02:52:23.497Z") } { "_id" : ObjectId("561882527bd66d65a90fb01d"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T03:13:22.594Z") } { "_id" : ObjectId("561882f9ff80495f50bec62f"), "Uid" : 12, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T03:16:09.930Z") } mongos> db.test.find({Uid:8}); { "_id" : ObjectId("56178e453af16d338338d3bb"), "Uid" : 8, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-09T09:52:05.539Z") } { "_id" : ObjectId("56187e117ec37f9bb7244496"), "Uid" : 8, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T02:55:13.529Z") } { "_id" : ObjectId("5618824d7bd66d65a90fb01c"), "Uid" : 8, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T03:13:17.564Z") } { "_id" : ObjectId("561882fdff80495f50bec630"), "Uid" : 8, "Name" : "zhanjindong", "Age" : 13, "Date" : ISODate("2015-10-10T03:16:13.788Z") } mongos> db.test.insert({"Uid":8,"Name":"zhanjindong","Age":13,"Date":new Date()}); WriteResult({ "nInserted" : 1 }) mongos> db.test.insert({"Uid":12,"Name":"zhanjindong","Age":13,"Date":new Date()}); WriteResult({ "nInserted" : 1 })
可見集羣能夠正常使用
4)解決方案
能夠在代碼中配置鏈接多臺mongo實現failover,java代碼以下:
List<ServerAddress> mongoHostList = new ArrayList<ServerAddress>(); mongoHostList.add(new ServerAddress("ip",port)); mongoHostList.add(new ServerAddress("ip",port)); mongoHostList.add(new ServerAddress("ip",port)); MongoClient mg = new MongoClient(mongoHostList);
1)分片節點故障,能夠按照MongoDB副本集故障測試和解決方案中解決,仍能夠實現可用性
2)整個分片故障,若是能夠從新啓動的話,仍然可使用,不然只能從備份中恢復,必然會丟數據
只要有可用的配置服務器,整個集羣均可以正常使用
只要有可用的mongos服務器,整個集羣就能夠正常使用,要客戶端也實現failover,須要將代碼寫成鏈接多臺mongo的方式