MongDB集羣容災方案步驟

MongoDB複製集
優/特色
支持大數據量、高擴展性、高性能、靈活數據模型、高可用性。
同步機制
數據複製的目的是使數據獲得最大的可用性,避免單點故障引發的整站不能訪問的狀況的發生,Mongodb的副本集在同一時刻只有一臺服務器是能夠寫的,副本集的主從複製也是一個異步同步的過程,是slave端從primary端獲取日誌,而後在本身身上徹底順序的執行日誌所記錄的各類操做(該日誌是不記錄查詢操做的),這個日誌就是local數據庫中的oplog.rs表,默認在64位機器上這個表是比較大的,佔磁盤大小的5%,oplog.rs的大小能夠在啓動參數中設定:--oplogSize 1000,單位是M。linux

鑑於雙機房容災意外狀況可能放生,本方案選擇故障時人工介入轉移或恢復,不加入仲裁節點。其中A機房爲主機房一個primary+2個Secondary節點,B機房做爲災備機房,2個Secondary節點。最壞當主機房掛掉時經過權重來啓動B機房某節點爲Primary,繼續提供服務。
環境規劃
A機房 角色 B機房 角色
192.168.70.214 Primary 192.168.71.214 Secondary 3 複製集節點 3
192.168.70.215 Secondary 1 複製集節點 1 192.168.71.215 Secondary 4 複製集節點 4
192.168.70.216 Secondary 2 複製集節點 2 mongodb

架構示意圖
其中下面是主機房斷電斷網時的故障轉移示意圖。shell

安裝配置
這裏全部節點目錄建立一致,方便管理維護,從配置文件來判斷各節點的角色。
建立目錄
--爲MongoDB建立軟件、數據、日誌目錄,默認狀況下它將數據存儲在/mgdata
[root@test153 /]# mkdir -p /mgdb/mongodbtest/replset/data
[root@test153 /]# mkdir /mgdata
[root@test153 /]# mkdir /mglog數據庫

上傳介質
sftp> cd /mgdb
sftp> put mongodb-linux-x86_64-2.2.3.tgz.tar服務器

解壓
[root@test153 /]# cd /mgdb
$ tar -xvf mongodb-linux-x86_64-2.2.3.tgz.tar
[root@test153 mgdb]# mv mongodb-linux-x86_64-2.2.3 mongodb架構

服務啓動
每一個節點都要執行
cd /root/mongodb/binoracle

192.168.70.214
/root/mongodb/bin/mongod --replSet repset --port 27017 --dbpath /root/data27011 --oplogSize 2048 --logpath /root/log27011/log27011.log &
./mongo 192.168.70.214:27017異步

192.168.70.215
/root/mongodb/bin/mongod --replSet repset --port 27017 --dbpath /root/data27012 --oplogSize 2048 --logpath /root/log27012/log27012.log &
./mongo 192.168.70.215:27017socket

192.168.70.216
/root/mongodb/bin/mongod --replSet repset --port 27017 --dbpath /root/data27013 --oplogSize 2048 --logpath /root/log27013/log27013.log &
./mongo 192.168.70.216:27017性能

192.168.71.214
/root/mongodb/bin/mongod --replSet repset --port 27017 --dbpath /root/data27017 --oplogSize 2048 --logpath /root/log27017/log27017.log &
./mongo 192.168.71.214:27017

192.168.71.215
/root/mongodb/bin/mongod --replSet repset --port 27017 --dbpath /root/data27018 --oplogSize 2048 --logpath /root/log27018/log27018.log &
./mongo 192.168.71.215:27017

分別經過 tail -f /root/log27011/log27011.log 來觀察分析個節點運行狀況
複製集配置
在任何一臺mongodb實例上登陸,進入admin庫,執行config命令,配置相應權重
[root@localhost bin]# pwd
/root/mongodb/bin
[root@localhost bin]# ./mongo 192.168.70.214:27017
MongoDB shell version: 2.2.3
connecting to: test
Welcome to the MongoDB shell.
For interactive help, type "help".
For more comprehensive documentation, see
http://docs.mongodb.org/
Questions? Try the support group
http://groups.google.com/group/mongodb-user
> use admin
switched to db admin
> config = { _id:"repset", members:[
... {_id:0,host:"192.168.70.214:27017",priority:10},
... {_id:1,host:"192.168.70.215:27017",priority:7},
... {_id:2,host:"192.168.70.216:27017",priority:6},
... {_id:3,host:"192.168.71.214:27017",priority:9}]
... {_id:4,host:"192.168.71.215:27017",priority:8}]
... }
{
"_id" : "repset",
"members" : [
{
"_id" : 0,
"host" : "192.168.70.214:27017",
"priority" : 10
},
{
"_id" : 0,
"host" : "192.168.70.215:27017",
"priority" : 7
},
{
"_id" : 0,
"host" : "192.168.70.216:27017",
"priority" : 6
},
{
"_id" : 1,
"host" : "192.168.71.214:27017",
"priority" : 9
},
{
"_id" : 2,
"host" : "192.168.71.215:27017",
"priority" : 8
}
]
}
--查看
repset:PRIMARY> rs.conf()
{
"_id" : "repset",
"version" : 38349,
"members" : [
{
"_id" : 4,
"host" : "192.168.71.214:27017",
"priority" : 9
},
{
"_id" : 5,
"host" : "192.168.71.215:27017",
"priority" : 8
},
{
"_id" : 6,
"host" : "192.168.70.214:27017",
"priority" : 10
},
{
"_id" : 7,
"host" : "192.168.70.215:27017",
"priority" : 7
},
{
"_id" : 8,
"host" : "192.168.70.216:27017",
"priority" : 6
}
]
}
初始化副本集配置
> rs.initiate(config);
{
"info" : "Config now saved locally. Should come online in about a minute.",
"ok" : 1
}
初始須要一點時間同步
查看集羣節點狀態
repset:PRIMARY> rs.status()
{
"set" : "repset",
"date" : ISODate("2018-11-09T07:55:04Z"),
"myState" : 1,
"members" : [
{
"_id" : 4,
"name" : "192.168.71.214:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 1104,
"optime" : Timestamp(1541749003000, 1),
"optimeDate" : ISODate("2018-11-09T07:36:43Z"),
"lastHeartbeat" : ISODate("2018-11-09T07:55:03Z"),
"pingMs" : 0
},
{
"_id" : 5,
"name" : "192.168.71.215:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 1104,
"optime" : Timestamp(1541749003000, 1),
"optimeDate" : ISODate("2018-11-09T07:36:43Z"),
"lastHeartbeat" : ISODate("2018-11-09T07:55:03Z"),
"pingMs" : 0
},
{
"_id" : 6,
"name" : "192.168.70.214:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 1680,
"optime" : Timestamp(1541749003000, 1),
"optimeDate" : ISODate("2018-11-09T07:36:43Z"),
"self" : true
},
{
"_id" : 7,
"name" : "192.168.70.215:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 1104,
"optime" : Timestamp(1541749003000, 1),
"optimeDate" : ISODate("2018-11-09T07:36:43Z"),
"lastHeartbeat" : ISODate("2018-11-09T07:55:03Z"),
"pingMs" : 0
},
{
"_id" : 8,
"name" : "192.168.70.216:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 1104,
"optime" : Timestamp(1541749003000, 1),
"optimeDate" : ISODate("2018-11-09T07:36:43Z"),
"lastHeartbeat" : ISODate("2018-11-09T07:55:03Z"),
"pingMs" : 0
}
],
"ok" : 1
}
repset:PRIMARY>

查看後臺日誌
[root@oracle_master ~]# tail -f /mgdata/mongodb/log27017/mongod.log
驗證複製集數據一致性
先進去主庫primary的mongodb上,錄入數據

repset:PRIMARY> use dinpay
switched to db dinpay
repset:PRIMARY> db.dinpay.insert({"test1108":"xiawu1"})
repset:PRIMARY> db.getMongo().setSlaveOk();

去另外一個備庫上驗證數據
repset:SECONDARY> db.dinpay.find()
{ "_id" : ObjectId("5bd676e97e238f7b0dddfb0d"), "MongoDB TEST" : "dinpay" }
{ "_id" : ObjectId("5bd823b65b237ec32e664db2"), "mdbtest" : "zgy20181030" }
{ "_id" : ObjectId("5be53e2c60074628c8509830"), "test1108" : "xiawu1" }

斷電斷網模擬
斷電:直接kill mongdb進程
斷網:開啓某一機房的防火牆限制機房間通信
B機房斷電斷網
192.168.71.21四、192.16871.215斷電斷網 後個節點狀態
repset:PRIMARY> rs.status()
{
"set" : "repset",
"date" : ISODate("2018-11-09T08:02:36Z"),
"myState" : 1,
"members" : [
{
"_id" : 4,
"name" : "192.168.71.214:27017",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : Timestamp(1541750316000, 1),
"optimeDate" : ISODate("2018-11-09T07:58:36Z"),
"lastHeartbeat" : ISODate("2018-11-09T08:01:59Z"),
"pingMs" : 0,
"errmsg" : "socket exception [CONNECT_ERROR] for 192.168.71.214:27017"
},
{
"_id" : 5,
"name" : "192.168.71.215:27017",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : Timestamp(1541750316000, 1),
"optimeDate" : ISODate("2018-11-09T07:58:36Z"),
"lastHeartbeat" : ISODate("2018-11-09T08:01:57Z"),
"pingMs" : 0,
"errmsg" : "socket exception [CONNECT_ERROR] for 192.168.71.215:27017"
},
{
"_id" : 6,
"name" : "192.168.70.214:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 2132,
"optime" : Timestamp(1541750316000, 1),
"optimeDate" : ISODate("2018-11-09T07:58:36Z"),
"self" : true
},
{
"_id" : 7,
"name" : "192.168.70.215:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 1556,
"optime" : Timestamp(1541750316000, 1),
"optimeDate" : ISODate("2018-11-09T07:58:36Z"),
"lastHeartbeat" : ISODate("2018-11-09T08:02:35Z"),
"pingMs" : 0
},
{
"_id" : 8,
"name" : "192.168.70.216:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 1556,
"optime" : Timestamp(1541750316000, 1),
"optimeDate" : ISODate("2018-11-09T07:58:36Z"),
"lastHeartbeat" : ISODate("2018-11-09T08:02:35Z"),
"pingMs" : 0
}
],
"ok" : 1
}
repset:PRIMARY>

結論:A機房運行正常。
A機房斷電斷網
192.168.70.214(PRI)、192.168.70.21五、192.168.70.216

 

登陸B機房任一臺節點強制reconfig恢復副本集,僅保留活動着的節點
repset:SECONDARY> use admin
switched to db admin
--查看現有配置,其中70網段3個節點都已死掉了
repset:SECONDARY> cfg=rs.conf()
{
"_id" : "repset",
"version" : 79,
"members" : [
{
"_id" : 4,
"host" : "192.168.71.214:27017",
"priority" : 10
},
{
"_id" : 5,
"host" : "192.168.71.215:27017",
"priority" : 9
},
{
"_id" : 7,
"host" : "192.168.70.214:27017",
"priority" : 11
},
{
"_id" : 8,
"host" : "192.168.70.215:27017",
"priority" : 6
},
{
"_id" : 13,
"host" : "192.168.70.216:27017",
"priority" : 5
}
]
}
--只保留活着的節點
repset:SECONDARY> cfg.members = [cfg.members[0], cfg.members[1]]
[
{
"_id" : 4,
"host" : "192.168.71.214:27017",
"priority" : 10
},
{
"_id" : 5,
"host" : "192.168.71.215:27017",
"priority" : 9
}
]
--強制啓動並新產生一個PRIMARY組成2節點的備份集
repset:SECONDARY> rs.reconfig(cfg, {force :true })
{ "ok" : 1 }
repset:SECONDARY> rs.status()
{
"set" : "repset",
"date" : ISODate("2018-11-09T03:45:29Z"),
"myState" : 1,
"members" : [
{
"_id" : 4,
"name" : "192.168.71.214:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 69133,
"optime" : Timestamp(1541663971000, 1),
"optimeDate" : ISODate("2018-11-08T07:59:31Z"),
"self" : true
},
{
"_id" : 5,
"name" : "192.168.71.215:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 8,
"optime" : Timestamp(1541663971000, 1),
"optimeDate" : ISODate("2018-11-08T07:59:31Z"),
"lastHeartbeat" : ISODate("2018-11-09T03:45:29Z"),
"pingMs" : 0
}
],
"ok" : 1
}
repset:PRIMARY>
--檢查數據,先前數據仍存在
repset:PRIMARY> use test
switched to db test
repset:PRIMARY> show collections
system.indexes
test
Testdb
--狀態查詢
repset:PRIMARY> rs.conf()
{
"_id" : "repset",
"version" : 38342,
"members" : [
{
"_id" : 4,
"host" : "192.168.71.214:27017",
"priority" : 10
},
{
"_id" : 5,
"host" : "192.168.71.215:27017",
"priority" : 9
}
]
}

結論:B機房強制啓動,變成新的備份集
儘管已產生新的備份集,但不能保證斷電斷網瞬間存在舊PRIMARY未同步至各從節點而致使數據丟失的可能性。

恢復初始狀態
Kill並重啓A機房個節點mongdb

192.168.70.214
/root/mongodb/bin/mongod --replSet repset --port 27017 --dbpath /root/data27011 --oplogSize 2048 --logpath /root/log27011/log27011.log &
192.168.70.215
/root/mongodb/bin/mongod --replSet repset --port 27017 --dbpath /root/data27012 --oplogSize 2048 --logpath /root/log27012/log27012.log &
192.168.70.216
/root/mongodb/bin/mongod --replSet repset --port 27017 --dbpath /root/data27013 --oplogSize 2048 --logpath /root/log27013/log27013.log &

將A機房個節點加入新備份集(B機房),並對A機房某節點提權升爲新的PRIMARY,恢復至斷電斷網前的狀態
epset:PRIMARY> use admin
switched to db admin
repset:PRIMARY> cfg=rs.conf()
repset:PRIMARY> cfg.members[XX].priority = 8
8
repset:PRIMARY> rs.reconfig(cfg)


////////////////////////////////////////////////////////////////////////////////
主機房掛了再恢復測試。。。比上面恢復詳細
--A機房(主機房) 192.168.70.214(主)/192.168.70.215/192.168.70.216斷電斷網,B機房強制重啓後成爲了新的集羣,如今將2機房從新回到初始狀態,首先要確認以前各節點都是什麼角色
--加節點
--設權重
repset:PRIMARY> use admin
switched to db admin
repset:PRIMARY> rs.add("192.168.70.214:27017")
{ "ok" : 1 }
repset:PRIMARY> rs.add("192.168.70.215:27017")
{ "ok" : 1 }
repset:PRIMARY> rs.add("192.168.70.216:27017")
{ "ok" : 1 }
repset:PRIMARY>
repset:PRIMARY> cfg=rs.conf()
{
"_id" : "repset",
"version" : 63044,
"members" : [
{
"_id" : 4,
"host" : "192.168.71.214:27017",
"priority" : 9
},
{
"_id" : 5,
"host" : "192.168.71.215:27017",
"priority" : 8
},
{
"_id" : 6,
"host" : "192.168.70.214:27017"
},
{
"_id" : 7,
"host" : "192.168.70.215:27017"
},
{
"_id" : 8,
"host" : "192.168.70.216:27017"
}
]
}
repset:PRIMARY> cfg.members[2].priority = 11
11
repset:PRIMARY> cfg.members[3].priority = 6
6
repset:PRIMARY> cfg.members[4].priority = 5
5
repset:PRIMARY> rs.reconfig(cfg)
Mon Nov 12 16:04:10 DBClientCursor::init call() failed
Mon Nov 12 16:04:10 query failed : admin.$cmd { replSetReconfig: { _id: "repset", version: 63045, members: [ { _id: 4, host: "192.168.71.214:27017", priority: 9.0 }, { _id: 5, host: "192.168.71.215:27017", priority: 8.0 }, { _id: 6, host: "192.168.70.214:27017", priority: 11.0 }, { _id: 7, host: "192.168.70.215:27017", priority: 6.0 }, { _id: 8, host: "192.168.70.216:27017", priority: 5.0 } ] } } to: 192.168.71.214:27017
Mon Nov 12 16:04:10 trying reconnect to 192.168.71.214:27017
Mon Nov 12 16:04:10 reconnect 192.168.71.214:27017 ok
reconnected to server after rs command (which is normal)

repset:PRIMARY> Mon Nov 12 16:04:29 Socket recv() errno:104 Connection reset by peer 192.168.71.214:27017Mon Nov 12 16:04:29 SocketException: remote: 192.168.71.214:27017 error: 9001 socket exception [1] server [192.168.71.214:27017] Mon Nov 12 16:04:29 DBClientCursor::init call() failedMon Nov 12 16:04:29 query failed : admin.$cmd { replSetGetStatus: 1.0, forShell: 1.0 } to: 192.168.71.214:27017> Mon Nov 12 16:04:37 trying reconnect to 192.168.71.214:27017Mon Nov 12 16:04:37 reconnect 192.168.71.214:27017 okrepset:SECONDARY> repset:SECONDARY> rs.conf(){ "_id" : "repset", "version" : 63045, "members" : [ { "_id" : 4, "host" : "192.168.71.214:27017", "priority" : 9 }, { "_id" : 5, "host" : "192.168.71.215:27017", "priority" : 8 }, { "_id" : 6, "host" : "192.168.70.214:27017", "priority" : 11 }, { "_id" : 7, "host" : "192.168.70.215:27017", "priority" : 6 }, { "_id" : 8, "host" : "192.168.70.216:27017", "priority" : 5 } ]}repset:SECONDARY> repset:SECONDARY> rs.status(){ "set" : "repset", "date" : ISODate("2018-11-12T08:05:16Z"), "myState" : 2, "syncingTo" : "192.168.70.214:27017", "members" : [ { "_id" : 4, "name" : "192.168.71.214:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 19426, "optime" : Timestamp(1542009850000, 1), "optimeDate" : ISODate("2018-11-12T08:04:10Z"), "errmsg" : "syncing to: 192.168.70.214:27017", "self" : true }, { "_id" : 5, "name" : "192.168.71.215:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 50, "optime" : Timestamp(1542009850000, 1), "optimeDate" : ISODate("2018-11-12T08:04:10Z"), "lastHeartbeat" : ISODate("2018-11-12T08:05:14Z"), "pingMs" : 0, "errmsg" : "syncing to: 192.168.70.214:27017" }, { "_id" : 6, "name" : "192.168.70.214:27017", "health" : 1, "state" : 1, "stateStr" : "PRIMARY", "uptime" : 64, "optime" : Timestamp(1542009850000, 1), "optimeDate" : ISODate("2018-11-12T08:04:10Z"), "lastHeartbeat" : ISODate("2018-11-12T08:05:14Z"), "pingMs" : 1 }, { "_id" : 7, "name" : "192.168.70.215:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 64, "optime" : Timestamp(1542009850000, 1), "optimeDate" : ISODate("2018-11-12T08:04:10Z"), "lastHeartbeat" : ISODate("2018-11-12T08:05:14Z"), "pingMs" : 0, "errmsg" : "syncing to: 192.168.70.214:27017" }, { "_id" : 8, "name" : "192.168.70.216:27017", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 64, "optime" : Timestamp(1542009850000, 1), "optimeDate" : ISODate("2018-11-12T08:04:10Z"), "lastHeartbeat" : ISODate("2018-11-12T08:05:14Z"), "pingMs" : 1, "errmsg" : "syncing to: 192.168.70.214:27017" } ], "ok" : 1}repset:SECONDARY>

相關文章
相關標籤/搜索