一、閱讀raft論文
二、閱讀raft理論與實踐[1]-理論篇
三、閱讀raft理論與實踐[2]-lab2a
四、查看我寫的這篇文章: 模擬RPC遠程過程調用node
一、raft.go 的raft結構體 補充字段。 字段應該儘可能與raft論文的Figure2接近。golang
type Raft struct { mu sync.Mutex // Lock to protect shared access to this peer's state peers []*labrpc.ClientEnd // RPC end points of all peers persister *Persister // Object to hold this peer's persisted state me int // this peer's index into peers[] dead int32 // set by Kill() // Your data here (2A, 2B, 2C). // Look at the paper's Figure 2 for a description of what // state a Raft server must maintain. state int // follower, candidate or leader resetTimer chan struct{} // for reset election timer electionTimer *time.Timer // election timer electionTimeout time.Duration // 400~800ms heartbeatInterval time.Duration // 100ms CurrentTerm int // Persisted before responding to RPCs VotedFor int // Persisted before responding to RPCs Logs []LogEntry // Persisted before responding to RPCs commitCond *sync.Cond // for commitIndex update //newEntryCond []*sync.Cond // for new log entry commitIndex int // Volatile state on all servers lastApplied int // Volatile state on all servers nextIndex []int // Leader only, reinitialized after election matchIndex []int // Leader only, reinitialized after election applyCh chan ApplyMsg // outgoing channel to service shutdownCh chan struct{} // shutdown channel, shut raft instance gracefully }
func (rf *Raft) GetState() (int, bool) { var term int var isleader bool // Your code here (2A). rf.mu.Lock() defer rf.mu.Unlock() term = rf.CurrentTerm isleader = rf.state == Leader return term, isleader }
type RequestVoteArgs struct { // Your data here (2A, 2B). Term int // candidate's term CandidateID int // candidate requesting vote LastLogIndex int // index of candidate's last log entry LastLogTerm int // term of candidate's last log entry } type RequestVoteReply struct { // Your data here (2A). CurrentTerm int // currentTerm, for candidate to update itself VoteGranted bool // true means candidate received vote }
一、獲取當前節點的log個數,以及最後一個log的term 肯定當前節點的term。canvas
二、若是調用節點的term小於當前節點,返回當前term,而且不爲其投票。app
三、若是調用節點的term大於當前節點,修改當前節點的term,當前節點轉爲follower.ide
四、若是調用節點的term大於當前節點,或者等於當前節點term而且調用節點的log個數大於等於當前節點的log,則爲調用節點投票。this
五、投票後重置當前節點的選舉超時時間。code
func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) { // Your code here (2A, 2B). select { case <-rf.shutdownCh: DPrintf("[%d-%s]: peer %d is shutting down, reject RV rpc request.\n", rf.me, rf, rf.me) return default: } rf.mu.Lock() defer rf.mu.Unlock() lastLogIdx, lastLogTerm := rf.lastLogIndexAndTerm() DPrintf("[%d-%s]: rpc RV, from peer: %d, arg term: %d, my term: %d (last log idx: %d->%d, term: %d->%d)\n", rf.me, rf, args.CandidateID, args.Term, rf.CurrentTerm, args.LastLogIndex, lastLogIdx, args.LastLogTerm, lastLogTerm) if args.Term < rf.CurrentTerm { reply.CurrentTerm = rf.CurrentTerm reply.VoteGranted = false } else { if args.Term > rf.CurrentTerm { // convert to follower rf.CurrentTerm = args.Term rf.state = Follower rf.VotedFor = -1 } // if is null (follower) or itself is a candidate (or stale leader) with same term if rf.VotedFor == -1 { //|| (rf.VotedFor == rf.me && !sameTerm) { //|| rf.votedFor == args.CandidateID { // check whether candidate's log is at-least-as update if (args.LastLogTerm == lastLogTerm && args.LastLogIndex >= lastLogIdx) || args.LastLogTerm > lastLogTerm { rf.resetTimer <- struct{}{} rf.state = Follower rf.VotedFor = args.CandidateID reply.VoteGranted = true DPrintf("[%d-%s]: peer %d vote to peer %d (last log idx: %d->%d, term: %d->%d)\n", rf.me, rf, rf.me, args.CandidateID, args.LastLogIndex, lastLogIdx, args.LastLogTerm, lastLogTerm) } } } }
除了一些基本的初始化過程,新開了一個goroutine。server
func Make(peers []*labrpc.ClientEnd, me int, persister *Persister, applyCh chan ApplyMsg) *Raft { rf := &Raft{} rf.peers = peers rf.persister = persister rf.me = me rf.applyCh = applyCh // Your initialization code here (2A, 2B, 2C). rf.state = Follower rf.VotedFor = -1 rf.Logs = make([]LogEntry, 1) // first index is 1 rf.Logs[0] = LogEntry{ // placeholder Term: 0, Command: nil, } rf.nextIndex = make([]int, len(peers)) rf.matchIndex = make([]int, len(peers)) rf.electionTimeout = time.Millisecond * time.Duration(400+rand.Intn(100)*4) rf.electionTimer = time.NewTimer(rf.electionTimeout) rf.resetTimer = make(chan struct{}) rf.shutdownCh = make(chan struct{}) // shutdown raft gracefully rf.commitCond = sync.NewCond(&rf.mu) // commitCh, a distinct goroutine rf.heartbeatInterval = time.Millisecond * 40 // small enough, not too small // initialize from state persisted before a crash rf.readPersist(persister.ReadRaftState()) go rf.electionDaemon() // kick off election return rf }
除了shutdown,還有兩個通道,一個是electionTimer,用於選舉超時。ip
一個是resetTimer,用於重置選舉超時。ci
注意time.reset是很難正確使用的。
一旦選舉超時,調用go rf.canvassVotes()
// electionDaemon func (rf *Raft) electionDaemon() { for { select { case <-rf.shutdownCh: DPrintf("[%d-%s]: peer %d is shutting down electionDaemon.\n", rf.me, rf, rf.me) return case <-rf.resetTimer: if !rf.electionTimer.Stop() { <-rf.electionTimer.C } rf.electionTimer.Reset(rf.electionTimeout) case <-rf.electionTimer.C: rf.mu.Lock() DPrintf("[%d-%s]: peer %d election timeout, issue election @ term %d\n", rf.me, rf, rf.me, rf.CurrentTerm) rf.mu.Unlock() go rf.canvassVotes() rf.electionTimer.Reset(rf.electionTimeout) } } }
replyHandler是進行請求返回後的處理。
當前節點爲了成爲leader,會調用每個節點的RequestVote方法。
若是返回過來的term大於當前term,那麼當前節點變爲follower,重置選舉超時時間。
不然,若是收到了超過一半節點的投票,那麼其變爲了leader,並當即給其餘節點發送心跳檢測。
// canvassVotes issues RequestVote RPC func (rf *Raft) canvassVotes() { var voteArgs RequestVoteArgs rf.fillRequestVoteArgs(&voteArgs) peers := len(rf.peers) var votes = 1 replyHandler := func(reply *RequestVoteReply) { rf.mu.Lock() defer rf.mu.Unlock() if rf.state == Candidate { if reply.CurrentTerm > voteArgs.Term { rf.CurrentTerm = reply.CurrentTerm rf.turnToFollow() //rf.persist() rf.resetTimer <- struct{}{} // reset timer return } if reply.VoteGranted { if votes == peers/2 { rf.state = Leader rf.resetOnElection() // reset leader state go rf.heartbeatDaemon() // new leader, start heartbeat daemon DPrintf("[%d-%s]: peer %d become new leader.\n", rf.me, rf, rf.me) return } votes++ } } } for i := 0; i < peers; i++ { if i != rf.me { go func(n int) { var reply RequestVoteReply if rf.sendRequestVote(n, &voteArgs, &reply) { replyHandler(&reply) } }(i) } } }
一、leader調用每個節點的AppendEntries方法。
二、若是當前節點大於調用節點,那麼AppendEntries失敗。不然,修改當前的term爲最大。
三、若是當前節點是leader,始終將其變爲follower(爲了讓leader穩定)
四、將當前節點投票給調用者(對於落後的節點)。
五、重置當前節點的超時時間。
func (rf *Raft) heartbeatDaemon() { for { if _, isLeader := rf.GetState(); !isLeader { return } // reset leader's election timer rf.resetTimer <- struct{}{} select { case <-rf.shutdownCh: return default: for i := 0; i < len(rf.peers); i++ { if i != rf.me { go rf.consistencyCheck(i) // routine heartbeat } } } time.Sleep(rf.heartbeatInterval) } } func (rf *Raft) consistencyCheck(n int) { rf.mu.Lock() defer rf.mu.Unlock() pre := rf.nextIndex[n] - 1 var args = AppendEntriesArgs{ Term: rf.CurrentTerm, LeaderID: rf.me, PrevLogIndex: pre, PrevLogTerm: rf.Logs[pre].Term, Entries: nil, LeaderCommit: rf.commitIndex, } go func() { DPrintf("[%d-%s]: consistency Check to peer %d.\n", rf.me, rf, n) var reply AppendEntriesReply if rf.sendAppendEntries(n, &args, &reply) { rf.consistencyCheckReplyHandler(n, &reply) } }() } func (rf *Raft) AppendEntries(args *AppendEntriesArgs, reply *AppendEntriesReply) { select { case <-rf.shutdownCh: DPrintf("[%d-%s]: peer %d is shutting down, reject AE rpc request.\n", rf.me, rf, rf.me) return default: } DPrintf("[%d-%s]: rpc AE, from peer: %d, term: %d\n", rf.me, rf, args.LeaderID, args.Term) rf.mu.Lock() defer rf.mu.Unlock() if args.Term < rf.CurrentTerm { //DPrintf("[%d-%s]: AE failed from leader %d. (heartbeat: leader's term < follower's term (%d < %d))\n", // rf.me, rf, args.LeaderID, args.Term, rf.currentTerm) reply.CurrentTerm = rf.CurrentTerm reply.Success = false return } if rf.CurrentTerm < args.Term { rf.CurrentTerm = args.Term } // for stale leader if rf.state == Leader { rf.turnToFollow() } // for straggler (follower) if rf.VotedFor != args.LeaderID { rf.VotedFor = args.LeaderID } // valid AE, reset election timer // if the node recieve heartbeat. then it will reset the election timeout rf.resetTimer <- struct{}{} reply.Success = true reply.CurrentTerm = rf.CurrentTerm return }
若是心跳檢測失敗了,那麼變爲follower,重置選舉超時。
// n: which follower func (rf *Raft) consistencyCheckReplyHandler(n int, reply *AppendEntriesReply) { rf.mu.Lock() defer rf.mu.Unlock() if rf.state != Leader { return } if reply.Success { } else { // found a new leader? turn to follower if rf.state == Leader && reply.CurrentTerm > rf.CurrentTerm { rf.turnToFollow() rf.resetTimer <- struct{}{} DPrintf("[%d-%s]: leader %d found new term (heartbeat resp from peer %d), turn to follower.", rf.me, rf, rf.me, n) return } } }