Syscall函數的定義以下,傳入4個參數,返回3個參數。golang
func syscall(fn, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno)
syscall函數的做用是傳入系統調用的地址和參數,執行完成後返回。流程主要是系統調用前執行entersyscall,設置g p的狀態,而後入參,執行後,寫返回值而後執行exitsyscall設置g p的狀態。
entersyscall和exitsyscall在g的調用中細講。函數
// func Syscall(trap int64, a1, a2, a3 uintptr) (r1, r2, err uintptr); // Trap # in AX, args in DI SI DX R10 R8 R9, return in AX DX // Note that this differs from "standard" ABI convention, which // would pass 4th arg in CX, not R10. // 4個入參:PC param1 param2 param3 TEXT ·Syscall(SB),NOSPLIT,$0-56 // 調用entersyscall 判斷是執行條件是否知足 記錄調度信息 切換g p的狀態 CALL runtime·entersyscall(SB) // 將參數存入寄存器中 MOVQ a1+8(FP), DI MOVQ a2+16(FP), SI MOVQ a3+24(FP), DX MOVQ trap+0(FP), AX // syscall entry SYSCALL CMPQ AX, $0xfffffffffffff001 JLS ok // 執行失敗時 寫返回值 MOVQ $-1, r1+32(FP) MOVQ $0, r2+40(FP) NEGQ AX MOVQ AX, err+48(FP) // 調用exitsyscall 記錄調度信息 CALL runtime·exitsyscall(SB) RET ok: // 執行成功時 寫返回值 MOVQ AX, r1+32(FP) MOVQ DX, r2+40(FP) MOVQ $0, err+48(FP) CALL runtime·exitsyscall(SB) RET TEXT ·RawSyscall(SB),NOSPLIT,$0-56 MOVQ a1+8(FP), DI MOVQ a2+16(FP), SI MOVQ a3+24(FP), DX MOVQ trap+0(FP), AX // syscall entry SYSCALL JCC ok1 MOVQ $-1, r1+32(FP) // r1 MOVQ $0, r2+40(FP) // r2 MOVQ AX, err+48(FP) // errno RET ok1: MOVQ AX, r1+32(FP) // r1 MOVQ DX, r2+40(FP) // r2 MOVQ $0, err+48(FP) // errno RET
明顯SysCall比RawSyscall多調用了兩個方法,entersyscall和exitsyscall,增長這兩個函數的調用,讓調度器有機會去對即將要進入系統調用的goroutine進行調整,方便調度。ui
// 系統調用的時候調用該函數 // 進入系統調用,G將會進入_Gsyscall狀態,也就是會被暫時掛起,直到系統調用結束。 // 此時M進入系統調用,那麼P也會放棄該M。可是,此時M還指向P,在M從系統調用返回後還能找到P func entersyscall() { reentersyscall(getcallerpc(), getcallersp()) } // Syscall跟蹤: // 在系統調用開始時,咱們發出traceGoSysCall來捕獲堆棧跟蹤。 // 若是系統調用未阻止,則咱們不會發出任何其餘事件。 // 若是系統調用被阻止(即,從新獲取了P),則retaker會發出traceGoSysBlock; // 當syscall返回時,咱們發出traceGoSysExit,當goroutine開始運行時 // (可能當即,若是exitsyscallfast返回true),咱們發出traceGoStart。 // 爲了確保在traceGoSysBlock以後嚴格發出traceGoSysExit, // 咱們記得syscalltick的當前值以m爲單位(_g_.m.syscalltick = _g_.m.p.ptr()。syscalltick), // 以後發出traceGoSysBlock的人將遞增p.syscalltick; // 咱們在發出traceGoSysExit以前等待增量。 // 請注意,即便未啓用跟蹤,增量也會完成, // 由於能夠在syscall的中間啓用跟蹤。 咱們不但願等待掛起。 //go:nosplit func reentersyscall(pc, sp uintptr) { _g_ := getg() //禁用搶佔,由於在此功能期間g處於Gsyscall狀態,但g-> sched可能不一致,請勿讓GC觀察它。 _g_.m.locks++ // Entersyscall must not call any function that might split/grow the stack. // (See details in comment above.) // 捕獲可能發生的調用,方法是將堆棧保護替換爲會使任何堆棧檢查失敗的內容,並留下一個標誌來通知newstack終止。 _g_.stackguard0 = stackPreempt _g_.throwsplit = true // Leave SP around for GC and traceback. save(pc, sp) _g_.syscallsp = sp _g_.syscallpc = pc // 讓G進入_Gsyscall狀態,此時G已經被掛起了,直到系統調用結束,纔會讓G從新寫進入running casgstatus(_g_, _Grunning, _Gsyscall) if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp { systemstack(func() { print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n") throw("entersyscall") }) } if trace.enabled { systemstack(traceGoSysCall) // systemstack itself clobbers g.sched.{pc,sp} and we might // need them later when the G is genuinely blocked in a // syscall save(pc, sp) } if atomic.Load(&sched.sysmonwait) != 0 { systemstack(entersyscall_sysmon) save(pc, sp) } if _g_.m.p.ptr().runSafePointFn != 0 { // runSafePointFn may stack split if run on this stack systemstack(runSafePointFn) save(pc, sp) } _g_.m.syscalltick = _g_.m.p.ptr().syscalltick _g_.sysblocktraced = true // 這裏很關鍵:P的M已經陷入系統調用,因而P忍痛放棄該M // 可是請注意:此時M還指向P,在M從系統調用返回後還能找到P pp := _g_.m.p.ptr() pp.m = 0 _g_.m.oldp.set(pp) _g_.m.p = 0 // P的狀態變爲Psyscall atomic.Store(&pp.status, _Psyscall) if sched.gcwaiting != 0 { systemstack(entersyscall_gcwait) save(pc, sp) } _g_.m.locks-- }
該方法主要是爲系統調用前作了準備工做:this
作好這些準備工做即可以真正的執行系統調用了。當該線程m長時間阻塞在系統調用的時候,一直在運行的sysmon線程會檢測到該p的狀態,並將其剝離,驅動其餘的m(新建或獲取)來調度執行該p上的任務,這其中主要是在retake方法中實現的,該方法還處理了goroutine搶佔調度,這裏省略,後面介紹搶佔調度在介紹:atom
當系統Syscall返回的時,會調用exitsyscall方法恢復調度:pwa
//go:nosplit //go:nowritebarrierrec //go:linkname exitsyscall func exitsyscall() { _g_ := getg() _g_.m.locks++ // see comment in entersyscall if getcallersp() > _g_.syscallsp { throw("exitsyscall: syscall frame is no longer valid") } _g_.waitsince = 0 oldp := _g_.m.oldp.ptr() _g_.m.oldp = 0 // 從新獲取p if exitsyscallfast(oldp) { if trace.enabled { if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick { systemstack(traceGoStart) } } // There's a cpu for us, so we can run. _g_.m.p.ptr().syscalltick++ // We need to cas the status and scan before resuming... casgstatus(_g_, _Gsyscall, _Grunning) // Garbage collector isn't running (since we are), // so okay to clear syscallsp. _g_.syscallsp = 0 _g_.m.locks-- if _g_.preempt { // restore the preemption request in case we've cleared it in newstack _g_.stackguard0 = stackPreempt } else { // otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock _g_.stackguard0 = _g_.stack.lo + _StackGuard } _g_.throwsplit = false if sched.disable.user && !schedEnabled(_g_) { // Scheduling of this goroutine is disabled. Gosched() } return } _g_.sysexitticks = 0 if trace.enabled { // Wait till traceGoSysBlock event is emitted. // This ensures consistency of the trace (the goroutine is started after it is blocked). for oldp != nil && oldp.syscalltick == _g_.m.syscalltick { osyield() } // We can't trace syscall exit right now because we don't have a P. // Tracing code can invoke write barriers that cannot run without a P. // So instead we remember the syscall exit time and emit the event // in execute when we have a P. _g_.sysexitticks = cputicks() } _g_.m.locks-- // 沒有獲取到p,只能解綁當前g,從新調度該m了 mcall(exitsyscall0) // Scheduler returned, so we're allowed to run now. // Delete the syscallsp information that we left for // the garbage collector during the system call. // Must wait until now because until gosched returns // we don't know for sure that the garbage collector // is not running. _g_.syscallsp = 0 _g_.m.p.ptr().syscalltick++ _g_.throwsplit = false }
exitsyscall會嘗試從新綁定p,優先選擇以前m綁定的p(進入系統的調用的時候,p只是單方面解綁了和m的關係,經過m依舊能夠找到p):線程
//go:nosplit func exitsyscallfast(oldp *p) bool { _g_ := getg() // Freezetheworld sets stopwait but does not retake P's. //stw,直接解綁p,而後退出 if sched.stopwait == freezeStopWait { return false } // Try to re-acquire the last P. // 若是以前附屬的P還沒有被其餘M,嘗試綁定該P if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) { // There's a cpu for us, so we can run. wirep(oldp) exitsyscallfast_reacquired() return true } // 不然從空閒P列表中取出一個來 // Try to get any other idle P. if sched.pidle != 0 { var ok bool systemstack(func() { ok = exitsyscallfast_pidle() if ok && trace.enabled { if oldp != nil { // Wait till traceGoSysBlock event is emitted. // This ensures consistency of the trace (the goroutine is started after it is blocked). for oldp.syscalltick == _g_.m.syscalltick { osyield() } } traceGoSysExit(0) } }) if ok { return true } } return false }
func exitsyscall0(gp *g) { _g_ := getg() //修改g狀態爲 _Grunable casgstatus(gp, _Gsyscall, _Grunnable) dropg() //解綁 lock(&sched.lock) var _p_ *p //嘗試獲取p if schedEnabled(_g_) { _p_ = pidleget() } if _p_ == nil { // 未獲取到p,g進入全局隊列等待調度 globrunqput(gp) } else if atomic.Load(&sched.sysmonwait) != 0 { atomic.Store(&sched.sysmonwait, 0) notewakeup(&sched.sysmonnote) } unlock(&sched.lock) // 獲取到p,綁定,而後執行 if _p_ != nil { acquirep(_p_) execute(gp, false) // Never returns. } // // m有綁定的g,解綁p而後綁定的g來喚醒,執行 if _g_.m.lockedg != 0 { // Wait until another thread schedules gp and so m again. stoplockedm() execute(gp, false) // Never returns. } // 關聯p失敗了,休眠,等待喚醒,在進行調度。 stopm() schedule() // Never returns. }
上述即是golang系統調用的整個流程,大體以下:翻譯