golang調度學習-調度流程 (五) Syscall

時間 2021-04-21

標籤 golang 函數 this atom pwa 線程翻譯指針 rest 欄目 Go 简体版

原文原文鏈接

syscall函數

Syscall函數的定義以下，傳入4個參數，返回3個參數。golang

func syscall(fn, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno)

syscall函數的做用是傳入系統調用的地址和參數，執行完成後返回。流程主要是系統調用前執行entersyscall，設置g p的狀態，而後入參，執行後，寫返回值而後執行exitsyscall設置g p的狀態。
entersyscall和exitsyscall在g的調用中細講。函數

// func Syscall(trap int64, a1, a2, a3 uintptr) (r1, r2, err uintptr);
// Trap # in AX, args in DI SI DX R10 R8 R9, return in AX DX
// Note that this differs from "standard" ABI convention, which
// would pass 4th arg in CX, not R10.

// 4個入參：PC param1 param2 param3
TEXT ·Syscall(SB),NOSPLIT,$0-56
    // 調用entersyscall 判斷是執行條件是否知足 記錄調度信息 切換g p的狀態
    CALL    runtime·entersyscall(SB)
    // 將參數存入寄存器中
    MOVQ    a1+8(FP), DI
    MOVQ    a2+16(FP), SI
    MOVQ    a3+24(FP), DX
    MOVQ    trap+0(FP), AX  // syscall entry
    SYSCALL
    CMPQ    AX, $0xfffffffffffff001
    JLS ok
    // 執行失敗時 寫返回值
    MOVQ    $-1, r1+32(FP)
    MOVQ    $0, r2+40(FP)
    NEGQ    AX
    MOVQ    AX, err+48(FP)
    // 調用exitsyscall 記錄調度信息
    CALL    runtime·exitsyscall(SB)
    RET
ok:
    // 執行成功時 寫返回值
    MOVQ    AX, r1+32(FP)
    MOVQ    DX, r2+40(FP)
    MOVQ    $0, err+48(FP)
    CALL    runtime·exitsyscall(SB)
    RET 

TEXT    ·RawSyscall(SB),NOSPLIT,$0-56
    MOVQ    a1+8(FP), DI
    MOVQ    a2+16(FP), SI
    MOVQ    a3+24(FP), DX
    MOVQ    trap+0(FP), AX    // syscall entry
    SYSCALL
    JCC    ok1
    MOVQ    $-1, r1+32(FP)    // r1
    MOVQ    $0, r2+40(FP)    // r2
    MOVQ    AX, err+48(FP)    // errno
    RET
ok1:
    MOVQ    AX, r1+32(FP)    // r1
    MOVQ    DX, r2+40(FP)    // r2
    MOVQ    $0, err+48(FP)    // errno
    RET

明顯SysCall比RawSyscall多調用了兩個方法，entersyscall和exitsyscall，增長這兩個函數的調用，讓調度器有機會去對即將要進入系統調用的goroutine進行調整，方便調度。ui

entersyscall

// 系統調用的時候調用該函數
// 進入系統調用，G將會進入_Gsyscall狀態，也就是會被暫時掛起，直到系統調用結束。
// 此時M進入系統調用，那麼P也會放棄該M。可是，此時M還指向P，在M從系統調用返回後還能找到P
func entersyscall() {
    reentersyscall(getcallerpc(), getcallersp())
}
// Syscall跟蹤：
// 在系統調用開始時，咱們發出traceGoSysCall來捕獲堆棧跟蹤。
// 若是系統調用未阻止，則咱們不會發出任何其餘事件。
// 若是系統調用被阻止（即，從新獲取了P），則retaker會發出traceGoSysBlock；
// 當syscall返回時，咱們發出traceGoSysExit，當goroutine開始運行時
// （可能當即，若是exitsyscallfast返回true），咱們發出traceGoStart。
// 爲了確保在traceGoSysBlock以後嚴格發出traceGoSysExit，
// 咱們記得syscalltick的當前值以m爲單位（_g_.m.syscalltick = _g_.m.p.ptr（）。syscalltick），
// 以後發出traceGoSysBlock的人將遞增p.syscalltick；
// 咱們在發出traceGoSysExit以前等待增量。
// 請注意，即便未啓用跟蹤，增量也會完成，
// 由於能夠在syscall的中間啓用跟蹤。 咱們不但願等待掛起。
//go:nosplit
func reentersyscall(pc, sp uintptr) {
    _g_ := getg()

       //禁用搶佔，由於在此功能期間g處於Gsyscall狀態，但g-> sched可能不一致，請勿讓GC觀察它。
    _g_.m.locks++

    // Entersyscall must not call any function that might split/grow the stack.
    // (See details in comment above.)
        // 捕獲可能發生的調用，方法是將堆棧保護替換爲會使任何堆棧檢查失敗的內容，並留下一個標誌來通知newstack終止。
    _g_.stackguard0 = stackPreempt
    _g_.throwsplit = true

    // Leave SP around for GC and traceback.
    save(pc, sp)
    _g_.syscallsp = sp
    _g_.syscallpc = pc
    // 讓G進入_Gsyscall狀態，此時G已經被掛起了，直到系統調用結束，纔會讓G從新寫進入running
    casgstatus(_g_, _Grunning, _Gsyscall)
    if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
        systemstack(func() {
            print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
            throw("entersyscall")
        })
    }

    if trace.enabled {
        systemstack(traceGoSysCall)
        // systemstack itself clobbers g.sched.{pc,sp} and we might
        // need them later when the G is genuinely blocked in a
        // syscall
        save(pc, sp)
    }

    if atomic.Load(&sched.sysmonwait) != 0 {
        systemstack(entersyscall_sysmon)
        save(pc, sp)
    }

    if _g_.m.p.ptr().runSafePointFn != 0 {
        // runSafePointFn may stack split if run on this stack
        systemstack(runSafePointFn)
        save(pc, sp)
    }

    _g_.m.syscalltick = _g_.m.p.ptr().syscalltick
    _g_.sysblocktraced = true
    // 這裏很關鍵：P的M已經陷入系統調用，因而P忍痛放棄該M
        // 可是請注意：此時M還指向P，在M從系統調用返回後還能找到P
    pp := _g_.m.p.ptr()
    pp.m = 0
    _g_.m.oldp.set(pp)
    _g_.m.p = 0
    // P的狀態變爲Psyscall
    atomic.Store(&pp.status, _Psyscall)
    if sched.gcwaiting != 0 {
        systemstack(entersyscall_gcwait)
        save(pc, sp)
    }
    _g_.m.locks--
}

該方法主要是爲系統調用前作了準備工做：this

修改g的狀態爲_Gsyscall
檢查sysmon線程是否在執行，睡眠須要喚醒
p放棄m，可是m依舊持有p的指針，結束調用後優先選擇p
修改p的狀態爲_Psyscal

作好這些準備工做即可以真正的執行系統調用了。當該線程m長時間阻塞在系統調用的時候，一直在運行的sysmon線程會檢測到該p的狀態，並將其剝離，驅動其餘的m（新建或獲取）來調度執行該p上的任務,這其中主要是在retake方法中實現的，該方法還處理了goroutine搶佔調度，這裏省略，後面介紹搶佔調度在介紹：atom

exitsyscall

當系統Syscall返回的時，會調用exitsyscall方法恢復調度：pwa

//go:nosplit
//go:nowritebarrierrec
//go:linkname exitsyscall
func exitsyscall() {
    _g_ := getg()

    _g_.m.locks++ // see comment in entersyscall
    if getcallersp() > _g_.syscallsp {
        throw("exitsyscall: syscall frame is no longer valid")
    }

    _g_.waitsince = 0
    oldp := _g_.m.oldp.ptr()
    _g_.m.oldp = 0
     // 從新獲取p
    if exitsyscallfast(oldp) {
        if trace.enabled {
            if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {
                systemstack(traceGoStart)
            }
        }
        // There's a cpu for us, so we can run.
        _g_.m.p.ptr().syscalltick++
        // We need to cas the status and scan before resuming...
        casgstatus(_g_, _Gsyscall, _Grunning)

        // Garbage collector isn't running (since we are),
        // so okay to clear syscallsp.
        _g_.syscallsp = 0
        _g_.m.locks--
        if _g_.preempt {
            // restore the preemption request in case we've cleared it in newstack
            _g_.stackguard0 = stackPreempt
        } else {
            // otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock
            _g_.stackguard0 = _g_.stack.lo + _StackGuard
        }
        _g_.throwsplit = false

        if sched.disable.user && !schedEnabled(_g_) {
            // Scheduling of this goroutine is disabled.
            Gosched()
        }

        return
    }

    _g_.sysexitticks = 0
    if trace.enabled {
        // Wait till traceGoSysBlock event is emitted.
        // This ensures consistency of the trace (the goroutine is started after it is blocked).
        for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {
            osyield()
        }
        // We can't trace syscall exit right now because we don't have a P.
        // Tracing code can invoke write barriers that cannot run without a P.
        // So instead we remember the syscall exit time and emit the event
        // in execute when we have a P.
        _g_.sysexitticks = cputicks()
    }

    _g_.m.locks--

    // 沒有獲取到p，只能解綁當前g，從新調度該m了
    mcall(exitsyscall0)

    // Scheduler returned, so we're allowed to run now.
    // Delete the syscallsp information that we left for
    // the garbage collector during the system call.
    // Must wait until now because until gosched returns
    // we don't know for sure that the garbage collector
    // is not running.
    _g_.syscallsp = 0
    _g_.m.p.ptr().syscalltick++
    _g_.throwsplit = false
}

exitsyscallfast

exitsyscall會嘗試從新綁定p，優先選擇以前m綁定的p（進入系統的調用的時候，p只是單方面解綁了和m的關係，經過m依舊能夠找到p）：線程

//go:nosplit
func exitsyscallfast(oldp *p) bool {
    _g_ := getg()

    // Freezetheworld sets stopwait but does not retake P's.
    //stw，直接解綁p，而後退出
    if sched.stopwait == freezeStopWait {
        return false
    }

    // Try to re-acquire the last P.
    // 若是以前附屬的P還沒有被其餘M,嘗試綁定該P
    if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {
        // There's a cpu for us, so we can run.
        wirep(oldp)
        exitsyscallfast_reacquired()
        return true
    }
        // 不然從空閒P列表中取出一個來
    // Try to get any other idle P.
    if sched.pidle != 0 {
        var ok bool
        systemstack(func() {
            ok = exitsyscallfast_pidle()
            if ok && trace.enabled {
                if oldp != nil {
                    // Wait till traceGoSysBlock event is emitted.
                    // This ensures consistency of the trace (the goroutine is started after it is blocked).
                    for oldp.syscalltick == _g_.m.syscalltick {
                        osyield()
                    }
                }
                traceGoSysExit(0)
            }
        })
        if ok {
            return true
        }
    }
    return false
}

exitsyscall0

func exitsyscall0(gp *g) {
    _g_ := getg()
        //修改g狀態爲 _Grunable
    casgstatus(gp, _Gsyscall, _Grunnable)
    dropg()                  //解綁
    lock(&sched.lock)
    var _p_ *p
    //嘗試獲取p
    if schedEnabled(_g_) {
        _p_ = pidleget()
    }
    if _p_ == nil {
            // 未獲取到p，g進入全局隊列等待調度
        globrunqput(gp)
    } else if atomic.Load(&sched.sysmonwait) != 0 {
        atomic.Store(&sched.sysmonwait, 0)
        notewakeup(&sched.sysmonnote)
    }
    unlock(&sched.lock)
    // 獲取到p，綁定，而後執行
    if _p_ != nil {
        acquirep(_p_)
        execute(gp, false) // Never returns.
    }
    //  // m有綁定的g，解綁p而後綁定的g來喚醒，執行
    if _g_.m.lockedg != 0 {
        // Wait until another thread schedules gp and so m again.
        stoplockedm()
        execute(gp, false) // Never returns.
    }
    // 關聯p失敗了，休眠，等待喚醒，在進行調度。
    stopm()
    schedule() // Never returns.
}

總結

上述即是golang系統調用的整個流程，大體以下：翻譯

業務調用封裝好的系統調用函數，編譯器翻譯到Syscall
執行entersyscall()方法，修改g，p的狀態，p單方面解綁m，並檢查喚醒sysmon線程，檢測系統調用。
當sysmon線程檢測到系統調用阻塞時間過長的時候，調用retake，從新調度該p，讓p上可執行的得以執行，不浪費資源
系統調用返回，進入exitsyscall方法，優先獲取以前的p，若是該p已經被佔有，從新獲取空閒的p，綁定，而後繼續執行該g。當獲取不到p的時候，調用exitsyscall0，解綁g，休眠，等待下次喚醒調度。

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。