Go Gin源碼學習(四) 路由基數樹

時間 2019-11-10

原文原文鏈接

基數樹

此次學習的是Gin中的路由，在學習源碼一種咱們看到了Gin的路由是它的特點。然而基礎數據使用了基數樹也提供了性能的保障。由於路由這部分比較獨立並且邏輯相對複雜，因此須要單獨學習。
首先咱們須要瞭解的是基數樹，百度百科中的解釋
其中有一個圖可讓咱們更加直觀的看到數據是如何存儲的。

基數樹，至關因而一種前綴樹。對於基數樹的每一個節點，若是該節點是肯定的子樹的話，就和父節點合併。基數樹可用來構建關聯數組。
在上面的圖裏也能夠看到，數據結構會把全部相同前綴都提取剩餘的都做爲子節點。node

基數樹在Gin中的應用

從上面能夠看到基數樹是一個前綴樹，圖中也能夠看到數據結構。那基數樹在Gin中是如何應用的呢？舉一個例子其實就能看得出來
router.GET("/support", handler1)
router.GET("/search", handler2)
router.GET("/contact", handler3)
router.GET("/group/user/", handler4)
router.GET("/group/user/test", handler5)
最終的內存結構爲:數組

/ (handler = nil, indices = "scg")
    s (handler = nil, indices = "ue")
        upport (handler = handler1, indices = "")
        earch (handler = handler2, indices = "")
    contact (handler = handler3, indices = "")
    group/user/ (handler = handler4, indices = "u")
        test (handler = handler5, indices = "")

能夠看到 router使用get方法添加了5個路由，實際存儲結果就是上面顯示的。我特意在後面加上了每一個節點中的handler和indices。 indices是有序保存全部子節點的第一個字符造成的字符串。爲何要特地突出這個字段，由於在查找子節點下面是否包含path的時候不須要循環子節點，只須要循環這個字段就能夠知道是否包含。這樣的操做也能夠提高一些效率。數據結構

源碼查看

先看一下節點的對象的定義和如何調用的，須要注意的是indices這個字段上面已經提到了它的做用app

type node struct {
    // 保存這個節點上的URL路徑
    // 例如上圖中的search和support, 共同的parent節點的path="s"
    // 後面兩個節點的path分別是"earch"和"upport"
    path string
    // 判斷當前節點路徑是否是參數節點, 例如上圖的:post部分就是wildChild節點
    wildChild bool
    // 節點類型包括static, root, param, catchAll
    // static: 靜態節點, 例如上面分裂出來做爲parent的s
    // root: 若是插入的節點是第一個, 那麼是root節點
    // catchAll: 有*匹配的節點
    // param: 除上面外的節點
    nType nodeType
    // 記錄路徑上最大參數個數
    maxParams uint8
    // 和children[]對應, 保存的是分裂的分支的第一個字符
    // 例如search和support, 那麼s節點的indices對應的"eu"
    // 表明有兩個分支, 分支的首字母分別是e和u
    indices string
    // 保存孩子節點
    children []*node
    // 當前節點的處理函數
    handle Handle
    // 優先級
    priority uint32
}

//RouterGrou實現的GET方法調用了handler
func (group *RouterGroup) GET(relativePath string, handlers ...HandlerFunc) IRoutes {
    return group.handle("GET", relativePath, handlers)
}

func (group *RouterGroup) handle(httpMethod, relativePath string, handlers HandlersChain) IRoutes {
    //方法計算出路徑，把group中的basepath和relativepath 合併在一塊兒
    absolutePath := group.calculateAbsolutePath(relativePath)
    //合併handler 把group中添加的中間件和傳入的handlers合併起來
    handlers = group.combineHandlers(handlers)
    //調用addRoute 添加router
    group.engine.addRoute(httpMethod, absolutePath, handlers)
    return group.returnObj()
}

接下來咱們須要看的是addRoute這個方法了，方法體比較長。其實大多的邏輯都在處理帶參數的節點，真正核心的邏輯其實並很少。我把主要的邏輯都寫上了註釋應該仍是比較容易理解的。若是看不懂其實一步步debug幾回也能幫助理解。函數

func (engine *Engine) addRoute(method, path string, handlers HandlersChain) {
    assert1(path[0] == '/', "path must begin with '/'")
    assert1(method != "", "HTTP method can not be empty")
    assert1(len(handlers) > 0, "there must be at least one handler")

    debugPrintRoute(method, path, handlers)
    //獲取method的樹的根節點，每一個method都有一個根節點，好比GET，POST 都會維護一個根節點
    root := engine.trees.get(method)
    //若是沒有則建立一個節點
    if root == nil {
        root = new(node)
        engine.trees = append(engine.trees, methodTree{method: method, root: root})
    }
    //正式添加路由
    root.addRoute(path, handlers)
}

func (n *node) addRoute(path string, handlers HandlersChain) {
    //記錄原始path
    fullPath := path
    n.priority++
    //統計path中包含多少參數 就是判斷`：`，`*`的數量 最多255個
    numParams := countParams(path)

    //判斷節點是否爲空
    if len(n.path) > 0 || len(n.children) > 0 {
    walk:
        for {
            // 更新最大參數數量
            if numParams > n.maxParams {
                n.maxParams = numParams
            }

            // 找到相同前綴 循環次數 是取 path 和 n.path 長度的小那個長度
            i := 0
            max := min(len(path), len(n.path))
            //循環判斷是否字符相同，相同則i++ 直到最後
            for i < max && path[i] == n.path[i] {
                i++
            }

            //判斷是否有前綴相同，若是有相同的則把目前這個節點提取出來做爲子節點
            //再把相同前綴的path部分做爲 父節點
            //好比n的path = romaned 如今新增路由的path = romanus 相同前綴爲 roman
            //步驟爲：
            //1. 提取ed 新建一個child節點 把原來n的屬性都複製過去
            //2. 把原來的n的path改成相同前綴：roman 爲indices添加 子節點的第一個字符:e
            if i < len(n.path) {
                child := node{
                    path:      n.path[i:],
                    wildChild: n.wildChild,
                    indices:   n.indices,
                    children:  n.children,
                    handlers:  n.handlers,
                    priority:  n.priority - 1,
                }

                // Update maxParams (max of all children)
                for i := range child.children {
                    if child.children[i].maxParams > child.maxParams {
                        child.maxParams = child.children[i].maxParams
                    }
                }

                n.children = []*node{&child}
                // []byte for proper unicode char conversion, see #65
                n.indices = string([]byte{n.path[i]})
                n.path = path[:i]
                n.handlers = nil
                n.wildChild = false
            }

            //原先的節點n如今已經分紅2個節點了 結構爲：
            //roman 父節點
            //    ed    子節點[0]
            //那麼如今須要把傳入的路由添加到這個父節點中
            //最終結構爲
            //roman 父節點
            //    ed 子節點[0]
            //    us 子節點[1]
            // 其中還有一些狀況須要自調用 至關於遞歸 舉例說明：
            //roman
            //    ed
            //    uie
            //當判斷父節點n 原本就有一個uie子節點 這時候uie和us 又有相同前綴u 這個時候須要把這個u再次提取出來做爲父節點 因此須要遞歸調用walk
            //最終結果爲 三層結構
            //roman
            //    ed
            //    u
            //        ie
            //        s
            //還有一種狀況是若是是帶有參數的路由 則也會再次調用walk
            if i < len(path) {
                path = path[i:]

                if n.wildChild {
                    n = n.children[0]
                    n.priority++

                    // Update maxParams of the child node
                    if numParams > n.maxParams {
                        n.maxParams = numParams
                    }
                    numParams--

                    // Check if the wildcard matches
                    if len(path) >= len(n.path) && n.path == path[:len(n.path)] {
                        // check for longer wildcard, e.g. :name and :names
                        if len(n.path) >= len(path) || path[len(n.path)] == '/' {
                            continue walk
                        }
                    }

                    panic("path segment '" + path +
                        "' conflicts with existing wildcard '" + n.path +
                        "' in path '" + fullPath + "'")
                }

                c := path[0]

                // slash after param
                if n.nType == param && c == '/' && len(n.children) == 1 {
                    n = n.children[0]
                    n.priority++
                    continue walk
                }

                // Check if a child with the next path byte exists
                for i := 0; i < len(n.indices); i++ {
                    if c == n.indices[i] {
                        i = n.incrementChildPrio(i)
                        n = n.children[i]
                        continue walk
                    }
                }

                // Otherwise insert it
                if c != ':' && c != '*' {
                    // []byte for proper unicode char conversion, see #65
                    n.indices += string([]byte{c})
                    child := &node{
                        maxParams: numParams,
                    }
                    n.children = append(n.children, child)
                    n.incrementChildPrio(len(n.indices) - 1)
                    n = child
                }
                n.insertChild(numParams, path, fullPath, handlers)
                return

            } else if i == len(path) {
                if n.handlers != nil {
                    panic("handlers are already registered for path '" + fullPath + "'")
                }
                n.handlers = handlers
            }
            return
        }
    } else { // 節點爲空，直接添加直接添加路由
        n.insertChild(numParams, path, fullPath, handlers)
        n.nType = root
    }
}

//添加節點函數 主要處理包含參數節點
func (n *node) insertChild(numParams uint8, path string, fullPath string, handlers HandlersChain) {
    var offset int // already handled bytes of the path

    // 循環查找前綴爲'：' 或者 '*'
    for i, max := 0, len(path); numParams > 0; i++ {
        c := path[i]
        if c != ':' && c != '*' {
            continue
        }

        // 判斷在*參數以後不能再有*或者: 不然則報錯 除非到了下一個/
        end := i + 1
        for end < max && path[end] != '/' {
            switch path[end] {
            // the wildcard name must not contain ':' and '*'
            case ':', '*':
                panic("only one wildcard per path segment is allowed, has: '" +
                    path[i:] + "' in path '" + fullPath + "'")
            default:
                end++
            }
        }

        //檢查這個節點是否存在子節點，若是咱們在這裏插入通配符，子節點將是不可訪問的
        if len(n.children) > 0 {
            panic("wildcard route '" + path[i:end] +
                "' conflicts with existing children in path '" + fullPath + "'")
        }

        // check if the wildcard has a name
        if end-i < 2 {
            panic("wildcards must be named with a non-empty name in path '" + fullPath + "'")
        }

        // 參數類型 至關於註冊路由時候帶有:
        if c == ':' {
            // split path at the beginning of the wildcard
            if i > 0 {
                n.path = path[offset:i]
                offset = i
            }

            child := &node{
                nType:     param,
                maxParams: numParams,
            }
            n.children = []*node{child}
            n.wildChild = true
            n = child
            n.priority++
            numParams--

            if end < max {
                n.path = path[offset:end]
                offset = end

                child := &node{
                    maxParams: numParams,
                    priority:  1,
                }
                n.children = []*node{child}
                n = child
            }

        } else {
            //若是是通配符*
            if end != max || numParams > 1 {
                panic("catch-all routes are only allowed at the end of the path in path '" + fullPath + "'")
            }

            if len(n.path) > 0 && n.path[len(n.path)-1] == '/' {
                panic("catch-all conflicts with existing handle for the path segment root in path '" + fullPath + "'")
            }

            // currently fixed width 1 for '/'
            i--
            if path[i] != '/' {
                panic("no / before catch-all in path '" + fullPath + "'")
            }

            n.path = path[offset:i]

            // first node: catchAll node with empty path
            child := &node{
                wildChild: true,
                nType:     catchAll,
                maxParams: 1,
            }
            n.children = []*node{child}
            n.indices = string(path[i])
            n = child
            n.priority++

            // second node: node holding the variable
            child = &node{
                path:      path[i:],
                nType:     catchAll,
                maxParams: 1,
                handlers:  handlers,
                priority:  1,
            }
            n.children = []*node{child}

            return
        }
    }

    // 插入路由 若是不包含參數節點 offset爲0
    n.path = path[offset:]
    n.handlers = handlers
}

最後咱們要看下根據path獲取router的方法getRouter。這個方法仍是比較簡單的，註釋基本也能明白。post

//根據path查找路由的方法
func (n *node) getValue(path string, po Params, unescape bool) (handlers HandlersChain, p Params, tsr bool) {
    p = po
walk:
    for {
        if len(path) > len(n.path) {
            if path[:len(n.path)] == n.path {
                path = path[len(n.path):]
                // 判斷若是不是參數節點
                // 那path的第一個字符 循環對比indices中的每一個字符查找到子節點
                if !n.wildChild {
                    c := path[0]
                    for i := 0; i < len(n.indices); i++ {
                        if c == n.indices[i] {
                            n = n.children[i]
                            continue walk
                        }
                    }

                    tsr = path == "/" && n.handlers != nil
                    return
                }

                // handle wildcard child
                n = n.children[0]
                switch n.nType {
                case param:
                    // 若是是普通':'節點, 那麼找到/或者path end, 得到參數
                    end := 0
                    for end < len(path) && path[end] != '/' {
                        end++
                    }

                    // save param value
                    if cap(p) < int(n.maxParams) {
                        p = make(Params, 0, n.maxParams)
                    }
                    i := len(p)
                    p = p[:i+1] // expand slice within preallocated capacity
                    p[i].Key = n.path[1:]
                    val := path[:end]
                    if unescape {
                        var err error
                        if p[i].Value, err = url.QueryUnescape(val); err != nil {
                            p[i].Value = val // fallback, in case of error
                        }
                    } else {
                        p[i].Value = val
                    }

                    // 若是參數還沒處理完, 繼續walk
                    if end < len(path) {
                        if len(n.children) > 0 {
                            path = path[end:]
                            n = n.children[0]
                            continue walk
                        }

                        // ... but we can't
                        tsr = len(path) == end+1
                        return
                    }
                    // 不然得到handle返回就OK
                    if handlers = n.handlers; handlers != nil {
                        return
                    }
                    if len(n.children) == 1 {
                        // No handle found. Check if a handle for this path + a
                        // trailing slash exists for TSR recommendation
                        n = n.children[0]
                        tsr = n.path == "/" && n.handlers != nil
                    }

                    return

                case catchAll:
                    // *匹配全部參數
                    if cap(p) < int(n.maxParams) {
                        p = make(Params, 0, n.maxParams)
                    }
                    i := len(p)
                    p = p[:i+1] // expand slice within preallocated capacity
                    p[i].Key = n.path[2:]
                    if unescape {
                        var err error
                        if p[i].Value, err = url.QueryUnescape(path); err != nil {
                            p[i].Value = path // fallback, in case of error
                        }
                    } else {
                        p[i].Value = path
                    }

                    handlers = n.handlers
                    return

                default:
                    panic("invalid node type")
                }
            }
        } else if path == n.path {
            // We should have reached the node containing the handle.
            // Check if this node has a handle registered.
            if handlers = n.handlers; handlers != nil {
                return
            }

            if path == "/" && n.wildChild && n.nType != root {
                tsr = true
                return
            }

            // No handle found. Check if a handle for this path + a
            // trailing slash exists for trailing slash recommendation
            for i := 0; i < len(n.indices); i++ {
                if n.indices[i] == '/' {
                    n = n.children[i]
                    tsr = (len(n.path) == 1 && n.handlers != nil) ||
                        (n.nType == catchAll && n.children[0].handlers != nil)
                    return
                }
            }

            return
        }

        // Nothing found. We can recommend to redirect to the same URL with an
        // extra trailing slash if a leaf exists for that path
        tsr = (path == "/") ||
            (len(n.path) == len(path)+1 && n.path[len(path)] == '/' &&
                path == n.path[:len(n.path)-1] && n.handlers != nil)
        return
    }
}

總結

Gin的路由是它的特點，其實就是由於他的存儲結構。基數樹的存儲結構能夠很快的查詢到對應路由而且執行到handler。避免了每次請求循環全部路由的邏輯，提高了Gin總體的性能。試想若是一個大型項目中GET路由有100個，若是每次請求都去循環100次查找性能會不好，若是使用基數樹的存儲方式可能只須要通過幾回的查詢。性能

Gin路由代碼很長，其中大部分是處理帶有參數的節點的邏輯。下一次的學習中，仍是老規矩，本身模仿着寫一個基數樹存儲結構的路由查找邏輯。去除掉那些參數邏輯只留下主要核心邏輯。學習