從底層理解 Golang 的 map 實現

時間 2019-11-16

標籤底層理解 golang map 實現欄目 Go 简体版

原文原文鏈接

定義

golang 中的 map 就是經常使用的 hashtable，底層實現由 hmap，維護着若干個 bucket 數組，一般每一個 bucket 保存着8組kv對，若是超過8個(發生hash衝突時)，會在 extra 字段結構體中的 overflow ，使用鏈地址法一直擴展下去。先看下 hmap 結構體：node

type hmap struct {
    count     int // 元素的個數
    flags     uint8 // 標記讀寫狀態，主要是作競態檢測，避免併發讀寫
    B         uint8  // 能夠容納 2 ^ N 個bucket
    noverflow uint16 // 溢出的bucket個數
    hash0     uint32 // hash 因子
    
    buckets    unsafe.Pointer // 指向數組buckets的指針
    oldbuckets unsafe.Pointer // growing 時保存原buckets的指針
    nevacuate  uintptr        // growing 時已遷移的個數
    
    extra *mapextra
}

type mapextra struct {
	overflow    *[]*bmap
	oldoverflow *[]*bmap

	nextOverflow *bmap
}
複製代碼

bucket 的結構體：golang

// A bucket for a Go map.
type bmap struct {
    // tophash generally contains the top byte of the hash value
    // for each key in this bucket. If tophash[0] < minTopHash,
    // tophash[0] is a bucket evacuation state instead.
    tophash [bucketCnt]uint8    // 記錄着每一個key的高8個bits
    // Followed by bucketCnt keys and then bucketCnt elems.
    // NOTE: packing all the keys together and then all the elems together makes the
    // code a bit more complicated than alternating key/elem/key/elem/... but it allows
    // us to eliminate padding which would be needed for, e.g., map[int64]int8.
    // Followed by an overflow pointer.
}
複製代碼

其中 kv 對是按照 key0/key1/key2/...val0/val1/val2/... 的格式排列，雖然在保存上面會比key/value對更復雜一些，可是避免了由於cpu要求固定長度讀取，字節對齊，形成的空間浪費。算法

初始化 && 插入

package main

func main() {
	a := map[string]int{"one": 1, "two": 2, "three": 3}

	_ = a["one"]
}
複製代碼

初始化3個key/value的map數組

TEXT main.main(SB) /Users/such/gomodule/runtime/main.go
=>      main.go:3       0x10565fb*      4881ec70010000          sub rsp, 0x170
        main.go:3       0x1056602       4889ac2468010000        mov qword ptr [rsp+0x168], rbp
        main.go:3       0x105660a       488dac2468010000        lea rbp, ptr [rsp+0x168]
        main.go:4       0x105664b       488b6d00                mov rbp, qword ptr [rbp]
        main.go:4       0x105666d       e8de9cfeff              call $runtime.fastrand
        main.go:4       0x1056672       488b442450              mov rax, qword ptr [rsp+0x50]
        main.go:4       0x1056677       8400                    test byte ptr [rax], al
        main.go:4       0x10566c6       48894c2410              mov qword ptr [rsp+0x10], rcx
        main.go:4       0x10566cb       4889442418              mov qword ptr [rsp+0x18], rax
        main.go:4       0x10566d0       e80b8efbff              call $runtime.mapassign_faststr
        main.go:4       0x1056726       48894c2410              mov qword ptr [rsp+0x10], rcx
        main.go:4       0x105672b       4889442418              mov qword ptr [rsp+0x18], rax
        main.go:4       0x1056730       e8ab8dfbff              call $runtime.mapassign_faststr
        main.go:4       0x1056786       4889442410              mov qword ptr [rsp+0x10], rax
        main.go:4       0x105678b       48894c2418              mov qword ptr [rsp+0x18], rcx
        main.go:4       0x1056790       e84b8dfbff              call $runtime.mapassign_faststr
複製代碼

(省略了部分) 能夠看出來，聲明時連續調用三次 call $runtime.mapassign_faststr 添加鍵值對bash

func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
	if h == nil {
		panic(plainError("assignment to entry in nil map"))
	}
	if raceenabled {
		callerpc := getcallerpc()
		pc := funcPC(mapassign)
		racewritepc(unsafe.Pointer(h), callerpc, pc)
		raceReadObjectPC(t.key, key, callerpc, pc)
	}
	// 看到這裏，發現和以前 slice 聲明時同樣，都會作競態檢測
	if msanenabled {
		msanread(key, t.key.size)
	}
	
	// 這裏就是併發讀寫map時，panic的地方
	if h.flags&hashWriting != 0 {
		throw("concurrent map writes")
	}
	// t 是 map 的類型，所以在編譯時，能夠肯定key的類型，繼而肯定hash算法。
	alg := t.key.alg
	hash := alg.hash(key, uintptr(h.hash0))

	// 設置flag爲writing
	h.flags ^= hashWriting

	if h.buckets == nil {
		h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
	}

again:  // 從新計算bucket的hash
	bucket := hash & bucketMask(h.B)
	if h.growing() {
		growWork(t, h, bucket)
	}
	b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
	top := tophash(hash)

	var inserti *uint8
	var insertk unsafe.Pointer
	var elem unsafe.Pointer
bucketloop:
    // 遍歷找到bucket
	for {
		for i := uintptr(0); i < bucketCnt; i++ {
			if b.tophash[i] != top {
				if isEmpty(b.tophash[i]) && inserti == nil {
					inserti = &b.tophash[i]
					insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
					elem = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
				}
				if b.tophash[i] == emptyRest {
					break bucketloop
				}
				continue
			}
			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
			if t.indirectkey() {
				k = *((*unsafe.Pointer)(k))
			}
			// equal 方法也是根據不一樣的數據類型，在編譯時肯定
			if !alg.equal(key, k) {
				continue
			}
			// map 中已經存在 key，修改 key 對應的 value
			if t.needkeyupdate() {
				typedmemmove(t.key, k, key)
			}
			elem = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
			goto done
		}
		ovf := b.overflow(t)
		if ovf == nil {
			break
		}
		b = ovf
	}

	// Did not find mapping for key. Allocate new cell & add entry.

	// If we hit the max load factor or we have too many overflow buckets,
	// and we're not already in the middle of growing, start growing. if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) { hashGrow(t, h) goto again // Growing the table invalidates everything, so try again } if inserti == nil // 若是沒有找到插入的node，即當前全部桶都已放滿 newb := h.newoverflow(t, b) inserti = &newb.tophash[0] insertk = add(unsafe.Pointer(newb), dataOffset) elem = add(insertk, bucketCnt*uintptr(t.keysize)) } // store new key/elem at insert position if t.indirectkey() { kmem := newobject(t.key) *(*unsafe.Pointer)(insertk) = kmem insertk = kmem } if t.indirectelem() { vmem := newobject(t.elem) *(*unsafe.Pointer)(elem) = vmem } typedmemmove(t.key, insertk, key) *inserti = top h.count++ done: // 再次檢查（雙重校驗鎖的思路）是否併發寫 if h.flags&hashWriting == 0 { throw("concurrent map writes") } h.flags &^= hashWriting if t.indirectelem() { elem = *((*unsafe.Pointer)(elem)) } return elem } 複製代碼

查找

TEXT main.main(SB) /Users/such/gomodule/runtime/main.go
=>      main.go:6       0x10567a9*      488d0550e10000          lea rax, ptr [rip+0xe150]
        main.go:6       0x10567c5       4889442410              mov qword ptr [rsp+0x10], rax
        main.go:6       0x10567ca       48c744241803000000      mov qword ptr [rsp+0x18], 0x3
        main.go:6       0x10567d3       e89885fbff              call $runtime.mapaccess1_faststr
複製代碼

在 map 中找一個 key 的時候，runtime 調用了 mapaccess1 方法，和添加時很相似架構

func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
	if raceenabled && h != nil {
		callerpc := getcallerpc()
		pc := funcPC(mapaccess1)
		racereadpc(unsafe.Pointer(h), callerpc, pc)
		raceReadObjectPC(t.key, key, callerpc, pc)
	}
	if msanenabled && h != nil {
		msanread(key, t.key.size)
	}
	if h == nil || h.count == 0 {
		if t.hashMightPanic() {
			t.key.alg.hash(key, 0) // see issue 23734
		}
		return unsafe.Pointer(&zeroVal[0])
	}
	if h.flags&hashWriting != 0 {
		throw("concurrent map read and map write")
	}
	alg := t.key.alg
	hash := alg.hash(key, uintptr(h.hash0))
	m := bucketMask(h.B)
	b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
	if c := h.oldbuckets; c != nil {
		if !h.sameSizeGrow() {
			// There used to be half as many buckets; mask down one more power of two.
			m >>= 1
		}
		oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
		if !evacuated(oldb) {
			b = oldb
		}
	}
	top := tophash(hash)
bucketloop:
	for ; b != nil; b = b.overflow(t) {
		for i := uintptr(0); i < bucketCnt; i++ {
			if b.tophash[i] != top {
				if b.tophash[i] == emptyRest {
					break bucketloop
				}
				continue
			}
			k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
			if t.indirectkey() {
				k = *((*unsafe.Pointer)(k))
			}
			// 若是找到 key，就返回 key 指向的 value 指針的值，
			// 在計算 ptr 的時候，初始位置當前bmap, 偏移量 offset，是一個 bmap 結構體的大小，但對於amd64架構，
			// 還須要考慮字節對齊，即 8 字節對齊（dataOffset）+ 8個key的大小 + i (當前索引) 個value的大小
			if alg.equal(key, k) {
				e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
				if t.indirectelem() {
					e = *((*unsafe.Pointer)(e))
				}
				return e
			}
		}
	}
	// 若是未找到的話，返回零對象的引用的指針
	return unsafe.Pointer(&zeroVal[0])
}
複製代碼

在 map 包裏，還有個相似的方法， mapaccess2 在通過驗證，在 _, ok := a["one"] 通常用於判斷key是否存在的寫法時，是會用到。其實根據函數的返回值也能夠看出。併發

Growing

和 slice 同樣，在 map 的元素持續增加時，每一個bucket極端狀況下會有不少overflow，退化成鏈表，須要 rehash。通常擴容是在 h.count > loadFactor(2^B)。負載因子通常是：容量 / bucket數量，golang 的負載因子 loadFactorNum / loadFactorDen = 6.5，爲何不選擇1呢，像 Redis 的 dictentry，只能保存一組鍵值對，golang的話，一個bucket正常狀況下能夠保存8組鍵值對；那爲何選擇6.5這個值呢，做者給出了一組數據。app

loadFactor	%overflow	bytes/entry	hitprobe	missprobe
4.00	2.13	20.77	3.00	4.00
4.50	4.05	17.30	3.25	4.50
5.00	6.85	14.77	3.50	5.00
5.50	10.55	12.94	3.75	5.50
6.00	15.27	11.67	4.00	6.00
6.50	20.90	10.79	4.25	6.50
7.00	27.14	10.15	4.50	7.00
7.50	34.03	9.73	4.75	7.50
8.00	41.10	9.40	5.00	8.00

loadFactor：負載因子；
%overflow：溢出率，有溢出 bucket 的佔比；
bytes/entry：每一個 key/value 對佔用字節比；
hitprobe：找到一個存在的key平均查找個數；
missprobe：找到一個不存在的key平均查找個數；函數

一般在負載因子 > 6.5時，就是平均每一個bucket存儲的鍵值對超過6.5個或者是overflow的數量 > 2 ^ 15時會發生擴容（遷移）。它分爲兩種狀況：
第一種：因爲map在不斷的insert 和 delete 中，bucket中的鍵值存儲不夠均勻，內存利用率很低，須要進行遷移。（注：bucket數量不作增長）
第二種：真正的，由於負載因子過大引發的擴容，bucket 增長爲原 bucket 的兩倍
不論上述哪種 rehash，都是調用 hashGrow 方法：oop

定義原 hmap 中指向 buckets 數組的指針
建立 bucket 數組並設置爲 hmap 的 bucket 字段
將 extra 中的 oldoverflow 指向 overflow，overflow 指向 nil
若是正在 growing 的話，開始漸進式的遷移，在 growWork 方法裏是 bucket 中 key/value 的遷移
在所有遷移完成後，釋放內存

注意： golang在rehash時，和Redis同樣採用漸進式的rehash，沒有一次性遷移全部的buckets，而是把key的遷移分攤到每次插入或刪除時，在 bucket 中的 key/value 所有遷移完成釋放oldbucket和extra.oldoverflow（儘量不去使用map存儲大量數據；最好在初始化一次性聲明cap，避免頻繁擴容）

刪除

func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
...省略
search:
	for ; b != nil; b = b.overflow(t) {
		for i := uintptr(0); i < bucketCnt; i++ {
			if t.indirectkey() {
				*(*unsafe.Pointer)(k) = nil
			} else if t.key.ptrdata != 0 {
				memclrHasPointers(k, t.key.size)
			}
			e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
			if t.indirectelem() {
				*(*unsafe.Pointer)(e) = nil
			} else if t.elem.ptrdata != 0 {
				memclrHasPointers(e, t.elem.size)
			} else {
				memclrNoHeapPointers(e, t.elem.size)
			}
			
			b.tophash[i] = emptyOne
			
			if i == bucketCnt-1 {
				if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest {
					goto notLast
				}
			} else {
				if b.tophash[i+1] != emptyRest {
					goto notLast
				}
			}
			for {
				b.tophash[i] = emptyRest
				if i == 0 {
					if b == bOrig {
						break // beginning of initial bucket, we're done. } // Find previous bucket, continue at its last entry. c := b for b = bOrig; b.overflow(t) != c; b = b.overflow(t) { } i = bucketCnt - 1 } else { i-- } if b.tophash[i] != emptyOne { break } } notLast: h.count-- break search } } ... } 複製代碼

key 和value，若是是值類型的話，直接設置爲nil, 若是是指針的話，就從 ptr 位置開始清除 n 個bytes; 接着在刪除時，只是在tophash對應的位置上，設置爲 empty 的標記（b.tophash[i] = emptyOne），沒有真正的釋放內存空間，由於頻繁的申請、釋放內存空間開銷很大，若是真正想釋放的話，只有依賴GC；若是bucket是以一些 emptyOne 的標記結束，最終，就設置爲 emptyRest 標記，emptyOne 和 emptyRest 都是空的標記，emptyRest的區別就是：標記在高索引位和 overflow bucket 都是空的，應該是考慮在以後重用時，插入和刪除操做須要查找位置時，減小查找次數。

建議

作兩組試驗，第一組是：提早分配好 map 的總容量後追加k/v；另外一組是：初始化 0 容量的 map 後作追加

package main

import "testing"
var count int = 100000
func addition(m map[int]int) map[int]int {
	for i := 0; i < count; i++ {
		m[i] = i
	}
	return m
}
func BenchmarkGrows(b *testing.B) {
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		m := make(map[int]int)
		addition(m)
	}
}
func BenchmarkNoGrows(b *testing.B) {
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		m := make(map[int]int, count)
		addition(m)
	}
}
複製代碼

$ go test -bench=. ./
goos: darwin
goarch: amd64
# benchmark名字 -CPU數 執行次數 平均執行時間ns
BenchmarkGrows-4             200           8298505 ns/op
BenchmarkNoGrows-4           300           4627118 ns/op
PASS
ok      _/Users/such/gomodule/runtime   4.401s
複製代碼

提早定義容量的case平均執行時間比未定義容量的快了80% --- 擴容時的數據拷貝和從新哈希成本很高！
再看看內存的分配次數：

$ go test -bench=. -benchmem ./
goos: darwin
goarch: amd64
# benchmark名字 -CPU數 執行次數 平均執行時間ns 每次分配內存大小 每次內存分配次數
BenchmarkGrows-4             200           9265553 ns/op         5768155 B/op       4010 allocs/op
BenchmarkNoGrows-4           300           4855000 ns/op         2829115 B/op       1678 allocs/op
PASS
ok      _/Users/such/gomodule/runtime   4.704s
複製代碼

兩個方法執行相同的次數，GC的次數也會多出一倍

func main() {
	for i := 0; i < 5; i++ {
		n := make(map[int]int, count)
		addition(n)
		//m := make(map[int]int)
		//addition(m)
	}
}
// 第一組，預分配
$ go build -o growth && GODEBUG=gctrace=1 ./growth
gc 1 @0.006s 0%: 0.002+0.091+0.015 ms clock, 0.011+0.033/0.011/0.088+0.060 ms cpu, 5->5->2 MB, 6 MB goal, 4 P
gc 2 @0.012s 0%: 0.001+0.041+0.002 ms clock, 0.007+0.032/0.007/0.033+0.009 ms cpu, 5->5->2 MB, 6 MB goal, 4 P
gc 3 @0.017s 0%: 0.002+0.090+0.010 ms clock, 0.008+0.035/0.006/0.084+0.041 ms cpu, 5->5->2 MB, 6 MB goal, 4 P
gc 4 @0.022s 0%: 0.001+0.056+0.008 ms clock, 0.007+0.026/0.003/0.041+0.034 ms cpu, 5->5->2 MB, 6 MB goal, 4 P

// 第二組，未分配
$ go build -o growth && GODEBUG=gctrace=1 ./growth
gc 1 @0.005s 0%: 0.001+0.10+0.001 ms clock, 0.007+0.076/0.004/0.13+0.007 ms cpu, 5->5->3 MB, 6 MB goal, 4 P
gc 2 @0.012s 0%: 0.002+0.071+0.010 ms clock, 0.008+0.016/0.010/0.075+0.040 ms cpu, 5->5->0 MB, 7 MB goal, 4 P
gc 3 @0.015s 0%: 0.001+0.13+0.009 ms clock, 0.007+0.006/0.037/0.082+0.036 ms cpu, 4->5->3 MB, 5 MB goal, 4 P
gc 4 @0.021s 0%: 0.001+0.13+0.009 ms clock, 0.007+0.040/0.007/0.058+0.038 ms cpu, 6->6->1 MB, 7 MB goal, 4 P
gc 5 @0.024s 0%: 0.001+0.084+0.001 ms clock, 0.005+0.036/0.006/0.052+0.006 ms cpu, 4->4->3 MB, 5 MB goal, 4 P
gc 6 @0.030s 0%: 0.002+0.075+0.001 ms clock, 0.008+0.056/0.004/0.072+0.007 ms cpu, 6->6->1 MB, 7 MB goal, 4 P
gc 7 @0.033s 0%: 0.013+0.11+0.003 ms clock, 0.053+0.047/0.013/0.075+0.012 ms cpu, 4->4->3 MB, 5 MB goal, 4 P
gc 8 @0.041s 0%: 0.002+0.073+0.024 ms clock, 0.008+0.033/0.010/0.067+0.097 ms cpu, 6->6->1 MB, 7 MB goal, 4 P
gc 9 @0.043s 0%: 0.001+0.067+0.001 ms clock, 0.006+0.046/0.003/0.070+0.006 ms cpu, 4->4->3 MB, 5 MB goal, 4 P
複製代碼

有個1千萬kv的 map，測試在什麼狀況下會回收內存

package main

var count = 10000000
var dict = make(map[int]int, count)
func addition() {
	for i := 0; i < count; i++ {
		dict[i] = i
	}
}
func clear() {
	for k := range dict {
		delete(dict, k)
	}
	//dict = nil
}
func main() {
	addition()
	clear()
	debug.FreeOSMemory()
}

$ go build -o clear && GODEBUG=gctrace=1 ./clear
gc 1 @0.007s 0%: 0.006+0.12+0.015 ms clock, 0.025+0.037/0.038/0.12+0.061 ms cpu, 306->306->306 MB, 307 MB goal, 4 P
gc 2 @0.963s 0%: 0.004+1.0+0.025 ms clock, 0.017+0/0.96/0.48+0.10 ms cpu, 307->307->306 MB, 612 MB goal, 4 P
gc 3 @1.381s 0%: 0.004+0.081+0.003 ms clock, 0.018+0/0.051/0.086+0.013 ms cpu, 309->309->306 MB, 612 MB goal, 4 P (forced)
scvg-1: 14 MB released
scvg-1: inuse: 306, idle: 77, sys: 383, released: 77, consumed: 306 (MB)
複製代碼

刪除了全部kv，堆大小（goal）並沒有變化

func clear() {
	for k := range dict {
		delete(dict, k)
	}
	dict = nil
}

$ go build -o clear && GODEBUG=gctrace=1 ./clear
gc 1 @0.006s 0%: 0.004+0.12+0.010 ms clock, 0.019+0.035/0.016/0.17+0.043 ms cpu, 306->306->306 MB, 307 MB goal, 4 P
gc 2 @0.942s 0%: 0.003+1.0+0.010 ms clock, 0.012+0/0.85/0.54+0.043 ms cpu, 307->307->306 MB, 612 MB goal, 4 P
gc 3 @1.321s 0%: 0.003+0.072+0.002 ms clock, 0.013+0/0.050/0.090+0.010 ms cpu, 309->309->0 MB, 612 MB goal, 4 P (forced)
scvg-1: 319 MB released
scvg-1: inuse: 0, idle: 383, sys: 383, released: 383, consumed: 0 (MB)
複製代碼

清除事後，設置爲nil，纔會真正釋放內存。（自己每2分鐘強制 runtime.GC()，每5分鐘 scavenge 釋放內存，其實沒必要太過糾結是否真正釋放，未真正釋放也是爲了後面有可能的重用， 但有時須要真實釋放時，清楚怎麼作才能解決問題）

Reference

Map：golang.org/src/runtime… Benchmark：dave.cheney.net/2013/06/30/…
Gctrace：dave.cheney.net/tag/godebug
FreeOsMemory：golang.org/pkg/runtime…

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。