在使用map的過程當中,有兩個問題是常常會遇到的:讀寫衝突和遍歷無序性。爲何會這樣呢,底層是怎麼實現的呢?帶着這兩個問題,我簡單的瞭解了一下map的增刪改查及遍歷的實現。api
type hmap struct {
// Note: the format of the hmap is also encoded in cmd/compile/internal/gc/reflect.go.
// Make sure this stays in sync with the compiler's definition.
count int // 有效數據的長度# live cells == size of map. Must be first (used by len() builtin)
flags uint8 // 用於記錄hashmap的狀態
B uint8 // 2^B = buckets的數量log_2 of # of buckets (can hold up to loadFactor * 2^B items)
noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
hash0 uint32 // 隨機的hash種子
buckets unsafe.Pointer // buckets數組array of 2^B Buckets. may be nil if count==0.
oldbuckets unsafe.Pointer // 老的buctedts數據,map增加的時候會用到
nevacuate uintptr // progress counter for evacuation (buckets less than this have been evacuated)
extra *mapextra // 額外的bmap數組optional fields
}
複製代碼
type mapextra struct {
// If both key and value do not contain pointers and are inline, then we mark bucket
// type as containing no pointers. This avoids scanning such maps.
// However, bmap.overflow is a pointer. In order to keep overflow buckets
// alive, we store pointers to all overflow buckets in hmap.extra.overflow and hmap.extra.oldoverflow.
// overflow and oldoverflow are only used if key and value do not contain pointers.
// overflow contains overflow buckets for hmap.buckets.
// oldoverflow contains overflow buckets for hmap.oldbuckets.
// The indirection allows to store a pointer to the slice in hiter.
overflow *[]*bmap
oldoverflow *[]*bmap
// nextOverflow holds a pointer to a free overflow bucket.
nextOverflow *bmap
}
複製代碼
type bmap struct {
// tophash generally contains the top byte of the hash value
// for each key in this bucket. If tophash[0] < minTopHash,
// tophash[0] is a bucket evacuation state instead.
tophash [bucketCnt]uint8
// Followed by bucketCnt keys and then bucketCnt values.
// NOTE: packing all the keys together and then all the values together makes the
// code a bit more complicated than alternating key/value/key/value/... but it allows
// us to eliminate padding which would be needed for, e.g., map[int64]int8.
// Followed by an overflow pointer.
}
複製代碼
type stringStruct struct {
str unsafe.Pointer
len int
}
複製代碼
map遍歷時用到的結構,startBucket+offset設定了開始遍歷的地址,保證map遍歷的無序性數組
type hiter struct {
// key的指針
key unsafe.Pointer // Must be in first position. Write nil to indicate iteration end (see cmd/internal/gc/range.go).
// 當前value的指針
value unsafe.Pointer // Must be in second position (see cmd/internal/gc/range.go).
t *maptype
// 指向map的指針
h *hmap
// 指向buckets的指針
buckets unsafe.Pointer // bucket ptr at hash_iter initialization time
// 指向當前遍歷的bucket的指針
bptr *bmap // current bucket
// 指向map.extra.overflow
overflow *[]*bmap // keeps overflow buckets of hmap.buckets alive
// 指向map.extra.oldoverflow
oldoverflow *[]*bmap // keeps overflow buckets of hmap.oldbuckets alive
// 開始遍歷的bucket的索引
startBucket uintptr // bucket iteration started at
// 開始遍歷bucket上的偏移量
offset uint8 // intra-bucket offset to start from during iteration (should be big enough to hold bucketCnt-1)
wrapped bool // already wrapped around from end of bucket array to beginning
B uint8
i uint8
bucket uintptr
checkBucket uintptr
}
複製代碼
這裏的keys和values、*overflow三個變量在結構體中並無體現,可是在源碼過程當中,一直有爲他們預留位置,因此這裏的示意圖中就展現出來了,keys和values其實8個長度的數組bash
咱們簡單寫個demo,經過go tool
來分析一下底層所對應的函數數據結構
func main() {
m := make(map[interface{}]interface{}, 16)
m["111"] = 1
m["222"] = 2
m["444"] = 4
_ = m["444"]
_, _ = m["444"]
delete(m, "444")
for range m {
}
}
複製代碼
▶ go tool objdump -s "main.main" main | grep CALL
main.go:4 0x455c74 e8f761fbff CALL runtime.makemap(SB)
main.go:5 0x455ce1 e8da6dfbff CALL runtime.mapassign(SB)
main.go:6 0x455d7b e8406dfbff CALL runtime.mapassign(SB)
main.go:7 0x455e15 e8a66cfbff CALL runtime.mapassign(SB)
main.go:8 0x455e88 e89363fbff CALL runtime.mapaccess1(SB)
main.go:9 0x455ec4 e84766fbff CALL runtime.mapaccess2(SB)
main.go:10 0x455f00 e85b72fbff CALL runtime.mapdelete(SB)
main.go:12 0x455f28 e804a7ffff CALL 0x450631
main.go:12 0x455f53 e8b875fbff CALL runtime.mapiterinit(SB)
main.go:12 0x455f75 e88677fbff CALL runtime.mapiternext(SB)
main.go:7 0x455f8f e81c9cffff CALL runtime.gcWriteBarrier(SB)
main.go:6 0x455f9c e80f9cffff CALL runtime.gcWriteBarrier(SB)
main.go:5 0x455fa9 e8029cffff CALL runtime.gcWriteBarrier(SB)
main.go:3 0x455fb3 e8f87dffff CALL runtime.morestack_noctxt(SB)
複製代碼
makemap建立一個hmap結構體,並賦予這個變量一些初始的屬性併發
func makemap(t *maptype, hint int, h *hmap) *hmap {
// 首先判斷map的大小是否合適
if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) {
hint = 0
}
// initialize Hmap
// 初始化hmap結構
if h == nil {
h = new(hmap)
}
// 生成一個隨機的hash種子
h.hash0 = fastrand()
// find size parameter which will hold the requested # of elements
// 根據hint,也就是map預設的長度,肯定B的大小,以使map的裝載係數在正常範圍內,擴容那塊再細講
B := uint8(0)
for overLoadFactor(hint, B) {
B++
}
h.B = B
// allocate initial hash table
// if B == 0, the buckets field is allocated lazily later (in mapassign)
// If hint is large zeroing this memory could take a while.
// 若是B==0,則賦值的時候進行惰性分配,若是B!=0,則分配對應數量的buckets
if h.B != 0 {
var nextOverflow *bmap
h.buckets, nextOverflow = makeBucketArray(t, h.B, nil)
if nextOverflow != nil {
h.extra = new(mapextra)
h.extra.nextOverflow = nextOverflow
}
}
return h
}
複製代碼
##makeBucketArrayapp
makeBucketArray初始化了map所需的buckets,最少分配2^b個bucketsless
func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets unsafe.Pointer, nextOverflow *bmap) {
base := bucketShift(b)
nbuckets := base
// 若是b,也就是map比較大的狀況,則多分配點數組,給nextOverflow使用
if b >= 4 {
// 計算應該多分配的buckets數量
nbuckets += bucketShift(b - 4)
sz := t.bucket.size * nbuckets
up := roundupsize(sz)
if up != sz {
nbuckets = up / t.bucket.size
}
}
// 若是不是 dirtyalloc,新分配map空間時,dirtyalloc爲nil
if dirtyalloc == nil {
// 申請buckets數組
buckets = newarray(t.bucket, int(nbuckets))
} else {
// dirtyalloc was previously generated by
// the above newarray(t.bucket, int(nbuckets))
// but may not be empty.
buckets = dirtyalloc
size := t.bucket.size * nbuckets
if t.bucket.kind&kindNoPointers == 0 {
memclrHasPointers(buckets, size)
} else {
memclrNoHeapPointers(buckets, size)
}
}
// 判斷是否多申請了buckets,多申請的buckets放在nextOverflow裏面以備後用
if base != nbuckets {
nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize)))
last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize)))
last.setoverflow(t, (*bmap)(buckets))
}
return buckets, nextOverflow
}
複製代碼
初始化的過程到此就結束了,比較簡單,就是根據初始化的大小,肯定buckets的數量,並分配內存等dom
在上面的go tool
分析過程當中能夠發現ide
mapaccess1
mapaccess2
兩個函數的邏輯大體相同,咱們以mapaccess1
爲例來分析函數
func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
// 若是h尚未實例化,或者尚未值,返回零值
if h == nil || h.count == 0 {
return unsafe.Pointer(&zeroVal[0])
}
// 判斷當前map是否處於 寫 的過程當中,讀寫衝突
if h.flags&hashWriting != 0 {
throw("concurrent map read and map write")
}
// 根據初始化生產的hash隨機種子hash0,計算key的hash值
alg := t.key.alg
hash := alg.hash(key, uintptr(h.hash0))
m := bucketMask(h.B)
// 根據key的hash值,計算出對應的bucket的位置,計算過程後面圖示
b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))
// 擴容的過程當中,oldbuckets不爲空,因此這時候,這時候須要判斷,目標bucket是否已經遷移完成了,擴容的時候細講
if c := h.oldbuckets; c != nil {
if !h.sameSizeGrow() {
// There used to be half as many buckets; mask down one more power of two.
m >>= 1
}
// 若是目標bucket在擴容中尚未遷移,則到oldbuckets中找目標bucket
oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))
if !evacuated(oldb) {
b = oldb
}
}
// 計算出key的tophash,用於比對
top := tophash(hash)
for ; b != nil; b = b.overflow(t) {
for i := uintptr(0); i < bucketCnt; i++ {
// 若是tophash不一致,key確定不一樣,繼續尋找下一個
if b.tophash[i] != top {
continue
}
// tophash一直,須要判斷key是否一致
k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
if t.indirectkey {
k = *((*unsafe.Pointer)(k))
}
// key也是相同的,則返回對應的value
if alg.equal(key, k) {
v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
if t.indirectvalue {
v = *((*unsafe.Pointer)(v))
}
return v
}
}
}
return unsafe.Pointer(&zeroVal[0])
}
複製代碼
這個函數就是找bmap的overflow的地址,經過結構圖中能夠看出,找到bmap結構體的最後一個指針佔用的內存單元就是overflow指向的下一個bmap的地址了
func (b *bmap) overflow(t *maptype) *bmap {
return *(**bmap)(add(unsafe.Pointer(b), uintptr(t.bucketsize)-sys.PtrSize))
}
複製代碼
上面的邏輯比較簡單,可是在這裏有幾個問題須要解決
先放一下buckets和bmap的放大圖
bucket(bmap結構體)是怎麼肯定的
bucket := hash & bucketMask(h.B)
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
複製代碼
加入B=5,則說明buckets的數量爲2^5 = 32,則取hash的末5位,來計算出目標bucket的索引,圖中計算出索引爲6,因此,在buckets上偏移6個bucket大小的地址,便可找到對應的bucket
tophash是怎麼肯定的
func tophash(hash uintptr) uint8 {
top := uint8(hash >> (sys.PtrSize*8 - 8))
if top < minTopHash {
top += minTopHash
}
return top
}
複製代碼
每一個bucket的tophash數組的長度爲8,因此,這裏直接去hash值的前8位計算出來數值,既是tophash了
key和value的地址爲何是經過偏移來計算的
k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
複製代碼
根據最開始的數據結構分析和上面的bmap圖示,能夠看出bmap中全部的key是放在一塊兒的,全部的value是放在一塊兒的,dataoffset是tophash[8]所佔用的大小,因此,key所在的地址也就是 b的地址+dataOffset的偏移+對應的索引i*key的大小,同理value是排列在key的後面的
func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {
if h == nil {
panic(plainError("assignment to entry in nil map"))
}
// map併發讀寫的處理,直接拋異常
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
// 根據map的hash種子 hash0,計算key的hash值
alg := t.key.alg
hash := alg.hash(key, uintptr(h.hash0))
// Set hashWriting after calling alg.hash, since alg.hash may panic,
// in which case we have not actually done a write.
h.flags |= hashWriting
// 若是map沒有buckets,就分配(make(map)不指定map長度的時候就會惰性分配buckets)
if h.buckets == nil {
h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)
}
again:
// 根據計算出的hash值,來肯定應該插入的bucket在buckets中的索引
bucket := hash & bucketMask(h.B)
// 判斷是否在擴容map,growWork是來完成擴容操做的
if h.growing() {
growWork(t, h, bucket)
}
// 確認bucket的地址
b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + bucket*uintptr(t.bucketsize)))
// 根據計算出hash二進制前八位的值,做爲tophash使用
top := tophash(hash)
var inserti *uint8
var insertk unsafe.Pointer
var val unsafe.Pointer
for {
for i := uintptr(0); i < bucketCnt; i++ {
// 循環遍歷tophash數組,若是數組的索引位置爲空,先拿過來使用
if b.tophash[i] != top {
if b.tophash[i] == empty && inserti == nil {
inserti = &b.tophash[i]
insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
}
continue
}
// 找到了tophash數組中找到了當前key的tophash一致的狀況
k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
// 若是key是指針,獲取指針對應的數據
if t.indirectkey {
k = *((*unsafe.Pointer)(k))
}
// 判斷這兩個key是否相同,不一樣繼續尋找
if !alg.equal(key, k) {
continue
}
// already have a mapping for key. Update it.
if t.needkeyupdate {
typedmemmove(t.key, k, key)
}
// 根據i找到value應該存放的位置,能夠結合結構圖中bmap的數據結構來理解
val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
goto done
}
// buckets中沒有找到空餘的位置或者相同的key,則到overflow中查找
ovf := b.overflow(t)
if ovf == nil {
break
}
b = ovf
}
// Did not find mapping for key. Allocate new cell & add entry.
// If we hit the max load factor or we have too many overflow buckets,
// and we're not already in the middle of growing, start growing.
// 判斷是否須要擴容
if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {
hashGrow(t, h)
goto again // Growing the table invalidates everything, so try again
}
// inerti==nil,表示map的buckets都滿了,則須要新加一個overflow掛載到map和對應的bmap下
if inserti == nil {
// all current buckets are full, allocate a new one.
newb := h.newoverflow(t, b)
inserti = &newb.tophash[0]
insertk = add(unsafe.Pointer(newb), dataOffset)
val = add(insertk, bucketCnt*uintptr(t.keysize))
}
// store new key/value at insert position
// 存儲key value到指定的位置
if t.indirectkey {
kmem := newobject(t.key)
*(*unsafe.Pointer)(insertk) = kmem
insertk = kmem
}
if t.indirectvalue {
vmem := newobject(t.elem)
*(*unsafe.Pointer)(val) = vmem
}
typedmemmove(t.key, insertk, key)
*inserti = top
h.count++
done:
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
// 修改map的flags
h.flags &^= hashWriting
if t.indirectvalue {
val = *((*unsafe.Pointer)(val))
}
return val
}
複製代碼
func (h *hmap) newoverflow(t *maptype, b *bmap) *bmap {
var ovf *bmap
// 先去找一下預先分配的有沒有剩餘的overflow
if h.extra != nil && h.extra.nextOverflow != nil {
// We have preallocated overflow buckets available.
// See makeBucketArray for more details.
// 預先分配的有,直接使用預先分配的,而後更新一下 下一個能夠用overflow => nextOverflow
ovf = h.extra.nextOverflow
if ovf.overflow(t) == nil {
// We're not at the end of the preallocated overflow buckets. Bump the pointer.
h.extra.nextOverflow = (*bmap)(add(unsafe.Pointer(ovf), uintptr(t.bucketsize)))
} else {
// This is the last preallocated overflow bucket.
// Reset the overflow pointer on this bucket,
// which was set to a non-nil sentinel value.
ovf.setoverflow(t, nil)
h.extra.nextOverflow = nil
}
} else {
ovf = (*bmap)(newobject(t.bucket))
}
// 增長noverflow
h.incrnoverflow()
if t.bucket.kind&kindNoPointers != 0 {
h.createOverflow()
*h.extra.overflow = append(*h.extra.overflow, ovf)
}
// 把當前overflow,掛載到bmap的overflow鏈表後面
b.setoverflow(t, ovf)
return ovf
}
複製代碼
overflow指向的就是一個bmap結構,而bmap結構的最後一個地址,存儲的是overflow的地址,經過bmap.overflow能夠將bmap的全部overflow串聯起來,hmap.extra.nextOverflow也是同樣的邏輯
在mapassign
函數中能夠看到,擴容發生的狀況有兩種
overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)
複製代碼
先來看一下這兩個函數
func overLoadFactor(count int, B uint8) bool {
// loadFactorNum = 13; loadFactorDen = 2
return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
}
複製代碼
uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)
能夠簡化爲 count / (2^B) > 6.5
, 這個6.5即是表明loadFactor的負載係數
##tooManyOverflowBuckets
func tooManyOverflowBuckets(noverflow uint16, B uint8) bool {
// If the threshold is too low, we do extraneous work.
// If the threshold is too high, maps that grow and shrink can hold on to lots of unused memory.
// "too many" means (approximately) as many overflow buckets as regular buckets.
// See incrnoverflow for more details.
if B > 15 {
B = 15
}
// The compiler doesn't see here that B < 16; mask B to generate shorter shift code.
return noverflow >= uint16(1)<<(B&15)
}
複製代碼
經過判斷noverflow的數量來判斷overflow是否太多
咱們理解一下這兩種狀況擴容的緣由
超過設定的負載值
根據key查找的過程當中,根據末B位肯定bucket,高8位肯定tophash,可是查找tophash的過程當中,是須要遍歷整個bucket的,因此,最優的狀況是每一個bucket只存儲一個key,這樣就達到了hash的O(1)的查找效率,可是空間卻大大的浪費了;若是全部的key都存儲到了一個bucket裏面面,就退變成了鏈表,查找效率就變成了O(n),因此裝載係數就是爲了平衡查找效率和存儲空間的,當裝載係數過大,就須要增長bucket了,來提升查找效率,即增量擴容
有太多的overflow
當bucket的空位所有填滿的時候,裝載係數就達到了8,爲何還會有tooManyOverflowBuckets的判斷呢,map不只有增長還有刪除的操做,當某一個bucket的空位填滿後,開始填充到overflow裏面,這時候再刪除bucket裏面的數據,其實整個過程頗有可能並無觸發 超過負載擴容機制的,(由於有較多的buckets),可是查找overflow的數據,就首先要遍歷bucket的數據,這個就是無用功了,查找效率就低了,這時候須要不增長bucket數量的擴容,也就是等量擴容
擴容的工做是由hashGrow
開始的,可是真正進行遷移工做的是evacuate
, 由growWork
進行d調用;在每一次的maassign和mapdelete的時候,會判斷這個map是否正在進行擴容操做,若是是的,就遷移當前的bucket;因此,map的擴容並非一蹴而就的,而是一個按部就班的過程
func hashGrow(t *maptype, h *hmap) {
// If we've hit the load factor, get bigger.
// Otherwise, there are too many overflow buckets,
// so keep the same number of buckets and "grow" laterally.
// 判斷是等量擴容仍是增量擴容
bigger := uint8(1)
if !overLoadFactor(h.count+1, h.B) {
bigger = 0
h.flags |= sameSizeGrow
}
// 爲map根據新的B(h.B+bigger爲新的h.B)從新分配新的buckets和overflow
oldbuckets := h.buckets
newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil)
flags := h.flags &^ (iterator | oldIterator)
if h.flags&iterator != 0 {
flags |= oldIterator
}
// commit the grow (atomic wrt gc)
// 更新hmap相關的屬性
h.B += bigger
h.flags = flags
h.oldbuckets = oldbuckets
h.buckets = newbuckets
h.nevacuate = 0
h.noverflow = 0
// 將老的map的extra和nextOverflow更新到新的map結構下面
if h.extra != nil && h.extra.overflow != nil {
// Promote current overflow buckets to the old generation.
if h.extra.oldoverflow != nil {
throw("oldoverflow is not nil")
}
h.extra.oldoverflow = h.extra.overflow
h.extra.overflow = nil
}
if nextOverflow != nil {
if h.extra == nil {
h.extra = new(mapextra)
}
h.extra.nextOverflow = nextOverflow
}
// the actual copying of the hash table data is done incrementally
// by growWork() and evacuate().
}
複製代碼
hashGrow
這個前菜已經準備完成了,接下來就交給growWork
和 evacuate
兩個函數來完成的
func growWork(t *maptype, h *hmap, bucket uintptr) {
// make sure we evacuate the oldbucket corresponding
// to the bucket we're about to use
evacuate(t, h, bucket&h.oldbucketmask())
// evacuate one more oldbucket to make progress on growing
if h.growing() {
evacuate(t, h, h.nevacuate)
}
}
複製代碼
###evacuate
講hmap中的一個bucket搬移到新的buckets中,老的bucket裏key與新的buckets中位置的對應,一樣參考map的查找過程
這裏如何判斷這個bucket是否已經搬移過了呢,主要就是依據evacuated
函數來判斷
func evacuated(b *bmap) bool {
h := b.tophash[0]
return h > empty && h < minTopHash
}
複製代碼
看了源碼就發現原理很簡單,就是對tophash[0]值的判斷,那麼確定是在搬移以後設置的這個值,咱們經過evacuate
函數l哎一探究竟吧
func evacuate(t *maptype, h *hmap, oldbucket uintptr) {
b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
newbit := h.noldbuckets()
// 判斷是否搬移過
if !evacuated(b) {
// TODO: reuse overflow buckets instead of using new ones, if there
// is no iterator using the old buckets. (If !oldIterator.)
// xy contains the x and y (low and high) evacuation destinations.
// 吧bucket原先對應的索引賦值給x
var xy [2]evacDst
x := &xy[0]
x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))
x.k = add(unsafe.Pointer(x.b), dataOffset)
x.v = add(x.k, bucketCnt*uintptr(t.keysize))
// 若是是增量擴容,擴容後的bucket有變,假如以B=5爲例,B+1= 6,這時候去倒數6位計算bucket的索引,可是倒數第6位只能是0或者1,也就是說索引只能是,x或y(x+newbit),這裏計算出來y,以備後用
if !h.sameSizeGrow() {
// Only calculate y pointers if we're growing bigger.
// Otherwise GC can see bad pointers.
y := &xy[1]
y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize)))
y.k = add(unsafe.Pointer(y.b), dataOffset)
y.v = add(y.k, bucketCnt*uintptr(t.keysize))
}
// 進行搬移
for ; b != nil; b = b.overflow(t) {
k := add(unsafe.Pointer(b), dataOffset)
v := add(k, bucketCnt*uintptr(t.keysize))
for i := 0; i < bucketCnt; i, k, v = i+1, add(k, uintptr(t.keysize)), add(v, uintptr(t.valuesize)) {
top := b.tophash[i]
// 空的跳過
if top == empty {
b.tophash[i] = evacuatedEmpty
continue
}
if top < minTopHash {
throw("bad map state")
}
k2 := k
if t.indirectkey {
k2 = *((*unsafe.Pointer)(k2))
}
var useY uint8
if !h.sameSizeGrow() {
// Compute hash to make our evacuation decision (whether we need
// to send this key/value to bucket x or bucket y).
// 判斷hash計算出來,是使用x仍是y,等量擴容是使用x
hash := t.key.alg.hash(k2, uintptr(h.hash0))
if h.flags&iterator != 0 && !t.reflexivekey && !t.key.alg.equal(k2, k2) {
// If key != key (NaNs), then the hash could be (and probably
// will be) entirely different from the old hash. Moreover,
// it isn't reproducible. Reproducibility is required in the
// presence of iterators, as our evacuation decision must
// match whatever decision the iterator made.
// Fortunately, we have the freedom to send these keys either
// way. Also, tophash is meaningless for these kinds of keys.
// We let the low bit of tophash drive the evacuation decision.
// We recompute a new random tophash for the next level so
// these keys will get evenly distributed across all buckets
// after multiple grows.
useY = top & 1
top = tophash(hash)
} else {
if hash&newbit != 0 {
useY = 1
}
}
}
if evacuatedX+1 != evacuatedY {
throw("bad evacuatedN")
}
b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY
dst := &xy[useY] // evacuation destination
// 若是目標的bucket已經滿了,則新建overflow,掛載到bucket上,並使用這個overflow
if dst.i == bucketCnt {
dst.b = h.newoverflow(t, dst.b)
dst.i = 0
dst.k = add(unsafe.Pointer(dst.b), dataOffset)
dst.v = add(dst.k, bucketCnt*uintptr(t.keysize))
}
// 拷貝key value,設置tophash數組的對應索引的值
dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check
if t.indirectkey {
*(*unsafe.Pointer)(dst.k) = k2 // copy pointer
} else {
typedmemmove(t.key, dst.k, k) // copy value
}
if t.indirectvalue {
*(*unsafe.Pointer)(dst.v) = *(*unsafe.Pointer)(v)
} else {
typedmemmove(t.elem, dst.v, v)
}
dst.i++
// These updates might push these pointers past the end of the
// key or value arrays. That's ok, as we have the overflow pointer
// at the end of the bucket to protect against pointing past the
// end of the bucket.
dst.k = add(dst.k, uintptr(t.keysize))
dst.v = add(dst.v, uintptr(t.valuesize))
}
}
// Unlink the overflow buckets & clear key/value to help GC.
if h.flags&oldIterator == 0 && t.bucket.kind&kindNoPointers == 0 {
b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))
// Preserve b.tophash because the evacuation
// state is maintained there.
ptr := add(b, dataOffset)
n := uintptr(t.bucketsize) - dataOffset
memclrHasPointers(ptr, n)
}
}
if oldbucket == h.nevacuate {
advanceEvacuationMark(h, t, newbit)
}
}
複製代碼
擴容是逐步進行的,一次搬運一個bucket
咱們以原先的B=5爲例,如今增量擴容後B=6,可是hash的倒數第6位只能是0或1,也就是說,若是原先計算出來的bucket索引爲6的話,即 00110,那麼新的bucket對應的索引只能是 100110(6+2^5)或 000110(6),x對應的就是6,y對應的就是(6+2^5);若是是等量擴容,那麼索引確定就是不變的,這時候就不須要y了
找到對應的新的bucket以後,按順序依次存放就ok了
刪除的邏輯比較簡單,根據key查找,找到就清空key和value及tophash
func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {
if h == nil || h.count == 0 {
return
}
// 讀寫衝突
if h.flags&hashWriting != 0 {
throw("concurrent map writes")
}
// 下面一大片的計算hash,查找bucket,查到bucket裏面的key,邏輯同樣,就不重複了
alg := t.key.alg
hash := alg.hash(key, uintptr(h.hash0))
// Set hashWriting after calling alg.hash, since alg.hash may panic,
// in which case we have not actually done a write (delete).
h.flags |= hashWriting
bucket := hash & bucketMask(h.B)
if h.growing() {
growWork(t, h, bucket)
}
b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))
top := tophash(hash)
search:
for ; b != nil; b = b.overflow(t) {
for i := uintptr(0); i < bucketCnt; i++ {
if b.tophash[i] != top {
continue
}
k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
k2 := k
if t.indirectkey {
k2 = *((*unsafe.Pointer)(k2))
}
if !alg.equal(key, k2) {
continue
}
// Only clear key if there are pointers in it.
// 這裏找到了key,若是key是指針,設爲nil,不然清空key對應內存的數據
if t.indirectkey {
*(*unsafe.Pointer)(k) = nil
} else if t.key.kind&kindNoPointers == 0 {
memclrHasPointers(k, t.key.size)
}
// 同理刪除v
v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize))
if t.indirectvalue {
*(*unsafe.Pointer)(v) = nil
} else if t.elem.kind&kindNoPointers == 0 {
memclrHasPointers(v, t.elem.size)
} else {
memclrNoHeapPointers(v, t.elem.size)
}
// 把tophash設置爲0,並更新count屬性
b.tophash[i] = empty
h.count--
break search
}
}
if h.flags&hashWriting == 0 {
throw("concurrent map writes")
}
h.flags &^= hashWriting
}
複製代碼
按通常的思惟來考慮,遍歷值須要遍歷buckets數組裏面的每一個bucket以及bucket下掛的overflow鏈表便可,可是map存在擴容的狀況,這樣就會致使遍歷的難度增大了,咱們看一下go是怎麼實現的
根據go tool
的分析,咱們能夠簡單看一下遍歷時的流程信息
func mapiterinit(t *maptype, h *hmap, it *hiter) {
if h == nil || h.count == 0 {
return
}
if unsafe.Sizeof(hiter{})/sys.PtrSize != 12 {
throw("hash_iter size incorrect") // see cmd/compile/internal/gc/reflect.go
}
// 設置iter的屬性
it.t = t
it.h = h
// grab snapshot of bucket state
it.B = h.B
it.buckets = h.buckets
if t.bucket.kind&kindNoPointers != 0 {
// Allocate the current slice and remember pointers to both current and old.
// This preserves all relevant overflow buckets alive even if
// the table grows and/or overflow buckets are added to the table
// while we are iterating.
h.createOverflow()
it.overflow = h.extra.overflow
it.oldoverflow = h.extra.oldoverflow
}
// decide where to start
// 隨機生成一個種子,並根據這個隨機種子計算出startBucket和offset,保證遍歷的隨機性
r := uintptr(fastrand())
if h.B > 31-bucketCntBits {
r += uintptr(fastrand()) << 31
}
it.startBucket = r & bucketMask(h.B)
it.offset = uint8(r >> h.B & (bucketCnt - 1))
// iterator state
it.bucket = it.startBucket
// Remember we have an iterator.
// Can run concurrently with another mapiterinit().
if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator {
atomic.Or8(&h.flags, iterator|oldIterator)
}
// 開始遍歷
mapiternext(it)
}
複製代碼
func mapiternext(it *hiter) {
h := it.h
if raceenabled {
callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, funcPC(mapiternext))
}
if h.flags&hashWriting != 0 {
throw("concurrent map iteration and map write")
}
t := it.t
bucket := it.bucket
b := it.bptr
i := it.i
checkBucket := it.checkBucket
alg := t.key.alg
next:
// b==nil說明bucket.overflow鏈表已經遍歷完成了,遍歷下一個bucket
if b == nil {
// 遍歷到了開始的bucket,並且startBucket被遍歷過了,則說明整個map遍歷完成了
if bucket == it.startBucket && it.wrapped {
// end of iteration
it.key = nil
it.value = nil
return
}
// 若是hmap正在擴容,則判斷當前遍歷的bucket是否搬移完了,搬移完了,使用新得bucket,不然使用oldbucket
if h.growing() && it.B == h.B {
// Iterator was started in the middle of a grow, and the grow isn't done yet.
// If the bucket we're looking at hasn't been filled in yet (i.e. the old
// bucket hasn't been evacuated) then we need to iterate through the old
// bucket and only return the ones that will be migrated to this bucket.
oldbucket := bucket & it.h.oldbucketmask()
b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))
if !evacuated(b) {
checkBucket = bucket
} else {
b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
checkBucket = noCheck
}
} else {
b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))
checkBucket = noCheck
}
bucket++
// 遍歷到了數組末尾,從數組頭繼續遍歷
if bucket == bucketShift(it.B) {
bucket = 0
it.wrapped = true
}
i = 0
}
// 遍歷當前bucket或者bucket.overflow裏面的數據
for ; i < bucketCnt; i++ {
// 經過offset與i,肯定正在遍歷的bucket的tophash的索引
offi := (i + it.offset) & (bucketCnt - 1)
if b.tophash[offi] == empty || b.tophash[offi] == evacuatedEmpty {
continue
}
// 根據偏移量i,肯定key和value的地址
k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.keysize))
if t.indirectkey {
k = *((*unsafe.Pointer)(k))
}
v := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+uintptr(offi)*uintptr(t.valuesize))
if checkBucket != noCheck && !h.sameSizeGrow() {
// 說明增量擴容中,須要進一步判斷
// Special case: iterator was started during a grow to a larger size
// and the grow is not done yet. We're working on a bucket whose
// oldbucket has not been evacuated yet. Or at least, it wasn't
// evacuated when we started the bucket. So we're iterating
// through the oldbucket, skipping any keys that will go
// to the other new bucket (each oldbucket expands to two
// buckets during a grow).
if t.reflexivekey || alg.equal(k, k) {
// 數據尚未從oldbucket遷移到新的bucket裏面,判斷這個key從新計算後是否與oldbucket的索引一致,不一致則跳過
// If the item in the oldbucket is not destined for
// the current new bucket in the iteration, skip it.
hash := alg.hash(k, uintptr(h.hash0))
if hash&bucketMask(it.B) != checkBucket {
continue
}
} else {
// Hash isn't repeatable if k != k (NaNs). We need a
// repeatable and randomish choice of which direction
// to send NaNs during evacuation. We'll use the low
// bit of tophash to decide which way NaNs go.
// NOTE: this case is why we need two evacuate tophash
// values, evacuatedX and evacuatedY, that differ in
// their low bit.
if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
continue
}
}
}
if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
!(t.reflexivekey || alg.equal(k, k)) {
// 這裏的數據不是正在擴容中的數據,能夠直接使用
// This is the golden data, we can return it.
// OR
// key!=key, so the entry can't be deleted or updated, so we can just return it.
// That's lucky for us because when key!=key we can't look it up successfully.
it.key = k
if t.indirectvalue {
v = *((*unsafe.Pointer)(v))
}
it.value = v
} else {
// The hash table has grown since the iterator was started.
// The golden data for this key is now somewhere else.
// Check the current hash table for the data.
// This code handles the case where the key
// has been deleted, updated, or deleted and reinserted.
// NOTE: we need to regrab the key as it has potentially been
// updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
// 在遍歷開始以後,這個map進行了擴容,數據可能不正確,從新查找獲取一下
rk, rv := mapaccessK(t, h, k)
if rk == nil {
continue // key has been deleted
}
it.key = rk
it.value = rv
}
it.bucket = bucket
if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
it.bptr = b
}
it.i = i + 1
it.checkBucket = checkBucket
return
}
// 遍歷bucket.overflow鏈表
b = b.overflow(t)
i = 0
goto next
}
複製代碼
總體思路以下: