從源碼看redis的'map'結構

時間 2019-11-07

標籤源碼 redis map 結構欄目 Redis 简体版

原文原文鏈接

hset用來往map結構存入數據java

> hset user:100 name paxi
(integer) 1
複製代碼

user:100是整個map結構的key,name是map中的一項字段值，經過hget就能夠獲取存入的結果git

> hget user:100 name
"paxi"
複製代碼

hset命令執行追蹤

hset的執行入口在 hsetCommandgithub

Code.SLICE.source("robj *o = lookupKeyWrite(c->db,key);")
.interpretation("根據提供的dict自己的key，注意這裏不是dict中元素的key,而是查找dict的key,好比 user:100 age 12 這裏的key是 user:100");

Code.SLICE.source("if (o == NULL) {\n" +
        " o = createHashObject();\n" +
        " dbAdd(c->db,key,o);\n" +
        " } else {\n" +
        " if (o->type != OBJ_HASH) {\n" +
        " addReply(c,shared.wrongtypeerr);\n" +
        " return NULL;\n" +
        " }\n" +
        " }")
.interpretation("若是存在就僅校驗是不是hash，知足條件返回；若是不存在就建立一個hash對象，並把這個key的關係存到了本身的db中");
複製代碼

map是不能存在key是同樣的元素的，於是會先檢查是否有一樣的key,沒有就再建立一個HashObjectredis

Code.SLICE.source("unsigned char *zl = ziplistNew();\n" +
                " robj *o = createObject(OBJ_HASH, zl);\n" +
                " o->encoding = OBJ_ENCODING_ZIPLIST;\n" +
                " return o;")
    .interpretation("默認建立的hash結構，它的編碼方式使用的是ziplist");
複製代碼

默認的map結構使用的是ziplist的編碼方式，當超過hash_max_ziplist_value(默認64)時則會將編碼方式替換成 OBJ_ENCODING_HT。數組

key存儲

key這裏指的是map整個結構的key,而不是map中的一個字段bash

爲了方便區分分別以key和field區分，好比 user:100是整個map結構的key,name是map中的一項字段數據結構

從 lookupKeyWrite 和 dbAdd 追蹤進去，key其實也是存在了一個dict的結構中函數

Code.SLICE.source("typedef struct dict {\n" +
                " dictType *type;\n" +
                " void *privdata;\n" +
                " dictht ht[2];\n" +
                " long rehashidx; /* rehashing not in progress if rehashidx == -1 */\n" +
                " unsigned long iterators; /* number of iterators currently running */\n" +
                "} dict;")
    .interpretation("字典結構")
    .interpretation("dictType使得redis能夠對任意類型的key和value對應類型來操做")
    .interpretation("privdata存儲用戶傳進來的值，key就是key,value就是value")
    .interpretation("dictht數組存儲兩個ht,在rehash的時候，ht[0]表示舊的，ht[1]表示新的，當rehash完成，再將ht[1]地址給ht[0]")
    .interpretation("rehashidx用來標識是否正在進行rehash,沒有進行的時候是-1")
    .interpretation("iterators表示當前正在進行遍歷的iterator的個數,若是要進行rehash，可是當前有迭代器正在進行遍歷，不會進行rehash");
複製代碼

注意到 dictht 和 rehashidx 這兩個字段的存在，使得redis方便進行擴容，dictht是redis存儲數據的地方，rehashidx用來表示，當前擴容到哪兒了，若是一個map的filed很是的多，那麼擴容過程當中須要的拷貝量很是大，因此redis選擇了使用兩個 dictht 來是想逐步的拷貝測試

field與value的存儲

map結構首先存儲的方式是使用ziplist,當數據過大，不適合ziplist的時候才選用 OBJ_ENCODING_HT,在存儲的時候也須要對應的作不一樣的處理ui

//...
Code.SLICE.source("if (o->encoding == OBJ_ENCODING_ZIPLIST){" +
        "..." +
        " if (hashTypeLength(o) > server.hash_max_ziplist_entries)\n" +
        " hashTypeConvert(o, OBJ_ENCODING_HT);" +
        "}")
        .interpretation("根據編碼方式來作不一樣的set,若是是 ZIPLIST,插入完成以後，會統計當前存儲的個數，若是超過了 hash_max_ziplist_entries （512) 那麼轉換爲 OBJ_ENCODING_HT ");
Code.SLICE.source("} else if (o->encoding == OBJ_ENCODING_HT) {")
    .interpretation("處理 HashTable的編碼方式");
Code.SLICE.source(" dictEntry *de = dictFind(o->ptr,field);")
    .interpretation("在當前key對應的dict中去查找，有沒有這個字段對應的值");
Code.SLICE.source(" if (de) {\n" +
                " sdsfree(dictGetVal(de));\n" +
                " if (flags & HASH_SET_TAKE_VALUE) {\n" +
                " dictGetVal(de) = value;\n" +
                " value = NULL;\n" +
                " } else {\n" +
                " dictGetVal(de) = sdsdup(value);\n" +
                " }\n" +
                " update = 1;\n" +
                " }")
.interpretation("若是存在釋放原來的dict中值的空間，插入新的值，並標識是更新");
//...
Code.SLICE.source("dictAdd(o->ptr,f,v);")
    .interpretation("將key和value加入到dict中");
//...
複製代碼

以HT爲例，field存儲以前，先要看容量是否是夠,不夠就須要先進行擴容

Code.SLICE.source("if (dictIsRehashing(d)) return DICT_OK;")
                .interpretation("若是已經在rehash了，那麼不須要再次擴容");
Code.SLICE.source("if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);")
        .interpretation("若是dict當前沒有分配空間，默認擴容爲爲4個數組長度");
Code.SLICE.source(" if (d->ht[0].used >= d->ht[0].size &&\n" +
        " (dict_can_resize ||\n" +
        " d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))")
        .interpretation("當已經使用的量不小於分配的量，而且比例已經超過默認佔比(默認值爲5)進行擴容或者能夠進行resize");
Code.SLICE.source(" return dictExpand(d, d->ht[0].used*2);")
        .interpretation("擴容爲使用量的2倍");
複製代碼

size:分配的空間，也就是每一個table的數組個數它必定是2的冪次方
used:表示map中已經添加了的元素個數

當遇到知足的條件則進行擴容，擴容後再選擇存儲

Code.SLICE.source("if (dictIsRehashing(d)) _dictRehashStep(d);")
            .interpretation("若是dict正在執行Rehash先執行一步rehash");
Code.SLICE.source("if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)\n" +
                 " return NULL;")
        .interpretation("計算出當前key在dict中的下標,若是在那個下標已經有這個key了，返回添加失敗");
Code.SLICE.source("ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];")
        .interpretation("根據是否在rehash來保證新的元素只會放在心的entry列表裏面");
Code.SLICE.source(" entry = zmalloc(sizeof(*entry));")
        .interpretation("分配新的entry的空間");
Code.SLICE.source(" entry->next = ht->table[index];\n" +
                " ht->table[index] = entry;\n" +
                " ht->used++;")
        .interpretation("將新的entry放在第一個dict鏈表的第一位,並增長使用量");
Code.SLICE.source(" dictSetKey(d, entry, key);")
        .interpretation("把key存入entry");
複製代碼