Redis源碼閱讀-Dict哈希字典

Dict和Java中的HashMap很類似,都是數組開鏈法解決衝突。redis

 

可是Redis爲了高性能, 有不少比較微妙的方法,例如 數組的大小老是2的倍數,初始大小是4。數組

rehash並非一次就執行完,而是分屢次執行。每次執行一部分。其中rehashidx表示如今hash到哪個桶啦,-1表示如今並無rehash.數據結構

dict包含兩個dicttable, 編號爲0,1,  dictht0是直接存儲哈希表的地方, dictht1在rehash中用到,當rehashidx不爲-1時, 查找key,同時在dictht1和dictht0中查找。dom

 

 

函數

 

數據結構性能

 

typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;
} dictEntry;

typedef struct dictType {
    unsigned int (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
    dictEntry **table;
    unsigned long size;
    unsigned long sizemask;
    unsigned long used;
} dictht;

typedef struct dict {
    dictType *type;
    void *privdata;
    dictht ht[2];
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    int iterators; /* number of iterators currently running */
} dict;

/* If safe is set to 1 this is a safe iterator, that means, you can call
 * dictAdd, dictFind, and other functions against the dictionary even while
 * iterating. Otherwise it is a non safe iterator, and only dictNext()
 * should be called while iterating. */
typedef struct dictIterator {
    dict *d;
    long index;
    int table, safe;
    dictEntry *entry, *nextEntry;
    /* unsafe iterator fingerprint for misuse detection. */
    long long fingerprint;
} dictIterator;

typedef void (dictScanFunction)(void *privdata, const dictEntry *de);

 

查找keyui

 

dictEntry *dictFind(dict *d, const void *key)
{
    dictEntry *he;
    unsigned int h, idx, table;

    if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
    if (dictIsRehashing(d)) _dictRehashStep(d);
    h = dictHashKey(d, key);
    for (table = 0; table <= 1; table++) {
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL;
    }
    return NULL;
}

 

redis的rehash是增量rehash,每次rehash一部分this

rehash過程:spa

1. 從 dictht0的table 0到----N-1查找不爲NULL的位置(非空桶)code

2. 對該位置的鏈表進行處理, hash到dictht 1的table 1中。

rehash的函數,設置了n參數,表示要處理的非空桶的個數,可是在函數內部設置了最多訪問10*n個空桶。

int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;

    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) {
            unsigned int h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

 

 

 

 

和adlist同樣,dict也有迭代器

 

迭代方法以下:

dictEntry *dictNext(dictIterator *iter)
{
    //對錶的桶進行遍歷,直到找到一個非空桶,返回
    while (1) {
        if (iter->entry == NULL) {
            dictht *ht = &iter->d->ht[iter->table];
            if (iter->index == -1 && iter->table == 0) {
                if (iter->safe)
                    iter->d->iterators++;
                else
                    iter->fingerprint = dictFingerprint(iter->d);//對dict進行指紋
            }
            iter->index++;
            //若是迭代到表的最後一個桶,就判斷要不要迭代第二個表
            if (iter->index >= (long) ht->size) {
                if (dictIsRehashing(iter->d) && iter->table == 0) {
                    iter->table++;
                    iter->index = 0;
                    ht = &iter->d->ht[1];
                } else {
                    break;
                }
            }
            iter->entry = ht->table[iter->index];
        } else {
            iter->entry = iter->nextEntry;
        }
        if (iter->entry) {
            /* We need to save the 'next' here, the iterator user
             * may delete the entry we are returning. */
            iter->nextEntry = iter->entry->next;
            return iter->entry;
        }
    }
    return NULL;
}

Dict的API以下:

/* API */
/* 字典建立, type參數制定各種對字典的自定義函數,會初始化dictht, dict */
dict *dictCreate(dictType *type, void *privDataPtr);
int dictExpand(dict *d, unsigned long size);
/* 添加鍵值對,內部調用addRaw和setvalue ,若是已經存在,返回NULL*/
int dictAdd(dict *d, void *key, void *val);
/* 添加鍵 ,若是已經存在,返回NULL*/
dictEntry *dictAddRaw(dict *d, void *key);
/* 添加一個key,若是存在,直接設置value,設置key的value */
int dictReplace(dict *d, void *key, void *val);
/* 添加一個key,若是存在,直接返回 */
dictEntry *dictReplaceRaw(dict *d, void *key);
/* 刪除一個節點,須要free那個節點 */
int dictDelete(dict *d, const void *key);
/* 刪除一個節點,不須要free那個節點 */
int dictDeleteNoFree(dict *d, const void *key);
/* 刪除dict*/
void dictRelease(dict *d);
/* 查找key*/
dictEntry * dictFind(dict *d, const void *key);
/* 查找key的value*/
void *dictFetchValue(dict *d, const void *key);
/* 將dict的size設置和元素數量同樣,可是符合2的倍數*/
int dictResize(dict *d);
dictIterator *dictGetIterator(dict *d);
dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
void dictPrintStats(dict *d);
unsigned int dictGenHashFunction(const void *key, int len);
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d, void(callback)(void*));
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
/* rehash,設置一個最長時間*/
int dictRehashMilliseconds(dict *d, int ms);
void dictSetHashFunctionSeed(unsigned int initval);
unsigned int dictGetHashFunctionSeed(void);
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);

 

上面的API不少函數內部都會判斷當前是否是還在rehash狀態,若是是,就rehash一步。

 

在rehash前,會判斷是否是有迭代器存在,若是有迭代器存在,就不rehash

static void _dictRehashStep(dict *d) { if (d->iterators == 0) dictRehash(d,1);}

相關文章
相關標籤/搜索