談Redis的refash的增量式擴容

時間 2019-11-11

原文原文鏈接

談Redis的refash的增量式擴容

最近在複習的時候，研究了下關於redis爲何rehash對redis的性能影響小，緣由之一在於它的增量式複製，也叫漸進式hash吧！其實這種思想很值得借鑑，分清輕重優化選擇redis

/* 哈希表節點 */
typedef struct dictEntry {
    // 鍵
    void *key;
    // 值
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
    } v;
    // 指向下個哈希表節點，造成鏈表
    struct dictEntry *next;
} dictEntry;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
/* 哈希表
 * 每一個字典都使用兩個哈希表，以實現漸進式 rehash 。
 */
typedef struct dictht {
    // 哈希表數組
    // 能夠看做是：一個哈希表數組，數組的每一個項是entry鏈表的頭結點（鏈地址法解決哈希衝突）
    dictEntry **table;
    // 哈希表大小
    unsigned long size;
    // 哈希表大小掩碼，用於計算索引值
    // 老是等於 size - 1
    unsigned long sizemask;
    // 該哈希表已有節點的數量
    unsigned long used;
} dictht;
/* 字典 */
typedef struct dict {
    // 類型特定函數
    dictType *type;
    // 私有數據
    void *privdata;
    // 哈希表
    dictht ht[2];
    // rehash 索引
    // 當 rehash 不在進行時，值爲 -1
    int rehashidx; /* rehashing not in progress if rehashidx == -1 */
    // 目前正在運行的安全迭代器的數量
    int iterators; /* number of iterators currently running */
} dict;

dict的結構大體如上，接下來分析一下其中最重要的幾個數據成員：數組

dictht::table：哈希表內部的table結構使用了鏈地址法來解決哈希衝突，剛開始看的時候我很奇怪，這怎麼是個二維數組？這實際上是一個指向數組的指針，數組中的每一項都是entry鏈表的頭結點。
dictht ht[2]：在dict的內部，維護了兩張哈希表，做用等同因而一對滾動數組，一張表是舊錶，一張表是新表，當hashtable的大小須要動態改變的時候，舊錶中的元素就往新開闢的新表中遷移，當下一次變更大小，當前的新表又變成了舊錶，以此達到資源的複用和效率的提高。安全
字段rehashidx：由於是漸進式的哈希，數據的遷移並非一步完成的，因此須要有一個索引來指示當前的rehash進度。當rehashidx爲-1時，表明沒有哈希操做。數據結構

rehash的主體部分:函數

/* Performs N steps of incremental rehashing. Returns 1 if there are still
 * keys to move from the old to the new hash table, otherwise 0 is returned.
 *
 * Note that a rehashing step consists in moving a bucket (that may have more
 * than one key as we use chaining) from the old to the new hash table, however
 * since part of the hash table may be composed of empty spaces, it is not
 * guaranteed that this function will rehash even a single bucket, since it
 * will visit at max N*10 empty buckets in total, otherwise the amount of
 * work it does would be unbound and the function may block for a long time.
 * rehash是以bucket(桶)爲基本單位進行漸進式的數據遷移的，每步完成一個bucket的遷移，直至全部數據遷移完畢。一個bucket對應哈希表數組中的一條entry鏈表。新版本的dictRehash()還加入了一個最大訪問空桶數(empty_visits)的限制來進一步減少可能引發阻塞的時間。
 */
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;

    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

接下來咱們深扒一下這個函數的具體實現。性能

判斷dict是否正在rehashing，只有是，才能繼續往下進行，不然已經結束哈希過程，直接返回。
接着是分n步進行的漸進式哈希主體部分（n由函數參數傳入），在while的條件裏面加入對.used舊錶中剩餘元素數目的觀察，增長安全性。
一個runtime的斷言保證一下漸進式哈希的索引沒有越界。
接下來一個小while是爲了跳過空桶，同時更新剩餘能夠訪問的空桶數，empty_visits這個變量的做用以前已經說過了。
如今咱們來到了當前的bucket，在下一個while(de)中把其中的全部元素都遷移到ht[1]中，索引值是輔助了哈希表的大小掩碼計算出來的，能夠保證不會越界。同時更新了兩張表的當前元素數目。
每一步rehash結束，都要增長索引值，而且把舊錶中已經遷移完畢的bucket置爲空指針。
最後判斷一下舊錶是否所有遷移完畢，如果，則回收空間，重置舊錶，重置漸進式哈希的索引，不然用返回值告訴調用方，dict內仍然有數據未遷移。

漸進式哈希的精髓在於：數據的遷移不是一次性完成的，而是能夠經過dictRehash()這個函數分步規劃的，而且調用方能夠及時知道是否須要繼續進行漸進式哈希操做。若是dict數據結構中存儲了海量的數據，那麼一次性遷移勢必帶來redis性能的降低，別忘了redis是單線程模型，在實時性要求高的場景下這多是致命的。而漸進式哈希則將這種代價可控地分攤了，調用方能夠在dict作插入，刪除，更新的時候執行dictRehash()，最小化數據遷移的代價。
在遷移的過程當中，數據是在新表仍是舊錶中並非一個很是急迫的需求，遷移的過程並不會丟失數據，在舊錶中找不到再到新表中尋找就是了。
參考博客：https://blog.csdn.net/cqk0100/article/details/8040081優化

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。