HashTable結構定義在zend_types.h中 ui
typedef struct _zend_array HashTable; struct _zend_array { zend_refcounted_h gc; union { struct { ZEND_ENDIAN_LOHI_4( zend_uchar flags, zend_uchar nApplyCount, zend_uchar nIteratorsCount, zend_uchar reserve) } v; uint32_t flags; } u; uint32_t nTableMask; Bucket *arData; uint32_t nNumUsed; uint32_t nNumOfElements; uint32_t nTableSize; uint32_t nInternalPointer; zend_long nNextFreeElement; dtor_func_t pDestructor; };
重要的幾個字段:指針
nTableSize: 哈系表最多容納元素數量,歸整爲2的冪。code
nTableMask: 掩碼, 對key求哈係獲得的值h, h | nTableMask獲得偏移量。索引
arData:存儲實際數據。內存
arData指向的數據實際分爲兩部分,Hash部分和Data部分。string
ht->arDatahash
-----------------Hash部分------------------------|-------------------Data部分--------------------it
{Hash部分}{arData指針}{Data部分}。io
哈希運算不可避免會產生衝突,有兩個以上的key映射到同一位置, PHP採用的是鏈地址法,即把衝突的鍵值用鏈表鏈接起來。鏈地址法會有內存碎片,還須要不斷分配釋放內存,因此這裏直接分配了nTableSize個Bucket空間,存放全部的key-val對。class
因此這裏分配了兩個空間,Data部分存儲key-val對,Hash部分存儲映射對某個位置的元素鏈表的頭部。
zend_hash_init對HashTable進行初始化賦值。這是一個宏,實際調用_zend_hash_init.
ht->nTableSize存放取整後的nSize,此時沒有爲哈希表元素分配內存。 添加元素時經過ht->u.flags & HASH_FLAG_INITIALIZED判斷爲false時分配內存。
zend_hash_check_size(nSize) 將nsize取整爲2的冪。
#define zend_hash_init(ht, nSize, pHashFunction, pDestructor, persistent) \ _zend_hash_init((ht), (nSize), (pDestructor), (persistent) ZEND_FILE_LINE_CC) ZEND_API void ZEND_FASTCALL _zend_hash_init(HashTable *ht, uint32_t nSize, dtor_func_t pDestructor, zend_bool persistent ZEND_FILE_LINE_DC) { GC_REFCOUNT(ht) = 1; GC_TYPE_INFO(ht) = IS_ARRAY; ht->u.flags = (persistent ? HASH_FLAG_PERSISTENT : 0) | HASH_FLAG_APPLY_PROTECTION | HASH_FLAG_STATIC_KEYS; ht->nTableSize = zend_hash_check_size(nSize); ht->nTableMask = HT_MIN_MASK; HT_SET_DATA_ADDR(ht, &uninitialized_bucket); ht->nNumUsed = 0; ht->nNumOfElements = 0; ht->nInternalPointer = HT_INVALID_IDX; ht->nNextFreeElement = 0; ht->pDestructor = pDestructor; }
第一次向表中添加元素時調用, 分配空間。
//第一次向表中添加元素時調用, 分配空間。 static void zend_always_inline zend_hash_real_init_ex(HashTable *ht, int packed) { HT_ASSERT(GC_REFCOUNT(ht) == 1); ZEND_ASSERT(!((ht)->u.flags & HASH_FLAG_INITIALIZED)); if (packed) { HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT)); (ht)->u.flags |= HASH_FLAG_INITIALIZED | HASH_FLAG_PACKED; HT_HASH_RESET_PACKED(ht); } else { (ht)->nTableMask = -(ht)->nTableSize; HT_SET_DATA_ADDR(ht, pemalloc(HT_SIZE(ht), (ht)->u.flags & HASH_FLAG_PERSISTENT)); (ht)->u.flags |= HASH_FLAG_INITIALIZED; if (EXPECTED(ht->nTableMask == -8)) { Bucket *arData = ht->arData; HT_HASH_EX(arData, -8) = -1; HT_HASH_EX(arData, -7) = -1; HT_HASH_EX(arData, -6) = -1; HT_HASH_EX(arData, -5) = -1; HT_HASH_EX(arData, -4) = -1; HT_HASH_EX(arData, -3) = -1; HT_HASH_EX(arData, -2) = -1; HT_HASH_EX(arData, -1) = -1; } else { HT_HASH_RESET(ht); } } }
HashTable如何查找元素:
static zend_always_inline Bucket *zend_hash_find_bucket(const HashTable *ht, zend_string *key) { zend_ulong h; uint32_t nIndex; uint32_t idx; Bucket *p, *arData; //計算hash。 h = zend_string_hash_val(key); arData = ht->arData; //根據nTableMask取哈希值的後幾位做爲有效值,決定最終散列位置。 nIndex = h | ht->nTableMask; /* #define HT_HASH_EX(data, idx) ((uint32_t*)(data))[(int32_t)(idx)] //散列到(-nIndex)的元素組成一個鏈表,idx爲此鏈表頭的位置索引。 idx = HT_HASH_EX(arData, nIndex); while (EXPECTED(idx != HT_INVALID_IDX)) { // p == arData[idx], p爲散列到同一bucket的元素列表的首元素。經過while循環遍歷查找。 p = HT_HASH_TO_BUCKET_EX(arData, idx); if (EXPECTED(p->key == key)) { /* check for the same interned string */ return p; } else if (EXPECTED(p->h == h) && EXPECTED(p->key) && EXPECTED(ZSTR_LEN(p->key) == ZSTR_LEN(key)) && EXPECTED(memcmp(ZSTR_VAL(p->key), ZSTR_VAL(key), ZSTR_LEN(key)) == 0)) { return p; } idx = Z_NEXT(p->val); } return NULL; }
經過_zend_hash_add_or_update_i添加元素。
static zend_always_inline zval *_zend_hash_add_or_update_i(HashTable *ht, zend_string *key, zval *pData, uint32_t flag ZEND_FILE_LINE_DC);