散列函數知足如下的條件:
一、對輸入值運算,獲得一個固定長度的摘要(Hash value);
二、不一樣的輸入值可能對應一樣的輸出值;
三、散列函數的輸出值儘可能接近均勻分佈,即輸出值y的分佈函數F(y)=y/m, m爲散列函數的最大值;
四、x的微小變化可使f(x)發生很是大的變化,即所謂「雪崩效應」(Avalanche effect),即|df(x)/dx| >> 1;
哈希衝突(Hash collision)是沒法避免的。哈希衝突的處理方法:
1, 鏈地址法html
//HashTable.h typedef unsigned int UINT; class Node{ public: Node(int key, const string & str); int key; string value; Node * next; }; class HashTable{ public: HashTable(); bool Insert(int key, const string & value); bool Find(int key); string & operator[](int key); private: bool Insert(Node ** node, int key, const string & value); Node * HashTable::FindNode(int key); unsigned int hasher(int key);//哈希函數 enum{SIZE = 100}; Node * nodes[SIZE]; }; //HashTable.cpp Node::Node(int Key, const string & str): key(Key), value(str), next(0){} HashTable::HashTable(){ memset(nodes, 0, SIZE * sizeof(Node *)); } unsigned int HashTable::hasher(int key){ return abs(key) % SIZE;//最簡單的hash函數 } bool HashTable::Insert(int key, const std::string &value){ UINT adr = hasher(key); Node * node = nodes[adr]; if(node == 0){ nodes[adr] = new Node(key, value); }else{ return Insert(&node->next, key, value); } } bool HashTable::Insert(Node * * next, int key, const string & value){ Node * node = *next; if(node == 0){ (*next) = new Node(key, value); return true; } else{ return Insert(&node->next, key, value); } } bool HashTable::Find(int key){ UINT adr = hasher(key); Node * node = nodes[adr]; if(node == 0){ return false; }else{ do{ if(node->key == key){ return true; }else{ node = node->next; } }while(node != 0); return false; } } Node * HashTable::FindNode(int key){ UINT adr = hasher(key); Node * node = nodes[adr]; if(node == 0){ return 0; }else{ do{ if(node->key == key){ return node; }else{ node = node->next; } }while(node != 0); return 0; } } string & HashTable::operator[](int key){ Node * node = FindNode(key); assert(node != 0); return node->value; } //main.cpp int main() { HashTable ht; ht.Insert(1, "you"); string value = ht[1]; cout << value << endl; ht.Insert(101, "girl"); value = ht[101]; cout << value << endl; ht.Insert(201, "boy"); value = ht[201]; cout << value << endl; ht[201] = "man"; cout << ht[201] << endl; cin.get(); return 0; }
2, 開放地址法
爲每一個Hash值,創建一個Hash桶(Bucket),哈希桶的個數是固定的,桶的容量也是固定的。
好處是查表的最大開銷是能夠肯定的,由於最多處理的衝突數是肯定的,因此算法的時間複雜度爲O(1)+O(m),其中m爲Hash桶容量。
壞處是新建的表項可能會因爲衝突過多,而不能裝入Hash表中。
http://www.360doc.com/content/13/0108/16/8363527_258987810.shtmlnode
3,線性探測再散列算法
//StringHash.h #define MAXTABLELEN 1024 // 默認哈希索引表大小 typedef struct _HASHTABLE { // 哈希索引表定義 long nHashA; long nHashB; bool bExists; }HASHTABLE, *PHASHTABLE ; class StringHash { public: StringHash(const long nTableLength = MAXTABLELEN); ~StringHash(void); private: unsigned long cryptTable[0x500]; unsigned long m_tablelength; // 哈希索引表長度 HASHTABLE *m_HashIndexTable; void InitCryptTable(); // 對哈希索引表預處理 unsigned long HashString(const string &lpszString, unsigned long dwHashType); // 哈希函數 public: bool Hash(string url); unsigned long Hashed(string url); // 檢測url是否被hash過,是則返回位置,不然返回-1 }; //StringHash.c StringHash::StringHash(const long nTableLength ) { InitCryptTable(); m_tablelength = nTableLength; m_HashIndexTable = new HASHTABLE[nTableLength]; for ( int i = 0; i < nTableLength; i++ ) { m_HashIndexTable[i].nHashA = -1; m_HashIndexTable[i].nHashB = -1; m_HashIndexTable[i].bExists = false; } } StringHash::~StringHash(void) { //清理內存 if ( NULL != m_HashIndexTable ) { delete []m_HashIndexTable; m_HashIndexTable = NULL; m_tablelength = 0; } } void StringHash::InitCryptTable(){ unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i; for( index1 = 0; index1 < 0x100; index1++ ) { for( index2 = index1, i = 0; i < 5; i++, index2 += 0x100 ){ unsigned long temp1, temp2; seed = (seed * 125 + 3) % 0x2AAAAB; temp1 = (seed & 0xFFFF) << 0x10; seed = (seed * 125 + 3) % 0x2AAAAB; temp2 = (seed & 0xFFFF); cryptTable[index2] = ( temp1 | temp2 ); } } } unsigned long StringHash::HashString(const string& lpszString, unsigned long dwHashType){ //第二個參數指明使用哪一個哈希表的哈希函數 unsigned char *key = (unsigned char *)(const_cast(lpszString.c_str())); unsigned long seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE; int ch; while(*key != 0){ ch = toupper(*key++); seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2); seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3; } return seed1; } unsigned long StringHash::Hashed(string lpszString){ const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2;//不一樣的字符串三次hash還會碰撞的概率無限接近於不可能 unsigned long nHash = HashString(lpszString, HASH_OFFSET); unsigned long nHashA = HashString(lpszString, HASH_A); unsigned long nHashB = HashString(lpszString, HASH_B); unsigned long nHashStart = nHash % m_tablelength, nHashPos = nHashStart; while ( m_HashIndexTable[nHashPos].bExists){ if (m_HashIndexTable[nHashPos].nHashA == nHashA && m_HashIndexTable[nHashPos].nHashB == nHashB) return nHashPos; else nHashPos = (nHashPos + 1) % m_tablelength; if (nHashPos == nHashStart) break; } return -1; //沒有找到 } bool StringHash::Hash(string lpszString) { //hash a string const unsigned long HASH_OFFSET = 0, HASH_A = 1, HASH_B = 2; unsigned long nHash = HashString(lpszString, HASH_OFFSET); unsigned long nHashA = HashString(lpszString, HASH_A); unsigned long nHashB = HashString(lpszString, HASH_B); unsigned long nHashStart = nHash % m_tablelength, nHashPos = nHashStart; while ( m_HashIndexTable[nHashPos].bExists){ nHashPos = (nHashPos + 1) % m_tablelength; if (nHashPos == nHashStart){ //一個輪迴 return false; //hash表中沒有空餘的位置了,沒法完成hash } } m_HashIndexTable[nHashPos].bExists = true; m_HashIndexTable[nHashPos].nHashA = nHashA; m_HashIndexTable[nHashPos].nHashB = nHashB; return true; }
註解:字符串的哈希函數
你能夠把哈希表存儲在字符串數組中,而後你能夠計算字符串的哈希值,而後與已經存儲的字符串的哈希值進行比較。若是有匹配的哈希值,就能夠經過字符串比較進行匹配驗證。這種方法叫索引,根據數組的大小以及字符串的平均長度能夠約100倍。 數組
unsigned long HashString(char *lpszString){ unsigned long ulHash = 0xf1e2d3c4; while (*lpszString != 0) { ulHash <<= 1; ulHash += *lpszString++; } return ulHash; }
上面代碼中的散列算法在遍歷字符串過程當中,將哈希值左移一位,而後加上字符值。它會在較低的數據範圍內產生相對可預測的輸出,從而可能會產生大量衝突。
MPQ格式,使用了一種很是複雜的散列算法(以下所示),產生徹底不可預測的哈希值,這個算法十分有效,這就是所謂的單向散列算法。函數
unsigned long HashString(char *lpszFileName, unsigned long dwHashType){ unsigned char *key = (unsigned char *)lpszFileName; unsigned long seed1 = 0x7FED7FED, seed2 = 0xEEEEEEEE; int ch; while(*key != 0) { ch = toupper(*key++); seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2); seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3; } return seed1; }