仍是直接上代碼吧:ios
1 #include <stdlib.h> 2 #include <string> 3 #include <iostream> 4 #include <fstream> 5 #include <vector> 6 #include <list> 7 8 using namespace std; 9 10 static const int hashtable_length = 6151; 11 static const int hashtable_compare = 769; 12 13 // 用於定位一個Bucket 14 unsigned int hash1(const char* str) 15 { 16 const char* end_of_str = str+strlen(str); 17 unsigned int sum = 0; 18 while (end_of_str - str > 3) 19 { 20 sum = (sum + (unsigned int)*((unsigned int*)str))%hashtable_length; 21 str += 4; 22 } 23 return sum; 24 } 25 26 // 用於快速比較兩個字符串,理論上會提升檢索速度 27 // 用長度來判斷是一種簡單的方法,有時間能夠設計更復雜的方法 28 unsigned int hash2(const char* str) 29 { 30 return strlen(str)%hashtable_compare; 31 } 32 bool find_in_bucket(list<string>& l, const char* str) 33 { 34 list<string>::iterator iter; 35 unsigned int hash_key1 = hash1(str); 36 unsigned int hash_key2 = hash2(str); 37 bool exist = false; 38 for (iter = l.begin(); iter != l.end(); iter++) 39 { 40 if (hash_key2 == hash2(iter->c_str())) 41 if (strcmp(str, iter->c_str()) == 0) 42 { 43 return true; 44 } 45 } 46 return false; 47 } 48 int insert_in_bucket(list<string>& l, const char* str) 49 { 50 if (!find_in_bucket(l, str)) 51 { 52 l.push_back(string(str)); 53 return l.size(); 54 }else 55 return -1; 56 } 57 bool find_in_hashtable(vector<list<string>>& v, const char* str) 58 { 59 return find_in_bucket(v[hash1(str)], str); 60 } 61 int insert_in_hashtable(vector<list<string>>& v, const char* str) 62 { 63 return insert_in_bucket(v[hash1(str)], str); 64 } 65 66 int main() 67 { 68 // 學習list 69 // 學習hashtable 70 71 vector<list<string>> hashtable(hashtable_length, list<string>()); 72 73 ofstream log_file; 74 log_file.open("D:\\log.txt"); 75 ifstream input_file; 76 input_file.open("D:\\input.txt"); 77 if(!input_file) 78 return -1; 79 80 // 10KB的緩衝區。注意!若是緩衝區過大,會致使棧空間不足,在函數層層調用的時候,會產生 stack overflow 錯誤! 81 char buff[10240] = {0}; 82 int max_of_bucket = -1; 83 while (input_file.getline(buff, 10240-1)) 84 { 85 // 使用哈希表,存儲海量數據,可以快速查找和插入 86 int len = insert_in_hashtable(hashtable, buff); 87 if (len > max_of_bucket) 88 max_of_bucket = len; 89 log_file<< "hashkey\t= \t" << hash1(buff) << "\nlength\t=\t"<< len << "\n" << endl; 90 } 91 log_file<< "max_of_bucket = " << max_of_bucket << endl; 92 input_file.close(); 93 94 // 快速查找 95 input_file.open("D:\\test.txt"); 96 if(!input_file) 97 return -2; 98 while (input_file.getline(buff, 10240-1)) 99 { 100 // 使用哈希表,存儲海量數據,可以快速查找 101 if (find_in_hashtable(hashtable, buff)) 102 log_file.write("Found it !\n", strlen("Found it !\n")); 103 else 104 log_file.write("Missed it !\n", strlen("Missed it !\n")); 105 } 106 input_file.close(); 107 log_file.close(); 108 109 return 0; 110 }