首先,在看這篇文章前,你須要瞭解緩存是幹嗎的?node
衆所周知,程序運行時,數據通常存在內存或磁盤裏,而內存中的數據老是能夠被更快速的獲取。可是內存空間是有限的,大多數人PC的內存可能在4G~16G之間,這意味着你必需要捨棄一部分不頻繁被訪問的數據,把它們存在磁盤裏;而把常常須要被訪問的數據存在內存裏,這就是緩存的基本思路。git
但對於程序(和你)而言,沒法預測哪些數據是被常常訪問的,因此你只能根據訪問數據的歷史來推測和統計哪些數據是被常常訪問的,並把它存在內存裏,若是內存滿了就找個最不被常常訪問的數據替換掉。這個統計過程和改寫內存的策略一般被稱爲「頁面置換算法」,事實上,全部可實現的緩存策略(頁面置換算法)都是基於歷史訪問信息來實現的。科學家通過幾十年的努力,發明了許許多多的頁面置換算法,好比:FIFO、LFU、LRU、ARC、MQ、GD、GDSF....,它們各有所長,沒有孰優孰劣。github
缺頁數(率):爲了評判頁面置換算法的優劣,針對訪問數據的次數n,和數據未被緩存命中的次數(缺頁數)p,缺頁率=p/n。顯然,缺頁率越小,頁面置換算法越優秀。web
本文基於哈希表的內存管理結構,簡單地實現線程安全的緩存算法(LFU, LRU, ARC, MQ, GD, GDSF):算法
首先看API:(cache.h)緩存
1 #pragma once 2 3 #include <sys/time.h> 4 #ifdef __cplusplus 5 extern "C" { 6 #endif 7 8 typedef struct _cache *cache_t, _cache; 9 typedef struct _cache_ele cache_pair; 10 typedef struct _cache_ret { /// 讀緩存返回結果 11 long cost; /// 代價 12 const char*cache; /// 數據 13 }cache_ret; 14 /** 15 * @API 16 * create, delete, search, read 17 */ 18 cache_t new_cache (unsigned capacity, cache_ret(*model)(cache_t, const char*)); /// 新建 19 void del_cache (cache_t cache); /// 刪除 20 unsigned cache_page_fault (cache_t cache); /// 缺頁數 21 cache_ret read_cache (cache_t cache, const char*filename); /// 讀緩存 22 23 /** 24 * @Cache_Algorithm_Model 25 * cache_ret(*)(cache_t, const char*) 26 */ 27 cache_ret LRU (cache_t cache, const char*request); /// 頁面替換算法模型 28 cache_ret LFU (cache_t cache, const char*request); 29 cache_ret ARC (cache_t cache, const char*request); 30 cache_ret MQ (cache_t cache, const char*request); 31 cache_ret GD (cache_t cache, const char*request); 32 cache_ret GDSF(cache_t cache, const char*request); 33 34 #ifdef __cplusplus 35 } 36 #endif
數據結構:安全
緩存: 數據結構
1 struct _cache_ele { /// 數據單元 2 char *key, *file_cache; /// 鍵值、數據 3 long cost; /// 代價(長度) 4 struct timeval pre_t; /// 上次訪問時間 5 unsigned int cnt; /// 訪問次數 6 struct _cache_ele *nxt, *pre; 7 }; 8 9 struct _cache { 10 cache_pair table[HASHTABELSIZE], *first_table; /// 哈希表,first_table根據須要生成 11 cache_ret (*f)(cache_t, const char *); /// 頁面置換策略 12 pthread_mutex_t mutex; /// 線程安全的鎖 13 unsigned int capacity, _cur, first_cur, page_fault; /// 容量、當前量、ft當前量、缺頁數 14 };/// *cache_t
緩存策略實現:ide
緩存策略其實是一個選擇問題,若是緩存沒有滿,那麼顯然能夠直接把新請求的數據直接讀到緩存中,若是滿了,則按照策略選一個數據替換掉。spa
(cache.c完整代碼)
1 #include "cache.h" 2 #include <zconf.h> 3 #include "stdlib.h" 4 #include <sys/mman.h> 5 #include "pthread.h" 6 #include "string.h" 7 #include <sys/time.h> 8 #include <fcntl.h> 9 10 #define RMALLOC(type,n) (type*)malloc(sizeof(type)*(n)) 11 #define MALLOC(p,type,n) type*p = RMALLOC(type, n) 12 #define MAX_BUFFER_LEN 1024ll * 1024 13 #ifndef HASHTABLESZIE 14 #define HASHTABELSIZE 10005 15 #endif 16 17 unsigned int string_hash(const char *str) { 18 unsigned int hash = 0; 19 int i; 20 for (i = 0; *str; i++) { 21 if ((i & 1) == 0)hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3)); 22 else hash ^= (~((hash << 11) ^ (*str++) ^ (hash >> 5))); 23 } 24 return (hash & 0x7FFFFFFF); 25 } 26 27 struct _cache_ele { 28 char *key, *file_cache; 29 long cost; 30 struct timeval pre_t; 31 unsigned int cnt; 32 struct _cache_ele *nxt, *pre; 33 }; 34 35 cache_pair*new_cache_pair(){ 36 MALLOC(res, cache_pair, 1); 37 res->key = res->file_cache = NULL; 38 res->cnt = res->cost = 0; 39 res->nxt = res->pre = NULL; 40 return res; 41 } 42 43 void del_cache_pair(cache_pair *del) { 44 free(del->key); 45 free(del->file_cache); 46 free(del); 47 } 48 49 struct _cache { 50 cache_pair table[HASHTABELSIZE], *first_table; /// hash table 51 cache_ret (*f)(cache_t, const char *); /// function pointer 52 pthread_mutex_t mutex; 53 unsigned int capacity, _cur, first_cur, page_fault; 54 };/// *cache_t 55 56 cache_t new_cache(unsigned capacity, cache_ret(*model)(cache_t, const char *)) { 57 if (model) { 58 MALLOC(ret, _cache, 1); 59 pthread_mutex_init(&ret->mutex, NULL); 60 ret->capacity = capacity; 61 ret->page_fault = ret->first_cur = ret->_cur = 0; 62 memset(ret->table, 0, sizeof(cache_pair) * HASHTABELSIZE); 63 if (model == ARC)ret->first_table = RMALLOC(cache_pair, HASHTABELSIZE); 64 else if(model == MQ)ret->first_table = RMALLOC(cache_pair, HASHTABELSIZE * 3); 65 else ret->first_table = NULL; 66 ret->f = model; 67 return ret; 68 } 69 return NULL; 70 } 71 72 cache_ret read_cache(cache_t cache, const char *filename) { 73 pthread_mutex_lock(&cache->mutex); 74 cache_ret res = cache->f(cache, filename); 75 pthread_mutex_unlock(&cache->mutex); 76 return res; 77 } 78 79 unsigned cache_page_fault(cache_t cache){ 80 return cache->page_fault; 81 } 82 83 void del_cache(cache_t cache) { 84 pthread_mutex_destroy(&cache->mutex); 85 for (int i = 0; i < HASHTABELSIZE; ++i) { 86 cache_pair *p = cache->table[i].nxt; 87 while (p) { 88 cache_pair *tmp = p; 89 p = p->nxt; 90 del_cache_pair(tmp); 91 } 92 } 93 if (cache->first_table) { 94 for (int i = 0; i < HASHTABELSIZE; ++i) { 95 cache_pair *p = cache->first_table[i].nxt; 96 while (p) { 97 cache_pair *tmp = p; 98 p = p->nxt; 99 del_cache_pair(tmp); 100 } 101 } 102 free(cache->first_table); 103 } 104 free(cache); 105 } 106 107 cache_pair *is_in_table(cache_pair *table, const char *request, int *ret) { 108 unsigned int index = string_hash(request) % HASHTABELSIZE; 109 cache_pair *src = table + index; 110 if (!src->nxt) { 111 *ret = 0; 112 return src; 113 } 114 src = src->nxt; 115 while (strcmp(src->key, request)) { 116 cache_pair *pre = src; 117 src = src->nxt; 118 if (!src) { /// not in table: return pre node 119 *ret = 0; 120 return pre; 121 } 122 } 123 *ret = 1; 124 return src; 125 } 126 127 void replace_after_src(cache_pair *src, const char *request) { 128 src = src->nxt; 129 src->cnt = 1; 130 gettimeofday(&src->pre_t, NULL); 131 src->key = src->key ? (char *) realloc(src->key, strlen(request) + 1) : RMALLOC(char, strlen(request) + 1); 132 strcpy(src->key, request); 133 int fd = open(request, O_RDONLY); 134 if (fd > 0) { 135 char *fp = mmap(NULL, MAX_BUFFER_LEN, PROT_READ, MAP_SHARED, fd, 0); 136 src->cost = strlen(fp) + 1; 137 src->file_cache = src->file_cache ? (char *) realloc(src->file_cache, src->cost) : RMALLOC(char, src->cost); 138 strcpy(src->file_cache, fp); 139 munmap(fp, MAX_BUFFER_LEN); 140 close(fd); 141 } else { 142 src->cost = -1; 143 if (src->file_cache)free(src->file_cache); 144 src->file_cache = NULL; 145 } 146 } 147 148 void add_after_src(cache_pair *src, const char *request) { 149 src->nxt = new_cache_pair(); 150 src->nxt->pre = src; 151 replace_after_src(src, request); 152 } 153 154 void replace_copy(cache_pair *src, cache_pair *aim) { 155 src = src->nxt; 156 src->cnt = aim->cnt; 157 gettimeofday(&src->pre_t, NULL); 158 src->cost = aim->cost; 159 free(src->key); 160 free(src->file_cache); 161 src->key = aim->key; 162 src->file_cache = aim->file_cache; 163 aim->pre->nxt = aim->nxt; 164 free(aim); 165 } 166 167 void add_copy(cache_pair *src, cache_pair *aim) { 168 src->nxt = new_cache_pair(); 169 src->nxt->pre = src; 170 replace_copy(src, aim); 171 } 172 173 cache_pair *LRU_CHOOSE(cache_pair *table) { 174 double mn = -1; 175 cache_pair *res = NULL; 176 for (int i = 0; i < HASHTABELSIZE; ++i) 177 if (table[i].nxt) { 178 cache_pair *ptr = table + i; 179 while (ptr->nxt) { 180 cache_pair *pre = ptr; 181 ptr = ptr->nxt; 182 double cur = ptr->pre_t.tv_sec * 1000.0 + ptr->pre_t.tv_usec / 1000.0; 183 if (mn < 0 || cur < mn) { 184 mn = cur; 185 res = pre; 186 } 187 } 188 } 189 return res; 190 } 191 192 cache_pair *LFU_CHOOSE(cache_pair *table) { 193 int mn = -1; 194 cache_pair *res = NULL; 195 for (int i = 0; i < HASHTABELSIZE; ++i) 196 if (table[i].nxt) { 197 cache_pair *ptr = table + i; 198 while (ptr->nxt) { 199 cache_pair *pre = ptr; 200 ptr = ptr->nxt; 201 int cur = ptr->cnt; 202 if (mn < 0 || cur < mn) { 203 mn = cur; 204 res = pre; 205 } 206 } 207 } 208 return res; 209 } 210 211 cache_pair *GD_CHOOSE(cache_pair *table) { 212 double mn = -1; 213 cache_pair *res = NULL; 214 for (int i = 0; i < HASHTABELSIZE; ++i) 215 if (table[i].nxt) { 216 cache_pair *ptr = table + i; 217 while (ptr->nxt) { 218 cache_pair *pre = ptr; 219 ptr = ptr->nxt; 220 double cur = ptr->cost + ptr->pre_t.tv_sec; 221 if (mn < 0 || cur < mn) { 222 mn = cur; 223 res = pre; 224 } 225 } 226 } 227 return res; 228 } 229 230 cache_pair *GDSF_CHOOSE(cache_pair *table) { 231 double mn = -1; 232 cache_pair *res = NULL; 233 for (int i = 0; i < HASHTABELSIZE; ++i) 234 if (table[i].nxt) { 235 cache_pair *ptr = table + i; 236 while (ptr->nxt) { 237 cache_pair *pre = ptr; 238 ptr = ptr->nxt; 239 double cur = ptr->cnt * ptr->cost + ptr->pre_t.tv_sec; 240 if (mn < 0 || cur < mn) { 241 mn = cur; 242 res = pre; 243 } 244 } 245 } 246 return res; 247 } 248 249 cache_ret LRU(cache_t set, const char *request) { 250 int flag; 251 cache_pair *src = is_in_table(set->table, request, &flag); 252 if (flag) { /// real node 253 src->cnt++; 254 gettimeofday(&src->pre_t, NULL); 255 } else { /// pre node 256 ++set->page_fault; 257 if (set->_cur == set->capacity) { /// choose and replace 258 src = LRU_CHOOSE(set->table); 259 replace_after_src(src, request); 260 } else { /// add node 261 add_after_src(src, request); 262 ++set->_cur; 263 } 264 src = src->nxt; 265 } 266 return (cache_ret) {src->cost, src->file_cache}; 267 } 268 269 cache_ret LFU(cache_t set, const char *request) { 270 int flag; 271 cache_pair *src = is_in_table(set->table, request, &flag); 272 if (flag) { 273 src->cnt++; 274 gettimeofday(&src->pre_t, NULL); 275 } else { 276 ++set->page_fault; 277 if (set->_cur == set->capacity) { 278 src = LFU_CHOOSE(set->table); 279 replace_after_src(src, request); 280 } else { 281 add_after_src(src, request); 282 ++set->_cur; 283 } 284 src = src->nxt; 285 } 286 return (cache_ret) {src->cost, src->file_cache}; 287 } 288 289 cache_ret ARC(cache_t set, const char *request) { 290 int flag; 291 cache_pair *first_table = set->first_table; 292 cache_pair *src = is_in_table(set->table, request, &flag); 293 if (flag) { /// in second table 294 src->cnt++; 295 gettimeofday(&src->pre_t, NULL); 296 } else { 297 cache_pair *first_src = is_in_table(first_table, request, &flag); 298 if (flag) { /// in first table 299 ++first_src->cnt; 300 if (set->_cur == set->capacity) { /// choose and replace 301 src = LRU_CHOOSE(set->table); 302 replace_copy(src, first_src); /// copy data to nxt src and delete first_src 303 } else { /// add node 304 add_copy(src, first_src); /// create node and replace 305 ++set->_cur; 306 } 307 src = src->nxt; 308 } else { /// not in first table 309 ++set->page_fault; 310 if (set->first_cur == set->capacity) { 311 first_src = LRU_CHOOSE(first_table); 312 replace_after_src(first_src, request); 313 } else { 314 add_after_src(first_src, request); 315 ++set->first_cur; 316 } 317 src = first_src->nxt; 318 } 319 } 320 return (cache_ret) {src->cost, src->file_cache}; 321 } 322 323 cache_ret MQ(cache_t set, const char *request) { 324 int flag; 325 cache_pair *first_table = set->first_table; 326 cache_pair *src = is_in_table(set->table, request, &flag); 327 if (flag) { /// in second table 328 src->cnt++; 329 gettimeofday(&src->pre_t, NULL); 330 } else { 331 cache_pair *first_src = is_in_table(first_table, request, &flag); 332 if (flag) { /// in first table 333 ++first_src->cnt; 334 if (set->_cur == set->capacity) { /// choose and replace 335 src = LRU_CHOOSE(set->table); 336 replace_copy(src, first_src); /// copy data to nxt src and delete first_src 337 } else { /// add node 338 add_copy(src, first_src); /// create node and replace 339 ++set->_cur; 340 } 341 src = src->nxt; 342 } else { /// not in first table 343 ++set->page_fault; 344 if (set->first_cur == set->capacity) { 345 first_src = LRU_CHOOSE(first_table); 346 replace_after_src(first_src, request); 347 } else { 348 add_after_src(first_src, request); 349 ++set->first_cur; 350 } 351 src = first_src->nxt; 352 } 353 } 354 return (cache_ret) {src->cost, src->file_cache}; 355 } 356 357 cache_ret GD(cache_t set, const char *request) { 358 int flag; 359 cache_pair *src = is_in_table(set->table, request, &flag); 360 if (flag) { 361 src->cnt++; 362 gettimeofday(&src->pre_t, NULL); 363 } else { 364 ++set->page_fault; 365 if (set->_cur == set->capacity) { 366 src = GD_CHOOSE(set->table); 367 replace_after_src(src, request); 368 } else { 369 add_after_src(src, request); 370 ++set->_cur; 371 } 372 src = src->nxt; 373 } 374 return (cache_ret) {src->cost, src->file_cache}; 375 } 376 377 cache_ret GDSF(cache_t set, const char *request) { 378 int flag; 379 cache_pair *src = is_in_table(set->table, request, &flag); 380 if (flag) { 381 src->cnt++; 382 gettimeofday(&src->pre_t, NULL); 383 } else { 384 ++set->page_fault; 385 if (set->_cur == set->capacity) { 386 src = GDSF_CHOOSE(set->table); 387 replace_after_src(src, request); 388 } else { 389 add_after_src(src, request); 390 ++set->_cur; 391 } 392 src = src->nxt; 393 } 394 return (cache_ret) {src->cost, src->file_cache}; 395 }
---版權:代碼聽從MIT協議開源,能夠按需使用。(地址:cache.c)
---轉載需標明出處,侵權必究
---做者:RhythmLian: https://github.com/Rhythmicc