基於哈希表實現頁面置換算法

首先,在看這篇文章前,你須要瞭解緩存是幹嗎的?node

緩存

    衆所周知,程序運行時,數據通常存在內存或磁盤裏,而內存中的數據老是能夠被更快速的獲取。可是內存空間是有限的,大多數人PC的內存可能在4G~16G之間,這意味着你必需要捨棄一部分不頻繁被訪問的數據,把它們存在磁盤裏;而把常常須要被訪問的數據存在內存裏,這就是緩存的基本思路。git

    但對於程序(和你)而言,沒法預測哪些數據是被常常訪問的,因此你只能根據訪問數據的歷史來推測和統計哪些數據是被常常訪問的,並把它存在內存裏,若是內存滿了就找個最不被常常訪問的數據替換掉。這個統計過程和改寫內存的策略一般被稱爲「頁面置換算法」,事實上,全部可實現的緩存策略(頁面置換算法)都是基於歷史訪問信息來實現的。科學家通過幾十年的努力,發明了許許多多的頁面置換算法,好比:FIFO、LFU、LRU、ARC、MQ、GD、GDSF....,它們各有所長,沒有孰優孰劣。github

    缺頁數(率):爲了評判頁面置換算法的優劣,針對訪問數據的次數n,和數據未被緩存命中的次數(缺頁數)p,缺頁率=p/n。顯然,缺頁率越小,頁面置換算法越優秀。web

 

正事

    本文基於哈希表的內存管理結構,簡單地實現線程安全的緩存算法(LFU, LRU, ARC, MQ, GD, GDSF):算法

  首先看API:(cache.h)緩存

 1 #pragma once
 2 
 3 #include <sys/time.h>
 4 #ifdef __cplusplus
 5 extern "C" {
 6 #endif
 7 
 8 typedef struct _cache *cache_t, _cache;
 9 typedef struct _cache_ele cache_pair;
10 typedef struct _cache_ret { /// 讀緩存返回結果
11     long cost;        /// 代價
12     const char*cache; /// 數據
13 }cache_ret;
14 /**
15  * @API
16  * create, delete, search, read
17  */
18 cache_t   new_cache        (unsigned capacity, cache_ret(*model)(cache_t, const char*)); /// 新建
19 void      del_cache        (cache_t cache);                                              /// 刪除
20 unsigned  cache_page_fault (cache_t cache);                                              /// 缺頁數
21 cache_ret read_cache       (cache_t cache, const char*filename);                         /// 讀緩存
22 
23 /**
24  * @Cache_Algorithm_Model
25  * cache_ret(*)(cache_t, const char*)
26  */
27 cache_ret LRU (cache_t cache, const char*request); /// 頁面替換算法模型
28 cache_ret LFU (cache_t cache, const char*request);
29 cache_ret ARC (cache_t cache, const char*request);
30 cache_ret MQ  (cache_t cache, const char*request);
31 cache_ret GD  (cache_t cache, const char*request);
32 cache_ret GDSF(cache_t cache, const char*request);
33 
34 #ifdef __cplusplus
35 }
36 #endif

 

    數據結構:安全

    緩存:  數據結構

 1 struct _cache_ele {                   /// 數據單元
 2     char *key, *file_cache;           /// 鍵值、數據 
 3     long cost;                        /// 代價(長度)
 4     struct timeval pre_t;             /// 上次訪問時間
 5     unsigned int cnt;                 /// 訪問次數
 6     struct _cache_ele *nxt, *pre;
 7 };
 8 
 9 struct _cache {
10     cache_pair table[HASHTABELSIZE], *first_table;      /// 哈希表,first_table根據須要生成
11     cache_ret (*f)(cache_t, const char *);              /// 頁面置換策略
12     pthread_mutex_t mutex;                              /// 線程安全的鎖
13     unsigned int capacity, _cur, first_cur, page_fault; /// 容量、當前量、ft當前量、缺頁數
14 };/// *cache_t

     緩存策略實現:ide

     緩存策略其實是一個選擇問題,若是緩存沒有滿,那麼顯然能夠直接把新請求的數據直接讀到緩存中,若是滿了,則按照策略選一個數據替換掉。spa

     (cache.c完整代碼)

  1 #include "cache.h"
  2 #include <zconf.h>
  3 #include "stdlib.h"
  4 #include <sys/mman.h>
  5 #include "pthread.h"
  6 #include "string.h"
  7 #include <sys/time.h>
  8 #include <fcntl.h>
  9 
 10 #define RMALLOC(type,n) (type*)malloc(sizeof(type)*(n))
 11 #define MALLOC(p,type,n) type*p = RMALLOC(type, n)
 12 #define MAX_BUFFER_LEN 1024ll * 1024
 13 #ifndef HASHTABLESZIE
 14 #define HASHTABELSIZE 10005
 15 #endif
 16 
 17 unsigned int string_hash(const char *str) {
 18     unsigned int hash = 0;
 19     int i;
 20     for (i = 0; *str; i++) {
 21         if ((i & 1) == 0)hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3));
 22         else hash ^= (~((hash << 11) ^ (*str++) ^ (hash >> 5)));
 23     }
 24     return (hash & 0x7FFFFFFF);
 25 }
 26 
 27 struct _cache_ele {
 28     char *key, *file_cache;
 29     long cost;
 30     struct timeval pre_t;
 31     unsigned int cnt;
 32     struct _cache_ele *nxt, *pre;
 33 };
 34 
 35 cache_pair*new_cache_pair(){
 36     MALLOC(res, cache_pair, 1);
 37     res->key = res->file_cache = NULL;
 38     res->cnt = res->cost = 0;
 39     res->nxt = res->pre = NULL;
 40     return res;
 41 }
 42 
 43 void del_cache_pair(cache_pair *del) {
 44     free(del->key);
 45     free(del->file_cache);
 46     free(del);
 47 }
 48 
 49 struct _cache {
 50     cache_pair table[HASHTABELSIZE], *first_table;      /// hash table
 51     cache_ret (*f)(cache_t, const char *);              /// function pointer
 52     pthread_mutex_t mutex;
 53     unsigned int capacity, _cur, first_cur, page_fault;
 54 };/// *cache_t
 55 
 56 cache_t new_cache(unsigned capacity, cache_ret(*model)(cache_t, const char *)) {
 57     if (model) {
 58         MALLOC(ret, _cache, 1);
 59         pthread_mutex_init(&ret->mutex, NULL);
 60         ret->capacity = capacity;
 61         ret->page_fault = ret->first_cur = ret->_cur = 0;
 62         memset(ret->table, 0, sizeof(cache_pair) * HASHTABELSIZE);
 63         if (model == ARC)ret->first_table = RMALLOC(cache_pair, HASHTABELSIZE);
 64         else if(model == MQ)ret->first_table = RMALLOC(cache_pair, HASHTABELSIZE * 3);
 65         else ret->first_table = NULL;
 66         ret->f = model;
 67         return ret;
 68     }
 69     return NULL;
 70 }
 71 
 72 cache_ret read_cache(cache_t cache, const char *filename) {
 73     pthread_mutex_lock(&cache->mutex);
 74     cache_ret res = cache->f(cache, filename);
 75     pthread_mutex_unlock(&cache->mutex);
 76     return res;
 77 }
 78 
 79 unsigned cache_page_fault(cache_t cache){
 80     return cache->page_fault;
 81 }
 82 
 83 void del_cache(cache_t cache) {
 84     pthread_mutex_destroy(&cache->mutex);
 85     for (int i = 0; i < HASHTABELSIZE; ++i) {
 86         cache_pair *p = cache->table[i].nxt;
 87         while (p) {
 88             cache_pair *tmp = p;
 89             p = p->nxt;
 90             del_cache_pair(tmp);
 91         }
 92     }
 93     if (cache->first_table) {
 94         for (int i = 0; i < HASHTABELSIZE; ++i) {
 95             cache_pair *p = cache->first_table[i].nxt;
 96             while (p) {
 97                 cache_pair *tmp = p;
 98                 p = p->nxt;
 99                 del_cache_pair(tmp);
100             }
101         }
102         free(cache->first_table);
103     }
104     free(cache);
105 }
106 
107 cache_pair *is_in_table(cache_pair *table, const char *request, int *ret) {
108     unsigned int index = string_hash(request) % HASHTABELSIZE;
109     cache_pair *src = table + index;
110     if (!src->nxt) {
111         *ret = 0;
112         return src;
113     }
114     src = src->nxt;
115     while (strcmp(src->key, request)) {
116         cache_pair *pre = src;
117         src = src->nxt;
118         if (!src) { /// not in table: return pre node
119             *ret = 0;
120             return pre;
121         }
122     }
123     *ret = 1;
124     return src;
125 }
126 
127 void replace_after_src(cache_pair *src, const char *request) {
128     src = src->nxt;
129     src->cnt = 1;
130     gettimeofday(&src->pre_t, NULL);
131     src->key = src->key ? (char *) realloc(src->key, strlen(request) + 1) : RMALLOC(char, strlen(request) + 1);
132     strcpy(src->key, request);
133     int fd = open(request, O_RDONLY);
134     if (fd > 0) {
135         char *fp = mmap(NULL, MAX_BUFFER_LEN, PROT_READ, MAP_SHARED, fd, 0);
136         src->cost = strlen(fp) + 1;
137         src->file_cache = src->file_cache ? (char *) realloc(src->file_cache, src->cost) : RMALLOC(char, src->cost);
138         strcpy(src->file_cache, fp);
139         munmap(fp, MAX_BUFFER_LEN);
140         close(fd);
141     } else {
142         src->cost = -1;
143         if (src->file_cache)free(src->file_cache);
144         src->file_cache = NULL;
145     }
146 }
147 
148 void add_after_src(cache_pair *src, const char *request) {
149     src->nxt = new_cache_pair();
150     src->nxt->pre = src;
151     replace_after_src(src, request);
152 }
153 
154 void replace_copy(cache_pair *src, cache_pair *aim) {
155     src = src->nxt;
156     src->cnt = aim->cnt;
157     gettimeofday(&src->pre_t, NULL);
158     src->cost = aim->cost;
159     free(src->key);
160     free(src->file_cache);
161     src->key = aim->key;
162     src->file_cache = aim->file_cache;
163     aim->pre->nxt = aim->nxt;
164     free(aim);
165 }
166 
167 void add_copy(cache_pair *src, cache_pair *aim) {
168     src->nxt = new_cache_pair();
169     src->nxt->pre = src;
170     replace_copy(src, aim);
171 }
172 
173 cache_pair *LRU_CHOOSE(cache_pair *table) {
174     double mn = -1;
175     cache_pair *res = NULL;
176     for (int i = 0; i < HASHTABELSIZE; ++i)
177         if (table[i].nxt) {
178             cache_pair *ptr = table + i;
179             while (ptr->nxt) {
180                 cache_pair *pre = ptr;
181                 ptr = ptr->nxt;
182                 double cur = ptr->pre_t.tv_sec * 1000.0 + ptr->pre_t.tv_usec / 1000.0;
183                 if (mn < 0 || cur < mn) {
184                     mn = cur;
185                     res = pre;
186                 }
187             }
188         }
189     return res;
190 }
191 
192 cache_pair *LFU_CHOOSE(cache_pair *table) {
193     int mn = -1;
194     cache_pair *res = NULL;
195     for (int i = 0; i < HASHTABELSIZE; ++i)
196         if (table[i].nxt) {
197             cache_pair *ptr = table + i;
198             while (ptr->nxt) {
199                 cache_pair *pre = ptr;
200                 ptr = ptr->nxt;
201                 int cur = ptr->cnt;
202                 if (mn < 0 || cur < mn) {
203                     mn = cur;
204                     res = pre;
205                 }
206             }
207         }
208     return res;
209 }
210 
211 cache_pair *GD_CHOOSE(cache_pair *table) {
212     double mn = -1;
213     cache_pair *res = NULL;
214     for (int i = 0; i < HASHTABELSIZE; ++i)
215         if (table[i].nxt) {
216             cache_pair *ptr = table + i;
217             while (ptr->nxt) {
218                 cache_pair *pre = ptr;
219                 ptr = ptr->nxt;
220                 double cur = ptr->cost + ptr->pre_t.tv_sec;
221                 if (mn < 0 || cur < mn) {
222                     mn = cur;
223                     res = pre;
224                 }
225             }
226         }
227     return res;
228 }
229 
230 cache_pair *GDSF_CHOOSE(cache_pair *table) {
231     double mn = -1;
232     cache_pair *res = NULL;
233     for (int i = 0; i < HASHTABELSIZE; ++i)
234         if (table[i].nxt) {
235             cache_pair *ptr = table + i;
236             while (ptr->nxt) {
237                 cache_pair *pre = ptr;
238                 ptr = ptr->nxt;
239                 double cur = ptr->cnt * ptr->cost + ptr->pre_t.tv_sec;
240                 if (mn < 0 || cur < mn) {
241                     mn = cur;
242                     res = pre;
243                 }
244             }
245         }
246     return res;
247 }
248 
249 cache_ret LRU(cache_t set, const char *request) {
250     int flag;
251     cache_pair *src = is_in_table(set->table, request, &flag);
252     if (flag) { /// real node
253         src->cnt++;
254         gettimeofday(&src->pre_t, NULL);
255     } else { /// pre node
256         ++set->page_fault;
257         if (set->_cur == set->capacity) { /// choose and replace
258             src = LRU_CHOOSE(set->table);
259             replace_after_src(src, request);
260         } else { /// add node
261             add_after_src(src, request);
262             ++set->_cur;
263         }
264         src = src->nxt;
265     }
266     return (cache_ret) {src->cost, src->file_cache};
267 }
268 
269 cache_ret LFU(cache_t set, const char *request) {
270     int flag;
271     cache_pair *src = is_in_table(set->table, request, &flag);
272     if (flag) {
273         src->cnt++;
274         gettimeofday(&src->pre_t, NULL);
275     } else {
276         ++set->page_fault;
277         if (set->_cur == set->capacity) {
278             src = LFU_CHOOSE(set->table);
279             replace_after_src(src, request);
280         } else {
281             add_after_src(src, request);
282             ++set->_cur;
283         }
284         src = src->nxt;
285     }
286     return (cache_ret) {src->cost, src->file_cache};
287 }
288 
289 cache_ret ARC(cache_t set, const char *request) {
290     int flag;
291     cache_pair *first_table = set->first_table;
292     cache_pair *src = is_in_table(set->table, request, &flag);
293     if (flag) { /// in second table
294         src->cnt++;
295         gettimeofday(&src->pre_t, NULL);
296     } else {
297         cache_pair *first_src = is_in_table(first_table, request, &flag);
298         if (flag) { /// in first table
299             ++first_src->cnt;
300             if (set->_cur == set->capacity) { /// choose and replace
301                 src = LRU_CHOOSE(set->table);
302                 replace_copy(src, first_src); /// copy data to nxt src and delete first_src
303             } else { /// add node
304                 add_copy(src, first_src); /// create node and replace
305                 ++set->_cur;
306             }
307             src = src->nxt;
308         } else { /// not in first table
309             ++set->page_fault;
310             if (set->first_cur == set->capacity) {
311                 first_src = LRU_CHOOSE(first_table);
312                 replace_after_src(first_src, request);
313             } else {
314                 add_after_src(first_src, request);
315                 ++set->first_cur;
316             }
317             src = first_src->nxt;
318         }
319     }
320     return (cache_ret) {src->cost, src->file_cache};
321 }
322 
323 cache_ret MQ(cache_t set, const char *request) {
324     int flag;
325     cache_pair *first_table = set->first_table;
326     cache_pair *src = is_in_table(set->table, request, &flag);
327     if (flag) { /// in second table
328         src->cnt++;
329         gettimeofday(&src->pre_t, NULL);
330     } else {
331         cache_pair *first_src = is_in_table(first_table, request, &flag);
332         if (flag) { /// in first table
333             ++first_src->cnt;
334             if (set->_cur == set->capacity) { /// choose and replace
335                 src = LRU_CHOOSE(set->table);
336                 replace_copy(src, first_src); /// copy data to nxt src and delete first_src
337             } else { /// add node
338                 add_copy(src, first_src); /// create node and replace
339                 ++set->_cur;
340             }
341             src = src->nxt;
342         } else { /// not in first table
343             ++set->page_fault;
344             if (set->first_cur == set->capacity) {
345                 first_src = LRU_CHOOSE(first_table);
346                 replace_after_src(first_src, request);
347             } else {
348                 add_after_src(first_src, request);
349                 ++set->first_cur;
350             }
351             src = first_src->nxt;
352         }
353     }
354     return (cache_ret) {src->cost, src->file_cache};
355 }
356 
357 cache_ret GD(cache_t set, const char *request) {
358     int flag;
359     cache_pair *src = is_in_table(set->table, request, &flag);
360     if (flag) {
361         src->cnt++;
362         gettimeofday(&src->pre_t, NULL);
363     } else {
364         ++set->page_fault;
365         if (set->_cur == set->capacity) {
366             src = GD_CHOOSE(set->table);
367             replace_after_src(src, request);
368         } else {
369             add_after_src(src, request);
370             ++set->_cur;
371         }
372         src = src->nxt;
373     }
374     return (cache_ret) {src->cost, src->file_cache};
375 }
376 
377 cache_ret GDSF(cache_t set, const char *request) {
378     int flag;
379     cache_pair *src = is_in_table(set->table, request, &flag);
380     if (flag) {
381         src->cnt++;
382         gettimeofday(&src->pre_t, NULL);
383     } else {
384         ++set->page_fault;
385         if (set->_cur == set->capacity) {
386             src = GDSF_CHOOSE(set->table);
387             replace_after_src(src, request);
388         } else {
389             add_after_src(src, request);
390             ++set->_cur;
391         }
392         src = src->nxt;
393     }
394     return (cache_ret) {src->cost, src->file_cache};
395 }
View Code

 

 

---版權:代碼聽從MIT協議開源,能夠按需使用。(地址:cache.c

---轉載需標明出處,侵權必究

---做者:RhythmLian: https://github.com/Rhythmicc

相關文章
相關標籤/搜索