實踐中更高效、實現起來相對簡單的基於末尾壞字符原則的BM算法實現

以前網上看的若干算法,無非兩個原則:壞字符原則、好後綴原則。按照算法所述實現了一個版本,但發現其效率還不如本文所述的實現方式。我的分析效率較低的緣由多是由於不斷地向前找壞字符或者好後綴來肯定跳躍距離致使的,不斷的比對操做應該是影響效率的根源。算法

下面貼一段實現較簡單的方法,感謝以前的領導磊哥,實現參照了他的代碼。ui

PS:大概看了下ClamAV的BM實現,感受很複雜。spa

 1 #define BM_TAB_LEN  (256)
 2 
 3 uint64_t *InitBMTab(const uint8_t *In_ui8Pattern, uint64_t In_ui64PattLen)
 4 {
 5     uint64_t    *pui64RetVal    = NULL;
 6 
 7     if (In_ui8Pattern == NULL || In_ui64PattLen == 0)
 8     {
 9         goto fun_ret;
10     }
11 
12     pui64RetVal = (uint64_t *)malloc(sizeof(uint64_t) * BM_TAB_LEN);
13     if (pui64RetVal == NULL)
14     {
15         goto fun_ret;
16     }
17 
18     for (uint16_t i = 0; i < BM_TAB_LEN; i ++)
19     {
20         pui64RetVal[i] = In_ui64PattLen;
21     }
22 
23     for (uint64_t i = 0; i < In_ui64PattLen; i ++)
24     {
25         pui64RetVal[In_ui8Pattern[i]] = In_ui64PattLen - i - 1;
26     }
27 
28 fun_ret:
29     return pui64RetVal;
30 }
31 
32 int8_t ReBuildBMTab(uint64_t *Out_pui64BMJmpTab, const uint8_t *In_ui8Pattern, uint64_t In_ui64PattLen)
33 {
34     int8_t  i8RetVal    = 0;
35 
36     if (Out_pui64BMJmpTab == NULL || In_ui8Pattern == NULL || In_ui64PattLen == 0)
37     {
38         i8RetVal = -1;
39         goto fun_ret;
40     }
41 
42     for (uint16_t i = 0; i < BM_TAB_LEN; i ++)
43     {
44         Out_pui64BMJmpTab[i] = In_ui64PattLen;
45     }
46 
47     for (uint64_t i = 0; i < In_ui64PattLen; i ++)
48     {
49         Out_pui64BMJmpTab[In_ui8Pattern[i]] = In_ui64PattLen - i - 1;
50     }
51 
52 fun_ret:
53     return i8RetVal;
54 }
55 
56 void ReleaseBMTab(uint64_t *Out_pui64BMJmpTab)
57 {
58     if (Out_pui64BMJmpTab != NULL)
59     {
60         free(Out_pui64BMJmpTab);
61     }
62 }
63 
64 uint64_t BMSearch(const uint64_t *In_pui64BMJmpTab, const uint8_t *In_pui8Pattern, uint64_t In_ui64PattLen,
65     const uint8_t *In_pui8Buf, uint64_t In_ui64BufLen)
66 {
67     uint64_t    ui64RetVal  = -1;
68     uint64_t    ui64EndIdx  = 0;
69 
70     if (In_pui64BMJmpTab == NULL || In_pui8Pattern == NULL
71         || In_ui64PattLen == 0 || In_pui8Buf == NULL || In_ui64BufLen == 0
72         || In_ui64BufLen < In_ui64PattLen)
73     {
74         goto fun_ret;
75     }
76 
77     ui64EndIdx = In_ui64PattLen - 1;
78     do 
79     {
80         if (In_pui64BMJmpTab[In_pui8Buf[ui64EndIdx]] != 0)
81         {
82             ui64EndIdx += In_pui64BMJmpTab[In_pui8Buf[ui64EndIdx]];
83             continue;
84         }
85         if (memcmp(In_pui8Pattern, In_pui8Buf + ui64EndIdx - In_ui64PattLen + 1, In_ui64PattLen) == 0)
86         {
87             ui64RetVal = ui64EndIdx - In_ui64PattLen + 1;
88             goto fun_ret;
89         }
90         ui64EndIdx ++;
91     } while (ui64EndIdx < In_ui64BufLen);
92 
93 fun_ret:
94     return ui64RetVal;
95 }
相關文章
相關標籤/搜索