整理日: 2015年2月16日算法
假設文本串text長度爲n,模式串pattern長度爲m,BM算法的主要特徵爲:數組
常規的匹配算法移動模式串的時候是從左到右,而進行比較的時候也是從左到右的,基本框架是:框架
while(j <= strlen(text) - strlen(pattern)){ for (i = 0; i < strlen(pattern) && pattern[i] == text[i + j]; ++i); if (i == strlen(pattern)) { Match; break; } else ++j; }
而BM算法在移動模式串的時候是從左到右,而進行比較的時候是從右到左的,基本框架是:code
while(j <= strlen(text) - strlen(pattern)){ for (i = strlen(pattern); i >= 0 && pattern[i] == text[i + j]; --i); if (i < 0)) { Match; break; } else j += BM(); }
#include <stdio.h> #include <string.h> #define MAX_CHAR 256 #define SIZE 256 #define MAX(x, y) (x) > (y) ? (x) : (y) void BoyerMoore(char *pattern, int m, char *text, int n); int main() { char text[256], pattern[256]; while(1) { scanf("%s%s", text, pattern); if(text == 0 || pattern == 0) break; BoyerMoore(pattern, strlen(pattern), text, strlen(text)); printf("\n"); } return 0; } void print(int *array, int n, char *arrayName) { int i; printf("%s: ", arrayName); for(i = 0; i < n; i++) { printf("%d ", array[i]); } printf("\n"); } void PreBmBc(char *pattern, int m, int bmBc[]) { int i; for(i = 0; i < MAX_CHAR; i++) { bmBc[i] = m; } for(i = 0; i < m - 1; i++) { bmBc[pattern[i]] = m - 1 - i; } /* printf("bmBc[]: "); for(i = 0; i < m; i++) { printf("%d ", bmBc[pattern[i]]); } printf("\n"); */ } void suffix_old(char *pattern, int m, int suff[]) { int i, j; suff[m - 1] = m; for(i = m - 2; i >= 0; i--) { j = i; while(j >= 0 && pattern[j] == pattern[m - 1 - i + j]) j--; suff[i] = i - j; } } void suffix(char *pattern, int m, int suff[]) { int f, g, i; suff[m - 1] = m; g = m - 1; for (i = m - 2; i >= 0; --i) { if (i > g && suff[i + m - 1 - f] < i - g) suff[i] = suff[i + m - 1 - f]; else { if (i < g) g = i; f = i; while (g >= 0 && pattern[g] == pattern[g + m - 1 - f]) --g; suff[i] = f - g; } } // print(suff, m, "suff[]"); } void PreBmGs(char *pattern, int m, int bmGs[]) { int i, j; int suff[SIZE]; // 計算後綴數組 suffix(pattern, m, suff); // 先所有賦值爲m,包含Case3 for(i = 0; i < m; i++) { bmGs[i] = m; } // Case2 j = 0; for(i = m - 1; i >= 0; i--) { if(suff[i] == i + 1) { for(; j < m - 1 - i; j++) { if(bmGs[j] == m) bmGs[j] = m - 1 - i; } } } // Case1 for(i = 0; i <= m - 2; i++) { bmGs[m - 1 - suff[i]] = m - 1 - i; } // print(bmGs, m, "bmGs[]"); } void BoyerMoore(char *pattern, int m, char *text, int n) { int i, j, bmBc[MAX_CHAR], bmGs[SIZE]; // Preprocessing PreBmBc(pattern, m, bmBc); PreBmGs(pattern, m, bmGs); // Searching j = 0; while(j <= n - m) { for(i = m - 1; i >= 0 && pattern[i] == text[i + j]; i--); if(i < 0) { printf("Find it, the position is %d\n", j); j += bmGs[0]; return; } else { j += MAX(bmBc[text[i + j]] - m + 1 + i, bmGs[i]); } } printf("No find.\n"); }