KMP算法

KMP算法學習

KMP中用到的函數詳解

1. prefix_table()

void prefix_table(char pattern[], int prefix[], int n ) { //用於求取前綴表
    prefix[0]  = 0; //開始的第一個字符置爲0 , 後續函數將數組總體後移
    int len    = 0;// len指能匹配到的前綴和 和 後綴和的長度
    int i = 1; // 從第二個字符開始 pattern的遊標
    while (i < n) {
        if (pattern[i] == pattern[len]) { //若是發現相等
            len++; // 匹配長度++
            i++; //遊標後移
            prefix[i] = len; // 第i個字符的前綴表中的值爲 len
        }
        else { //不相等的狀況
            if (len > 0) // 斜對齊(前綴表總體後移的狀況 prefix[0] = -1)
                len = prefix[len - 1];
            else { // 防止進入死循環
                prefix[i] = len;
                i++;
            }
        }
    }
}

2. move_prefix_table()

void move_prefix_table (int prefix[], int n) { // 將整個後綴表後移
    for (int i = n - 1; i > 0; i++) {
        prefix[i] = prefix[i -1];
    }
    prefix[0] = -1;
}
void kmp_search(char text[], char pattern[]) {
    int n = strlen(pattern);
    int *prefix = (int *)malloc( sizeof(int) * n);
    prefix_table(pattern, prefix, n);
    move_prefix_table(prefix, n);

    // text[i]      len(text)    = m
    // pattern[j]   len(pattern) = n
    int i = 0, j = 0;
    int m = strlen(text);
    int n = strlen(pattern);

    while (i < m) {
        if ( j == n -1 && text[i] == pattern[j]) { // 匹配到的條件
            printf("Found patten at %d.\n", i - j); // i - j是子串的起始位置
            j = prefix[j]; // 若是屢次出現則繼續尋找
        }
        if (text[i] == pattern[j]) { // 正常匹配到則遊標右移
            i++;
            j++;
        }
        else { //不匹配的狀況
            j = prefix[j];//將pattern的遊標拉到prefix繼續匹配
            if (j == -1) { // j == -1說明首次未能匹配 則從新開始匹配
                i++;
                j++;
            }
        }
    }
}
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

void prefix_table(char pattern[], int prefix[], int n ) {
    prefix[0]  = 0;
    int len    = 0;
    int i = 1;
    while (i < n) {
        if (pattern[i] == pattern[len]) {
            len++;
            i++;
            prefix[i] = len;
        }
        else {
            if (len > 0)
                len = prefix[len - 1];
            else {
                prefix[i] = len;
                i++;
            }
        }
    }
}
void move_prefix_table (int prefix[], int n) {
    for (int i = n - 1; i > 0; i++) {
        prefix[i] = prefix[i -1];
    }
    prefix[0] = -1;
}
void kmp_search(char text[], char pattern[]) {
    int n = strlen(pattern);
    int *prefix = (int *)malloc( sizeof(int) * n);
    prefix_table(pattern, prefix, n);
    move_prefix_table(prefix, n);

    // text[i]      len(text)    = m
    // pattern[j]   len(pattern) = n
    int i = 0, j = 0;
    int m = strlen(text);
    int n = strlen(pattern);

    while (i < m) {
        if ( j == n -1 && text[i] == pattern[j]) {
            printf("Found patten at %d.\n", i - j);
            j = prefix[j];
        }
        if (text[i] == pattern[j]) {
            i++;
            j++;

        }
        else {
            j = prefix[j];
            if (j == -1) {
                i++;
                j++;
            }
        }
    }
}

int main() {

    char pattern[] = "ABABCABABA";
    char text[]    = "ABABABCABAABABABAB";
    kmp_search(text, pattern);
    /*int prefix[9];
    int n = 9;
    prefix_table(pattern, prefix, n);
    move_prefix_table(prefix, n);
    for (int i = 0; i < n; i++) {
        printf("%d ", prefix[i]);
    }
    printf("\n");*/
    return 0;
}
相關文章
相關標籤/搜索