AC自動機

AC自動機

image

1.根據字符構造trie樹
2.構建失敗匹配指針
   1.根節點的因此一代子孩子失敗指針都指向root
   2.子節點匹配失敗時,找到父節點的失敗指針,找不到就一直找,直到找到root還匹配不到,直接指向root
3.文本串匹配
   1.若是已經匹配到完整的模式串或者壓根匹配不到,根據失敗指針切換線路繼續向下查找
   2.若是匹配到了,那麼就繼續向下匹配
class ACNode {
    constructor(data){
        this.data = data
        this.isEndingChar = false
        this.children = new Map()
        this.length = 0
        this.fail = null
    }
}

class ACTree {
    constructor(){
        this.root = new ACNode('/')
    }
    insert(text){
        let node = this.root
        for(let char of text ){
            if(!node.children.get(char)){
                node.children.set(char,new ACNode(char))
            }
            node = node.children.get(char)
        }
        node.isEndingChar = true
        node.length = text.length
    }
    failurePointer(){
        let root = this.root
        let queue = []
        queue.push(root)
        while(queue.length > 0){
            let currentNode = queue.shift()
            for(let child of currentNode.children.values()){
                if(!child){
                    continue
                }

                if(currentNode == root){
                    child.fail = currentNode
                }else{
                    //不是一代子節點才指向
                    let grandFatherNode = currentNode.fail
                    while(grandFatherNode){
                        let failNode = grandFatherNode.children.get(child.data)
                        if(failNode){
                            child.fail = failNode
                            //找到失敗節點就不往下找了
                            break
                        }
                        grandFatherNode = grandFatherNode.fail
                    }
                    if(!grandFatherNode){
                        child.fail = root
                    }
                }               
                queue.push(child)
            }
        }
    }
    match(text){
        let root = this.root
        let len =  text.length
        let currentNode
        for(let i = 0; i < len; i++){
            let char = text[i]

            if(!currentNode){
                currentNode = root
            }

            while(!currentNode.children.get(char) && currentNode != root){
                //匹配不到就換線
                currentNode = currentNode.fail
            }

            currentNode = currentNode.children.get(char)

            let tmp = currentNode
            while(tmp != root){
                if(tmp.isEndingChar){
                    console.log(`from ${i - tmp.length + 1} length: ${tmp.length} str: ${text.substr(i - tmp.length + 1,tmp.length)}`)
                }
                //匹配到了就繼續看看其餘線有沒有能夠匹配成功的
                tmp = tmp.fail 
            }

        }
    }
}

function match(text,patterns){
    const autoMeta = new ACTree()
    for(pattern of patterns){
        autoMeta.insert(pattern)
    }
    autoMeta.failurePointer()
    autoMeta.match(text)
}

let patterns = ["at", "art", "oars", "soar"];
let text = "soarsoars";
match(text, patterns);

let patterns2 = ["Fxtec Pro1", "谷歌Pixel"];
let text2 = "一家總部位於倫敦的公司Fxtex在MWC上就推出了一款名爲Fxtec Pro1的手機,該機最大的亮點就是採用了側滑式全鍵盤設計。DxOMark年度總榜發佈 華爲P20 Pro/谷歌Pixel 3爭冠";
match(text2, patterns2);