码迷,mamicode.com
首页 > 其他好文 > 详细

AC自动机

时间:2019-05-05 01:00:37      阅读:182      评论:0      收藏:0      [点我收藏+]

标签:dsa   串匹配   image   length   struct   ati   完整   ns2   str   

AC自动机

技术图片

1.根据字符构造trie树
2.构建失败匹配指针
   1.根节点的所以一代子孩子失败指针都指向root
   2.子节点匹配失败时,找到父节点的失败指针,找不到就一直找,直到找到root还匹配不到,直接指向root
3.文本串匹配
   1.如果已经匹配到完整的模式串,根据失败指针切换线路继续向下查找
   2.如果还没有匹配完,那么就向下匹配
class ACNode {
    constructor(data){
        this.data = data
        this.children = new Map()
        this.isEndingChar = false
        this.length = 0
        this.fail = null
    }
}

class ACTree {
    constructor(){
        this.root = new ACNode('/')
    }
    insert(text){
        let node = this.root
        for(let char of text){
            if(!node.children.get(char)){
                node.children.set(char,new ACNode(char))
            }
            node = node.children.get(char)
        }
        node.isEndingChar = true
        node.length = text.length   
    }
    buildFailurePointer(){
        let root = this.root
        let queue = []
        queue.push(root)

        while(queue.length > 0){
            let parentNode = queue.shift()
            for(let childNode of parentNode.children.values()){
                if(!childNode){
                    continue
                }
                //如果现在还在树的第一层
                if(parentNode == root){
                    //所有子节点的失败指针都指向root
                    childNode.fail = root
                } else {
                    let grandFatherNode = parentNode.fail
                    while(grandFatherNode){
                        let failNode = grandFatherNode.children.get(childNode.data)
                        if(failNode){
                            childNode.fail = failNode
                            break
                        }
                        grandFatherNode = grandFatherNode.fail
                    }
                    if(!grandFatherNode){
                        childNode.fail = root
                    }
                }
                
                queue.push(childNode)
            }
        }
       
    }
    match(text){
      let root = this.root
      let len = text.length
      let currentNode = root
      
      for( let i = 0; i < len; i++ ){
         let char = text[i]

         while(!currentNode.children.get(char) && currentNode !== root){
             //换线匹配
            currentNode = currentNode.fail
         }

         currentNode = currentNode.children.get(char)
         if(!currentNode){
             //如果currentNode没有值,就重新回到root节点
            currentNode = root
         }
         let tmp = currentNode
         while(tmp != root){
             if(tmp.isEndingChar === true){
                console.log(`from ${i - tmp.length + 1} length: ${tmp.length} str: ${text.substr(i - currentNode.length + 1,currentNode.length)}`)
             }
             tmp = tmp.fail
         }
      }
    }
}

function match(text,patterns){
    let automata = new ACTree();
    for(let pattern of patterns){
        automata.insert(pattern);
    }
    automata.buildFailurePointer();
    automata.match(text);
}

let patterns = ["at", "art", "oars", "soar"];
let text = "soarsoars";
match(text, patterns);

let patterns2 = ["Fxtec Pro1", "谷歌Pixel"];
let text2 = "一家总部位于伦敦的公司Fxtex在MWC上就推出了一款名为Fxtec Pro1的手机,该机最大的亮点就是采用了侧滑式全键盘设计。DxOMark年度总榜发布 华为P20 Pro/谷歌Pixel 3争冠";
match(text2, patterns2);

AC自动机

标签:dsa   串匹配   image   length   struct   ati   完整   ns2   str   

原文地址:https://www.cnblogs.com/pluslius/p/10810471.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!