以关键词为key,构建字典数组,对每个关键词可实现常数级别的查找。使用最长匹配算法,具体代码如下:
1 class WordMatcher { 2 public $dict = []; 3 public $wordMaxLen = 0; 4 5 function __construct(){ 6 if(! extension_loaded(‘mbstring‘)) { 7 exit(‘extension mbstring is not loaded‘); 8 } 9 } 10 11 function addWord($word) { 12 $len = mb_strlen($word); 13 $this->wordMaxLen = $len > $this->wordMaxLen ? $len : $this->wordMaxLen; 14 $this->dict[$word] = 1; 15 } 16 17 function removeWord($word) { 18 unset($this->dict[$word]); 19 } 20 21 function match($str, &$matched) { 22 if(mb_strlen($str) < 1) { 23 return; 24 } 25 26 $len = $this->wordMaxLen; 27 while($len>0) { 28 $substr = mb_substr($str, 0, $len); 29 if(isset($this->dict[$substr])) { 30 $matched[] = $substr; 31 break; 32 } else { 33 $len--; 34 } 35 } 36 if($len == 0) { 37 $len = 1; 38 } 39 $str = mb_substr($str, $len); 40 $this->match($str, $matched); 41 } 42 } 43 44 $matcher = new WordMatcher; 45 $matcher->addWord(‘PHP‘); 46 $matcher->addWord(‘语言‘); 47 48 49 $matcher->match(‘PHP是最好的语言‘, $matched);