百度百科的关键词链接是怎样实现的呢
百度百科的关键词带有链接。我在想少量关键词,只需要简单替换成链接就可以。可是百度的关键词是狠多的,可能成万上千万个。如果替换上万次,那效率也太低了吧。请教这样的功能是怎样实现的呢?谢谢!
附截图:
include 'TTrie.php';<br /><br />class wordkey extends TTrie {<br /> function b() {<br /> $t = array_pop($this->buffer);<br /> $this->buffer[] = "<b>$t</b>";<br /> }<br />}<br />$p = new wordkey;<br />$p->set('秦始皇', 'b');<br />$p->set('洛阳', 'b');<br />$t = $p->match('秦始皇东巡洛阳');<br />echo join('', $t);<br />
class TTrie {<br> protected $buffer = array();<br> protected $dict = array( array() );<br> protected $input = 0; //字符串当前偏移<br> protected $backtracking = 0; //字符串回溯位置<br> public $debug = 0;<br> public $savematch = 1;<br><br> function set($word, $action='') {<br> if(is_array($word)) {<br> foreach($word as $k=>$v) $this->set($k, $v);<br> return;<br> }<br> $p = count($this->dict);<br> $cur = 0; //当前节点号<br> foreach(str_split($word) as $c) {<br> if (isset($this->dict[$cur][$c])) { //已存在就下移<br> $cur = $this->dict[$cur][$c];<br> continue;<br> }<br> $this->dict[$p]= array(); //创建新节点<br> $this->dict[$cur][$c] = $p; //在父节点记录子节点号<br> $cur = $p; //把当前节点设为新插入的<br> $p++;<br> }<br> $this->dict[$cur]['acc'] = $action; //一个词结束,标记叶子节点<br> }<br> function getto($ch) {<br> $i =& $this->input; //字符串当前偏移<br> $p =& $this->backtracking; //字符串回溯位置<br> $len = strlen($this->doc);<br> $t = '';<br> $this->input++;<br>// while($this->inputdoc{$this->input} != $ch) $t .= $this->doc{$this->input++};<br>// $t .= $this->doc{$this->input++};<br> do {<br> if($this->input >= $len) break;<br> $t .= $this->doc{$this->input};<br> }while($this->doc{$this->input++} != $ch);<br> return trim($t);<br> } <br> function match($s) {<br> $this->doc =& $s;<br> $this->buffer = array();<br> $ret = array();<br> $cur = 0; //当前节点,初始为根节点<br> $i =& $this->input; //字符串当前偏移<br> $p =& $this->backtracking; //字符串回溯位置<br> $i = $p = 0;<br> $s .= "\0"; //附加结束符<br> $len = strlen($s);<br> $buf = '';<br> while($i $c = $s{$i};<br> if(isset($this->dict[$cur][$c])) { //如果存在<br> $cur = $this->dict[$cur][$c]; //转到对应的位置<br> if(isset($this->dict[$cur][$s[$i+1]])) {//检查下一个字符是否也能匹配,长度优先<br> $i++;<br> continue;<br> }<br> if(isset($this->dict[$cur]['acc'])) { //是叶子节点,单词匹配!<br> if($buf != '') {<br> $this->buffer[] = $buf;<br> $buf = '';<br> }<br> if($this->savematch) $this->buffer[] = substr($s, $p, $i - $p + 1); //取出匹配位置和匹配的词<div class="clear"> </div>