Reprinted: http://www.169it.com/blog_article/ 601549531.html
Main content of this section:
A keyword replacement class
It can be mainly used for keyword filtering or keyword search and replacement.
Implementation process analysis:
Keyword replacement is actually a str_replace() process. If a simple str_replace is faced with 10W keywords, it only takes about 2 seconds to write a 1W word article.
The problem:
The keyword is replaced more than once. For example, a needs to be replaced with a, but the result may be aWait like this.
For this reason, a method is needed to protect the replaced tags. Then before processing the article, replace the tag first, such as [_tnum_], and then restore it after the article is processed.
Another question, what if there is [_tnum_] itself in the keyword or article, then you need to exclude this. str_replace cannot be used here, but preg_replace needs to be used to exclude it with regular expressions.
The third question is, what if there are two keywords a and ab? I hope to match the long ones first and then match the short ones, so I need to sort them before matching.
The last question, when str_replace is changed to preg_replace, it becomes slower and it takes 5 seconds to match 100,000 times for the same paragraph. Among the string processing functions, strpos is faster. Then use strpos to find the keywords first. , 100,000 queries take less than 1 second. Even if it is 1 million, it will take more than 8 seconds.
A keyword matching replacement class, code:
/*
* Keyword matching class
* @author ylx
* @packet mipang
* Usage examples
* $str = "The green-shelled egg chicken spreads Van der Sar next year, and the next year 1 spreads the room Lucas local army";
* $key = new KeyReplace($str,array("xxxx" =>"sadf","Next year 1"=>'http://baidu.com',"Next year"=>'google.com'));
* echo $key- >getResultText();
* echo $key->getRuntime();
*/
class KeyReplace
{
private $keys = array();
private $ text = "";
private $runtime = 0;
private $url = true;
private $stopkeys = array();
private $all = false;
/**
* @access public
* @param string $text specifies the article to be processed
* @param array $keys specifies the dictionary phrase array(key=>url,...) url can be an array, If it is an array, one of it will be randomly replaced
* @param array $stopkeys specifies the stop words array(key,...) The words in this will not be processed
* @param boolean $url true indicates replacement into a link, otherwise only replace
* @param boolean $all true means replace all found words, otherwise only replace the first
*/
public function __construct($text='',$keys=array(),$url=true,$stopkeys=array(),$all=false) {
$this-> keys = $keys;
$this->text = $text;
$this->url = $url;
$this->stopkeys = $stopkeys;
$this- >all = $all;
}
/**
* Get processed articles
* @access public
* @return string text
*/
public function getResultText() {
$start = microtime(true);
$keys = $this->hits_keys();
$keys_tmp = array_keys()($keys);
function cmp($a, $b){
if (mb_strlen($a) == mb_strlen($b)) {
return 0;
}
return (mb_strlen($ a) < mb_strlen($b)) ? 1 : -1;
}
usort($keys_tmp,"cmp");
foreach($keys_tmp as $key){
if(is_array($keys[$key])){
$url = $keys[$key][rand(0,count($keys[$key])-1)];
}else
$url = $keys[$key];
$this->text = $this->r_s($this->text,$key,$url);
}
$this->runtime = microtime(true)-$start;
return $this->text;
}
/**
* Get processing time
* @access public
* @return float
*/
public function getRuntime() {
return $this->runtime;
}
/**
* Set keywords
* @access public
* @param array $keys array(key=>url,...)
*/
public function setKeys($keys) {
$this->keys = $keys;
}
/**
* Set stop words
* @access public
* @param array $keys array(key,...)
*/
public function setStopKeys($keys) {
$this->stopkeys = $keys;
}
/**
* Set article
* @access public
* @param string $text
*/
public function setText($text) {
$this->text = $text;
}
/**
* Used to find the hit keyword in the string
* @access public
* @return array $keys Returns the matched word array(key=>url,...)
*/
public function hits_keys(){
$ar = $this->keys;
$ar = $ar?$ar:array();
$result=array();
$str = $this->text;
foreach($ar as $k=>$url){
$k = trim($k);
if(!$k)
continue;
if(strpos($str,$k)!==false && !in_array($k,$this->stopkeys)){
$result[$k] = $url;
}
}
return $result?$result:array();
}
/**
* Used to find the hit stop word in the string
* @access public
* @return array $keys Returns the matched word array(key,...)
*/
public function hits_stop_keys(){
$ar = $this->stopkeys;
$ar = $ar?$ar:array();
$result=array();
$str = $this->text;
foreach($ar as $k){
$k = trim($k);
if(!$k)
continue;
if(strpos($str,$k)!==false && in_array($k,$this->stopkeys)){
$result[] = $k;
}
}
return $result?$result:array();
}
/**
* Handle the replacement process
* @access private
* @param string $text Replaced person
* @param string $key Keyword
* @param string $url link
* @return string $text processed article
*/
private function r_s($text,$key,$url){
$tmp = $text;
$stop_keys = $this->hits_stop_keys();
$stopkeys = $tags = $a = array();
if(preg_match_all("#] >[^<]*]*>#su",$tmp,$m)){
$a=$m[0];
foreach($m[0] as $k=>$z){
$z = preg_replace("###s","#",$z);
$tmp = preg_replace('#'.$z.'#s',"[_a".$k."_]",$tmp,1);
}
};
if(preg_match_all("#<[^>] >#s",$tmp,$m)){
$tags = $m[0];
foreach($m[0] as $k=>$z){
$z = preg_replace("###s","#",$z);
$tmp = preg_replace('#'.$z.'#s',"[_tag".$k."_]",$tmp,1);
}
}
if(!empty($stop_keys)){
if(preg_match_all("#".implode("|",$stop_keys)."#s",$tmp,$m)){
$stopkeys = $m[0];
foreach($m[0] as $k=>$z){
$z = preg_replace("###s","#",$z);
$tmp = preg_replace('#'.$z.'#s',"[_s".$k."_]",$tmp,1);
}
}
}
$key1 = preg_replace("#([#()[]*])#s","\\$1",$key);
if($this->url)
$tmp = preg_replace("#(?![_s|[_a|[_|[_t|[_ta|[_tag)".$key1."(?!agd _]|gd _]|d _]|sd _]|_])#us",''.$key.'',$tmp,$this->all?-1:1);
else
$tmp = preg_replace("#(?![_s|[_a|[_|[_t|[_ta|[_tag)".$key1."(?!agd _]|gd _]|d _]|sd _]|_])#us",$url,$tmp,$this->all?-1:1);
if(!empty($a)){
foreach($a as $n=>$at){
$tmp = str_replace("[_a".$n."_]",$at,$tmp);
}
}
if(!empty($tags)){
foreach($tags as $n=>$at){
$tmp = str_replace("[_tag".$n."_]",$at,$tmp);
}
}
if(!empty($stopkeys)){
foreach($stopkeys as $n=>$at){
$tmp = str_replace("[_s".$n."_]",$at,$tmp);
}
}
return $tmp;
}
}