-
- /**
- * 금지어 필터링
- * 실행 효율: 글당 0.05초
- * @author liuxu
- *
- */
- class Logic_BlackWord
- {
-
- const APP_FORUM = 1;
- const APP_BLOG = 2 ;
- const APP_VOTE = 3;
-
- /**
- * 금지된 단어를 가져오기 위한 필터
- * @paramknown $txt
- * @return Ambigous
- */
- 공개 함수 getHitList($txt)
- {
- $hitList = array();
-
- //对禁词分批过滤
- $max = $this->getMax();
- if($max)
- {
- $size = 1000;
- $ last = ceil($max/$size);
- for($page=1;$page<=$last;$page )
- {
- $result = $this->getHitListByPage($txt ,$page,$size);
- if($result) $hitList = array_merge($hitList,$result);
- }
- }
-
- $hitList2 = array();
- foreach($hitList as $hit=>$type)
- {
- $hitList2[$type][] = $hit;
- }
-
- return $hitList2;
- }
-
- 비공개 함수 getMax()
- {
- $redis = Rds::factory();
- $memKey = 'blackWord_max';
- $max = $redis-> ;get($memKey);
- if($max===false)
- {
- $max = 0;
- $blackWord = new Model_BlackWord_BlackWord();
- $para['field '] = "MAX(id) AS max";
- $result = $blackWord->search($para);
- if(isset($result[0]['max'])) $max = $result[0]['max'];
-
- $redis->setex($memKey,300,$max);
- }
-
- $max 반환;
- }
-
- /**
- * 일괄 필터링하여 금지된 단어 얻기
- * @param 알 수 없는 $txt
- * @param number $page
- * @param number $size
- * @return multitype:Ambigous
- */
- 비공개 함수 getHitListByPage($txt,$page=1,$size=1000)
- {
- $hitList = array();
-
- //분할批得到禁词树
- $wordTree = $this->getWordTreeByPage($page,$size);
-
- $txt = Strip_tags($txt);
- $txt = preg_replace('/[^a-zA-Z0-9\x{4e00}-\x{9fa5}]/iu','',$txt);
-
- $len = mb_strlen($txt,'UTF-8');
- for($i=0;$i<$len;$i )
- {
- $char = mb_substr($txt,$i,1 ,'UTF-8');
- if(isset($wordTree[$char]))
- {
- $result = $this->getHitListByTree(mb_substr($txt,$i,50, 'UTF-8'),$wordTree);
- if($result)
- {
- foreach($result as $hit=>$type)
- {
- $hitList[$ hit] = $type;
- }
- }
- }
- }
-
- return $hitList;
- }
-
- /**
- * 단어 금지 여부
- * @param str $txt
- * @param arr $wordTree
- * @return multitype:unknown
- */
- 비공개 함수 getHitListByTree($txt,&$wordTree)
- {
- $len = mb_strlen($txt,'UTF-8');
- $point = & $wordTree;
- $ hit = '';
- $hitList = array();
- for($i=0;$i<$len;$i )
- {
- $char = mb_substr($txt,$ i,1,'UTF-8');
- if(isset($point[$char]))
- {
- $hit .= $char;
- $point = & $point[ $char];
-
- if(isset($point['type']))//匹配成功
- {
- $hitList[$hit] = $point['type'];
- }
- }
- else
- {
- break;
- }
-
- }
-
- return $hitList;
- }
-
- /**
- * 금지된 단어 트리를 일괄적으로 가져오기
- * @param int $page
- * @param int $size
- * @return arr:
- */
- 비공개 함수 getWordTreeByPage($page=1,$size=1000)
- {
- $redis = Rds::factory();
- $memKey = 'blackWord_tree_'.$page.'_'.$size;
- $wordTree = $redis->get($memKey);
- if($wordTree===false)
- {
- $wordTree = array();
- $blackWord = new Model_BlackWord_BlackWord();
- $start = ($page-1)*$size;
- $end = $start $size;
- $para['where'] = "status=1 AND id>".$start." AND id<=".$end;
- $result = $blackWord->search($para);
- if($result)
- {
- foreach($result as $value)
- {
- if($value['word'])
- {
- $value[' word'] = preg_split('/(? $point = & $wordTree;
- foreach($value ['단어'] $char)
- {
- $point = & $point[$char];
- }
-
- $point['type'] = $value['type '];
- }
- }
- }
-
- $redis->setex($memKey,300,$wordTree);
- }
-
- return $wordTree ;
- }
-
- }
-
-
复代码
|