分享一个PHP中文分词
Release: 2016-06-13 09:43:06
Original
1085 people have browsed it
-
-
-
/**
-
* 中文分词处理方法
-
*+---------------------------------
-
* @param stirng $string 要处理的字符串
-
* @param boolers $sort=false 根据value进行倒序
-
* @param Numbers $top=0 返回指定数量,默认返回全部
-
*+---------------------------------
-
* @return void
-
*/
-
function scws($text, $top = 5, $return_array = false, $sep = ',') {
-
include('./pscws4/pscws4.php');//去下面给的网址把pscws4下载下来
-
$cws = new pscws4('utf-8');
-
$cws -> set_charset('utf-8');
-
$cws -> set_dict('./pscws4/etc/dict.utf8.xdb');
-
$cws -> set_rule('./pscws4/etc/rules.utf8.ini');
-
//$cws->set_multi(3);
-
$cws -> set_ignore(true);
-
//$cws->set_debug(true);
-
//$cws->set_duality(true);
-
$cws -> send_text($text);
-
$ret = $cws -> get_tops($top, 'r,v,p');
-
$result = null;
-
foreach ($ret as $value) {
-
if (false === $return_array) {
-
$result .= $sep . $value['word'];
-
} else {
-
$result[] = $value['word'];
-
}
-
}
-
return false === $return_array ? substr($result, 1) : $result;
-
}
-
print_r(scws('青花夔龙纹香炉'));
-
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Latest Articles by Author
-
2024-10-22 09:46:29
-
2024-10-13 13:53:41
-
2024-10-12 12:15:51
-
2024-10-11 22:47:31
-
2024-10-11 19:36:51
-
2024-10-11 15:50:41
-
2024-10-11 15:07:41
-
2024-10-11 14:21:21
-
2024-10-11 12:59:11
-
2024-10-11 12:17:31