function cutstr($string, $length, $dot = ' ...') { global $charset; if(strlen($string) <= $length) return $string; $string = str_replace(array('&', '"', '<', '>'), array('&', '"', '<', '>'), $string); $strcut = ''; if(strtolower($charset) == 'utf-8') { $n = $tn = $noc = 0; while($n < strlen($string)) { $t = ord($string[$n]); // 特别要注意这部分,utf-8是1--6位不定长表示的,这里就是如何 // 判断utf-8是1位2位还是3位还是4、5、6位,这对其他语言的编程也有用处 // 具体可以查看rfc3629或rfc2279 if($t == 9 || $t == 10 || (32 <= $t && $t <= 126)) { $tn = 1; $n++; $noc++; } elseif(194 <= $t && $t <= 223) { $tn = 2; $n += 2; $noc += 2; } elseif(224 <= $t && $t < 239) { $tn = 3; $n += 3; $noc += 2; } elseif(240 <= $t && $t <= 247) { $tn = 4; $n += 4; $noc += 2; } elseif(248 <= $t && $t = $length) { break; } } if($noc > $length) $n -= $tn; $strcut = substr($string, 0, $n); } else { for($i = 0; $i < $length; $i++) { $strcut .= ord($string[$i]) > 127 ? $string[$i] . $string[++$i] : $string[$i]; } } $strcut = str_replace(array('&', '"', '<', '>'), array('&', '"', '<', '>'), $strcut); return $strcut . $dot; }
$testStr = "好好学习!(good good study)天天向上!(day day up)"; // ^_^! V // 使用utf文档 echo cutstr($testStr, 10); // 使用dos文档使用以下代码测试utf-8的效果 //$testStr = iconv('GBK', 'UTF-8', $testStr); //echo iconv('UTF-8', 'GBK', cutstr($testStr, 10));