-
- $tmp = preg_replace('/[一-龥]/u','<@>','Hello who am I? 123abc');
- /u is UTF-8
Copy the code
Code 1, PHP intercepts UTF-8 strings to solve the half-character problem
-
-
/*** - * PHP intercepts UTF-8 strings and solves the half-character problem. utf_substr
- * English and numbers (half-width) are 1 byte (8 bits), Chinese (full-width) are 3 bytes
- * @return When $len is less than or equal to 0, the entire string will be returned
- * @param $str Source string
- * $len The length of the substring on the left
- * @edit bbs.it-home.org
- function utf_substr($str,$len){
- for($i=0;$i< $len;$i++){
- $temp_str=substr($str,0,1);
- if(ord($temp_str) > 127){
- $i++;
- if($i<$len){
- $ new_str[]=substr($str,0,3);
- $str=substr($str,3);
- }
- }else{
- $new_str[]=substr($str,0,1);
- $ str=substr($str,1);
- }
- }
- return join($new_str);
- }
//Calling example
- $str = utf_substr('Hello',4 );
- echo $str;
- ?>
-
Copy code
Code 2, intercept utf-8 string function
-
-
/** - * Intercept utf-8 string
- * edit bbs.it-home.org
- */
- function cut_str($sourcestr,$cutlength){
- $returnstr='';
- $i=0;
- $ n=0;
- $str_length=strlen($sourcestr);//The number of bytes in the string
- while (($n<$cutlength) and ($i<=$str_length)){
- $temp_str=substr($ sourcestr,$i,1);
- $ascnum=Ord($temp_str);//Get the ascii code of the $i-th character in the string
- if ($ascnum>=224){ //If the ASCII bit is high and 224 ,
- $returnstr=$returnstr.substr($sourcestr,$i,3); //According to the UTF-8 encoding specification, 3 consecutive characters are counted as a single character
- $i=$i+3; //Actual Byte is counted as 3
- $n++; //String length is counted as 1
- }elseif ($ascnum>=192){ //If the ASCII bit is higher than 192,
- $returnstr=$returnstr.substr($sourcestr,$i, 2); //According to the UTF-8 encoding specification, 2 consecutive characters are counted as a single character
- $i=$i+2; //The actual Byte is counted as 2
- $n++; //The string length is counted as 1
- }elseif ($ascnum>=65 && $ascnum<=90){ //If it is an uppercase letter,
- $returnstr=$returnstr.substr($sourcestr,$i,1);
- $i=$i+1; //The actual Byte number is still counted as 1
- $n++; //But considering the overall aesthetics, uppercase letters are counted as one high-bit character
- }else{ //In other cases, including lowercase letters and half-width punctuation marks,
- $returnstr= $returnstr.substr($sourcestr,$i,1);
- $i=$i+1; //The actual Byte count is 1
- $n=$n+0.5; //Lowercase letters and half-width punctuation are equal to Half the high character width...
- }
- }
- if ($str_length>$cutlength){
- $returnstr = $returnstr . "...";//Add an ellipse at the end when the length exceeds
- }
- return $returnstr;
- }
//Call example
- $str = 'Hello! I'm good';
- $str = cut_str($str,3);
- echo $str;
- ? >
-
Copy code
|