php汉字转拼音首字母问题。
PHP获取拼音首字母
程序运行后,发现字符串中有些文字转换不了。如果是单独输入这些字,直接去转换可以读取出拼音开头字母。在字符串中就不行。程序代码如下:
function getfirstchar($s0){
$fchar = ord($s0{0});
if($fchar >= ord("a") and $fchar
$s1 = iconv("UTF-8","gb2312", $s0);
$s2 = iconv("gb2312","UTF-8", $s1);
if($s2 == $s0){$s = $s1;}else{$s = $s0;}
$asc = ord($s{0}) * 256 + ord($s{1}) - 65536;
if($asc >= -20319 and $asc
if($asc >= -20283 and $asc
if($asc >= -19775 and $asc
if($asc >= -19218 and $asc
if($asc >= -18710 and $asc
if($asc >= -18526 and $asc
if($asc >= -18239 and $asc
if($asc >= -17922 and $asc
if($asc >= -17417 and $asc
if($asc >= -16474 and $asc
if($asc >= -16212 and $asc
if($asc >= -15640 and $asc
if($asc >= -15165 and $asc
if($asc >= -14922 and $asc
if($asc >= -14914 and $asc
if($asc >= -14630 and $asc
if($asc >= -14149 and $asc
if($asc >= -14090 and $asc
if($asc >= -13318 and $asc
if($asc >= -12838 and $asc
if($asc >= -12556 and $asc
if($asc >= -11847 and $asc
if($asc >= -11055 and $asc
return null;
}
function pinyin1($zh){
$ret = "";
$s1 = iconv("UTF-8","gb2312", $zh);
$s2 = iconv("gb2312","UTF-8", $s1);
if($s2 == $zh){$zh = $s1;}
for($i = 0; $i
$s1 = substr($zh,$i,1);
$p = ord($s1);
if($p > 160){
$s2 = substr($zh,$i++,2);
$ret .= getfirstchar($s2);
}else{
$ret .= $s1;
}
}
return $ret;
}
echo "邮政路小学";
echo pinyin1('邮政路小学');
?>
输出结果为:邮政路小学YZLX
单独输入小字,用getfirstchar("小")可以转换成功。
回复讨论(解决方案)
你的算法只适用于 gb2312 一级字库
没有研习的必要
用qq提供的词典
<?php if ( ! defined('BASEPATH')) exit('No direct script access allowed'); /** * * 汉字转换拼音函数 * * @param $str string 需要转换的字符串 * @param $ishead bool 是否只显示第一个字母 * @param $ucfirst bool 是否第一个字母大写,默认为以下划线分割字之间:eg wo_men_da_jia,如果第三个参数是TRUE则为驼峰式 * @return String 把汉字转化为拼音的字符串 */ function GetPinyin($str,$ishead = FALSE,$ucfirst = FALSE) { $pinyins = array(); $restr = ''; $str = trim(iconv('UTF-8','GB2312',$str)); $slen = strlen($str); if($slen<2) { return $str; } if(count($pinyins)==0) { $fp = fopen(APPPATH.'/resources/pinyin.dat','r'); while(!feof($fp)) { $line = trim(fgets($fp)); $pinyins[$line[0].$line[1]] = substr($line,3,strlen($line)-3); } fclose($fp); } for($i=0;$i<$slen;$i++) { if(ord($str[$i])>0x80) { $c = $str[$i].$str[$i+1]; $i++; if(isset($pinyins[$c])) { if( ! $ishead AND $ucfirst) { $restr .= ucfirst($pinyins[$c]); } elseif( ! $ishead AND ! $ucfirst) { $restr .= '_'.$pinyins[$c]; } else { $restr .= $pinyins[$c][0]; } } else { $restr .= "_"; } } else if( preg_match("/[a-z0-9]/",$str[$i]) ) { $restr .= $str[$i]; } else { $restr .= "_"; } } if($restr[0] == '_') { return substr($restr,1); } else { return $restr; } }/* End of file pinyin_helper.php *//* Location ./application/helpers/pinyin_helper.php */
function Pinyin($_String,$_Code='gb2312') { $_DataKey = "a|ai|an|ang|ao|ba|bai|ban|bang|bao|bei|ben|beng|bi|bian|biao|bie|bin|bing|bo|bu|ca|cai|can|cang|cao|ce|ceng|cha". "|chai|chan|chang|chao|che|chen|cheng|chi|chong|chou|chu|chuai|chuan|chuang|chui|chun|chuo|ci|cong|cou|cu|". "cuan|cui|cun|cuo|da|dai|dan|dang|dao|de|deng|di|dian|diao|die|ding|diu|dong|dou|du|duan|dui|dun|duo|e|en|er". "|fa|fan|fang|fei|fen|feng|fo|fou|fu|ga|gai|gan|gang|gao|ge|gei|gen|geng|gong|gou|gu|gua|guai|guan|guang|gui". "|gun|guo|ha|hai|han|hang|hao|he|hei|hen|heng|hong|hou|hu|hua|huai|huan|huang|hui|hun|huo|ji|jia|jian|jiang". "|jiao|jie|jin|jing|jiong|jiu|ju|juan|jue|jun|ka|kai|kan|kang|kao|ke|ken|keng|kong|kou|ku|kua|kuai|kuan|kuang". "|kui|kun|kuo|la|lai|lan|lang|lao|le|lei|leng|li|lia|lian|liang|liao|lie|lin|ling|liu|long|lou|lu|lv|luan|lue". "|lun|luo|ma|mai|man|mang|mao|me|mei|men|meng|mi|mian|miao|mie|min|ming|miu|mo|mou|mu|na|nai|nan|nang|nao|ne". "|nei|nen|neng|ni|nian|niang|niao|nie|nin|ning|niu|nong|nu|nv|nuan|nue|nuo|o|ou|pa|pai|pan|pang|pao|pei|pen". "|peng|pi|pian|piao|pie|pin|ping|po|pu|qi|qia|qian|qiang|qiao|qie|qin|qing|qiong|qiu|qu|quan|que|qun|ran|rang". "|rao|re|ren|reng|ri|rong|rou|ru|ruan|rui|run|ruo|sa|sai|san|sang|sao|se|sen|seng|sha|shai|shan|shang|shao|". "she|shen|sheng|shi|shou|shu|shua|shuai|shuan|shuang|shui|shun|shuo|si|song|sou|su|suan|sui|sun|suo|ta|tai|". "tan|tang|tao|te|teng|ti|tian|tiao|tie|ting|tong|tou|tu|tuan|tui|tun|tuo|wa|wai|wan|wang|wei|wen|weng|wo|wu". "|xi|xia|xian|xiang|xiao|xie|xin|xing|xiong|xiu|xu|xuan|xue|xun|ya|yan|yang|yao|ye|yi|yin|ying|yo|yong|you". "|yu|yuan|yue|yun|za|zai|zan|zang|zao|ze|zei|zen|zeng|zha|zhai|zhan|zhang|zhao|zhe|zhen|zheng|zhi|zhong|". "zhou|zhu|zhua|zhuai|zhuan|zhuang|zhui|zhun|zhuo|zi|zong|zou|zu|zuan|zui|zun|zuo"; $_DataValue = "-20319|-20317|-20304|-20295|-20292|-20283|-20265|-20257|-20242|-20230|-20051|-20036|-20032|-20026|-20002|-19990". "|-19986|-19982|-19976|-19805|-19784|-19775|-19774|-19763|-19756|-19751|-19746|-19741|-19739|-19728|-19725". "|-19715|-19540|-19531|-19525|-19515|-19500|-19484|-19479|-19467|-19289|-19288|-19281|-19275|-19270|-19263". "|-19261|-19249|-19243|-19242|-19238|-19235|-19227|-19224|-19218|-19212|-19038|-19023|-19018|-19006|-19003". "|-18996|-18977|-18961|-18952|-18783|-18774|-18773|-18763|-18756|-18741|-18735|-18731|-18722|-18710|-18697". "|-18696|-18526|-18518|-18501|-18490|-18478|-18463|-18448|-18447|-18446|-18239|-18237|-18231|-18220|-18211". "|-18201|-18184|-18183|-18181|-18012|-17997|-17988|-17970|-17964|-17961|-17950|-17947|-17931|-17928|-17922". "|-17759|-17752|-17733|-17730|-17721|-17703|-17701|-17697|-17692|-17683|-17676|-17496|-17487|-17482|-17468". "|-17454|-17433|-17427|-17417|-17202|-17185|-16983|-16970|-16942|-16915|-16733|-16708|-16706|-16689|-16664". "|-16657|-16647|-16474|-16470|-16465|-16459|-16452|-16448|-16433|-16429|-16427|-16423|-16419|-16412|-16407". "|-16403|-16401|-16393|-16220|-16216|-16212|-16205|-16202|-16187|-16180|-16171|-16169|-16158|-16155|-15959". "|-15958|-15944|-15933|-15920|-15915|-15903|-15889|-15878|-15707|-15701|-15681|-15667|-15661|-15659|-15652". "|-15640|-15631|-15625|-15454|-15448|-15436|-15435|-15419|-15416|-15408|-15394|-15385|-15377|-15375|-15369". "|-15363|-15362|-15183|-15180|-15165|-15158|-15153|-15150|-15149|-15144|-15143|-15141|-15140|-15139|-15128". "|-15121|-15119|-15117|-15110|-15109|-14941|-14937|-14933|-14930|-14929|-14928|-14926|-14922|-14921|-14914". "|-14908|-14902|-14894|-14889|-14882|-14873|-14871|-14857|-14678|-14674|-14670|-14668|-14663|-14654|-14645". "|-14630|-14594|-14429|-14407|-14399|-14384|-14379|-14368|-14355|-14353|-14345|-14170|-14159|-14151|-14149". "|-14145|-14140|-14137|-14135|-14125|-14123|-14122|-14112|-14109|-14099|-14097|-14094|-14092|-14090|-14087". "|-14083|-13917|-13914|-13910|-13907|-13906|-13905|-13896|-13894|-13878|-13870|-13859|-13847|-13831|-13658". "|-13611|-13601|-13406|-13404|-13400|-13398|-13395|-13391|-13387|-13383|-13367|-13359|-13356|-13343|-13340". "|-13329|-13326|-13318|-13147|-13138|-13120|-13107|-13096|-13095|-13091|-13076|-13068|-13063|-13060|-12888". "|-12875|-12871|-12860|-12858|-12852|-12849|-12838|-12831|-12829|-12812|-12802|-12607|-12597|-12594|-12585". "|-12556|-12359|-12346|-12320|-12300|-12120|-12099|-12089|-12074|-12067|-12058|-12039|-11867|-11861|-11847". "|-11831|-11798|-11781|-11604|-11589|-11536|-11358|-11340|-11339|-11324|-11303|-11097|-11077|-11067|-11055". "|-11052|-11045|-11041|-11038|-11024|-11020|-11019|-11018|-11014|-10838|-10832|-10815|-10800|-10790|-10780". "|-10764|-10587|-10544|-10533|-10519|-10331|-10329|-10328|-10322|-10315|-10309|-10307|-10296|-10281|-10274". "|-10270|-10262|-10260|-10256|-10254"; $_TDataKey = explode('|', $_DataKey); $_TDataValue = explode('|', $_DataValue); $_Data = (php_VERSION>='5.0') ? array_combine($_TDataKey, $_TDataValue) : _Array_Combine($_TDataKey, $_TDataValue); arsort($_Data); reset($_Data); if($_Code != 'gb2312') $_String = _U2_Utf8_Gb($_String); $_Res = ''; for($i=0; $i<strlen($_String); $i++) { $_P = ord(substr($_String, $i, 1)); if($_P>160) { $_Q = ord(substr($_String, ++$i, 1)); $_P = $_P*256 + $_Q - 65536; } $_Res .= _Pinyin($_P, $_Data); } return preg_replace("/[^a-z0-9]*/",'', $_Res); } function _Pinyin($_Num, $_Data) { if ($_Num>0 && $_Num<160 ) return chr($_Num); elseif($_Num<-20319 || $_Num>-10247) return ''; else { foreach($_Data as $k=>$v){ if($v<=$_Num) break; } return $k; } } function _U2_Utf8_Gb($_C) { $_String = ''; if($_C < 0x80) $_String .= $_C; elseif($_C < 0x800) { $_String .= chr(0xC0 | $_C>>6); $_String .= chr(0x80 | $_C & 0x3F); }elseif($_C < 0x10000){ $_String .= chr(0xE0 | $_C>>12); $_String .= chr(0x80 | $_C>>6 & 0x3F); $_String .= chr(0x80 | $_C & 0x3F); } elseif($_C < 0x200000) { $_String .= chr(0xF0 | $_C>>18); $_String .= chr(0x80 | $_C>>12 & 0x3F); $_String .= chr(0x80 | $_C>>6 & 0x3F); $_String .= chr(0x80 | $_C & 0x3F); } return iconv('UTF-8', 'GB2312', $_String); } function _Array_Combine($_Arr1, $_Arr2) { for($i=0; $i<count($_Arr1); $i++) $_Res[$_Arr1[$i]] = $_Arr2[$i]; return $_Res; } echo Pinyin('山东菏泽牡丹区'); //默认是gb编码echo Pinyin('这是中国山东菏泽牡丹区',1); //第二个参数随意设置即为utf8编码?>
谢谢。我试试
用qq提供的词典
词典在哪,给个下载地址啊。。。源码中bat地址换上去就可以了吗
pinyin.dat 下载地址搜索
http://www.baidu.com/s?wd=pinyin.dat&ie=utf-8

Hot AI Tools

Undresser.AI Undress
AI-powered app for creating realistic nude photos

AI Clothes Remover
Online AI tool for removing clothes from photos.

Undress AI Tool
Undress images for free

Clothoff.io
AI clothes remover

AI Hentai Generator
Generate AI Hentai for free.

Hot Article

Hot Tools

Notepad++7.3.1
Easy-to-use and free code editor

SublimeText3 Chinese version
Chinese version, very easy to use

Zend Studio 13.0.1
Powerful PHP integrated development environment

Dreamweaver CS6
Visual web development tools

SublimeText3 Mac version
God-level code editing software (SublimeText3)

Hot Topics

11 Best PHP URL Shortener Scripts (Free and Premium)

Working with Flash Session Data in Laravel

Build a React App With a Laravel Back End: Part 2, React

Simplified HTTP Response Mocking in Laravel Tests

cURL in PHP: How to Use the PHP cURL Extension in REST APIs

12 Best PHP Chat Scripts on CodeCanyon

Announcement of 2025 PHP Situation Survey
