PHP-중국어 병음 초기 문자 도구 클래스

WBOY
풀어 주다: 2016-08-08 09:32:44
원래의
909명이 탐색했습니다.
<?<span>php


</span><span>/*</span><span>*
* 汉字拼音首字母工具类
*  注: 英文的字串:不变返回(包括数字)    eg .abc123 => abc123
*      中文字符串:返回拼音首字符        eg. 测试字符串 => CSZFC
*      中英混合串: 返回拼音首字符和英文   eg. 我i我j => WIWJ
*  eg.
*  $py = new str2PY();
*  
*  $result = $py->getInitials('周杰伦');
*
*  //获取首字母
*  $result = $py->getFirstString('abc');  //A
*  $resutl = $py->getFirstString("周杰伦"); //Z
*
<span>*/</span>

<span>class</span><span> str2py
{

    </span><span>private</span> <span>$_pinyins</span> = <span>array</span><span>(
        </span>176161 => 'A',
        176197 => 'B',
        178193 => 'C',
        180238 => 'D',
        182234 => 'E',
        183162 => 'F',
        184193 => 'G',
        185254 => 'H',
        187247 => 'J',
        191166 => 'K',
        192172 => 'L',
        194232 => 'M',
        196195 => 'N',
        197182 => 'O',
        197190 => 'P',
        198218 => 'Q',
        200187 => 'R',
        200246 => 'S',
        203250 => 'T',
        205218 => 'W',
        206244 => 'X',
        209185 => 'Y',
        212209 => 'Z',<span>
    );
    </span><span>private</span> <span>$_charset</span> = <span>null</span><span>;

    </span><span>/*</span><span>*
     * 构造函数, 指定需要的编码 default: utf-8
     * 支持utf-8, gb2312
     *
     * @param unknown_type $charset
     </span><span>*/</span>
    <span>public</span> <span>function</span> __construct(<span>$charset</span> = 'utf-8'<span>)
    {
        </span><span>$this</span>->_charset = <span>$charset</span><span>;
    }

    </span><span>/*</span><span>*
     * 中文字符串 substr
     *
     * @param string $str
     * @param int    $start
     * @param int    $len
     * @return string
     </span><span>*/</span>
    <span>private</span> <span>function</span> _msubstr(<span>$str</span>, <span>$start</span>, <span>$len</span><span>)
    {
        </span><span>$start</span> = <span>$start</span> * 2<span>;
        </span><span>$len</span> = <span>$len</span> * 2<span>;
        </span><span>$strlen</span> = <span>strlen</span>(<span>$str</span><span>);
        </span><span>$result</span> = ''<span>;
        </span><span>for</span> (<span>$i</span> = 0; <span>$i</span> < <span>$strlen</span>; <span>$i</span>++<span>)
        {
            </span><span>if</span> (<span>$i</span> >= <span>$start</span> && <span>$i</span> < (<span>$start</span> + <span>$len</span><span>))
            {
                </span><span>if</span> (<span>ord</span>(<span>substr</span>(<span>$str</span>, <span>$i</span>, 1)) > 129<span>)
                {
                    </span><span>$result</span> .= <span>substr</span>(<span>$str</span>, <span>$i</span>, 2<span>);
                }
                </span><span>else</span><span>
                {
                    </span><span>$result</span> .= <span>substr</span>(<span>$str</span>, <span>$i</span>, 1<span>);
                }
            }
            </span><span>if</span> (<span>ord</span>(<span>substr</span>(<span>$str</span>, <span>$i</span>, 1)) > 129<span>)
            {
                </span><span>$i</span>++<span>;
            }
        }
        </span><span>return</span> <span>$result</span><span>;
    }

    </span><span>/*</span><span>*
     * 字符串切分为数组 (汉字或者一个字符为单位)
     *
     * @param string $str
     * @return array
     </span><span>*/</span>
    <span>private</span> <span>function</span> _cutWord(<span>$str</span><span>)
    {
        </span><span>$words</span> = <span>array</span><span>();
        </span><span>while</span> (<span>$str</span> != ""<span>)
        {
            </span><span>if</span> (<span>$this</span>->_isAscii(<span>$str</span><span>))
            {</span><span>/*</span><span> 非中文 </span><span>*/</span>
                <span>$words</span>[] = <span>$str</span>[0<span>];
                </span><span>$str</span> = <span>substr</span>(<span>$str</span>, <span>strlen</span>(<span>$str</span>[0<span>]));
            }
            </span><span>else</span><span>
            {
                </span><span>$word</span> = <span>$this</span>->_msubstr(<span>$str</span>, 0, 1<span>);
                </span><span>$words</span>[] = <span>$word</span><span>;
                </span><span>$str</span> = <span>substr</span>(<span>$str</span>, <span>strlen</span>(<span>$word</span><span>));
            }
        }
        </span><span>return</span> <span>$words</span><span>;
    }

    </span><span>/*</span><span>*
     * 判断字符是否是ascii字符
     *
     * @param string $char
     * @return bool
     </span><span>*/</span>
    <span>private</span> <span>function</span> _isAscii(<span>$char</span><span>)
    {
        </span><span>return</span> ( <span>ord</span>(<span>substr</span>(<span>$char</span>, 0, 1)) < 160<span> );
    }

    </span><span>/*</span><span>*
     * 判断字符串前3个字符是否是ascii字符
     *
     * @param string $str
     * @return bool
     </span><span>*/</span>
    <span>private</span> <span>function</span> _isAsciis(<span>$str</span><span>)
    {
        </span><span>$len</span> = <span>strlen</span>(<span>$str</span>) >= 3 ? 3 : 2<span>;
        </span><span>$chars</span> = <span>array</span><span>();
        </span><span>for</span> (<span>$i</span> = 1; <span>$i</span> < <span>$len</span> - 1; <span>$i</span>++<span>)
        {
            </span><span>$chars</span>[] = <span>$this</span>->_isAscii(<span>$str</span>[<span>$i</span>]) ? 'yes' : 'no'<span>;
        }
        </span><span>$result</span> = <span>array_count_values</span>(<span>$chars</span><span>);
        </span><span>if</span> (<span>empty</span>(<span>$result</span>['no'<span>]))
        {
            </span><span>return</span> <span>true</span><span>;
        }
        </span><span>return</span> <span>false</span><span>;
    }

    </span><span>/*</span><span>*
     * 获取中文字串的拼音首字符
     *
     * @param string $str
     * @return string
     </span><span>*/</span>
    <span>public</span> <span>function</span> getInitials(<span>$str</span><span>)
    {
        </span><span>if</span> (<span>empty</span>(<span>$str</span><span>))
            </span><span>return</span> ''<span>;
        </span><span>if</span> (<span>$this</span>->_isAscii(<span>$str</span>[0]) && <span>$this</span>->_isAsciis(<span>$str</span><span>))
        {
            </span><span>return</span> <span>$str</span><span>;
        }
        </span><span>$result</span> = <span>array</span><span>();
        </span><span>if</span> (<span>$this</span>->_charset == 'utf-8'<span>)
        {
            </span><span>$str</span> = <span>iconv</span>('utf-8', 'gb2312', <span>$str</span><span>);
        }
        </span><span>$words</span> = <span>$this</span>->_cutWord(<span>$str</span><span>);
        </span><span>foreach</span> (<span>$words</span> <span>as</span> <span>$word</span><span>)
        {
            </span><span>if</span> (<span>$this</span>->_isAscii(<span>$word</span><span>))
            {</span><span>/*</span><span> 非中文 </span><span>*/</span>
                <span>$result</span>[] = <span>$word</span><span>;
                </span><span>continue</span><span>;
            }
            </span><span>$code</span> = <span>ord</span>(<span>substr</span>(<span>$word</span>, 0, 1)) * 1000 + <span>ord</span>(<span>substr</span>(<span>$word</span>, 1, 1<span>));
            </span><span>/*</span><span> 获取拼音首字母A--Z </span><span>*/</span>
            <span>if</span> ((<span>$i</span> = <span>$this</span>->_search(<span>$code</span>)) != -1<span>)
            {
                </span><span>$result</span>[] = <span>$this</span>->_pinyins[<span>$i</span><span>];
            }
        }
        </span><span>return</span> <span>strtoupper</span>(<span>implode</span>('', <span>$result</span><span>));
    }

    </span><span>/*</span><span>*
     *  20140624 wangtianbao 获取首字母
     *  @param string $str
     *  @return string
     </span><span>*/</span>
    <span>public</span> <span>function</span> getFirstString(<span>$str</span><span>)
    {
        </span><span>//</span><span>先把中文转换成字母</span>
        <span>$new_string</span> = <span>$this</span>->getInitials(<span>$str</span><span>);

        </span><span>if</span> (<span>empty</span>(<span>$new_string</span><span>))
        {
            </span><span>return</span> ''<span>;
        }
        </span><span>else</span><span>
        {
            </span><span>return</span> <span>strtoupper</span>(<span>substr</span>(<span>$new_string</span>, 0, 1<span>));
        }
    }

    </span><span>private</span> <span>function</span> _getChar(<span>$ascii</span><span>)
    {
        </span><span>if</span> (<span>$ascii</span> >= 48 && <span>$ascii</span> <= 57<span>)
        {
            </span><span>return</span> <span>chr</span>(<span>$ascii</span>);  <span>/*</span><span> 数字 </span><span>*/</span><span>
        }
        </span><span>elseif</span> (<span>$ascii</span> >= 65 && <span>$ascii</span> <= 90<span>)
        {
            </span><span>return</span> <span>chr</span>(<span>$ascii</span>);   <span>/*</span><span> A--Z </span><span>*/</span><span>
        }
        </span><span>elseif</span> (<span>$ascii</span> >= 97 && <span>$ascii</span> <= 122<span>)
        {
            </span><span>return</span> <span>chr</span>(<span>$ascii</span> - 32); <span>/*</span><span> a--z </span><span>*/</span><span>
        }
        </span><span>else</span><span>
        {
            </span><span>return</span> '-'; <span>/*</span><span> 其他 </span><span>*/</span><span>
        }
    }

    </span><span>/*</span><span>*
     * 查找需要的汉字内码(gb2312) 对应的拼音字符( 二分法 )
     *
     * @param int $code
     * @return int
     </span><span>*/</span>
    <span>private</span> <span>function</span> _search(<span>$code</span><span>)
    {
        </span><span>$data</span> = <span>array_keys</span>(<span>$this</span>-><span>_pinyins);
        </span><span>$lower</span> = 0<span>;
        </span><span>$upper</span> = <span>sizeof</span>(<span>$data</span>) - 1<span>;
        </span><span>$middle</span> = (int) <span>round</span>((<span>$lower</span> + <span>$upper</span>) / 2<span>);
        </span><span>if</span> (<span>$code</span> < <span>$data</span>[0<span>])
            </span><span>return</span> -1<span>;
        </span><span>for</span><span> (;;)
        {
            </span><span>if</span> (<span>$lower</span> > <span>$upper</span><span>)
            {
                </span><span>return</span> <span>$data</span>[<span>$lower</span> - 1<span>];
            }
            </span><span>$tmp</span> = (int) <span>round</span>((<span>$lower</span> + <span>$upper</span>) / 2<span>);
            </span><span>if</span> (!<span>isset</span>(<span>$data</span>[<span>$tmp</span><span>]))
            {
                </span><span>return</span> <span>$data</span>[<span>$middle</span><span>];
            }
            </span><span>else</span><span>
            {
                </span><span>$middle</span> = <span>$tmp</span><span>;
            }
            </span><span>if</span> (<span>$data</span>[<span>$middle</span>] < <span>$code</span><span>)
            {
                </span><span>$lower</span> = (int) <span>$middle</span> + 1<span>;
            }
            </span><span>else</span> <span>if</span> (<span>$data</span>[<span>$middle</span>] == <span>$code</span><span>)
            {
                </span><span>return</span> <span>$data</span>[<span>$middle</span><span>];
            }
            </span><span>else</span><span>
            {
                </span><span>$upper</span> = (int) <span>$middle</span> - 1<span>;
            }
        }
    }

}</span>
로그인 후 복사

위 내용은 관련 내용을 포함하여 php-중국어 병음 이니셜 도구 클래스를 소개한 내용으로, PHP 튜토리얼에 관심이 있는 친구들에게 도움이 되기를 바랍니다.

관련 라벨:
원천:php.cn
본 웹사이트의 성명
본 글의 내용은 네티즌들의 자발적인 기여로 작성되었으며, 저작권은 원저작자에게 있습니다. 본 사이트는 이에 상응하는 법적 책임을 지지 않습니다. 표절이나 침해가 의심되는 콘텐츠를 발견한 경우 admin@php.cn으로 문의하세요.
인기 튜토리얼
더>
최신 다운로드
더>
웹 효과
웹사이트 소스 코드
웹사이트 자료
프론트엔드 템플릿