gbk と utf-8 を相互に変換する、PHP で書かれたエンコード変換プログラムのコードを見てみましょう。
function phpUnescape_no($source) {
$decodedStr = "";
$len = strlen ($source)
while ($pos < $len) {
$charAt = substr ($source) , $pos, 1);
if ($charAt == '%') {
$pos++;
if ($source, $pos, 1); ️ HexVal); $entity = "". ' ;'
$decodedStr .= utf8_encode; // エスケープされた ASCII 文字があります
$hexVal = substr ($source, $pos , 2);
$decodedStr .= chr (hexdec ($hexVal)); $pos += 2;
$pos++
/////////// /////////////////////////////////// ////////////// //////////
function phpUnescape($escstr){
preg_match_all("/%u[0-9A-Za-z]{ 4}|%.{2}|*|[0- 9a-zA-Z.+-_]+/",$escstr,$matches); //prt($matches);
$ar = &$matches [0];
$c = "";
foreach( $ar as $val){
if (substr($val,0,1)!="%") { //英数字 +-_ の場合
$c .=$val;
}
elseif (substr($val,1,1)!="u") { //英数字以外の +-_ の場合
$x = hexdec (substr($val,1,2));
$c .=chr($x);
}
else { //0xFF より大きいコードの場合
$val = intval(substr($val,2), 16);
$c .= chr($val);
}elseif($val < 0x800) { // 0080-0800$c .= chr(0xC0 | ($val / 64)) ;
$c .= chr(0x80 | ($val % 64)); }else{ // 0800-FFFF
$c .= chr(0xE0 | (($ val / 64) / 64));
$ c .= chr(0x80 | (($val / 64) % 64));
$c .= chr(0x80 | ($val % 64));
}
}
}
return UTFtoGBK($c);
}
function UTFtoGBK($CS){
$CodeObj = new Chinese("UTF8","GBK");
return $CodeObj ->Convert($CS) ;
}
function phpEscape($str ){
$returnStr = "";
if (@function_exists('mb_convert_encoding')){
$returnStr = phpEscape_yes($str);
}
else{
$returnStr = phpEscape_no($str);
}
return $returnStr;
}
function phpEscape_yes($string, $encoding = 'GBK') {
for ($x = 0; $x < mb_strlen($string, $encoding); $x ++) {
$str = mb_substr($string, $x, 1, $encoding);
if (strlen( $str) > 1) {
$return .= '%u' 。 strtoupper(bin2hex(mb_convert_encoding($str, 'UCS-2', $encoding)));
} else {
$return .= '%' . strtoupper(bin2hex($str));
}
}
return $return;
}
function phpEscape_no($str){
preg_match_all("/[x80-xff].|[x01-x7f]+/", $str,$newstr);
$ar = $newstr[0];
foreach($ar as $k=>$v){
if(ord($ar[$k])>=127){
$tmpString=bin2hex(GBKtoUCS2($v));
if (!eregi("WIN",PHP_OS)){
$tmpString = substr($tmpString,2,2).substr($tmpString,0,2) ;
}
$reString.="%u".$tmpString;
} else {
$reString.= rawurlencode($v);
}
}
return $reString;
}
function GBKtoUCS2($CS ){
$CodeObj = new Chinese("GBK","UTF8");
return utf8ToUnicode($CodeObj ->Convert($CS));
}
function utf8ToUnicode($str,$order="big ")
{
$ucs2string ="";
$n=strlen($str);
for ($i=0;$i $v = $str[$i];
$ord = ord($v);
if( $ord<=0x7F){ // 0xxxxxxx
if ($order=="little") {
$ucs2string .= $v.chr(0);
}
else {
$ucs2string .= chr(0).$v;
}
}
elseif ($ordgt;=0x80) { //110xxxxx 10xxxxxx
$a = (ord($str[$i]) & 0x3F ) <<6;
$b = ord($str[$i+1]) & 0x3F ;
$ucsCode = dechex($a+$b); //echot($ucsCode);
$h = intval(substr($ucsCode,0,2),16);
$l = intval(substr($ucsCode,2,2),16);
if ($ order=="little") {
$ucs2string .= chr($l).chr($h);
}
else {
$ucs2string .= chr($h).chr($l);
}
$i++;
}elseif ($ordgt;=0x80 && ord($str[$i+2])>=0x80) { // 1110xxxx 10xxxxxx 10xxxxxx
$a = (ord($str[$i]) & 0x1F) $b = (ord($str[$i+1]) & 0x3F )<<6;
$c = ord($str[$i+2]) & 0x3F ;
$ucsCode = dechex($ a+$b+$c); //echot($ucsCode);
$h = intval(substr($ucsCode,0,2),16);
$l = intval(substr($ucsCode,2,2),16);
if ($ order=="little") {
$ucs2string .= chr($l).chr($h);
}
else {
$ucs2string .= chr($h).chr($l);
}
$i +=2;
}
}
return $ucs2string;
}
/////////////////////////////////////////// ///////////
function unescapeFuncMake($Txt){
if ($Txt[2]!="*") return $Txt;
$ETxt = "";
$MTxt = " egy+nb@QwXvCWjKPRxVzDl/h7EOMtSa9f6*FpNr81i_0kqdG2LBcuZIAJYo34m-sT%5.UH3SYZ0hzt/y@qDTNECf1BpujiO.X6ks+oIR8GPVg9wbm%xJvKLWrn*F4HAe-QladM27Uc5_";
$TTxtnum = substr($Txt,0,2);
$TTxt = substr ((substr($MTxt,70).substr($MTxt,70)),$TTxtnum);
for ($ii=3; $ii
$k = strpos($TTxt,$Txt[$ii],$w)-$w;
$ETxt .= $MTxt[$k];
}
return phpUnescape($ETxt) );
}
functionscapeFuncMake($Txt){
if ($Txt=="" || $Txt[2]=="*") return $Txt;
$MTxt = "egy+nb@QwXvCWjKPRxVzDl/h7EOMtSa9f6*FpNr81i_0kqdG2LBcuZIAJYo34m-sT %5.UH3SYZ0hzt/y@qDTNECf1BpujiO. $TTxt .($TTxt>9?"*":"**");
for ($ii=0; $ii
$ETxtstr = substr((substr($MTxt,70).substr($MTxt,70)),$TTxt) ;
$ETxt .= $ETxtstr[$k ];
}
return $ETxt;
}