/** * utf8轉gbk * * @param * $utfstr */ function utf8_to_gbk($utfstr) { global $UC2GBTABLE; $okstr = ''; if (empty ( $UC2GBTABLE )) { $filename = CODETABLEDIR . 'gb-unicode.table'; $fp = fopen ( $filename, 'rb' ); while ( $l = fgets ( $fp, 15 ) ) { $UC2GBTABLE [hexdec ( substr ( $l, 7, 6 ) )] = hexdec ( substr ( $l, 0, 6 ) ); } fclose ( $fp ); } $okstr = ''; $ulen = strlen ( $utfstr ); for($i = 0; $i < $ulen; $i ++) { $c = $utfstr [$i]; $cb = decbin ( ord ( $utfstr [$i] ) ); if (strlen ( $cb ) == 8) { $csize = strpos ( decbin ( ord ( $cb ) ), '0' ); for($j = 0; $j < $csize; $j ++) { $i ++; $c .= $utfstr [$i]; } $c = utf8_to_unicode ( $c ); if (isset ( $UC2GBTABLE [$c] )) { $c = dechex ( $UC2GBTABLE [$c] + 0x8080 ); $okstr .= chr ( hexdec ( $c [0] . $c [1] ) ) . chr ( hexdec ( $c [2] . $c [3] ) ); } else { $okstr .= '&#' . $c . ';'; } } else { $okstr .= $c; } } $okstr = trim ( $okstr ); return $okstr; } /** * gbk轉utf8 * * @param * $gbstr */ function gbk_to_utf8($gbstr) { global $CODETABLE; if (empty ( $CODETABLE )) { $filename = CODETABLEDIR . 'gb-unicode.table'; $fp = fopen ( $filename, 'rb' ); while ( $l = fgets ( $fp, 15 ) ) { $CODETABLE [hexdec ( substr ( $l, 0, 6 ) )] = substr ( $l, 7, 6 ); } fclose ( $fp ); } $ret = ''; $utf8 = ''; while ( $gbstr ) { if (ord ( substr ( $gbstr, 0, 1 ) ) > 0x80) { $thisW = substr ( $gbstr, 0, 2 ); $gbstr = substr ( $gbstr, 2, strlen ( $gbstr ) ); $utf8 = ''; @$utf8 = unicode_to_utf8 ( hexdec ( $CODETABLE [hexdec ( bin2hex ( $thisW ) ) - 0x8080] ) ); if ($utf8 != '') { for($i = 0; $i < strlen ( $utf8 ); $i += 3) $ret .= chr ( substr ( $utf8, $i, 3 ) ); } } else { $ret .= substr ( $gbstr, 0, 1 ); $gbstr = substr ( $gbstr, 1, strlen ( $gbstr ) ); } } return $ret; } /** * unicode轉utf8 * * @param * $c */ function unicode_to_utf8($c) { $str = ''; if ($c < 0x80) { $str .= $c; } elseif ($c < 0x800) { $str .= (0xC0 | $c >> 6); $str .= (0x80 | $c & 0x3F); } elseif ($c < 0x10000) { $str .= (0xE0 | $c >> 12); $str .= (0x80 | $c >> 6 & 0x3F); $str .= (0x80 | $c & 0x3F); } elseif ($c < 0x200000) { $str .= (0xF0 | $c >> 18); $str .= (0x80 | $c >> 12 & 0x3F); $str .= (0x80 | $c >> 6 & 0x3F); $str .= (0x80 | $c & 0x3F); } return $str; } /** * utf8轉unicode * * @param * $c */ function utf8_to_unicode($c) { switch (strlen ( $c )) { case 1 : return ord ( $c ); case 2 : $n = (ord ( $c [0] ) & 0x3f) << 6; $n += ord ( $c [1] ) & 0x3f; return $n; case 3 : $n = (ord ( $c [0] ) & 0x1f) << 12; $n += (ord ( $c [1] ) & 0x3f) << 6; $n += ord ( $c [2] ) & 0x3f; return $n; case 4 : $n = (ord ( $c [0] ) & 0x0f) << 18; $n += (ord ( $c [1] ) & 0x3f) << 12; $n += (ord ( $c [2] ) & 0x3f) << 6; $n += ord ( $c [3] ) & 0x3f; return $n; } }