php根據漢字獲取拼音(php基於拼音搜索實現原理)

php根據漢字獲取拼音(php基於拼音搜索實現原理)php

代碼一:獲取字符串漢字首字母,兼容GBK和UTF-8ide

<?php
function getfirstchar($s0){   //獲取單個漢字拼音首字母。注意:此處不要糾結。漢字拼音是沒有以U和V開頭的
    $fchar = ord($s0{0});
    if($fchar >= ord("A") and $fchar <= ord("z") )return strtoupper($s0{0});
    $s1 = iconv("UTF-8","gb2312", $s0);
    $s2 = iconv("gb2312","UTF-8", $s1);
    if($s2 == $s0){$s = $s1;}else{$s = $s0;}
    $asc = ord($s{0}) * 256 + ord($s{1}) - 65536;
    if($asc >= -20319 and $asc <= -20284) return "A";
    if($asc >= -20283 and $asc <= -19776) return "B";
    if($asc >= -19775 and $asc <= -19219) return "C";
    if($asc >= -19218 and $asc <= -18711) return "D";
    if($asc >= -18710 and $asc <= -18527) return "E";
    if($asc >= -18526 and $asc <= -18240) return "F";
    if($asc >= -18239 and $asc <= -17923) return "G";
    if($asc >= -17922 and $asc <= -17418) return "H";
    if($asc >= -17922 and $asc <= -17418) return "I";
    if($asc >= -17417 and $asc <= -16475) return "J";
    if($asc >= -16474 and $asc <= -16213) return "K";
    if($asc >= -16212 and $asc <= -15641) return "L";
    if($asc >= -15640 and $asc <= -15166) return "M";
    if($asc >= -15165 and $asc <= -14923) return "N";
    if($asc >= -14922 and $asc <= -14915) return "O";
    if($asc >= -14914 and $asc <= -14631) return "P";
    if($asc >= -14630 and $asc <= -14150) return "Q";
    if($asc >= -14149 and $asc <= -14091) return "R";
    if($asc >= -14090 and $asc <= -13319) return "S";
    if($asc >= -13318 and $asc <= -12839) return "T";
    if($asc >= -12838 and $asc <= -12557) return "W";
    if($asc >= -12556 and $asc <= -11848) return "X";
    if($asc >= -11847 and $asc <= -11056) return "Y";
    if($asc >= -11055 and $asc <= -10247) return "Z";
    return NULL;
    //return $s0;
}
function pinyin_long($zh){  //獲取整條字符串漢字拼音首字母
    $ret = "";
    $s1 = iconv("UTF-8","gb2312", $zh);
    $s2 = iconv("gb2312","UTF-8", $s1);
    if($s2 == $zh){$zh = $s1;}
    for($i = 0; $i < strlen($zh); $i++){
        $s1 = substr($zh,$i,1);
        $p = ord($s1);
        if($p > 160){
            $s2 = substr($zh,$i++,2);
            $ret .= getfirstchar($s2);
        }else{
            $ret .= $s1;
        }
    }
    return $ret;
}
echo pinyin_long('*《,@#$123HAHadf一年後');
?>


輸出:
ui


代碼二:獲取字符串全拼,僅支持GBKthis

<?php
class ChineseSpell {//注意僅支持:GBK,故傳入參數前須轉碼
    /**
    * @var array $chineseSpellList 拼音編碼對應表
    * @access private
    */
    private $chineseSpellList = array(
    'a'=>-20319,
    'ai'=>-20317,
    'an'=>-20304,
    'ang'=>-20295,
    'ao'=>-20292,
    'ba'=>-20283,
    'bai'=>-20265,
    'ban'=>-20257,
    'bang'=>-20242,
    'bao'=>-20230,
    'bei'=>-20051,
    'ben'=>-20036,
    'beng'=>-20032,
    'bi'=>-20026,
    'bian'=>-20002,
    'biao'=>-19990,
    'bie'=>-19986,
    'bin'=>-19982,
    'bing'=>-19976,
    'bo'=>-19805,
    'bu'=>-19784,
    'ca'=>-19775,
    'cai'=>-19774,
    'can'=>-19763,
    'cang'=>-19756,
    'cao'=>-19751,
    'ce'=>-19746,
    'ceng'=>-19741,
    'cha'=>-19739,
    'chai'=>-19728,
    'chan'=>-19725,
    'chang'=>-19715,
    'chao'=>-19540,
    'che'=>-19531,
    'chen'=>-19525,
    'cheng'=>-19515,
    'chi'=>-19500,
    'chong'=>-19484,
    'chou'=>-19479,
    'chu'=>-19467,
    'chuai'=>-19289,
    'chuan'=>-19288,
    'chuang'=>-19281,
    'chui'=>-19275,
    'chun'=>-19270,
    'chuo'=>-19263,
    'ci'=>-19261,
    'cong'=>-19249,
    'cou'=>-19243,
    'cu'=>-19242,
    'cuan'=>-19238,
    'cui'=>-19235,
    'cun'=>-19227,
    'cuo'=>-19224,
    'da'=>-19218,
    'dai'=>-19212,
    'dan'=>-19038,
    'dang'=>-19023,
    'dao'=>-19018,
    'de'=>-19006,
    'deng'=>-19003,
    'di'=>-18996,
    'dian'=>-18977,
    'diao'=>-18961,
    'die'=>-18952,
    'ding'=>-18783,
    'diu'=>-18774,
    'dong'=>-18773,
    'dou'=>-18763,
    'du'=>-18756,
    'duan'=>-18741,
    'dui'=>-18735,
    'dun'=>-18731,
    'duo'=>-18722,
    'e'=>-18710,
    'en'=>-18697,
    'er'=>-18696,
    'fa'=>-18526,
    'fan'=>-18518,
    'fang'=>-18501,
    'fei'=>-18490,
    'fen'=>-18478,
    'feng'=>-18463,
    'fo'=>-18448,
    'fou'=>-18447,
    'fu'=>-18446,
    'ga'=>-18239,
    'gai'=>-18237,
    'gan'=>-18231,
    'gang'=>-18220,
    'gao'=>-18211,
    'ge'=>-18201,
    'gei'=>-18184,
    'gen'=>-18183,
    'geng'=>-18181,
    'gong'=>-18012,
    'gou'=>-17997,
    'gu'=>-17988,
    'gua'=>-17970,
    'guai'=>-17964,
    'guan'=>-17961,
    'guang'=>-17950,
    'gui'=>-17947,
    'gun'=>-17931,
    'guo'=>-17928,
    'ha'=>-17922,
    'hai'=>-17759,
    'han'=>-17752,
    'hang'=>-17733,
    'hao'=>-17730,
    'he'=>-17721,
    'hei'=>-17703,
    'hen'=>-17701,
    'heng'=>-17697,
    'hong'=>-17692,
    'hou'=>-17683,
    'hu'=>-17676,
    'hua'=>-17496,
    'huai'=>-17487,
    'huan'=>-17482,
    'huang'=>-17468,
    'hui'=>-17454,
    'hun'=>-17433,
    'huo'=>-17427,
    'ji'=>-17417,
    'jia'=>-17202,
    'jian'=>-17185,
    'jiang'=>-16983,
    'jiao'=>-16970,
    'jie'=>-16942,
    'jin'=>-16915,
    'jing'=>-16733,
    'jiong'=>-16708,
    'jiu'=>-16706,
    'ju'=>-16689,
    'juan'=>-16664,
    'jue'=>-16657,
    'jun'=>-16647,
    'ka'=>-16474,
    'kai'=>-16470,
    'kan'=>-16465,
    'kang'=>-16459,
    'kao'=>-16452,
    'ke'=>-16448,
    'ken'=>-16433,
    'keng'=>-16429,
    'kong'=>-16427,
    'kou'=>-16423,
    'ku'=>-16419,
    'kua'=>-16412,
    'kuai'=>-16407,
    'kuan'=>-16403,
    'kuang'=>-16401,
    'kui'=>-16393,
    'kun'=>-16220,
    'kuo'=>-16216,
    'la'=>-16212,
    'lai'=>-16205,
    'lan'=>-16202,
    'lang'=>-16187,
    'lao'=>-16180,
    'le'=>-16171,
    'lei'=>-16169,
    'leng'=>-16158,
    'li'=>-16155,
    'lia'=>-15959,
    'lian'=>-15958,
    'liang'=>-15944,
    'liao'=>-15933,
    'lie'=>-15920,
    'lin'=>-15915,
    'ling'=>-15903,
    'liu'=>-15889,
    'long'=>-15878,
    'lou'=>-15707,
    'lu'=>-15701,
    'lv'=>-15681,
    'luan'=>-15667,
    'lue'=>-15661,
    'lun'=>-15659,
    'luo'=>-15652,
    'ma'=>-15640,
    'mai'=>-15631,
    'man'=>-15625,
    'mang'=>-15454,
    'mao'=>-15448,
    'me'=>-15436,
    'mei'=>-15435,
    'men'=>-15419,
    'meng'=>-15416,
    'mi'=>-15408,
    'mian'=>-15394,
    'miao'=>-15385,
    'mie'=>-15377,
    'min'=>-15375,
    'ming'=>-15369,
    'miu'=>-15363,
    'mo'=>-15362,
    'mou'=>-15183,
    'mu'=>-15180,
    'na'=>-15165,
    'nai'=>-15158,
    'nan'=>-15153,
    'nang'=>-15150,
    'nao'=>-15149,
    'ne'=>-15144,
    'nei'=>-15143,
    'nen'=>-15141,
    'neng'=>-15140,
    'ni'=>-15139,
    'nian'=>-15128,
    'niang'=>-15121,
    'niao'=>-15119,
    'nie'=>-15117,
    'nin'=>-15110,
    'ning'=>-15109,
    'niu'=>-14941,
    'nong'=>-14937,
    'nu'=>-14933,
    'nv'=>-14930,
    'nuan'=>-14929,
    'nue'=>-14928,
    'nuo'=>-14926,
    'o'=>-14922,
    'ou'=>-14921,
    'pa'=>-14914,
    'pai'=>-14908,
    'pan'=>-14902,
    'pang'=>-14894,
    'pao'=>-14889,
    'pei'=>-14882,
    'pen'=>-14873,
    'peng'=>-14871,
    'pi'=>-14857,
    'pian'=>-14678,
    'piao'=>-14674,
    'pie'=>-14670,
    'pin'=>-14668,
    'ping'=>-14663,
    'po'=>-14654,
    'pu'=>-14645,
    'qi'=>-14630,
    'qia'=>-14594,
    'qian'=>-14429,
    'qiang'=>-14407,
    'qiao'=>-14399,
    'qie'=>-14384,
    'qin'=>-14379,
    'qing'=>-14368,
    'qiong'=>-14355,
    'qiu'=>-14353,
    'qu'=>-14345,
    'quan'=>-14170,
    'que'=>-14159,
    'qun'=>-14151,
    'ran'=>-14149,
    'rang'=>-14145,
    'rao'=>-14140,
    're'=>-14137,
    'ren'=>-14135,
    'reng'=>-14125,
    'ri'=>-14123,
    'rong'=>-14122,
    'rou'=>-14112,
    'ru'=>-14109,
    'ruan'=>-14099,
    'rui'=>-14097,
    'run'=>-14094,
    'ruo'=>-14092,
    'sa'=>-14090,
    'sai'=>-14087,
    'san'=>-14083,
    'sang'=>-13917,
    'sao'=>-13914,
    'se'=>-13910,
    'sen'=>-13907,
    'seng'=>-13906,
    'sha'=>-13905,
    'shai'=>-13896,
    'shan'=>-13894,
    'shang'=>-13878,
    'shao'=>-13870,
    'she'=>-13859,
    'shen'=>-13847,
    'sheng'=>-13831,
    'shi'=>-13658,
    'shou'=>-13611,
    'shu'=>-13601,
    'shua'=>-13406,
    'shuai'=>-13404,
    'shuan'=>-13400,
    'shuang'=>-13398,
    'shui'=>-13395,
    'shun'=>-13391,
    'shuo'=>-13387,
    'si'=>-13383,
    'song'=>-13367,
    'sou'=>-13359,
    'su'=>-13356,
    'suan'=>-13343,
    'sui'=>-13340,
    'sun'=>-13329,
    'suo'=>-13326,
    'ta'=>-13318,
    'tai'=>-13147,
    'tan'=>-13138,
    'tang'=>-13120,
    'tao'=>-13107,
    'te'=>-13096,
    'teng'=>-13095,
    'ti'=>-13091,
    'tian'=>-13076,
    'tiao'=>-13068,
    'tie'=>-13063,
    'ting'=>-13060,
    'tong'=>-12888,
    'tou'=>-12875,
    'tu'=>-12871,
    'tuan'=>-12860,
    'tui'=>-12858,
    'tun'=>-12852,
    'tuo'=>-12849,
    'wa'=>-12838,
    'wai'=>-12831,
    'wan'=>-12829,
    'wang'=>-12812,
    'wei'=>-12802,
    'wen'=>-12607,
    'weng'=>-12597,
    'wo'=>-12594,
    'wu'=>-12585,
    'xi'=>-12556,
    'xia'=>-12359,
    'xian'=>-12346,
    'xiang'=>-12320,
    'xiao'=>-12300,
    'xie'=>-12120,
    'xin'=>-12099,
    'xing'=>-12089,
    'xiong'=>-12074,
    'xiu'=>-12067,
    'xu'=>-12058,
    'xuan'=>-12039,
    'xue'=>-11867,
    'xun'=>-11861,
    'ya'=>-11847,
    'yan'=>-11831,
    'yang'=>-11798,
    'yao'=>-11781,
    'ye'=>-11604,
    'yi'=>-11589,
    'yin'=>-11536,
    'ying'=>-11358,
    'yo'=>-11340,
    'yong'=>-11339,
    'you'=>-11324,
    'yu'=>-11303,
    'yuan'=>-11097,
    'yue'=>-11077,
    'yun'=>-11067,
    'za'=>-11055,
    'zai'=>-11052,
    'zan'=>-11045,
    'zang'=>-11041,
    'zao'=>-11038,
    'ze'=>-11024,
    'zei'=>-11020,
    'zen'=>-11019,
    'zeng'=>-11018,
    'zha'=>-11014,
    'zhai'=>-10838,
    'zhan'=>-10832,
    'zhang'=>-10815,
    'zhao'=>-10800,
    'zhe'=>-10790,
    'zhen'=>-10780,
    'zheng'=>-10764,
    'zhi'=>-10587,
    'zhong'=>-10544,
    'zhou'=>-10533,
    'zhu'=>-10519,
    'zhua'=>-10331,
    'zhuai'=>-10329,
    'zhuan'=>-10328,
    'zhuang'=>-10322,
    'zhui'=>-10315,
    'zhun'=>-10309,
    'zhuo'=>-10307,
    'zi'=>-10296,
    'zong'=>-10281,
    'zou'=>-10274,
    'zu'=>-10270,
    'zuan'=>-10262,
    'zui'=>-10260,
    'zun'=>-10256,
    'zuo'=>-10254
    );
                      
    /**
    * 取漢字全部拼音
    * @param string $chinese 要轉換的漢字
    * @param string $delimiter 分隔符
    * @param int $length 返回的長度
    * @return string
    */
    public function getFullSpell($chinese, $delimiter = '', $length = 0){
        $spell = $this->getChineseSpells($chinese, $delimiter);
        if($length){
            $spell = substr($spell, 0, $length);
        }
        return $spell;
    }
      
    /**
    * 取漢字第【一個】漢字完整拼音
    * @param string $chinese 要轉換的漢字
    * @param int $length 返回的長度
    * @return string
    */
    public function getFirstSpell($chinese, $length = 0){
        $spell = $this->getChineseSpells($chinese, ' ', 1);
        if($length){
            $spell = substr($spell, 0, $length);
        }
        return $spell;
    }
      
    /**
    * 取一個漢字碼對應的拼音
    * @param int $num 漢字碼
    * @param string $blank 空白字符
    * @return string
    */
    public function getChineseSpell ($num, $blank = ''){
        if($num>0 && $num<160){
            return chr($num);
        }
        elseif($num<-20319||$num>-10247){
            return $blank;
        }
        else{
            foreach ($this->chineseSpellList as $spell => $code){
                if ($code > $num) break;
                $result = $spell;
            }
            return $result;
        }
    }
      
    /**
    * 取漢字拼音
    * @param string $chinese 要轉換的漢字
    * @param string $delimiter 分隔符
    * @param int $first 是否只返回第一個
    * @return string
    */
    public function getChineseSpells($chinese, $delimiter = '', $first=0){
        $result = array();
        for($i=0; $i<strlen($chinese); $i++){
            $p = ord(substr($chinese,$i,1));
            if($p>160){
                $q = ord(substr($chinese,++$i,1));
                $p = $p*256 + $q - 65536;
            }
            $result[] = $this->getChineseSpell($p);
            if($first){
                return $result[0];
            }
        }
        return implode($delimiter, $result);
    }
}
$ok=new ChineseSpell();
$str='%^&《》:「{}AGDFasdfas===取漢字全部拼音';
echo $ok->getFullSpell($str);
echo '<p />';
echo $ok->getChineseSpells($str);
?>

輸出:編碼

相關文章
相關標籤/搜索