PHP切割漢字

<?php
/*
@UTF-8編碼的字符可能由1~3個字節組成。

*/
/*--------------------------方法一截取中文字符串方法------------------------------*/
function msubstr($str, $start, $len)
{
    $tmpstr = "";
    $strlen = $start + $len;
    for ($i = 0; $i < $strlen; $i++) {
        if (ord(substr($str, $i, 1)) > 0xa0)   //ord()函數返回字符串的第一個字符的ASCII值
        {
            $tmpstr .= substr($str, $i, 2);
            $i++;
        } else {
            $tmpstr .= substr($str, $i, 1);
        }
    }
    return $tmpstr;
}


/*----------------------------第二種方法-----------------------------------*/
//截取的是UTF-8字符串
function utf_substr($str, $len)
{
    $new_str = [];
    for ($i = 0; $i < $len; $i++) {
        $tem_str = substr($str, 0, 1);
        if (ord($tem_str > 127)) {
            $i++;
            if ($i < $len) {
                $new_str[] = substr($str, 0, 3);
                $str = substr($str, 3);
            }
        } else {
            $new_str[] = substr($str, 0, 1);
            $str = substr($str, 1);
        }
    }
    return join($new_str);//join()函數把數組元素組合爲一個字符串
}


/*-------------------------------------第三種方法(UTF-8)--------------------------------*/
function cutstr($string, $length)
{
    preg_match_all("/[\x01-\x7f]|[\xc2-\xdf]|[\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info);
    $wordscut = "";
    $j = 0;
    for ($i = 0; $i < count($info[0]); $i++) {
        $wordscut .= $info[0][$i];
        $j = ord($info[0][$i]) > 127 ? $j + 2 : $j + 1;
        if ($j > $length - 3) {
            return $wordscut . "...";
        }
    }
    return join('', $info[0]);
}

$string = "312哈哈,這個組合很難切割哦";
echo cutstr($string, 10);


/*---------------------------------下面是曾經用過的截取第三個的字符串的------------------------------*/
// $name1 = mysql_result($my_rst,0,"name");
// $name = preg_match("/([1-9][0-9]+)/",$name1,$r);
// $name = $r[0];
// if($name == ""){
// $name=preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,2}'.
// '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,1}).*#s',
// '$1',$name1);
// }

/*--------------------------------------------第四種方法(UTF-8)---------------------------------------------*/
function cut_str($sourcestr, $cutlength)
{
    $returnstr = '';
    $i = 0;
    $n = 0;
    $str_length = strlen($sourcestr);//字符串的字節數
    while ($n < $cutlength && $i <= $str_length) {
        $temp_str = substr($sourcestr, $i, 1);
        $ascnum = ord($temp_str);//獲得字符串中第$i位字符的ASCII碼
        if ($ascnum >= 224) {
            $returnstr = $returnstr . substr($sourcestr, $i, 3);//根據UTF-8編碼規範,將3個連續的字符計爲單個字符
            $i = $i + 3;//實際Byte記爲3
            $n++;//字串長度爲1
        } elseif ($ascnum >= 192)//若是ASCII位高於192
        {
            $returnstr = $returnstr . substr($sourcestr, $i, 2);//根據UTF-8編碼規範,將2個連續的字符記爲單個字符
            $i = $i + 2;//實際Byte記爲2
            $n++;//字串長度爲1
        } elseif ($ascnum >= 65 && $ascnum <= 90)//若是是大寫字母
        {
            $returnstr = $returnstr . substr($sourcestr, $i, 1);
            $i = $i + 1;//byte記爲1
            $n++;//但考慮到總體美觀,大寫字母計成一個高位字符
        } else {
            $returnstr = $returnstr . substr($sourcestr, $i, 1);
            $i = $i + 1;//實際的Byte記爲1
            $n = $n + 0.5;//小寫字母和半角標點等與半個高位字符寬...
        }
    }
    if ($str_length > $cutlength) {
        $returnstr = $returnstr . "...";//超過長度時在尾處加上省略號
    }
    return $returnstr;
}


/*--------------------第五種方法(UTF-8)---------------------------------------------*/

function FSubstr($title, $start, $len = "", $magic = true)
{
    if ($len == "") $len = strlen($title);

    if ($start != 0) {
        $startv = ord(substr($title, $start, 1));
        if ($startv >= 128) {
            if ($startv < 192) {
                for ($i = $start - 1; $i > 0; $i--) {
                    $tempv = ord(substr($title, $i, 1));
                    if ($tempv >= 192) break;
                }
                $start = $i;
            }
        }
    }

    if (strlen($title) <= $len) return substr($title, $start, $len);

    $alen = 0;
    $blen = 0;
    $realnum = 0;
    $length = 0;
    for ($i = $start; $i < strlen($title); $i++) {
        $ctype = 0;
        $cstep = 0;

        $cur = substr($title, $i, 1);
        if ($cur == "&") {
            if (substr($title, $i, 4) == "&lt;") {
                $cstep = 4;
                $length += 4;
                $i += 3;
                $realnum++;
                if ($magic) {
                    $alen++;
                }
            } elseif (substr($title, $i, 4) == "&gt;") {
                $cstep = 4;
                $length += 4;
                $i += 3;
                $realnum++;
                if ($magic) {
                    $alen++;
                }
            } elseif (substr($title, $i, 5) == "&amp;") {
                $cstep = 5;
                $length += 5;
                $i += 4;
                $realnum++;
                if ($magic) {
                    $alen++;
                }
            } elseif (substr($title, $i, 6) == "&quot;") {
                $cstep = 6;
                $length += 6;
                $i += 5;
                $realnum++;
                if ($magic) {
                    $alen++;
                }
            } elseif (preg_match("/&#(\d+);?/i", substr($title, $i), $match)) {
                $cstep = strlen($match[0]);
                $length += strlen($match[0]);
                $i += strlen($match[0]) - 1;
                $realnum++;
                if ($magic) {
                    $blen++;
                    $ctype = 1;
                }
            }
        } else {
            if (ord($cur) >= 252) {
                $cstep = 6;
                $length += 6;
                $i += 5;
                $realnum++;
                if ($magic) {
                    $blen++;
                    $ctype = 1;
                }
            } elseif (ord($cur) >= 248) {
                $cstep = 5;
                $length += 5;
                $i += 4;
                $realnum++;
                if ($magic) {
                    $ctype = 1;
                    $blen++;
                }
            } elseif (ord($cur) >= 240) {
                $cstep = 4;
                $length += 4;
                $i += 3;
                $realnum++;
                if ($magic) {
                    $blen++;
                    $ctype = 1;
                }
            } elseif (ord($cur) >= 224) {
                $cstep = 3;
                $length += 3;
                $i += 2;
                $realnum++;
                if ($magic) {
                    $ctype = 1;
                    $blen++;
                }
            } elseif (ord($cur) >= 192) {
                $ctype = 2;
                $length += 2;
                $i += 1;
                $realnum++;
                if ($magic) {
                    $blen++;
                    $ctype = 1;
                }
            } elseif (ord($cur) >= 128) {
                $length += 1;
            } else {
                $cstep = 1;
                $length += 1;
                $realnum++;
                if ($magic) {
                    if (ord($cur) >= 65 && ord($cur) <= 90) {
                        $blen++;
                    } else {
                        $alen++;
                    }
                }
            }
        }
        if ($magic) {
            if (($blen * 2 + $alen) == ($len * 2)) break;
            if (($blen * 2 + $alen) == ($len * 2) + 1) {
                if ($ctype == 1) {
                    $length -= $cstep;
                    break;
                } else {
                    break;
                }
            }
        } else {
            if ($realnum == $len) break;
        }
    }
    unset($cur);
    unset($alen);
    unset($blen);
    unset($realnum);
    unset($ctype);
    unset($cstep);

    return substr($title, $start, $length);
}
function utf8Substr($str, $from, $len)
{
    return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $from . '}' .
        '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $len . '}).*#s',
        '$1', $str);
}

$title = "你哈珀niad1納斯達wop asdni你愛誰都沒阿斯頓撒旦12ccs- sd";

$title = utf8Substr($title, 0, 15);
echo $title;
 
 
?>
相關文章
相關標籤/搜索