gbk | utf-8 經過 Unicode互轉

package top.hh;

import java.io.UnsupportedEncodingException;

/**
 * description: gbk |  utf-8  經過 Unicode互轉
 *
 * @author: dawn.he QQ:       905845006
 * @email: dawn.he@cloudwise.com
 * @email: 905845006@qq.com
 * @date: 2019/9/8    9:31 AM
 */
public class UTFToGBK {
    public String gbkToUtf8(String gbk) {
        String l_temp = GBKToUnicode(gbk);
        l_temp = unicodeToUtf8(l_temp);

        return l_temp;
    }

    public String utf8ToGbk(String utf) {
        String l_temp = utf8ToUnicode(utf);
        l_temp = UnicodeToGBK(l_temp);

        return l_temp;
    }

    /**
     * @return String
     */

    public static String GBKToUnicode(String str) {
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < str.length(); i++) {
            char chr1 = (char) str.charAt(i);

            if (!isNeedConvert(chr1)) {
                result.append(chr1);
                continue;
            }

            result.append("\\u" + Integer.toHexString((int) chr1));
        }

        return result.toString();
    }

    /**
     * @return String
     */

    public static String UnicodeToGBK(String dataStr) {
        int index = 0;
        StringBuffer buffer = new StringBuffer();

        int li_len = dataStr.length();
        while (index < li_len) {
            if (index >= li_len - 1
                    || !"\\u".equals(dataStr.substring(index, index + 2))) {
                buffer.append(dataStr.charAt(index));

                index++;
                continue;
            }

            String charStr = "";
            charStr = dataStr.substring(index + 2, index + 6);

            char letter = (char) Integer.parseInt(charStr, 16);

            buffer.append(letter);
            index += 6;
        }

        return buffer.toString();
    }

    public static boolean isNeedConvert(char para) {
        return ((para & (0x00FF)) != para);
    }

    /**
     * utf-8 轉unicode
     *
     * @return String
     */
    public static String utf8ToUnicode(String inStr) {
        char[] myBuffer = inStr.toCharArray();

        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < inStr.length(); i++) {
            Character.UnicodeBlock ub = Character.UnicodeBlock.of(myBuffer[i]);
            if (ub == Character.UnicodeBlock.BASIC_LATIN) {
                //英文及數字等
                sb.append(myBuffer[i]);

            } else if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
                //全角半角字符
                int j = (int) myBuffer[i] - 65248;
                sb.append((char) j);

            } else {
                //漢字
                short s = (short) myBuffer[i];

                String hexS = Integer.toHexString(s);
                //若是s爲負數,轉換後會帶有四個f,這裏去掉四個f
                if (hexS.indexOf("ffff") > -1) {
                    hexS = hexS.substring(4, hexS.length());
                }
                String unicode = "\\u" + hexS;
                sb.append(unicode.toLowerCase());
            }
        }
        return sb.toString();
    }

    /**
     * @return String
     */
    public static String unicodeToUtf8(String theString) {
        char aChar;
        int len = theString.length();
        StringBuffer outBuffer = new StringBuffer(len);
        for (int x = 0; x < len; ) {
            aChar = theString.charAt(x++);
            if (aChar == '\\') {
                aChar = theString.charAt(x++);
                if (aChar == 'u') {
                    // Read the xxxx
                    int value = 0;
                    for (int i = 0; i < 4; i++) {
                        aChar = theString.charAt(x++);
                        switch (aChar) {
                            case '0':
                            case '1':
                            case '2':
                            case '3':
                            case '4':
                            case '5':
                            case '6':
                            case '7':
                            case '8':
                            case '9':
                                value = (value << 4) + aChar - '0';
                                break;
                            case 'a':
                            case 'b':
                            case 'c':
                            case 'd':
                            case 'e':
                            case 'f':
                                value = (value << 4) + 10 + aChar - 'a';
                                break;
                            case 'A':
                            case 'B':
                            case 'C':
                            case 'D':
                            case 'E':
                            case 'F':
                                value = (value << 4) + 10 + aChar - 'A';
                                break;
                            default:
                                throw new IllegalArgumentException(
                                        "Malformed   \\uxxxx   encoding.");
                        }
                    }
                    outBuffer.append((char) value);
                } else {
                    if (aChar == 't')
                        aChar = '\t';
                    else if (aChar == 'r')
                        aChar = '\r';
                    else if (aChar == 'n')
                        aChar = '\n';
                    else if (aChar == 'f')
                        aChar = '\f';
                    outBuffer.append(aChar);
                }
            } else
                outBuffer.append(aChar);
        }
        return outBuffer.toString();
    }

    public static void main(String[] args) throws UnsupportedEncodingException {
        String old = "fass3242342半f角ffff變拳jaf;sdfjaojv asdz腳,quanjiao沙拉醬阿迪 ";
        byte[] gbkDecode = old.getBytes("gbk");
        String utf8 = new UTFToGBK().gbkToUtf8(new String(gbkDecode, "gbk"));
        System.out.println(utf8);
        String gbk = new UTFToGBK().utf8ToGbk(utf8);
        System.out.println(gbk);
    }

}
相關文章
相關標籤/搜索