java UnicodeHtml.java
public class UnicodeHtml { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub String inStr = "dfadf\\uCN吉林長春|None|CERNET|1|None。"; // String inStr = "吉林長春"; String unicodeStr = ""; // // unicodeStr = UnicodeHtml.chinaToUnicode(inStr); // System.out.println(unicodeStr); unicodeStr = UnicodeHtml.string2unicode(inStr); System.out.println(unicodeStr); System.out.println(unicode2string(unicodeStr)); } /** * 把中文轉成Unicode碼 * * @param str * @return */ public static String string2unicode(String str) { str = (str == null ? "" : str); String tmp; StringBuffer sb = new StringBuffer(1000); char c; int i, j; sb.setLength(0); for (i = 0; i < str.length(); i++) { c = str.charAt(i); // sb.append("\\u"); sb.append("&#x"); j = (c >>> 8); tmp = Integer.toHexString(j); if (tmp.length() == 1) sb.append("0"); sb.append(tmp); j = (c & 0xFF); tmp = Integer.toHexString(j); if (tmp.length() == 1) sb.append("0"); sb.append(tmp); sb.append(";"); } return (new String(sb)); } /** * 把Unicode碼 轉爲中文 * 只能轉換全是Unicode字符,不能是混合型 * @param * @return */ public static String unicode2string(String str) { str = (str == null ? "" : str); if (str.indexOf("&#x") == -1 || str.indexOf(";") == -1 ) return str; StringBuffer sb = new StringBuffer(1000); for (int i = 0; i <= str.length() - 8;) { String strTemp = str.substring(i, i + 8); String value = strTemp.substring(3); value = value.substring(0,4); int c = 0; for (int j = 0; j < value.length(); j++) { char tempChar = value.charAt(j); int t = 0; switch (tempChar) { case 'a': t = 10; break; case 'b': t = 11; break; case 'c': t = 12; break; case 'd': t = 13; break; case 'e': t = 14; break; case 'f': t = 15; break; default: t = tempChar - 48; break; } c += t * ((int) Math.pow(16, (value.length() - j - 1))); } sb.append((char) c); i = i + 8; } return sb.toString(); } /** * 把中文轉成Unicode碼 * 只將中文轉換成Unicode,英文其餘字符不作處理 * @param str * @return */ public static String chinaToUnicode(String str) { String result = ""; for (int i = 0; i < str.length(); i++) { int chr1 = (char) str.charAt(i); if (chr1 >= 19968 && chr1 <= 171941) {// 漢字範圍 \u4e00-\u9fa5 (中文) // result += "\\u" + Integer.toHexString(chr1); result += "&#x" + Integer.toHexString(chr1)+";"; } else { result += str.charAt(i); } } return result; } }