java UnicodeHtml

java UnicodeHtml.java

public class UnicodeHtml {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub

		String inStr = "dfadf\\uCN吉林長春|None|CERNET|1|None。";
//		String inStr = "吉林長春";
		String unicodeStr = "";
//
//		unicodeStr = UnicodeHtml.chinaToUnicode(inStr);
//		System.out.println(unicodeStr);

		unicodeStr = UnicodeHtml.string2unicode(inStr);
		System.out.println(unicodeStr);
		System.out.println(unicode2string(unicodeStr));

	}

	/**
	 * 把中文轉成Unicode碼
	 * 
	 * @param str
	 * @return
	 */
	public static String string2unicode(String str) {
		str = (str == null ? "" : str);
		String tmp;
		StringBuffer sb = new StringBuffer(1000);
		char c;
		int i, j;
		sb.setLength(0);
		for (i = 0; i < str.length(); i++) {
			c = str.charAt(i);
//			sb.append("\\u");
			sb.append("&#x");
			j = (c >>> 8);
			tmp = Integer.toHexString(j);
			if (tmp.length() == 1)
				sb.append("0");
			sb.append(tmp);
			j = (c & 0xFF);
			tmp = Integer.toHexString(j);
			if (tmp.length() == 1)
				sb.append("0");
			sb.append(tmp);
			sb.append(";");

		}
		return (new String(sb));
	}

	/**
	 * 把Unicode碼 轉爲中文
	 * 只能轉換全是Unicode字符,不能是混合型
	 * @param
	 * @return
	 */
	public static String unicode2string(String str) {
		str = (str == null ? "" : str);
		if (str.indexOf("&#x") == -1 || str.indexOf(";") == -1 )
			return str;

		StringBuffer sb = new StringBuffer(1000);

		for (int i = 0; i <= str.length() - 8;) {
			String strTemp = str.substring(i, i + 8);
			String value = strTemp.substring(3);
			value = value.substring(0,4);
			int c = 0;
			for (int j = 0; j < value.length(); j++) {
				char tempChar = value.charAt(j);
				int t = 0;
				switch (tempChar) {
				case 'a':
					t = 10;
					break;
				case 'b':
					t = 11;
					break;
				case 'c':
					t = 12;
					break;
				case 'd':
					t = 13;
					break;
				case 'e':
					t = 14;
					break;
				case 'f':
					t = 15;
					break;
				default:
					t = tempChar - 48;
					break;
				}

				c += t * ((int) Math.pow(16, (value.length() - j - 1)));
			}
			sb.append((char) c);
			i = i + 8;
		}
		return sb.toString();
	}

	/**
	 * 把中文轉成Unicode碼
	 * 只將中文轉換成Unicode,英文其餘字符不作處理
	 * @param str
	 * @return
	 */
	public static String chinaToUnicode(String str) {
		String result = "";
		for (int i = 0; i < str.length(); i++) {
			int chr1 = (char) str.charAt(i);
			if (chr1 >= 19968 && chr1 <= 171941) {// 漢字範圍 \u4e00-\u9fa5 (中文)
//				result += "\\u" + Integer.toHexString(chr1);
				result += "&#x" + Integer.toHexString(chr1)+";";
			} else {
				result += str.charAt(i);
			}
		}
		return result;
	}



}
相關文章
相關標籤/搜索