引言app
能夠根據各類字符在Unicode字符編碼表中的區間來進行判斷,如數字爲'0'~'9'之間,英文字母爲'a'~'z'或'A'~'Z'等,Java判斷一個字符串是否有中文是利用Unicode編碼來判斷,由於中文的編碼區間爲:0x4e00--0x9fbb, 但通用區間來判斷中文也不很是精確,由於有些中文的標點符號利用區間判斷會獲得錯誤的結果。因此經過Character.UnicodeBlock來進行判斷。代碼以下:ui
package cn.csrc.base.count;編碼
public class CountCharacter {spa
public static void main(String[] args) {
String str ="我愛你abcd123中國 #!";
CountCharacter countCharacter = new CountCharacter();
countCharacter.count(str);
}
/**中文字符 */
private int chCharacter = 0;
/**英文字符 */
private int enCharacter = 0;
/**空格 */
private int spaceCharacter = 0;
/**數字 */
private int numberCharacter = 0;
/**其餘字符 */
private int otherCharacter = 0;
//記錄中文字符
private StringBuilder sb1=new StringBuilder();code
//記錄英文字符
private StringBuilder sb2=new StringBuilder();blog
//記錄數字
private StringBuilder sb3=new StringBuilder();字符串
//記錄特殊字符
private StringBuilder sb4=new StringBuilder();字符編碼
/***
* 統計字符串中中文,英文,數字,空格等字符個數
* @param str 須要統計的字符串
*/
public void count(String str) {
if(str.equals("") || str==null){
System.out.println("字符串爲空");
return;
}
for (int i = 0; i < str.length(); i++) {
char tmp = str.charAt(i);
if ((tmp >= 'A' && tmp <= 'Z') || (tmp >= 'a' && tmp <= 'z')) {
enCharacter ++;
sb2.append(tmp+" ");
} else if ((tmp >= '0') && (tmp <= '9')) {
numberCharacter ++;
sb3.append(tmp +" ");
} else if (tmp ==' ') {
spaceCharacter ++;
} else if (isChinese(tmp)) {
chCharacter ++;
sb1.append(tmp+" ");
} else {
otherCharacter ++;
sb4.append(tmp +" ");
}
}
System.out.println("字符串:" + str + " \r\n");
System.out.println("中文字符有:" + chCharacter +"個 ("+sb1.toString()+")");
System.out.println("英文字符有:" + enCharacter +"個 ("+sb2.toString()+")");
System.out.println("數字有:" + numberCharacter+"個 ("+sb3.toString()+")");
System.out.println("空格有:" + spaceCharacter+"個");
System.out.println("其餘字符有:" + otherCharacter+"個 ("+sb4.toString()+")");
}
/***
* 判斷字符是否爲中文
* @param ch 須要判斷的字符
* @return 中文返回true,非中文返回false
*/
private boolean isChinese(char ch) {
//獲取此字符的UniCodeBlock
Character.UnicodeBlock ub = Character.UnicodeBlock.of(ch);
// GENERAL_PUNCTUATION 判斷中文的「號
// CJK_SYMBOLS_AND_PUNCTUATION 判斷中文的。號
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOG RAPHS_EXTENSION_B
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) {
System.out.println(ch + " 是中文");
//sb1.append(ch+" ");
return true;
}
return false;
}
}class
結果以下:im