package test; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import org.mozilla.intl.chardet.nsDetector; import org.mozilla.intl.chardet.nsICharsetDetectionObserver; // jChardet 火狐所用字符編碼檢測算法 public class FileCharsetDetector { private boolean found = false; private String encoding = null; public static void main(String[] argv) throws Exception { File file1 = new File("C:\\Users\\Administrator\\Desktop\\VideoViewDemo\\VideoViewDemo\\src\\org\\apache\\android\\media\\AudioPlayer.java"); System.out.println("文件編碼:" + new FileCharsetDetector().guessFileEncoding(file1)); } /** * 傳入一個文件(File)對象,檢查文件編碼 * * @param file * File對象實例 * @return 文件編碼,若無,則返回null * @throws FileNotFoundException * @throws IOException */ public String guessFileEncoding(File file) throws FileNotFoundException, IOException { return guessFileEncoding(file, new nsDetector()); } /** * <pre> * 獲取文件的編碼 * @param file * File對象實例 * @param languageHint * 語言提示區域代碼 @see #nsPSMDetector ,取值以下: * 1 : Japanese * 2 : Chinese * 3 : Simplified Chinese * 4 : Traditional Chinese * 5 : Korean * 6 : Dont know(default) * </pre> * * @return 文件編碼,eg:UTF-8,GBK,GB2312形式(不肯定的時候,返回可能的字符編碼序列);若無,則返回null * @throws FileNotFoundException * @throws IOException */ public String guessFileEncoding(File file, int languageHint) throws FileNotFoundException, IOException { return guessFileEncoding(file, new nsDetector(languageHint)); } /** * 獲取文件的編碼 * * @param file * @param det * @return * @throws FileNotFoundException * @throws IOException */ private String guessFileEncoding(File file, nsDetector det) throws FileNotFoundException, IOException { // Set an observer... // The Notify() will be called when a matching charset is found. det.Init(new nsICharsetDetectionObserver() { public void Notify(String charset) { encoding = charset; found = true; } }); BufferedInputStream imp = new BufferedInputStream(new FileInputStream(file)); byte[] buf = new byte[1024]; int len; boolean done = false; boolean isAscii = false; while ((len = imp.read(buf, 0, buf.length)) != -1) { // Check if the stream is only ascii. isAscii = det.isAscii(buf, len); if (isAscii) { break; } // DoIt if non-ascii and not done yet. done = det.DoIt(buf, len, false); if (done) { break; } } imp.close(); det.DataEnd(); if (isAscii) { encoding = "ASCII"; found = true; } if (!found) { String[] prob = det.getProbableCharsets(); //這裏將可能的字符集組合起來返回 for (int i = 0; i < prob.length; i++) { if (i == 0) { encoding = prob[i]; } else { encoding += "," + prob[i]; } } if (prob.length > 0) { // 在沒有發現狀況下,也能夠只取第一個可能的編碼,這裏返回的是一個可能的序列 return encoding; } else { return null; } } return encoding; } }
參考文章:html