是否有過寫了半天代碼,發現居然用的GBK編碼,而後到主UTF-8上發現中文所有變成亂碼了。。。java
下面這個程序,只要輸入src的位置,瞬間轉換成utf-8算法
package tools; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.List; import org.mozilla.intl.chardet.nsDetector; import org.mozilla.intl.chardet.nsICharsetDetectionObserver; /********************************************** * Maven * <!-- 用於文件編碼檢查 --> * <dependency> * <groupId>net.sourceforge.jchardet</groupId> * <artifactId>jchardet</artifactId> * <version>1.0</version> * </dependency> * *********************************************/ /** * 藉助JCharDet獲取文件字符集 JCharDet * 是mozilla自動字符集探測算法代碼的java移植,其官方主頁爲: * http://jchardet.sourceforge.net/ */ public class FileCharsetDetector { private static boolean found = false; /** * 若是徹底匹配某個字符集檢測算法, 則該屬性保存該字符集的名稱. * 不然(如二進制文件)其值就爲默認值 null, 這時應當查詢屬性 */ private static String encoding = null; public static void main(String[] argv) throws Exception { String filepath = "C:\\Users\\chenhuan001\\workspace\\CrawlSinaBySelenium\\src"; GBKFold_to_UTF8(filepath); } public static void GBKFold_to_UTF8(String filepath) throws FileNotFoundException, IOException { List<String> files = FileUtil.getAllFileNameInFold(filepath); for (String file : files) { if (guestFileEncoding(new File(file)).equals("UTF-8")) { System.out.println(file); } else { //System.out.println(file); String file_content = FileUtil.readLogByStringAndEncode(file,"gbk"); //System.out.println(file_content); //FileUtil.deleteEveryThing(file);//刪除不了文件... FileUtil.writeLog(file, file_content, false, "utf-8"); } } } /** * 傳入一個文件(File)對象,檢查文件編碼 * * @param file * File對象實例 * @return 文件編碼,若無,則返回null * @throws FileNotFoundException * @throws IOException */ static public String guestFileEncoding(File file) throws FileNotFoundException, IOException { return geestFileEncoding(file, new nsDetector()); } /** * 獲取文件的編碼 * * @param file * File對象實例 * @param languageHint * 語言提示區域代碼 eg:1 : Japanese; 2 : Chinese; 3 : Simplified Chinese; * 4 : Traditional Chinese; 5 : Korean; 6 : Dont know (default) * @return 文件編碼,eg:UTF-8,GBK,GB2312形式,若無,則返回null * @throws FileNotFoundException * @throws IOException */ public String guestFileEncoding(File file, int languageHint) throws FileNotFoundException, IOException { return geestFileEncoding(file, new nsDetector(languageHint)); } /** * 獲取文件的編碼 * * @param path * 文件路徑 * @return 文件編碼,eg:UTF-8,GBK,GB2312形式,若無,則返回null * @throws FileNotFoundException * @throws IOException */ public String guestFileEncoding(String path) throws FileNotFoundException, IOException { return guestFileEncoding(new File(path)); } /** * 獲取文件的編碼 * * @param path * 文件路徑 * @param languageHint * 語言提示區域代碼 eg:1 : Japanese; 2 : Chinese; 3 : Simplified Chinese; * 4 : Traditional Chinese; 5 : Korean; 6 : Dont know (default) * @return * @throws FileNotFoundException * @throws IOException */ public String guestFileEncoding(String path, int languageHint) throws FileNotFoundException, IOException { return guestFileEncoding(new File(path), languageHint); } /** * 獲取文件的編碼 * * @param file * @param det * @return * @throws FileNotFoundException * @throws IOException */ private static String geestFileEncoding(File file, nsDetector det) throws FileNotFoundException, IOException { // Set an observer... // The Notify() will be called when a matching charset is found. det.Init(new nsICharsetDetectionObserver() { public void Notify(String charset) { found = true; encoding = charset; } }); BufferedInputStream imp = new BufferedInputStream(new FileInputStream(file)); byte[] buf = new byte[1024]; int len; boolean done = false; boolean isAscii = true; while ((len = imp.read(buf, 0, buf.length)) != -1) { // Check if the stream is only ascii. if (isAscii) isAscii = det.isAscii(buf, len); // DoIt if non-ascii and not done yet. if (!isAscii && !done) done = det.DoIt(buf, len, false); } det.DataEnd(); if (isAscii) { encoding = "ASCII"; found = true; } if (!found) { String prob[] = det.getProbableCharsets(); if (prob.length > 0) { // 在沒有發現狀況下,則取第一個可能的編碼 encoding = prob[0]; } else { return null; } } return encoding; } }
另外還要導入三個包:編碼
http://blog.csdn.net/luojia_wang/article/details/spa
這裏面有。.net
把上一篇文章中的FileUtil弄進去。 而後好像用到了log4j.jar 應該也要導入一下。code