最近作html腳本導入庫中,讀取時總會有亂碼的狀況。找到一些方法亂碼轉爲正確字符串輸出。html
參考原文:java
https://blog.csdn.net/ajaxhu/article/details/12446917ajax
<!--GetByteEncode--> <dependency> <groupId>com.googlecode.juniversalchardet</groupId> <artifactId>juniversalchardet</artifactId> <version>1.0.3</version> </dependency>
@Slf4j public class Test { @Test public void encode() throws IOException { String file = "C:\\Users\\Victory-x\\Desktop\\code.html"; byte[] bytes = file2byte(file); //編碼判斷 String encoding = GetByteEncode.getEncoding(bytes); System.out.println("字符編碼是:" + encoding); System.out.println("原亂碼輸出:" + new String(bytes)); System.out.println("//***********************//"); System.out.println("根據文件編碼輸出:" + new String(bytes, encoding)); } public static byte[] file2byte(String filePath) throws IOException { byte[] buffer = null; try { File file = new File(filePath); FileInputStream fis = new FileInputStream(file); ByteArrayOutputStream bos = new ByteArrayOutputStream(); byte[] b = new byte[1024]; int n; while ((n = fis.read(b)) != -1) { bos.write(b, 0, n); } fis.close(); bos.close(); buffer = bos.toByteArray(); } catch (FileNotFoundException e) { e.printStackTrace(); } return buffer; } }
GetByteEncode:
import lombok.extern.slf4j.Slf4j; import org.mozilla.universalchardet.UniversalDetector; /** * 獲取文件編碼類型 * * @author XSL * @version Id: GetByteEncode.java, V 1.0 2018/11/30 10:03 XSL Exp $$ */ @Slf4j public class GetByteEncode { /** * 獲取文件編碼類型 * * @param bytes 文件bytes數組 * @return 編碼類型 */ public static String getEncoding(byte[] bytes) { String defaultEncoding = "UTF-8"; UniversalDetector detector = new UniversalDetector(null); detector.handleData(bytes, 0, bytes.length); detector.dataEnd(); String encoding = detector.getDetectedCharset(); detector.reset(); log.info("字符編碼是:{}", encoding); if (encoding == null) { encoding = defaultEncoding; } return encoding; } }
其它方法亂碼轉換,原文:數組
http://daikainan.iteye.com/blog/1439322google