java判斷byte[]數組的原字符串編碼類型

時間 2019-12-06

原文原文鏈接

最近作html腳本導入庫中，讀取時總會有亂碼的狀況。找到一些方法亂碼轉爲正確字符串輸出。html

參考原文：java

https://blog.csdn.net/ajaxhu/article/details/12446917ajax

<!--GetByteEncode-->
<dependency>
    <groupId>com.googlecode.juniversalchardet</groupId>
    <artifactId>juniversalchardet</artifactId>
    <version>1.0.3</version>
</dependency>

@Slf4j
public class Test {
    @Test
    public void encode() throws IOException {
        String file = "C:\\Users\\Victory-x\\Desktop\\code.html";
        byte[] bytes = file2byte(file);
        //編碼判斷
        String encoding = GetByteEncode.getEncoding(bytes);
        System.out.println("字符編碼是：" + encoding);
        System.out.println("原亂碼輸出：" + new String(bytes));
        System.out.println("//***********************//");
        System.out.println("根據文件編碼輸出：" + new String(bytes, encoding));
    }

    public static byte[] file2byte(String filePath) throws IOException {
        byte[] buffer = null;
        try {
            File file = new File(filePath);
            FileInputStream fis = new FileInputStream(file);
            ByteArrayOutputStream bos = new ByteArrayOutputStream();
            byte[] b = new byte[1024];
            int n;
            while ((n = fis.read(b)) != -1) {
                bos.write(b, 0, n);
            }
            fis.close();
            bos.close();
            buffer = bos.toByteArray();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        return buffer;
    }
}

GetByteEncode：

import lombok.extern.slf4j.Slf4j;
import org.mozilla.universalchardet.UniversalDetector;

/**
 * 獲取文件編碼類型
 *
 * @author XSL
 * @version Id: GetByteEncode.java, V 1.0 2018/11/30 10:03 XSL Exp $$
 */
@Slf4j
public class GetByteEncode {

    /**
     * 獲取文件編碼類型
     *
     * @param bytes 文件bytes數組
     * @return      編碼類型
     */
    public static String getEncoding(byte[] bytes) {
        String defaultEncoding = "UTF-8";
        UniversalDetector detector = new UniversalDetector(null);
        detector.handleData(bytes, 0, bytes.length);
        detector.dataEnd();
        String encoding = detector.getDetectedCharset();
        detector.reset();
        log.info("字符編碼是：{}", encoding);
        if (encoding == null) {
            encoding = defaultEncoding;
        }
        return encoding;
    }
}