Java快速讀取大文件

Java快速讀取大文件

最近公司服務器監控系統須要作一個東西來分析Java應用程序的日誌。java

第一步探索:服務器

    首先我想到的是使用RandomAccessFile,由於他能夠很方便的去獲取和設置文件指針,下面是個人代碼。app

 

package cn.mucang.exception.analyzer;

import cn.mucang.exception.analyzer.analyze.LogAnalyzer;
import cn.mucang.exception.analyzer.config.AnalyseConfig;
import cn.mucang.exception.analyzer.support.DefaultLogLineBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;

/**
 * @author Gao Youbo
 * @since 2015/3/16.
 */
public class LogUtils {

    private static final Logger LOG = LoggerFactory.getLogger(LogUtils.class);

    /**
     * 分析日誌
     *
     * @param analyzer 分析器
     * @throws IOException
     */
    public static void analyse(LogAnalyzer analyzer) throws IOException {
        AnalyseConfig analyseConfig = analyzer.getAnalyseConfig();
        File file = new File(analyseConfig.getPath());
        LOG.info("開始分析日誌文件...{}", file.getAbsolutePath());
        if (!file.exists()) {
            throw new IOException("日誌文件不存在:" + analyseConfig);
        }
        if (analyseConfig.getFilePointer() < 0) {
            analyseConfig.setFilePointer(0);
        }

        FileInputStream stream = new FileInputStream(file);
        InputStreamReader reader = new InputStreamReader(stream);
        BufferedReader bufferedReader = new BufferedReader(reader);
        try (RandomAccessFile logFile = new RandomAccessFile(file, "r")) {
            long length = logFile.length();
            analyzer.getAnalyseConfig().setFileLenght(length); //設置文件字節長度
            if (analyseConfig.getFilePointer() > length) {
                throw new IllegalArgumentException("開始指針位置越界");
            } else {
                logFile.seek(analyseConfig.getFilePointer());
            }
            String line; //行數據
            int lineNumber = analyseConfig.getLineNumber(); //行號
            DefaultLogLineBuilder lb = null;
            long start = System.currentTimeMillis();
            while ((line = logFile.readLine()) != null) {
                bufferedReader.readLine();
                lineNumber++;
                long filePointer = logFile.getFilePointer();
                if (ParseUtils.isNewLine(lineNumber, line)) {
                    if (lb != null) {
                        analyzer.analyse(lb.getLogLine());
                    }
                    lb = new DefaultLogLineBuilder();
                }
                if (lb != null) {
                    lb.append(lineNumber, filePointer, line);
                    if (length == logFile.getFilePointer()) { //文檔讀取完了,調用一下分析
                        analyzer.analyse(lb.getLogLine());
                    }
                }
                if (lineNumber % 10000 == 0) {
                    long end = System.currentTimeMillis();
                    System.out.println(String.format("line=%s, used=%sms", lineNumber, end - start));
                    start = System.currentTimeMillis();
                }
            }
        }
    }

}

下面看一下性能,分析一萬行日誌平均須要1500毫秒,由於個人日誌分析使用到了正則,開始速度慢我覺得是大量的正則運算形成的。dom

 

 

第二部探索:ide

我本身寫了一個LogReader,本身控制指針位置。下面看一下代碼:性能

 

package cn.mucang.exception.analyzer;

import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

/**
 * @author Gao Youbo
 * @since 2015-03-25 09:02
 */
public class LogReader implements Closeable {
    /**
     * 文件大小
     */
    private long length;
    /**
     * 文件指針位置
     */
    private long filePointer;
    private FileInputStream inputStream;
    private InputStreamReader inputStreamReader;
    private BufferedReader bufferedReader;

    public LogReader(File logFile) throws FileNotFoundException {
        this.inputStream = new FileInputStream(logFile);
        this.inputStreamReader = new InputStreamReader(inputStream);
        this.bufferedReader = new BufferedReader(inputStreamReader);
        this.length = logFile.length();
    }

    public int read() throws IOException {
        filePointer++;
        return bufferedReader.read();
    }

    public String readLine() throws IOException {
        StringBuffer input = new StringBuffer();
        int c = -1;
        boolean eol = false; //end of line
        while (!eol) {
            switch (c = read()) {
                case -1:
                case '\n':
                    eol = true;
                    break;
                case '\r':
                    eol = true;
                    long cur = getFilePointer();
                    if ((read()) != '\n') {
                        skip(cur);
                    }
                default:
                    input.append((char) c);
                    break;
            }
        }
        if ((c == -1) && (input.length() == 0)) {
            return null;
        }
        return input.toString();
    }

    /**
     * 獲取當前讀取到的指針
     *
     * @return
     * @throws IOException
     */
    public long getFilePointer() throws IOException {
        return filePointer;
    }

    /**
     * 從當前位置跳過n個char
     *
     * @param n
     * @return 實際跳過多少個char
     * @throws IOException
     */
    public long skip(long n) throws IOException {
        return inputStreamReader.skip(n);
    }

    /**
     * 返回日誌文件的大小
     *
     * @return
     */
    public long length() {
        return length;
    }

    @Override
    public void close() throws IOException {
        if (bufferedReader != null) {
            bufferedReader.close();
        }
        if (inputStreamReader != null) {
            inputStreamReader.close();
        }
        if (inputStream != null) {
            inputStream.close();
        }
    }
}

package cn.mucang.exception.analyzer;

import cn.mucang.exception.analyzer.analyze.LogAnalyzer;
import cn.mucang.exception.analyzer.config.AnalyseConfig;
import cn.mucang.exception.analyzer.support.DefaultLogLineBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;

/**
 * @author Gao Youbo
 * @since 2015/3/16.
 */
public class LogUtils {

    private static final Logger LOG = LoggerFactory.getLogger(LogUtils.class);


    /**
     * 分析日誌
     *
     * @param analyzer 分析器
     * @throws java.io.IOException
     */
    public static void analyse(LogAnalyzer analyzer) throws IOException {
        AnalyseConfig analyseConfig = analyzer.getAnalyseConfig();
        File file = new File(analyseConfig.getPath());
        System.out.println(file.getAbsolutePath());
        LOG.info("開始分析日誌文件...{}", file.getAbsolutePath());
        if (!file.exists()) {
            throw new IOException("日誌文件不存在:" + analyseConfig);
        }
        if (analyseConfig.getFilePointer() < 0) {
            analyseConfig.setFilePointer(0);
        }
        try (LogReader logReader = new LogReader(file)) {
            long length = logReader.length();
            analyzer.getAnalyseConfig().setFileLenght(length); //設置文件字節長度
            if (analyseConfig.getFilePointer() > length) {
                throw new IllegalArgumentException("開始指針位置越界");
            } else {
                logReader.skip(analyseConfig.getFilePointer());
            }
            String line; //行數據
            int lineNumber = analyseConfig.getLineNumber(); //行號
            DefaultLogLineBuilder lb = null;
            long start = System.currentTimeMillis();
            while ((line = logReader.readLine()) != null) {
                lineNumber++;
                long filePointer = logReader.getFilePointer();
                if (ParseUtils.isNewLine(lineNumber, line)) {
                    if (lb != null) {
                        analyzer.analyse(lb.getLogLine());
                    }
                    lb = new DefaultLogLineBuilder();
                }
                if (lb != null) {
                    lb.append(lineNumber, filePointer, line);
                    if (length == filePointer) { //文檔讀取完了,調用一下分析
                        analyzer.analyse(lb.getLogLine());
                    }
                }
                if (lineNumber % 10000 == 0) {
                    long end = System.currentTimeMillis();
                    System.out.println(String.format("line=%s, used=%s", lineNumber, end - start));
                    start = System.currentTimeMillis();
                }
            }
        }
    }
}
接下來是測試的性能:

 

 

 

日誌解析速度提升了10倍。測試

相關文章
相關標籤/搜索