[TOC]html
最近研究JDK源碼,發現IO體系中的BufferedInputStream
,頗有意思,平時對這個類有很多誤解,因而寫下這篇博客,以供學習java
/** * 此類繼承FilterInputStream,該類使用了裝飾着設計模式,FilterInputStream的源碼超級簡單 */ public class BufferedInputStream extends FilterInputStream { // 默認的buf[]緩存數組大小 private static int DEFAULT_BUFFER_SIZE = 8192; /** * The maximum size of array to allocate. * Some VMs reserve some header words in an array. * Attempts to allocate larger arrays may result in * OutOfMemoryError: Requested array size exceeds VM limit * * buf[]緩存數組最大值 爲何會 減去8呢?由於一些JVM會數組頭部存一些數據 */ private static int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8; /** * The internal buffer array where the data is stored. When necessary, * it may be replaced by another array of * a different size. * * 緩存數組,核心成員變量,全部操做都是圍繞buf[] */ protected volatile byte buf[]; /** * Atomic updater to provide compareAndSet for buf. This is * necessary because closes can be asynchronous. We use nullness * of buf[] as primary indicator that this stream is closed. (The * "in" field is also nulled out on close.) * * 多線程相關,確保操做線程安全 */ private static final AtomicReferenceFieldUpdater<BufferedInputStream, byte[]> bufUpdater = AtomicReferenceFieldUpdater.newUpdater (BufferedInputStream.class, byte[].class, "buf"); /** * The index one greater than the index of the last valid byte in * the buffer. * This value is always * in the range <code>0</code> through <code>buf.length</code>; * elements <code>buf[0]</code> through <code>buf[count-1] * </code>contain buffered input data obtained * from the underlying input stream. * * buf[]數組中,有效數據的總數 */ protected int count; /** * The current position in the buffer. This is the index of the next * character to be read from the <code>buf</code> array. * <p> * This value is always in the range <code>0</code> * through <code>count</code>. If it is less * than <code>count</code>, then <code>buf[pos]</code> * is the next byte to be supplied as input; * if it is equal to <code>count</code>, then * the next <code>read</code> or <code>skip</code> * operation will require more bytes to be * read from the contained input stream. * * @see java.io.BufferedInputStream#buf * * buf[]數組中,當前讀取位置 */ protected int pos; /** * The value of the <code>pos</code> field at the time the last * <code>mark</code> method was called. * <p> * This value is always * in the range <code>-1</code> through <code>pos</code>. * If there is no marked position in the input * stream, this field is <code>-1</code>. If * there is a marked position in the input * stream, then <code>buf[markpos]</code> * is the first byte to be supplied as input * after a <code>reset</code> operation. If * <code>markpos</code> is not <code>-1</code>, * then all bytes from positions <code>buf[markpos]</code> * through <code>buf[pos-1]</code> must remain * in the buffer array (though they may be * moved to another place in the buffer array, * with suitable adjustments to the values * of <code>count</code>, <code>pos</code>, * and <code>markpos</code>); they may not * be discarded unless and until the difference * between <code>pos</code> and <code>markpos</code> * exceeds <code>marklimit</code>. * * @see java.io.BufferedInputStream#mark(int) * @see java.io.BufferedInputStream#pos * * 最後一次,調用mark方法,標記的位置 */ protected int markpos = -1; /** * The maximum read ahead allowed after a call to the * <code>mark</code> method before subsequent calls to the * <code>reset</code> method fail. * Whenever the difference between <code>pos</code> * and <code>markpos</code> exceeds <code>marklimit</code>, * then the mark may be dropped by setting * <code>markpos</code> to <code>-1</code>. * * @see java.io.BufferedInputStream#mark(int) * @see java.io.BufferedInputStream#reset() * * 該變量惟一入口就是mark(int readLimit),好比調用方法,mark(1024),那麼後面讀取的數據若是 * 超過了1024字節,那麼這次mark就爲無效標記,子類能夠選擇拋棄該mark標記,從頭開始。不過具體實現 * 跟具體的子類有關,在BufferedInputStream中,會拋棄mark標記,從新將markpos賦值爲-1 */ protected int marklimit; /** * Check to make sure that underlying input stream has not been * nulled out due to close; if not return it; * * 獲取真正的輸入流 */ private InputStream getInIfOpen() throws IOException { InputStream input = in; if (input == null) throw new IOException("Stream closed"); return input; } /** * Check to make sure that buffer has not been nulled out due to * close; if not return it; * * 獲取緩存數組 */ private byte[] getBufIfOpen() throws IOException { byte[] buffer = buf; if (buffer == null) throw new IOException("Stream closed"); return buffer; } /** * Creates a <code>BufferedInputStream</code> * and saves its argument, the input stream * <code>in</code>, for later use. An internal * buffer array is created and stored in <code>buf</code>. * * @param in the underlying input stream. * * 默認緩存數組大小爲8kb */ public BufferedInputStream(InputStream in) { this(in, DEFAULT_BUFFER_SIZE); } /** * Creates a <code>BufferedInputStream</code> * with the specified buffer size, * and saves its argument, the input stream * <code>in</code>, for later use. An internal * buffer array of length <code>size</code> * is created and stored in <code>buf</code>. * * @param in the underlying input stream. * @param size the buffer size. * @exception IllegalArgumentException if {@code size <= 0}. */ public BufferedInputStream(InputStream in, int size) { super(in); if (size <= 0) { throw new IllegalArgumentException("Buffer size <= 0"); } buf = new byte[size]; } /** * Fills the buffer with more data, taking into account * shuffling and other tricks for dealing with marks. * Assumes that it is being called by a synchronized method. * This method also assumes that all data has already been read in, * hence pos > count. * * 該方法做用,經過丟棄buf[]數據、增大buf[]數組,以騰出位置,將輸入流中新的數據保存到buf[]緩存數組中 */ private void fill() throws IOException { byte[] buffer = getBufIfOpen(); if (markpos < 0) // 由於沒有mark標記,直接丟棄buf[]數據 pos = 0; /* no mark: throw away the buffer */ else if (pos >= buffer.length) /* no room left in buffer */ if (markpos > 0) { /* can throw away early part of the buffer */ int sz = pos - markpos; System.arraycopy(buffer, markpos, buffer, 0, sz); pos = sz; markpos = 0; // !!!往下執行,markpos所有等於0 } else if (buffer.length >= marklimit) { markpos = -1; /* buffer got too big, invalidate mark */ pos = 0; /* drop buffer contents */ } else if (buffer.length >= MAX_BUFFER_SIZE) { throw new OutOfMemoryError("Required array size too large"); } else { /* grow buffer */ int nsz = (pos <= MAX_BUFFER_SIZE - pos) ? pos * 2 : MAX_BUFFER_SIZE; if (nsz > marklimit) // buf[]長度不超過marklimit,這樣mark標記始終有效 nsz = marklimit; byte nbuf[] = new byte[nsz]; System.arraycopy(buffer, 0, nbuf, 0, pos); if (!bufUpdater.compareAndSet(this, buffer, nbuf)) { // Can't replace buf if there was an async close. // Note: This would need to be changed if fill() // is ever made accessible to multiple threads. // But for now, the only way CAS can fail is via close. // assert buf == null; throw new IOException("Stream closed"); } buffer = nbuf; } count = pos; // 將輸入流中的數據獨到buf[]數組中 int n = getInIfOpen().read(buffer, pos, buffer.length - pos); if (n > 0) count = n + pos; } /** * See * the general contract of the <code>read</code> * method of <code>InputStream</code>. * * @return the next byte of data, or <code>-1</code> if the end of the * stream is reached. * @exception IOException if this input stream has been closed by * invoking its {@link #close()} method, * or an I/O error occurs. * @see java.io.FilterInputStream#in */ public synchronized int read() throws IOException { // 說明當前buf[]數組大小不夠了,須要fill() if (pos >= count) { fill(); // 說明沒有讀取到任何數據 if (pos >= count) return -1; } return getBufIfOpen()[pos++] & 0xff; } /** * Read characters into a portion of an array, reading from the underlying * stream at most once if necessary. */ private int read1(byte[] b, int off, int len) throws IOException { int avail = count - pos; if (avail <= 0) { /* If the requested length is at least as large as the buffer, and if there is no mark/reset activity, do not bother to copy the bytes into the local buffer. In this way buffered streams will cascade harmlessly. */ // !!!這個位置代碼很重要 // !!!這個位置代碼很重要 // !!!這個位置代碼很重要 /** * 當寫入指定數組b的長度大小超過BufferedInputStream中核心緩存數組buf[]的大小而且 * markpos < 0,那麼就直接從數據流中讀取數據給b數組,而不經過buf[]緩存數組,避免buf[]數組急劇增大 * */ if (len >= getBufIfOpen().length && markpos < 0) { return getInIfOpen().read(b, off, len); } fill(); avail = count - pos; if (avail <= 0) return -1; } int cnt = (avail < len) ? avail : len; System.arraycopy(getBufIfOpen(), pos, b, off, cnt); pos += cnt; return cnt; } /** * Reads bytes from this byte-input stream into the specified byte array, * starting at the given offset. * * <p> This method implements the general contract of the corresponding * <code>{@link InputStream#read(byte[], int, int) read}</code> method of * the <code>{@link InputStream}</code> class. As an additional * convenience, it attempts to read as many bytes as possible by repeatedly * invoking the <code>read</code> method of the underlying stream. This * iterated <code>read</code> continues until one of the following * conditions becomes true: <ul> * * <li> The specified number of bytes have been read, * * <li> The <code>read</code> method of the underlying stream returns * <code>-1</code>, indicating end-of-file, or * * <li> The <code>available</code> method of the underlying stream * returns zero, indicating that further input requests would block. * * </ul> If the first <code>read</code> on the underlying stream returns * <code>-1</code> to indicate end-of-file then this method returns * <code>-1</code>. Otherwise this method returns the number of bytes * actually read. * * <p> Subclasses of this class are encouraged, but not required, to * attempt to read as many bytes as possible in the same fashion. * * @param b destination buffer. * @param off offset at which to start storing bytes. * @param len maximum number of bytes to read. * @return the number of bytes read, or <code>-1</code> if the end of * the stream has been reached. * @exception IOException if this input stream has been closed by * invoking its {@link #close()} method, * or an I/O error occurs. * * 該方法主要調用read1(byte[] b, int off, int len) */ public synchronized int read(byte b[], int off, int len) throws IOException { getBufIfOpen(); // Check for closed stream if ((off | len | (off + len) | (b.length - (off + len))) < 0) { throw new IndexOutOfBoundsException(); } else if (len == 0) { return 0; } int n = 0; for (;;) { int nread = read1(b, off + n, len - n); if (nread <= 0) return (n == 0) ? nread : n; n += nread; if (n >= len) return n; // if not closed but no bytes available, return InputStream input = in; if (input != null && input.available() <= 0) return n; } } /** * See the general contract of the <code>skip</code> * method of <code>InputStream</code>. * * @exception IOException if the stream does not support seek, * or if this input stream has been closed by * invoking its {@link #close()} method, or an * I/O error occurs. * * 跳過流中指定字節數,感受該方法用處不大,至少到目前爲止,我本人還歷來沒有用過skip方法 */ public synchronized long skip(long n) throws IOException { getBufIfOpen(); // Check for closed stream if (n <= 0) { return 0; } long avail = count - pos; if (avail <= 0) { // If no mark position set then don't keep in buffer if (markpos <0) return getInIfOpen().skip(n); // Fill in buffer to save bytes for reset fill(); avail = count - pos; if (avail <= 0) return 0; } long skipped = (avail < n) ? avail : n; pos += skipped; return skipped; } /** * Returns an estimate of the number of bytes that can be read (or * skipped over) from this input stream without blocking by the next * invocation of a method for this input stream. The next invocation might be * the same thread or another thread. A single read or skip of this * many bytes will not block, but may read or skip fewer bytes. * <p> * This method returns the sum of the number of bytes remaining to be read in * the buffer (<code>count - pos</code>) and the result of calling the * {@link java.io.FilterInputStream#in in}.available(). * * @return an estimate of the number of bytes that can be read (or skipped * over) from this input stream without blocking. * @exception IOException if this input stream has been closed by * invoking its {@link #close()} method, * or an I/O error occurs. * * buf[]數組剩餘字節數+輸入流中剩餘字節數 */ public synchronized int available() throws IOException { int n = count - pos; int avail = getInIfOpen().available(); return n > (Integer.MAX_VALUE - avail) ? Integer.MAX_VALUE : n + avail; } /** * See the general contract of the <code>mark</code> * method of <code>InputStream</code>. * * @param readlimit the maximum limit of bytes that can be read before * the mark position becomes invalid. * @see java.io.BufferedInputStream#reset() * * 標記位置,marklimit只有在這裏纔可以被賦值,readlimit表示mark()方法執行後,最多可以從流中 * 讀取的數據,若是超過該字節大小,那麼在fill()的時候,就會認爲此mark()標記無效,從新將 * markpos = -1,pos = 0 */ public synchronized void mark(int readlimit) { marklimit = readlimit; markpos = pos; } /** * See the general contract of the <code>reset</code> * method of <code>InputStream</code>. * <p> * If <code>markpos</code> is <code>-1</code> * (no mark has been set or the mark has been * invalidated), an <code>IOException</code> * is thrown. Otherwise, <code>pos</code> is * set equal to <code>markpos</code>. * * @exception IOException if this stream has not been marked or, * if the mark has been invalidated, or the stream * has been closed by invoking its {@link #close()} * method, or an I/O error occurs. * @see java.io.BufferedInputStream#mark(int) */ public synchronized void reset() throws IOException { getBufIfOpen(); // Cause exception if closed if (markpos < 0) throw new IOException("Resetting to invalid mark"); pos = markpos; } /** * Tests if this input stream supports the <code>mark</code> * and <code>reset</code> methods. The <code>markSupported</code> * method of <code>BufferedInputStream</code> returns * <code>true</code>. * * @return a <code>boolean</code> indicating if this stream type supports * the <code>mark</code> and <code>reset</code> methods. * @see java.io.InputStream#mark(int) * @see java.io.InputStream#reset() */ public boolean markSupported() { return true; } /** * Closes this input stream and releases any system resources * associated with the stream. * Once the stream has been closed, further read(), available(), reset(), * or skip() invocations will throw an IOException. * Closing a previously closed stream has no effect. * * @exception IOException if an I/O error occurs. */ public void close() throws IOException { byte[] buffer; while ( (buffer = buf) != null) { if (bufUpdater.compareAndSet(this, buffer, null)) { InputStream input = in; in = null; if (input != null) input.close(); return; } // Else retry in case a new buf was CASed in fill() } } }
網上不少博客,說BufferedInputStream
頗有用,能夠一次性從IO中讀入不少數據,而後緩存在buf[]中,這樣就減小了IO消耗,不少博主,甚至給出了一些代碼實操,證實BufferedInputStream
確實能夠提升效率,這自己沒有任何問題,可是經我深刻源碼研究事後,卻發現實際場景中,該類使用頻率不多,根本不須要BufferedInputStream
設計模式
我將結合代碼,進行更有力的說明:數組
// file文件大小1個G private static String file = "D:\\StudySoftware\\VMware_virtualbox\\Data_vmware\\VMwareMachine\\kafka_single\\kafka-single-103-da5cf665.vmem"; private static void file() throws IOException{ long beginTime = System.currentTimeMillis(); FileInputStream input = new FileInputStream(file); byte[] bytes = new byte[1024 * 1]; int read = 0; while ((read = input.read(bytes, 0, bytes.length)) != -1) { // 不執行任何操做,僅僅讀取文件 } long endTime = System.currentTimeMillis(); System.out.println("file: 耗費時間:" + (endTime - beginTime)); } private static void bufferd() throws IOException{ long beginTime = System.currentTimeMillis(); FileInputStream input = new FileInputStream(file); BufferedInputStream bufferedInput = new BufferedInputStream(input); byte[] bytes = new byte[1024 * 1]; int read = 0; while ((read = bufferedInput.read(bytes, 0, bytes.length)) != -1) { //不執行任何操做,僅僅讀取文件 } long endTime = System.currentTimeMillis(); System.out.println("buffered: 耗費時間:" + (endTime - beginTime)); }
注意:緩存
代碼操做的時候,兩個方法不可以對同一個文件進行操做,防止JVM會自動優化,由於第一個方法讀完整個文件,第二個方法再讀的時候,JVM可能保存了部分信息,從而形成測試數據的不許確。而且爲了最大程度保證測試數據的準確性,一次JVM啓動,只測試一個方法安全
結果:多線程
①當 byte[] bytes = new byte[1024 * 1]; 數組大小爲1024less
buffered: 耗費時間:855 file: 耗費時間:3073async
②當 byte[] bytes = new byte[1024 * 2]; 數組大小爲2018ide
buffered: 耗費時間:813 file: 耗費時間:1909
③當 byte[] bytes = new byte[1024 * 3]; 數組大小爲3072
buffered: 耗費時間:1304 file: 耗費時間:1476
④當 byte[] bytes = new byte[1024 * 4]; 數組大小爲4096
buffered: 耗費時間:844 file: 耗費時間:1287
⑤當 byte[] bytes = new byte[1024 * 5]; 數組大小爲5120
buffered: 耗費時間:1343 file: 耗費時間:1061
⑥當 byte[] bytes = new byte[1024 * 6]; 數組大小爲6144
buffered: 耗費時間:1280 file: 耗費時間:985
⑦當 byte[] bytes = new byte[1024 * 7]; 數組大小爲7168
buffered: 耗費時間:1443 file: 耗費時間:851
⑧當 byte[] bytes = new byte[1024 * 8]; 數組大小爲8192
buffered: 耗費時間:774 file: 耗費時間:739
⑨當 byte[] bytes = new byte[1024 * 9]; 數組大小爲9216
buffered: 耗費時間:734 file: 耗費時間:749
⑩當 byte[] bytes = new byte[1024 * 10]; 數組大小爲10240
buffered: 耗費時間:739 file: 耗費時間:697
... ... ...
咱們能夠得出如下重要結論:
<font color="red">當bytes比較小時,使用BufferedInputStream
確實讀取文件時要快很多,但是當bytes逐步增大,尤爲是達到8kb的時候,咱們會發現 BufferedInputStream
和FileInputStream
讀取文件速度差很少了,沒有明顯差別</font>
咱們深刻源碼,便可發現:
所以當咱們把 while ((read = input.read(bytes, 0, bytes.length)) != -1)
中的bytes增大時,BufferedInputStream
沒有任何做用(<font color="red">除非有mark、reset需求</font>)
有的小夥伴,確定會說,那我將BufferedInputStream
中的buf[]大小提升不就好了嗎?
能夠是能夠,可是我將 while ((read = input.read(bytes, 0, bytes.length)) != -1)
中的bytes大小增大不就好了? 說到底都是字節數組,一個是在BufferedInputStream
外面,一個是在BufferedInputStream
內部,而如今咱們進行流讀取的時候,不少時候是不須要mark、reset操做的,而且咱們設置外部bytes大小一般會比較大,這個時候,徹底能夠不使用BufferedInputStream
①第一種使用場景,就是當咱們須要mark、reset特性時。不過要特別注意,mark、reset的使用,裏面涉及到不少東西,特別是當BufferedInputStream
執行fill()操做時
public static void main(String[] args) { try { final byte[] src = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}; final ByteArrayInputStream bis = new ByteArrayInputStream(src); final BufferedInputStream bufis = new BufferedInputStream(bis, 5); int data = -1; int i = 0; while((data = bufis.read()) != -1) { if(data == 4) { bufis.mark(2); } if(i++ == 9) { bufis.reset(); } System.out.printf("%d", data); } } catch(IOException ioex) { ioex.printStackTrace(); } } // 原文連接:https://blog.csdn.net/qq_26971305/article/details/79472696
有興趣的朋友,能夠debug上面的代碼,debug下面的狀況,相應你對BufferedInputStream
有更深的理解
if(i++ == 5)
if(i++ == 6)
if(i++ == 7)
if(i++ == 8)
if(i++ == 9)
if(i++ == 10)
... ... ... 時間多的朋友,能夠設置BufferedInputStream
中buf[]的大小長度和if(i++ == xx)判斷語句中的值來看看BufferedInputStream
類的執行流程
mark、reset特性不可亂用,否則會拋出異常的
public synchronized void reset() throws IOException { getBufIfOpen(); // Cause exception if closed if (markpos < 0) throw new IOException("Resetting to invalid mark"); pos = markpos; }
②第二種使用場景,當BufferedInputStream
配合DataInputStream
和ObjectOutputStream
使用時
ObjectInputStream input = new ObjectInputStream(new BufferedInputStream(new FileInputStream("E:\\obejct.txt"))); DataInputStream input = new DataInputStream(new BufferedInputStream(new FileInputStream("E:\\obejct.txt")));
當DataXxxStream
從管道讀取字節流的時候,是一個一個字節讀取的
而ObjectInputStream
底層依賴了DataXxxStream
對象
<br /> 參考連接: <https://blog.csdn.net/qq_26971305/article/details/79472696> <br />
<div style="font-size:18px;background-color: #CCFFFF"> 做者:<font color="#551A8B" size="5px">一杯熱咖啡AAA</font> <br /> 出處:<a href="https://www.cnblogs.com/AdaiCoffee/">https://www.cnblogs.com/AdaiCoffee/</a> <br /> 本文以學習、研究和分享爲主,歡迎轉載。若是文中有不妥或者錯誤的地方還望指出,以避免誤人子弟。若是你有更好的想法和意見,能夠留言討論,謝謝! </div>
原文出處:https://www.cnblogs.com/AdaiCoffee/p/11369699.html