系統須要用到一個導入excel文件的功能,使用poi組件常規方式讀取excel時,內存耗盡,OutOfMemoryError,或者讀取很是慢
因此寫了一個工具類,使用poi sax方式讀取excel,速度快不少,內存消耗能夠接受。java
測試結果以下:
.xlsx文件,35M大小,總4個sheel,
只讀取第一個,37434行,54列git
總行數:37434
讀取耗時:39秒
打印耗時:17秒apache
主要代碼以下:
ExcelUtils.class 主入口緩存
package com.xxx.bi.utils.excel; import java.util.List; import java.util.Objects; import org.apache.commons.lang3.StringUtils; import com.google.common.collect.Lists; public class ExcelUtils { /** logger日誌. */ // public static final Logger LOGGER = Logger.getLogger(ExcelUtils.class); public ExcelUtils() { } /** * 獲取excel的表頭 * * @param filePath * 文件路徑 * @param headerNum * 表頭所在行數 * @return */ public static List<String> getHeader(String filePath, int headerNum) { if (StringUtils.isBlank(filePath)) { throw new IllegalArgumentException("傳入文件路徑不能爲空"); } if (Objects.isNull(headerNum) || headerNum < 1) { headerNum = 1; } try { return LargeExcelFileReadUtil.getRowFromSheetOne(filePath, headerNum); } catch (Exception e) { // LOGGER.info("獲取excel[" + filePath + "]表頭失敗,緣由:", e); e.printStackTrace(); } return Lists.newArrayList(); } /** * 獲取excel的全部數據<br/> * 全部數據類型都是String<br/> * 會以第一行數據的列數爲總列數,因此第一行的數據必須都不爲空,不然可能出java.lang.IndexOutOfBoundsException * * @param filePath * 文件路徑 * @param headerNum * 表頭所在行數 * @return */ public static List<List<String>> getAllData(String filePath) { if (StringUtils.isBlank(filePath)) { throw new IllegalArgumentException("傳入文件路徑不能爲空"); } try { return LargeExcelFileReadUtil.getRowsFromSheetOne(filePath); } catch (Exception e) { // LOGGER.info("獲取excel[" + filePath + "]表頭失敗,緣由:", e); e.printStackTrace(); } return Lists.newArrayList(); } public static void main(String[] args) { long start = System.currentTimeMillis(); String filepath = "C:/Users/Administrator/Desktop/05-做業調配表 -快遞.xlsx"; // List<String> result = ExcelUtils.getHeader(filepath, 1); // for (String col : result) { // System.out.println(col); // } List<List<String>> result = ExcelUtils.getAllData(filepath); long end = System.currentTimeMillis(); for (List<String> list : result) { System.out.println(list.toString()); } long end1 = System.currentTimeMillis(); try { Thread.sleep(1000l); } catch (InterruptedException e) { e.printStackTrace(); } System.err.println("總行數:" + result.size()); System.err.println(("讀取耗時:" + (end - start) / 1000) + "秒"); System.err.println(("打印耗時:" + (end1 - end) / 1000) + "秒"); } }
LargeExcelFileReadUtil.class 真正的工具類xss
package com.xxx.bi.utils.excel; import java.io.InputStream; import java.util.List; import java.util.Objects; import org.apache.log4j.Logger; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.xml.sax.InputSource; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLReaderFactory; public class LargeExcelFileReadUtil { /** logger日誌. */ public static final Logger LOGGER = Logger.getLogger(LargeExcelFileReadUtil.class); // 處理一個sheet public static List<String> getRowFromSheetOne(String filename, Integer rowNum) throws Exception { InputStream inputStream = null; OPCPackage pkg = null; SingleRowHandler singleRowHandler = null; try { pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); singleRowHandler = new SingleRowHandler(sst, rowNum); XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser"); parser.setContentHandler(singleRowHandler); inputStream = r.getSheet("rId1"); InputSource sheetSource = new InputSource(inputStream); parser.parse(sheetSource); return singleRowHandler.getRow(); } catch (Exception e) { String message = e.getMessage(); if (Objects.nonNull(rowNum) && Objects.nonNull(singleRowHandler) && SingleRowHandler.FINISH_ROW_MESSAGE.equalsIgnoreCase(message)) { // 獲取某一行數據完成 ,暫時不知道怎麼能終止excel解析,直接拋出了異常,實際是成功的 return singleRowHandler.getRow(); } throw e; } finally { if (Objects.nonNull(pkg)) { pkg.close(); } if (Objects.nonNull(inputStream)) { inputStream.close(); } } } // 處理一個sheet public static List<List<String>> getRowsFromSheetOne(String filename) throws Exception { InputStream inputStream = null; OPCPackage pkg = null; MultiRowHandler multiRowHandler = null; try { pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); multiRowHandler = new MultiRowHandler(sst); XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser"); parser.setContentHandler(multiRowHandler); inputStream = r.getSheet("rId1"); InputSource sheetSource = new InputSource(inputStream); parser.parse(sheetSource); return multiRowHandler.getRows(); } catch (Exception e) { throw e; } finally { if (Objects.nonNull(pkg)) { pkg.close(); } if (Objects.nonNull(inputStream)) { inputStream.close(); } } } }
SingleRowHandler.class 當行處理類,能夠只獲取表頭或表格中的某一行數據ide
package com.xxx.bi.utils.excel; import java.util.ArrayList; import java.util.List; import java.util.Objects; import java.util.regex.Pattern; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class SingleRowHandler extends DefaultHandler { public final static String FINISH_ROW_MESSAGE = "row data process finish"; private Integer rowNum = null;// rowNum不爲空時則標示只須要獲取這一行的數據 private int curRowNum = 1; private String cellType = ""; private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private String cellPosition; private List<String> row = new ArrayList<>(); public List<String> getRow() { return row; } public SingleRowHandler(SharedStringsTable sst, Integer rowNum) { this.sst = sst; this.rowNum = rowNum; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if (name.equals("c")) { cellPosition = attributes.getValue("r"); // 這是一個新行 if (Pattern.compile("^A[0-9]+$").matcher(cellPosition).find()) { curRowNum = Integer.valueOf(cellPosition.substring(1)); } cellType = ""; cellType = attributes.getValue("t"); if ("s".equals(cellType)) { nextIsString = true; } else { nextIsString = false; } } // 清楚緩存內容 lastContents = ""; if (Objects.nonNull(rowNum) && curRowNum > rowNum) { // 獲取某一行數據完成 ,暫時不知道怎麼能終止excel解析,直接拋出了異常,實際是成功的 throw new SAXException(FINISH_ROW_MESSAGE); } } public void endElement(String uri, String localName, String name) throws SAXException { if (nextIsString) { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); nextIsString = false; } if (name.equals("v")) { if (Objects.isNull(rowNum) || rowNum == curRowNum) { row.add(lastContents); } } } public void characters(char[] ch, int start, int length) throws SAXException { lastContents += new String(ch, start, length); } }
MultiRowHandler.class 獲取excel全部行的數據工具
package com.xxx.bi.utils.excel; import java.util.ArrayList; import java.util.List; import java.util.Objects; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /** * 獲取完整excel數據的handler<br/> * * @author Administrator * */ public class MultiRowHandler extends DefaultHandler { private int curRowNum = 0;// 行號,從1開始 private int curColIndex = -1;// 列索引,從0開始 private int colCnt = 0;// 列數,取第一行列數作爲列總數 private String cellType = ""; private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private String cellPosition; private List<String> head = null; private List<String> curRowData = null; private boolean curRowIsBlank = true;// 當前是個空行 private List<List<String>> rows = new ArrayList<>(); public List<List<String>> getRows() { return rows; } public MultiRowHandler(SharedStringsTable sst) { this.sst = sst; } @Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if (name.equals("c")) { cellPosition = attributes.getValue("r"); curColIndex = getColIndex(cellPosition); // 這是一個新行 if (isNewRow(cellPosition)) { curRowNum = getRowNum(cellPosition); if (2 == curRowNum && Objects.nonNull(curRowData)) { head = curRowData; colCnt = head.size(); } curRowData = getBlankRow(colCnt); } cellType = ""; cellType = attributes.getValue("t"); if ("s".equals(cellType)) { nextIsString = true; } else { nextIsString = false; } } // 清楚緩存內容 lastContents = ""; } private boolean isNewRow(String cellPosition) { // 座標以A開頭,後面跟數字 或者座標行和當前行不一致的 boolean newRow = Pattern.compile("^A[0-9]+$").matcher(cellPosition).find(); if (!newRow) { int cellRowNum = getRowNum(cellPosition); newRow = (cellRowNum != curRowNum); } return newRow; } /** * 根據列座標獲取行號,從1開始,返回0時標示出錯 * * @param cellPosition * 列座標,爲A1,B23等 * @return 行號,從1開始,返回0是爲失敗 */ private static int getRowNum(String cellPosition) { String strVal = Pattern.compile("[^0-9]").matcher(cellPosition).replaceAll("").trim();// 獲取座標中的數字 if (StringUtils.isNotBlank(strVal)) { return Integer.valueOf(strVal); } return 0; } /** * 根據列座標返回當前列索引,從0開始,返回-1時標示出錯<br/> * A1->0; B1->1...AA1->26 * * @param cellPosition * 列座標,爲A1,B23等 * @return 列索引,從0開始,返回-1是爲失敗,A1->0; B1->1...AA1->26 */ private static int getColIndex(String cellPosition) { int index = -1; int num = 65;// A的Unicode碼 int length = cellPosition.length(); for (int i = 0; i < length; i++) { char c = cellPosition.charAt(i); if (Character.isDigit(c)) { break;// 肯定指定的char值是否爲數字 } index = (index + 1) * 26 + (int) c - num; } return index; } /** * 返回一個所有爲空字符串的空行 * * @param cnt * @return */ private List<String> getBlankRow(int cnt) { List<String> result = new ArrayList<>(cnt); for (int i = 0; i < cnt; i++) { result.add(i, ""); } curRowIsBlank = true; return result; } @Override public void endElement(String uri, String localName, String name) throws SAXException { if (nextIsString) { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); nextIsString = false; } if (name.equals("v")) { // System.out.println(MessageFormat.format("當前列定位:{0},當前行:{1},當前列:{2},當前值:{3}", // cellPosition, curRowNum, // curColIndex, lastContents)); if (Objects.isNull(head)) { curRowData.add(lastContents); } else { curRowData.set(curColIndex, lastContents); } curRowIsBlank = false; // 這是一個新行 if (isNewRow(cellPosition)) { if (Objects.nonNull(curRowData)) { if (curRowIsBlank) { curRowData.clear();// 若是當前行是空行,則清空當前行數據 } rows.add(curRowData); } } } } @Override public void endDocument() throws SAXException { if (Objects.nonNull(curRowData) && !curRowIsBlank) { rows.add(curRowData);// 最後一行在上面很差加入,最後一行全是空行的不加入 } super.endDocument(); } @Override public void characters(char[] ch, int start, int length) throws SAXException { lastContents += new String(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { lastContents += ""; } public static void main(String[] args) { System.out.println(getColIndex("BC2")); } }