不解釋直接上代碼,樓主在參考中修改了幾個BUG,這是參考原做者(牧夢者)路徑html
http://www.javashuo.com/article/p-vskdnsxh-z.htmljava
DefaultReader.javaapache
package com.cgtax.file.reader; import java.io.File; import java.util.ArrayList; import java.util.List; public class DefaultReader implements Reader { @Override public void process(File file) throws Exception { } @Override public List<List<String>> getData() { return new ArrayList<>(); } }
Reader.javaxss
package com.cgtax.file.reader; import java.io.File; import java.util.List; public interface Reader { public void process(File file) throws Exception; public List<List<String>> getData(); }
ReaderFactory.javaide
package com.cgtax.file.reader; import java.io.File; import org.apache.commons.lang.IllegalClassException; public class ReaderFactory { public static Reader createReader(File file) throws Exception { Reader reader = null; String suffix = file.getName().substring(file.getName().lastIndexOf(".") + 1); char[] chars = suffix.toCharArray(); chars[0] = (char) (chars[0] - 32); String convert = String.valueOf(chars); String className = "com.cgtax.file.reader." + convert + "Reader"; try { reader = (Reader) Class.forName(className).newInstance(); } catch (ClassNotFoundException e) { reader = new DefaultReader(); throw new IllegalClassException(className + " does not exists!"); }; return reader; } }
ReaderUtil.javafetch
package com.cgtax.file.reader; import java.io.File; import java.io.IOException; import java.util.List; import com.cgtax.collect.service.utils.ExcelSaveAs; public class ReaderUtil { public static List<List<String>> read(File file) throws Exception { Reader reader = ReaderFactory.createReader(file); File target = null; try { reader.process(file); } catch (IOException e){ String message = e.getMessage(); // 若是爲Excel格式錯誤異常,轉存後再解析 if(message != null && message.startsWith("Invalid header signature;")){ target = ExcelSaveAs.saveAs2007(file); // JACOB另存爲 reader = ReaderFactory.createReader(target); reader.process(target); }else{ throw e; } } catch(Exception e){ throw new RuntimeException(e); } finally { if (target != null && target.exists()) target.delete(); if (file != null && file.exists()) file.delete(); } List<List<String>> data = reader.getData(); return data; } }
XlsReader.javaui
package com.cgtax.file.reader; import java.io.File; import java.io.FileInputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedList; import java.util.List; import java.util.concurrent.atomic.AtomicLong; import org.apache.log4j.Logger; import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener; import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; import org.apache.poi.hssf.eventusermodel.HSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFRequest; import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord; import org.apache.poi.hssf.model.HSSFFormulaParser; import org.apache.poi.hssf.record.BOFRecord; import org.apache.poi.hssf.record.BlankRecord; import org.apache.poi.hssf.record.BoolErrRecord; import org.apache.poi.hssf.record.DimensionsRecord; import org.apache.poi.hssf.record.EOFRecord; import org.apache.poi.hssf.record.FormulaRecord; import org.apache.poi.hssf.record.LabelRecord; import org.apache.poi.hssf.record.LabelSSTRecord; import org.apache.poi.hssf.record.NumberRecord; import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.SSTRecord; import org.apache.poi.hssf.record.StringRecord; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import com.cgtax.collect.service.utils.CgtaxCollectUtil; import com.sdp.core.dt.Dto; import com.sdp.utils.Str; //https://blog.csdn.net/lipinganq/article/details/77678443 public class XlsReader implements HSSFListener, Reader { private static Logger logger = Logger.getLogger(XlsReader.class); private int minColumns = -1; private POIFSFileSystem fs; private int lastRowNumber; private int lastColumnNumber; private boolean outputFormulaValues = true; private SheetRecordCollectingListener workbookBuildingListener; private HSSFWorkbook stubWorkbook; private SSTRecord sstRecord; private FormatTrackingHSSFListener formatListener; private int sheetIndex = -1; private int nextRow; private int nextColumn; private boolean outputNextStringRecord; private int curRow = 0; private List<String> rowlist = new LinkedList<String>();; private AtomicLong rows = new AtomicLong(0); private boolean isValidateExcel; private List<List<String>> data = new ArrayList<>(); private int totalCount = 0; private int tableHeaderLength = 0; @Override public void process(File file) throws Exception { String filename = file.getAbsolutePath(); this.fs = new POIFSFileSystem(new FileInputStream(filename)); MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this); formatListener = new FormatTrackingHSSFListener(listener); HSSFEventFactory factory = new HSSFEventFactory(); HSSFRequest request = new HSSFRequest(); if (outputFormulaValues) { request.addListenerForAllRecords(formatListener); } else { workbookBuildingListener = new SheetRecordCollectingListener(formatListener); request.addListenerForAllRecords(workbookBuildingListener); } factory.processWorkbookEvents(request, fs); } @Override public List<List<String>> getData() { return data; } public void processRecord(Record record) { int thisRow = -1; int thisColumn = -1; String thisStr = null; String value = null; switch (record.getSid()) { case BOFRecord.sid: BOFRecord br = (BOFRecord) record; if (br.getType() == BOFRecord.TYPE_WORKSHEET) { // 若是有須要,則創建子工做薄 if (workbookBuildingListener != null && stubWorkbook == null) { stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook(); } sheetIndex++; if (sheetIndex == 0) { isValidateExcel = true; } } break; case DimensionsRecord.sid: //excel範圍 if (sheetIndex == 0) { DimensionsRecord dr = (DimensionsRecord) record; totalCount = dr.getLastRow(); } break; case SSTRecord.sid: sstRecord = (SSTRecord) record; break; case BlankRecord.sid: BlankRecord brec = (BlankRecord) record; thisRow = brec.getRow(); thisColumn = brec.getColumn(); thisStr = ""; rowlist.add(thisColumn, thisStr); break; case BoolErrRecord.sid: // 單元格爲布爾類型 BoolErrRecord berec = (BoolErrRecord) record; thisRow = berec.getRow(); thisColumn = berec.getColumn(); thisStr = berec.getBooleanValue() + ""; rowlist.add(thisColumn, thisStr); break; case FormulaRecord.sid: // 單元格爲公式類型 FormulaRecord frec = (FormulaRecord) record; thisRow = frec.getRow(); thisColumn = frec.getColumn(); if (outputFormulaValues) { if (Double.isNaN(frec.getValue())) { // Formula result is a string // This is stored in the next record outputNextStringRecord = true; nextRow = frec.getRow(); nextColumn = frec.getColumn(); } else { thisStr = formatListener.formatNumberDateCell(frec); } } else { thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"'; } rowlist.add(thisColumn, thisStr); break; case StringRecord.sid:// 單元格中公式的字符串 if (outputNextStringRecord) { // String for formula StringRecord srec = (StringRecord) record; thisStr = srec.getString(); thisRow = nextRow; thisColumn = nextColumn; outputNextStringRecord = false; } break; case LabelRecord.sid: LabelRecord lrec = (LabelRecord) record; curRow = thisRow = lrec.getRow(); thisColumn = lrec.getColumn(); value = lrec.getValue().trim(); value = value.equals("") ? " " : value; this.rowlist.add(thisColumn, value); break; case LabelSSTRecord.sid: // 單元格爲字符串類型 LabelSSTRecord lsrec = (LabelSSTRecord) record; curRow = thisRow = lsrec.getRow(); thisColumn = lsrec.getColumn(); if (sstRecord == null) { rowlist.add(thisColumn, " "); } else { value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim(); value = value.equals("") ? " " : value; rowlist.add(thisColumn, value); } break; case NumberRecord.sid: // 單元格爲數字類型 NumberRecord numrec = (NumberRecord) record; curRow = thisRow = numrec.getRow(); thisColumn = numrec.getColumn(); value = formatListener.formatNumberDateCell(numrec).trim(); value = value.equals("") ? " " : value; // 向容器加入列值 rowlist.add(thisColumn, value); break; case EOFRecord.sid: //workbook或者worksheet的結尾 break; default: break; } // 遇到新行的操做 if (thisRow != -1 && thisRow != lastRowNumber) { lastColumnNumber = -1; } // 空值的操做 if (record instanceof MissingCellDummyRecord) { MissingCellDummyRecord mc = (MissingCellDummyRecord) record; curRow = thisRow = mc.getRow(); thisColumn = mc.getColumn(); rowlist.add(thisColumn, " "); } // 更新行和列的值 if (thisRow > -1) lastRowNumber = thisRow; if (thisColumn > -1) lastColumnNumber = thisColumn; // 行結束時的操做 if (record instanceof LastCellOfRowDummyRecord) { if (minColumns > 0) { // 列值從新置空 if (lastColumnNumber == -1) { lastColumnNumber = 0; } } lastColumnNumber = -1; int start_row = Integer.parseInt(Dto.getParam().getString("start_row")); if ((curRow - start_row + 2 == 0) && isValidateExcel) { String labels = Dto.getParam().getString("labels"); if (Str.isNotAnyEmpty(labels)) { List<String> labelList = Arrays.asList(labels.split(",")); tableHeaderLength = labelList.size(); if (CgtaxCollectUtil.equals(labelList, rowlist)) { isValidateExcel = false; } else { Dto.getParam().set("model_title", Arrays.toString(labelList.toArray())); Dto.getParam().set("excel_title", Arrays.toString(rowlist.toArray())); logger.error("模型屬性:" + Arrays.toString(labelList.toArray())); logger.error("excel表頭:" + Arrays.toString(rowlist.toArray())); throw new RuntimeException(new IllegalArgumentException("excel列與模板屬性不一致!")); } } } rows.incrementAndGet(); int end_row = 0; String s_end_row = Dto.getParam().getString("end_row"); if (Str.isNotAnyEmpty(s_end_row)) { end_row = Integer.parseInt(s_end_row); } if ((sheetIndex == 0) && ((curRow - start_row + 2) > 0) && (curRow < totalCount + end_row)) { /* 處理最後一列爲空導入異常*/ String labels = Dto.getParam().getString("labels"); int labelSize = Arrays.asList(labels.split(",")).size(); if(rowlist.size() < labelSize){ for(int i = rowlist.size(); i < labelSize; i++){ rowlist.add(" "); } } /* 過濾空行*/ boolean isFlag = false; for(String str : rowlist){ if(Str.isNotAnyEmpty(str)){ isFlag = true; break; } } if(isFlag){ /* 處理數據列數大於表頭列數*/ while(rowlist.size() > tableHeaderLength) rowlist.remove(tableHeaderLength); data.add(new ArrayList<>(rowlist)); } } // 清空容器 rowlist.clear(); } } }
XlsxReader.javathis
package com.cgtax.file.reader; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Pattern; import org.apache.log4j.Logger; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; import com.cgtax.collect.service.utils.CgtaxCollectUtil; import com.jfinal.kit.StrKit; import com.sdp.core.dt.Dto; import com.sdp.utils.Str; public class XlsxReader extends DefaultHandler implements Reader { private static Logger logger = Logger.getLogger(XlsxReader.class); private OPCPackage pkg; private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private int sheetIndex = -1; private List<String> rowlist = new LinkedList<String>(); private int curRow = 0; private int actualRowIndex = 0; private List<Integer> curRowList = new ArrayList<Integer>(); private int curCol = 0; private boolean isTElement; private CellDataType nextDataType = CellDataType.SSTINDEX; private final DataFormatter formatter = new DataFormatter(); private short formatIndex; private String formatString; private String preRef = null, ref = null; private String maxRef = null; private StylesTable stylesTable; private AtomicLong rows = new AtomicLong(0); private boolean isValidateExcel; private List<List<String>> data = new ArrayList<>(); private int totalCount = 0; private int tableHeaderLength = 0;//記錄表頭長度 @Override public void process(File file) throws Exception { String filename = file.getAbsolutePath(); pkg = OPCPackage.open(filename); XSSFReader xssfReader = new XSSFReader(pkg); stylesTable = xssfReader.getStylesTable(); SharedStringsTable sst = xssfReader.getSharedStringsTable(); XMLReader parser = this.fetchSheetParser(sst); Iterator<InputStream> sheets = xssfReader.getSheetsData(); while (sheets.hasNext()) { curRow = 0; curRowList.clear(); actualRowIndex = 0; sheetIndex++; if (sheetIndex == 0) { isValidateExcel = true; } InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } if (pkg != null) pkg.close(); } @Override public List<List<String>> getData() { return data; } public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); this.sst = sst; parser.setContentHandler(this); return parser; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if ("dimension".equals(name)) {// 得到總計錄數 String d = attributes.getValue("ref"); totalCount = getNumber(d.substring(d.indexOf(":") + 1, d.length())); } if(ref != null) isRowNull(ref); // c => 單元格 if ("c".equals(name)) { // 前一個單元格的位置 if (preRef == null) { preRef = attributes.getValue("r"); } else { preRef = ref; } // 當前單元格的位置 ref = attributes.getValue("r"); // 設定單元格類型 this.setNextDataType(attributes); // Figure out if the value is an index in the SST String cellType = attributes.getValue("t"); if (cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } // 當元素爲t時 if ("t".equals(name)) { isTElement = true; } else { isTElement = false; } // 置空 lastContents = ""; } /** * 單元格中的數據可能的數據類型 */ enum CellDataType { BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL } /** * 處理數據類型 * * @param attributes */ public void setNextDataType(Attributes attributes) { nextDataType = CellDataType.NUMBER; formatIndex = -1; formatString = null; String cellType = attributes.getValue("t"); String cellStyleStr = attributes.getValue("s"); // String columData = attributes.getValue("r"); if ("b".equals(cellType)) { nextDataType = CellDataType.BOOL; } else if ("e".equals(cellType)) { nextDataType = CellDataType.ERROR; } else if ("inlineStr".equals(cellType)) { nextDataType = CellDataType.INLINESTR; } else if ("s".equals(cellType)) { nextDataType = CellDataType.SSTINDEX; } else if ("str".equals(cellType)) { nextDataType = CellDataType.FORMULA; } if (cellStyleStr != null) { int styleIndex = Integer.parseInt(cellStyleStr); XSSFCellStyle style = stylesTable.getStyleAt(styleIndex); formatIndex = style.getDataFormat(); formatString = style.getDataFormatString(); if ("m/d/yy" == formatString) { nextDataType = CellDataType.DATE; formatString = "yyyy-MM-dd hh:mm:ss.SSS"; } if (formatString == null) { nextDataType = CellDataType.NULL; formatString = BuiltinFormats.getBuiltinFormat(formatIndex); } } } /** * 對解析出來的數據進行類型處理 * * @param value * 單元格的值(這時候是一串數字) * @param thisStr * 一個空字符串 * @return */ public String getDataValue(String value, String thisStr) { switch (nextDataType) { // 這幾個的順序不能隨便交換,交換了極可能會致使數據錯誤 case BOOL: char first = value.charAt(0); thisStr = first == '0' ? "FALSE" : "TRUE"; break; case ERROR: thisStr = "\"ERROR:" + value.toString() + '"'; break; case FORMULA: thisStr = '"' + value.toString() + '"'; break; case INLINESTR: XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); thisStr = rtsi.toString(); rtsi = null; break; case SSTINDEX: String sstIndex = value.toString(); try { int idx = Integer.parseInt(sstIndex); XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx)); thisStr = rtss.toString(); rtss = null; } catch (NumberFormatException ex) { thisStr = value.toString(); } break; case NUMBER: if (formatString != null) { thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString).trim(); } else { thisStr = value; } thisStr = thisStr.replace("_", "").trim(); break; case DATE: thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString); // 對日期字符串做特殊處理 thisStr = thisStr.replace(" ", "T"); break; default: thisStr = " "; break; } return thisStr; } @Override public void endElement(String uri, String localName, String name) throws SAXException { // 根據SST的索引值的到單元格的真正要存儲的字符串 // 這時characters()方法可能會被調用屢次 if (nextIsString && StrKit.isBlank(lastContents) && isDecimal(lastContents)) { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); } // t元素也包含字符串 if (isTElement) { // 將單元格內容加入rowlist中,在這以前先去掉字符串先後的空白符 String value = lastContents.trim(); rowlist.add(curCol, value); curCol++; isTElement = false; } else if ("v".equals(name)) { // v => 單元格的值,若是單元格是字符串則v標籤的值爲該字符串在SST中的索引 String value = this.getDataValue(lastContents.trim(), ""); // 補全單元格之間的空單元格 if (!ref.equals(preRef)) { int len = countNullCell(ref, preRef); for (int i = 0; i < len; i++) { rowlist.add(curCol, ""); curCol++; } } else if (preRef.equals(ref)) { // 解決第一個字段爲空的記錄 String firstColRef = preRef.replaceAll("\\d+", ""); String rowNum = preRef.replaceAll("[A-Z]+", ""); if (!firstColRef.equals("A")) { preRef = "A" + rowNum; int len = countNullCell(ref, preRef) + 1;// A3爲空 則 A3 // B3的空單元格爲1 for (int i = 0; i < len; i++) { rowlist.add(curCol, ""); curCol++; } } } rowlist.add(curCol, value); curCol++; } else if ("c".equals(name) && countNullCell(ref, preRef) >= 0) { if (lastContents.trim().equals("")) { int len = countNullCell(ref, preRef) + 1;// A3爲空 則 A3 B3的空單元格爲1 for (int i = 0; i < len; i++) { rowlist.add(curCol, ""); curCol++; } } } else { if(name.equals("c") && preRef.equals(ref) && Str.isNotAnyEmpty(preRef) && Str.isNotAnyEmpty(ref) && lastContents.trim().equals("")){ rowlist.add(curCol, ""); curCol++; } // 若是標籤名稱爲 row ,這說明已到行尾,調用 optRows() 方法 if (name.equals("row")) { // 默認第一行爲表頭,以該行單元格數目爲最大數目 if (curRow == 1) { maxRef = ref; } // 補全一行尾部可能缺失的單元格 if (maxRef != null) { int len = countNullCell(maxRef, ref); for (int i = 0; i <= len; i++) { rowlist.add(curCol, ""); curCol++; } } int start_row = Integer.parseInt(Dto.getParam().getString("start_row")); if ((curRow - start_row + 2 == 0) && isValidateExcel) { String labels = Dto.getParam().getString("labels"); if (Str.isNotAnyEmpty(labels)) { List<String> labelList = Arrays.asList(labels.split(",")); tableHeaderLength = labelList.size(); if (CgtaxCollectUtil.equals(labelList, rowlist)) { isValidateExcel = false; } else { if (pkg != null) { try { pkg.close(); } catch (IOException e) { throw new RuntimeException(e); } } Dto.getParam().set("model_title", Arrays.toString(labelList.toArray())); Dto.getParam().set("excel_title", Arrays.toString(rowlist.toArray())); logger.error("模型屬性:" + Arrays.toString(labelList.toArray())); logger.error("excel表頭:" + Arrays.toString(rowlist.toArray())); throw new IllegalArgumentException("excel列與模板屬性不一致!"); } } } rows.incrementAndGet(); int end_row = 0; String s_end_row = Dto.getParam().getString("end_row"); if (Str.isNotAnyEmpty(s_end_row)) { end_row = Integer.parseInt(s_end_row); } if ((sheetIndex == 0) && ((actualRowIndex - start_row + 2) > 0) && (actualRowIndex < totalCount + end_row)) { /* 過濾空行*/ boolean isFlag = false; for(String str : rowlist){ if(Str.isNotAnyEmpty(str)){ isFlag = true; break; } } if(isFlag){ /* 處理數據列數大於表頭列數*/ while(rowlist.size() > tableHeaderLength) rowlist.remove(tableHeaderLength); data.add(new ArrayList<>(rowlist)); } } rowlist.clear(); curRow++; actualRowIndex++; curCol = 0; preRef = null; ref = null; } } } @Override public void endDocument() throws SAXException { } @Override public void characters(char[] ch, int start, int length) throws SAXException { lastContents += new String(ch, start, length); } private boolean isDecimal(String str) { return Pattern.compile("([1-9]+[0-9]*|0)(\\.[\\d]+)?").matcher(str).matches(); } private int countNullCell(String ref, String preRef) { // excel2007最大行數是1048576,最大列數是16384,最後一列列名是XFD String xfd = ref.replaceAll("\\d+", ""); String xfd_1 = preRef.replaceAll("\\d+", ""); xfd = fillChar(xfd, 3, '@', true); xfd_1 = fillChar(xfd_1, 3, '@', true); char[] letter = xfd.toCharArray(); char[] letter_1 = xfd_1.toCharArray(); int res = (letter[0] - letter_1[0]) * 26 * 26 + (letter[1] - letter_1[1]) * 26 + (letter[2] - letter_1[2]); return res - 1; } private String fillChar(String str, int len, char let, boolean isPre) { int len_1 = str.length(); if (len_1 < len) { if (isPre) { for (int i = 0; i < (len - len_1); i++) { str = let + str; } } else { for (int i = 0; i < (len - len_1); i++) { str = str + let; } } } return str; } private static int getNumber(String column) { String c = column.toUpperCase().replaceAll("[A-Z]", ""); return Integer.parseInt(c); } private void isRowNull(String column){ int rowIndex = getNumber(ref); if(!curRowList.contains(rowIndex)){ if(curRowList.size() > 0){ actualRowIndex += (rowIndex-1) - curRowList.get(curRowList.size() - 1); } curRowList.add(rowIndex); } } }