1、添加所需依賴:java
<dependency> <groupId>com.monitorjbl</groupId> <artifactId>xlsx-streamer</artifactId> <version>1.2.0</version> </dependency> <dependency> <groupId>org.eclipse.birt.runtime.3_7_1</groupId> <artifactId>org.apache.xerces</artifactId> <version>2.9.0</version> </dependency> <dependency>
2、新建工具類
package com.cy.exceldata.util;sql
import java.io.InputStream;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.junit.jupiter.api.Test;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**apache
public abstract class BigDataParseExcel extends DefaultHandler {api
private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private int sheetIndex = -1; private List<String> rowlist = new ArrayList<String>(); private int curRow = 0; //當前行 private int curCol = 0; //當前列索引 private int preCol = 0; //上一列列索引 private int titleRow = 0; //標題行,通常狀況下爲0 private int rowsize = 0; //列數 //excel記錄行操做方法,以sheet索引,行索引和行元素列表爲參數,對sheet的一行元素進行操做,元素爲String類型 public abstract void optRows(int sheetIndex, int curRow, List<String> rowlist) throws SQLException; //只遍歷一個sheet,其中sheetId爲要遍歷的sheet索引,從1開始,1-3 /** * @param filename * @param sheetId sheetId爲要遍歷的sheet索引,從1開始,1-3 * @throws Exception */ public void processOneSheet(String filename, int sheetId) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); // rId2 found by processing the Workbook // 根據 rId# 或 rSheet# 查找sheet InputStream sheet2 = r.getSheet("rId" + sheetId); sheetIndex++; InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); sheet2.close(); } /** * 遍歷 excel 文件 */ public void process(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> sheets = r.getSheetsData(); while (sheets.hasNext()) { curRow = 0; sheetIndex++; InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } } public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader(); //.createXMLReader("org.apache.xerces.parsers.SAXParser"); this.sst = sst; parser.setContentHandler(this); return parser; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => 單元格 if (name.equals("c")) { // 若是下一個元素是 SST 的索引,則將nextIsString標記爲true String cellType = attributes.getValue("t"); String rowStr = attributes.getValue("r"); curCol = this.getRowIndex(rowStr); if (cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } // 置空 lastContents = ""; } public void endElement(String uri, String localName, String name) throws SAXException { // 根據SST的索引值的到單元格的真正要存儲的字符串 // 這時characters()方法可能會被調用屢次 if (nextIsString) { try { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)) .toString(); } catch (Exception e) { } } // v => 單元格的值,若是單元格是字符串則v標籤的值爲該字符串在SST中的索引 // 將單元格內容加入rowlist中,在這以前先去掉字符串先後的空白符 if (name.equals("v")) { String value = lastContents.trim(); value = value.equals("") ? " " : value; int cols = curCol - preCol; if (cols > 1) { for (int i = 0; i < cols - 1; i++) { rowlist.add(preCol, ""); } } preCol = curCol; rowlist.add(curCol - 1, value); } else { //若是標籤名稱爲 row ,這說明已到行尾,調用 optRows() 方法 if (name.equals("row")) { int tmpCols = rowlist.size(); if (curRow > this.titleRow && tmpCols < this.rowsize) { for (int i = 0; i < this.rowsize - tmpCols; i++) { rowlist.add(rowlist.size(), ""); } } try { optRows(sheetIndex, curRow, rowlist); } catch (SQLException e) { e.printStackTrace(); } if (curRow == this.titleRow) { this.rowsize = rowlist.size(); } rowlist.clear(); curRow++; curCol = 0; preCol = 0; } } } public void characters(char[] ch, int start, int length) throws SAXException { //獲得單元格內容的值 lastContents += new String(ch, start, length); } //獲得列索引,每一列c元素的r屬性構成爲字母加數字的形式,字母組合爲列索引,數字組合爲行索引, //如AB45,表示爲第(A-A+1)*26+(B-A+1)*26列,45行 public int getRowIndex(String rowStr) { rowStr = rowStr.replaceAll("[^A-Z]", ""); byte[] rowAbc = rowStr.getBytes(); int len = rowAbc.length; float num = 0; for (int i = 0; i < len; i++) { num += (rowAbc[i] - 'A' + 1) * Math.pow(26, len - i - 1); } return (int) num; } public int getTitleRow() { return titleRow; } public void setTitleRow(int titleRow) { this.titleRow = titleRow; }
}eclipse
3、測試(這裏實現的是提取列表中符合條件的手機號信息,可根據需求改變)
public class POITest {xss
@SneakyThrows public static void main(String[] args) { long start = System.currentTimeMillis(); BigDataParseExcel xlx = new BigDataParseExcel(){ @Override public void optRows(int sheetIndex, int curRow, List<String> rowlist) throws SQLException { for (int i = 0; i < rowlist.size(); i++) { if(isMobile( rowlist.get(i))){ System.out.println(rowlist.get(i)); } } } }; xlx.process("C:/Users/Administrator/Desktop/data/test/DATA552410.xlsx"); long end = System.currentTimeMillis(); System.out.println((end-start)/1000); } public static boolean isMobile(final String str) { Pattern p = null; Matcher m = null; boolean b = false; p = Pattern.compile("^[1][3,4,5,7,8][0-9]{9}$"); // 驗證手機號 m = p.matcher(str); b = m.matches(); return b; }
}ide