Java實現經過POI讀取Excel文件提取手機號碼並去重

添加所需依賴:
<!-- apache poi 操做Microsoft Document -->java

<dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi</artifactId>
        <version>3.17</version>
    </dependency>
    <!-- Apache POI - Java API To Access Microsoft Format Files -->
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-ooxml</artifactId>
        <version>3.17</version>
    </dependency>
    <!-- Apache POI - Java API To Access Microsoft Format Files -->
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-ooxml-schemas</artifactId>
        <version>3.17</version>
    </dependency>
    <!-- poi eventmodel方式 依賴包 -->
    <dependency>
        <groupId>xerces</groupId>
        <artifactId>xercesImpl</artifactId>
        <version>2.11.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi</artifactId>
        <version>3.14</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-ooxml</artifactId>
        <version>3.14</version>
    </dependency>

代碼以下:
package com.cy.exceldata.util;apache

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;api

import com.cy.exceldata.util.SheetHandler;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.junit.jupiter.api.Test;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;xss

/**測試

  • 數據量比較大(8萬條以上)的excel文件解析,將excel文件解析爲 行列座標-值的形式存入map中,此方式速度快,內存耗損小 但只能讀取excle文件
  • 提供處理單個sheet方法 processOneSheet(String filename) 以及處理多個sheet方法 processAllSheets(String filename)
    *
    */

public class LargeExcelFileReadUtil {fetch

private  LinkedHashMap<String, String>rowContents=new LinkedHashMap<String, String>();
private  SheetHandler sheetHandler;

public LinkedHashMap<String, String> getRowContents() {
    return rowContents;
}
public void setRowContents(LinkedHashMap<String, String> rowContents) {
    this.rowContents = rowContents;
}

public SheetHandler getSheetHandler() {
    return sheetHandler;
}
public void setSheetHandler(SheetHandler sheetHandler) {
    this.sheetHandler = sheetHandler;
}
//處理一個sheet
public void processOneSheet(String filename) throws Exception {
    InputStream sheet2=null;
    OPCPackage pkg =null;
    try {
        pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();
        XMLReader parser = fetchSheetParser(sst);
        sheet2 = r.getSheet("rId1");
        InputSource sheetSource = new InputSource(sheet2);
        parser.parse(sheetSource);
        setRowContents(sheetHandler.getRowContents());
    }catch (Exception e) {
        e.printStackTrace();
        throw e;
    }finally{
        if(pkg!=null){
            pkg.close();
        }
        if(sheet2!=null){
            sheet2.close();
        }
    }
}
//處理多個sheet
public void processAllSheets(String filename) throws Exception {
    OPCPackage pkg =null;
    InputStream sheet=null;
    try{
        pkg=OPCPackage.open(filename);
        XSSFReader r = new XSSFReader( pkg );
        SharedStringsTable sst = r.getSharedStringsTable();
        XMLReader parser = fetchSheetParser(sst);
        Iterator<InputStream> sheets = r.getSheetsData();
        while(sheets.hasNext()) {
            System.out.println("Processing new sheet:\n");
            sheet = sheets.next();
            InputSource sheetSource = new InputSource(sheet);
            parser.parse(sheetSource);
        }
    }catch (Exception e) {
        e.printStackTrace();
        throw e;
    }finally{
        if(pkg!=null){
            pkg.close();
        }
        if(sheet!=null){
            sheet.close();
        }
    }
}

public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
    XMLReader parser =
            XMLReaderFactory.createXMLReader(
                    "com.sun.org.apache.xerces.internal.parsers.SAXParser"
            );
    setSheetHandler(new SheetHandler(sst));
    ContentHandler handler = (ContentHandler) sheetHandler;
    parser.setContentHandler(handler);
    return parser;
}

/**
 * See org.xml.sax.helpers.DefaultHandler javadocs
 */
//測試
@Test
public  void test ()throws Exception {
    Long time=System.currentTimeMillis();
    LargeExcelFileReadUtil example = new LargeExcelFileReadUtil();

    example.processOneSheet("C:/Users/Administrator/Desktop/data/test/模擬數據406250.xlsx");
    Long endtime=System.currentTimeMillis();
    LinkedHashMap<String, String>  map=example.getRowContents();
    Iterator<Entry<String, String>> it= map.entrySet().iterator();
    final Pattern CHINA_PATTERN = Pattern.compile("^((13[0-9])|(14[0,1,4-9])|(15[0-3,5-9])|(16[2,5,6,7])|(17[0-8])|(18[0-9])|(19[0-3,5-9]))\\d{8}$");
    int count=0;
    int phoneNum = 0;
    String prePos="";
    Set<String> numSet = new HashSet<String>();
    while (it.hasNext()){
        Map.Entry<String, String> entry=(Map.Entry<String, String>)it.next();
        String pos=entry.getKey();
        if(!pos.substring(1).equals(prePos)){
            prePos=pos.substring(1);
            count++;
        }
        if(isMobile(entry.getValue())){
            numSet.add(entry.getValue());
        }
    }
    for (String s : numSet) {
        System.out.println(s);
        phoneNum = phoneNum + 1;
    }

    System.out.println("號碼數量有: "+phoneNum);
    System.out.println("解析數據"+count+"條;耗時"+(endtime-time)/1000+"秒");
}

public static boolean isMobile(final String str) {
    Pattern p = null;
    Matcher m = null;
    boolean b = false;
    p = Pattern.compile("^[1][3,4,5,7,8][0-9]{9}$"); // 驗證手機號
    m = p.matcher(str);
    b = m.matches();
    return b;
}

}this

相關文章
相關標籤/搜索