Apache Poi獲取各種文檔內容。

poi版本3.9java

操做以前獲取文件輸入流對象app

 

FileInputStream fis;
try {
    fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
    return;
}

 

1.獲取word2003及之前版本內容。ide

 

WordExtractor wordExtractor = new WordExtractor(fis);
String result = wordExtractor.getText();

 

2.獲取word2007內容。excel

 

XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(new XWPFDocument(fis));
String result = xwpfWordExtractor.getText();

 

3.獲取excel2003及之前版本內容。code

 

POIFSFileSystem poifsFileSystem = new POIFSFileSystem(fis);
 StringBuffer sb = new StringBuffer();
 HSSFWorkbook wb = new HSSFWorkbook(poifsFileSystem);
 for(int sheetNum = 0;sheetNum < wb.getNumberOfSheets() ;sheetNum++){ 
     if(wb.getSheetAt(sheetNum)!=null){
         HSSFSheet sheet = wb.getSheetAt(sheetNum);
         for(int sheetRow =0;sheetRow<sheet.getLastRowNum();sheetRow++){
             if(sheet.getRow(sheetRow)!=null){
                 HSSFRow row = sheet.getRow(sheetRow);
                 for(int sheetCol =0;sheetCol<row.getLastCellNum();sheetCol++){
                     if(row.getCell(sheetCol)!=null){
                         HSSFCell aCell = row.getCell(sheetCol); 
                         if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
                             sb.append(aCell.getNumericCellValue() + "\t");
                         } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN) {
                             sb.append(aCell.getBooleanCellValue() + "\t");
                         } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
                             sb.append(aCell.getStringCellValue() + "\t");
                         } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_FORMULA){
                             sb.append(aCell.getCellFormula() + "\t");
                         } 
                     }
                     if(sheetCol==row.getLastCellNum()-1){
                         sb.append("\n");
                     }
                 }
             }
         }
     }
 }

4.獲取excel2007內容。orm

 

XSSFWorkbook wb = new XSSFWorkbook(fis);
StringBuffer sb = new StringBuffer();
for(int sheetNum = 0;sheetNum < wb.getNumberOfSheets() ;sheetNum++){			
    if(wb.getSheetAt(sheetNum)!=null){
        XSSFSheet sheet = wb.getSheetAt(sheetNum);
        for(int sheetRow =0;sheetRow<sheet.getLastRowNum();sheetRow++){
            if(sheet.getRow(sheetRow)!=null){
		XSSFRow row = sheet.getRow(sheetRow);
		for(int sheetCol =0;sheetCol<row.getLastCellNum();sheetCol++){
		    if(row.getCell(sheetCol)!=null){
			XSSFCell aCell = row.getCell(sheetCol); 
			if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
			    sb.append(aCell.getNumericCellValue() + "\t");
			} else if (aCell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN) {
			    sb.append(aCell.getBooleanCellValue() + "\t");
			} else if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
			    sb.append(aCell.getStringCellValue() + "\t");
			} else if (aCell.getCellType() == HSSFCell.CELL_TYPE_FORMULA){
			    sb.append(aCell.getCellFormula() + "\t");
			} 
		    }
		    if(sheetCol==row.getLastCellNum()-1){
			sb.append("\n");
                    }
                }
            }
        }
    }
}

 

5.獲取ppt文件內容。對象

 

StringBuffer sb = new StringBuffer();
SlideShow ss = new SlideShow(new HSLFSlideShow(fis));
Slide[] s = ss.getSlides();			
for(int i=0;i<s.length;i++){
    sb.append(s[i].getTitle());
    TextRun[] t = s[i].getTextRuns();
    for(int j=0;j<t.length;j++){
        sb.append(t[j].getText()+"\t");
    }
    sb.append("\n");
}

 

6. 獲取pdf文件內容。ip

 

PDFParser parser = new PDFParser(fis);								
parser.parse();														
PDDocument pdDocument = parser.getPDDocument();						
PDFTextStripper stripper = new PDFTextStripper();					
String result = stripper.getText(pdDocument);
相關文章
相關標籤/搜索