poi版本3.9java
操做以前獲取文件輸入流對象app
FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { return; }
1.獲取word2003及之前版本內容。ide
WordExtractor wordExtractor = new WordExtractor(fis); String result = wordExtractor.getText();
2.獲取word2007內容。excel
XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(new XWPFDocument(fis)); String result = xwpfWordExtractor.getText();
3.獲取excel2003及之前版本內容。code
POIFSFileSystem poifsFileSystem = new POIFSFileSystem(fis); StringBuffer sb = new StringBuffer(); HSSFWorkbook wb = new HSSFWorkbook(poifsFileSystem); for(int sheetNum = 0;sheetNum < wb.getNumberOfSheets() ;sheetNum++){ if(wb.getSheetAt(sheetNum)!=null){ HSSFSheet sheet = wb.getSheetAt(sheetNum); for(int sheetRow =0;sheetRow<sheet.getLastRowNum();sheetRow++){ if(sheet.getRow(sheetRow)!=null){ HSSFRow row = sheet.getRow(sheetRow); for(int sheetCol =0;sheetCol<row.getLastCellNum();sheetCol++){ if(row.getCell(sheetCol)!=null){ HSSFCell aCell = row.getCell(sheetCol); if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) { sb.append(aCell.getNumericCellValue() + "\t"); } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN) { sb.append(aCell.getBooleanCellValue() + "\t"); } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) { sb.append(aCell.getStringCellValue() + "\t"); } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_FORMULA){ sb.append(aCell.getCellFormula() + "\t"); } } if(sheetCol==row.getLastCellNum()-1){ sb.append("\n"); } } } } } }
4.獲取excel2007內容。orm
XSSFWorkbook wb = new XSSFWorkbook(fis); StringBuffer sb = new StringBuffer(); for(int sheetNum = 0;sheetNum < wb.getNumberOfSheets() ;sheetNum++){ if(wb.getSheetAt(sheetNum)!=null){ XSSFSheet sheet = wb.getSheetAt(sheetNum); for(int sheetRow =0;sheetRow<sheet.getLastRowNum();sheetRow++){ if(sheet.getRow(sheetRow)!=null){ XSSFRow row = sheet.getRow(sheetRow); for(int sheetCol =0;sheetCol<row.getLastCellNum();sheetCol++){ if(row.getCell(sheetCol)!=null){ XSSFCell aCell = row.getCell(sheetCol); if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) { sb.append(aCell.getNumericCellValue() + "\t"); } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN) { sb.append(aCell.getBooleanCellValue() + "\t"); } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) { sb.append(aCell.getStringCellValue() + "\t"); } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_FORMULA){ sb.append(aCell.getCellFormula() + "\t"); } } if(sheetCol==row.getLastCellNum()-1){ sb.append("\n"); } } } } } }
5.獲取ppt文件內容。對象
StringBuffer sb = new StringBuffer(); SlideShow ss = new SlideShow(new HSLFSlideShow(fis)); Slide[] s = ss.getSlides(); for(int i=0;i<s.length;i++){ sb.append(s[i].getTitle()); TextRun[] t = s[i].getTextRuns(); for(int j=0;j<t.length;j++){ sb.append(t[j].getText()+"\t"); } sb.append("\n"); }
6. 獲取pdf文件內容。ip
PDFParser parser = new PDFParser(fis); parser.parse(); PDDocument pdDocument = parser.getPDDocument(); PDFTextStripper stripper = new PDFTextStripper(); String result = stripper.getText(pdDocument);