因爲項目需求,須要上傳在線預覽生成的office文件,通過一番痛苦的研究,目前已經成功將DOC、DOCX、PPTX不亂碼轉換成功,PPT當字體不是宋體時出現亂碼,至今沒法解決,待往後再研究吧。而EXCEL轉換未能解決圖片問題。能力有限,先上代碼吧html
POI jar的maven地址:需注意的時,統一版本jar可能卻會出現內部一些方法不存在,不明白什麼狀況,經測試,選擇了3.15java
<properties> <poi.version>3.15</poi.version> </properties> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>${poi.version}</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>${poi.version}</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>${poi.version}</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-excelant</artifactId> <version>${poi.version}</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.3</version> <exclusions> <exclusion> <groupId>stax</groupId> <artifactId>stax-api</artifactId> </exclusion> <exclusion> <groupId>org.apache.xmlbeans</groupId> <artifactId>xmlbeans</artifactId> </exclusion> </exclusions> </dependency>
java代碼以下:apache
package com.boco.investment.common; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.poi.hslf.usermodel.HSLFAutoShape; import org.apache.poi.hslf.usermodel.HSLFTable; import org.apache.poi.hslf.usermodel.HSLFTextParagraph; import org.apache.poi.hslf.usermodel.HSLFTextRun; import org.apache.poi.hssf.converter.ExcelToHtmlConverter; import org.apache.poi.hssf.usermodel.HSSFPictureData; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.sl.usermodel.Slide; import org.apache.poi.sl.usermodel.SlideShow; import org.apache.poi.sl.usermodel.SlideShowFactory; import org.apache.poi.xslf.usermodel.*; import org.apache.poi.xwpf.converter.core.BasicURIResolver; import org.apache.poi.xwpf.converter.core.FileImageExtractor; import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter; import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.awt.*; import java.awt.geom.Rectangle2D; import java.awt.image.BufferedImage; import java.io.*; import java.util.List; /** * Created by wdj on 2017/11/14. * * 經過poi將doc、docx、ppt、pptx轉爲html * 保存圖片的位置在文件所在目錄,以文件名建立文件夾下 */ public class PoiUtils { private static Logger logger = LoggerFactory.getLogger(PoiUtils.class); private static String rootPath = ConfigUtils.getProperty("upload.rootPath"); private static final String IMAGE_SERVER = ConfigUtils.getProperty("file.server"); /** * poi office文件轉html,支持doc,docx,ppt,pptx * 根據源文件在同一目錄下生成相同名稱的html文件 */ public static boolean officeToHtml(String filePath) { filePath = rootPath + filePath; String htmlFilePath = FileUtils.getFileNameWithoutExtension(filePath)+".html"; try { if(checkFile(filePath,"doc")){ return wordToHtml03(filePath,htmlFilePath); }else if(checkFile(filePath,"docx")){ return wordToHtml07(filePath,htmlFilePath); }else if(checkFile(filePath,"ppt")){ return pptToHtml03(filePath,htmlFilePath); }else if(checkFile(filePath,"pptx")){ return pptToHtml07(filePath,htmlFilePath); }else { logger.error("poi OfficeToHtml出錯,不支持的文件格式"); return false; } } catch (Exception e) { logger.error("poi OfficeToHtml出錯:",e); return false; } } /** * Word03 轉爲 HTML * * @param fileName * @param outputFile */ public static boolean wordToHtml03(String fileName, String outputFile){ if (!(checkFile(fileName,"doc")||checkFile(fileName,"docx"))) { logger.error("word03文件轉html出錯,不支持類型爲:"+fileName.substring(fileName.lastIndexOf("."))+" 的文件"); return false; } HWPFDocument wordDoc = null; WordToHtmlConverter wthc = null; try { wordDoc = new HWPFDocument(new FileInputStream(fileName)); wthc = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); } catch (Exception e) { logger.error("word03轉html失敗",e); return false; } //html引用圖片位置 wthc.setPicturesManager((bytes,pt,string,f,f1) ->getImageUrl(fileName)+string); wthc.processDocument(wordDoc); List<Picture> pics = wordDoc.getPicturesTable().getAllPictures(); fileExists(getImageSavePath(fileName)); if (null != pics && pics.size() > 0) { for (Picture pic : pics) { try { //生成圖片位置 pic.writeImageContent(new FileOutputStream(getImageSavePath(fileName)+pic.suggestFullFileName())); } catch (IOException e) { logger.error("word03轉html失敗",e); return false; } } } Document htmlDocument = wthc.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); try { TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); } catch (TransformerException e) { logger.error("word03轉html失敗",e); return false; } finally { try { out.close(); } catch (IOException e) { logger.error("word03轉html文件流關閉失敗",e); } } String htmlStr = new String(out.toByteArray()); return writeFile(htmlStr, outputFile); } /** * Word07 轉爲 HTML * * @param fileName * @param outputFile */ public static boolean wordToHtml07(String fileName, String outputFile){ if (!checkFile(fileName,"docx")) { logger.error("word07文件轉html出錯,不支持類型爲:"+fileName.substring(fileName.lastIndexOf("."))+" 的文件"); return false; } //讀取文檔內容 XWPFDocument document = null; try { InputStream in = new FileInputStream(fileName); document = new XWPFDocument(in); } catch (IOException e) { logger.error("word07轉html失敗",e); return false; } //加載html頁面時圖片路徑 XHTMLOptions options = XHTMLOptions.create().URIResolver( new BasicURIResolver(getImageUrl(fileName))); //圖片保存文件夾路徑 fileExists(getImageSavePath(fileName)); options.setExtractor(new FileImageExtractor(new File(getImageSavePath(fileName)))); OutputStream out = null; try { out = new FileOutputStream(new File(outputFile)); XHTMLConverter.getInstance().convert(document, out, options); return true; } catch (IOException e) { logger.error("word07轉html失敗",e); return false; } finally { try { out.close(); } catch (IOException e) { logger.error("word07轉html文件流關閉失敗",e); } } } /** * excel to html * @param path * @param file */ // todo 待完成 public static void testExcel(String path,String file) { HSSFWorkbook excelBook= null; ExcelToHtmlConverter excelToHtmlConverter = null; try { InputStream input=new FileInputStream(path+file); excelBook = new HSSFWorkbook(input); excelToHtmlConverter = new ExcelToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() ); } catch (IOException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } //加載html頁面時圖片路徑 XHTMLOptions options = XHTMLOptions.create().URIResolver( new BasicURIResolver(getImageUrl(path))); //圖片保存文件夾路徑 options.setExtractor(new FileImageExtractor(new File(getImageSavePath(path)))); excelToHtmlConverter.setOutputRowNumbers(false); excelToHtmlConverter.setOutputHiddenRows(false); excelToHtmlConverter.setOutputColumnHeaders(false); excelToHtmlConverter.setOutputHiddenColumns(true); excelToHtmlConverter.processWorkbook(excelBook); List pics = excelBook.getAllPictures(); if (pics != null) { for (int i = 0; i < pics.size(); i++) { HSSFPictureData pic = (HSSFPictureData) pics.get (i); try { // pic.writeImageContent (new FileOutputStream (path + pic.suggestFullFileName() ) ); new FileOutputStream (path + "11" ).write(pic.getData()); } catch (IOException e) { e.printStackTrace(); } } } Document htmlDocument =excelToHtmlConverter.getDocument(); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource (htmlDocument); StreamResult streamResult = new StreamResult (outStream); TransformerFactory tf = TransformerFactory.newInstance(); try { Transformer serializer = tf.newTransformer(); serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty (OutputKeys.INDENT, "yes"); serializer.setOutputProperty (OutputKeys.METHOD, "html"); serializer.transform (domSource, streamResult); } catch (TransformerException e) { e.printStackTrace(); } finally { try { outStream.close(); } catch (IOException e) { e.printStackTrace(); } } String content = new String (outStream.toByteArray() ); try { FileUtils.writeStringToFile(new File (path, "exportExcel.html"), content, "utf-8"); } catch (IOException e) { e.printStackTrace(); } } /** * ppt03轉html * filepath:源文件 * htmlname:生成html名稱 * */ public static boolean pptToHtml03(String filepath, String outputFile){ File file = new File(filepath); // 讀入PPT文件 if (!checkFile(filepath,"ppt")) { logger.error("ppt03文件轉html出錯,不支持類型爲:"+FileUtils.getFileExtension(filepath)+" 的文件"); return false; } FileInputStream is = null; SlideShow ppt; try { is = new FileInputStream(file); ppt = SlideShowFactory.create(is); } catch (IOException e) { logger.error("ppt03文件轉html出錯:",e); return false; } finally { try { is.close(); } catch (IOException e) { logger.error("ppt03文件轉html關閉文件流失敗:",e); } } Dimension pgsize = ppt.getPageSize(); List<Slide> slide = ppt.getSlides(); FileOutputStream out =null; String imghtml=""; //保存圖片位置 fileExists(getImageSavePath(filepath)); for (int i = 0; i < slide.size(); i++) { for (Object o : slide.get(i).getShapes()) { if(o instanceof HSLFAutoShape) { HSLFAutoShape shapes = (HSLFAutoShape)o; List<HSLFTextParagraph> list = shapes.getTextParagraphs(); for (HSLFTextParagraph hslfTextRuns : list) { for (HSLFTextRun hslfTextRun : hslfTextRuns.getTextRuns()) { hslfTextRun.setFontFamily("宋體"); } } }else if(o instanceof HSLFTable){ HSLFTable hslfTable = (HSLFTable) o; int rowSize = hslfTable.getNumberOfRows(); int columnSize = hslfTable.getNumberOfColumns(); for (int j = 0; j < rowSize; j++) { for (int k = 0; k < columnSize; k++) { for (int l =0;l < hslfTable.getCell(j, k).getTextParagraphs().size();l++){ HSLFTextParagraph hslfTextRuns = hslfTable.getCell(j, k).getTextParagraphs().get(l); for (int m = 0;m < hslfTextRuns.getTextRuns().size();m++){ HSLFTextRun textRun = hslfTextRuns.getTextRuns().get(m); //todo 設置字體失敗,輸出html依舊會亂碼 textRun.setFontFamily("宋體"); } } } } } } BufferedImage img = new BufferedImage(pgsize.width,pgsize.height, BufferedImage.TYPE_INT_RGB); Graphics2D graphics = img.createGraphics(); graphics.setPaint(Color.BLUE); graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height)); slide.get(i).draw(graphics); // 這裏設置圖片的存放路徑和圖片的格式(jpeg,png,bmp等等),注意生成文件路徑與源文件同一個目錄 try { out= new FileOutputStream(getImageSavePath(filepath)+(i + 1) + ".jpeg"); javax.imageio.ImageIO.write(img, "jpeg", out); } catch (IOException e) { logger.error("ppt03文件轉html出錯:",e); try { out.close(); } catch (IOException e1) { logger.error("ppt03文件轉html關閉文件流失敗:",e); } return false; } //圖片在html加載路徑 String imgs=getImageUrl(filepath)+(i + 1) + ".jpeg"; imghtml+="<img src=\'"+imgs+"\' style=\'width:1200px;height:830px;vertical-align:text-bottom;\'><br><br><br><br>"; } DOMSource domSource = new DOMSource(); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); try { Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); } catch (TransformerException e) { logger.error("ppt03文件轉html出錯:",e); return false; } finally { try { out.close(); } catch (IOException e) { logger.error("ppt03文件轉html關閉文件流失敗:",e); } } String ppthtml="<html><head><META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>"+imghtml+"</body></html>"; try { FileUtils.writeStringToFile(new File(outputFile), ppthtml, "utf-8"); } catch (IOException e) { logger.error("ppt03文件轉html出錯:",e); return false; } return true; } /** * ppt07轉html * filepath:源文件 * outputFile:生成html名稱 * */ public static boolean pptToHtml07(String filepath,String outputFile) { File file = new File(filepath); // 讀入PPT文件 if (!checkFile(filepath,"pptx")) { logger.error("ppt07文件轉html出錯,不支持類型爲:"+FileUtils.getFileExtension(filepath)+" 的文件"); return false; } FileInputStream is = null; SlideShow ppt; try { is = new FileInputStream(file); ppt = SlideShowFactory.create(is); } catch (IOException e) { logger.error("ppt07文件轉html出錯:",e); return false; } finally { try { is.close(); } catch (IOException e) { logger.error("ppt07文件轉html關閉文件流失敗:",e); } } Dimension pgsize = ppt.getPageSize(); List<XSLFSlide> pptPageXSLFSLiseList=ppt.getSlides(); FileOutputStream out=null; String imghtml=""; //保存圖片位置 fileExists(getImageSavePath(filepath)); for (int i = 0; i < pptPageXSLFSLiseList.size(); i++) { for(XSLFShape shape : pptPageXSLFSLiseList.get(i).getShapes()){ //設置文字字體 if(shape instanceof XSLFTextShape) { XSLFTextShape tsh = (XSLFTextShape)shape; for(XSLFTextParagraph p : tsh){ for(XSLFTextRun r : p){ r.setFontFamily("宋體"); } } //設置表格字體 }else if(shape instanceof XSLFTable){ XSLFTable table = (XSLFTable)shape; int rowSize = table.getNumberOfRows(); int columnSize = table.getNumberOfColumns(); for (int j = 0; j < rowSize; j++) { for (int k = 0; k < columnSize; k++) { for (int l =0;l < table.getCell(j, k).getTextParagraphs().size();l++){ XSLFTextParagraph xslfTextRuns = table.getCell(j, k).getTextParagraphs().get(l); for (int m = 0;m < xslfTextRuns.getTextRuns().size();m++){ xslfTextRuns.getTextRuns().get(m).setFontFamily("宋體"); } } } } } } BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB); Graphics2D graphics = img.createGraphics(); graphics.setPaint(Color.white); graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height)); pptPageXSLFSLiseList.get(i).draw(graphics); //設置圖片存放位置 String Imgname = getImageSavePath(filepath) + (i+1) + ".jpg"; try { out = new FileOutputStream(Imgname); javax.imageio.ImageIO.write(img, "jpg", out); } catch (java.io.IOException e) { logger.error("ppt07文件轉html出錯:",e); try { out.close(); } catch (IOException e1) { logger.error("ppt07文件轉html關閉流出錯:",e); } return false; } //圖片在html加載路徑 String imgs=getImageUrl(filepath)+(i + 1) + ".jpg"; imghtml+="<img src=\'"+imgs+"\' style=\'width:1200px;height:830px;vertical-align:text-bottom;\'><br><br><br><br>"; } DOMSource domSource = new DOMSource(); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); try { Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); } catch (TransformerException e) { logger.error("ppt07文件轉html出錯:",e); return false; }finally { try { out.close(); } catch (java.io.IOException e) { logger.error("ppt07文件轉html關閉流出錯:",e); return false; } } String ppthtml="<html><head><META http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"></head><body>"+imghtml+"</body></html>"; try { FileUtils.writeStringToFile(new File(outputFile), ppthtml, "utf-8"); return true; } catch (Exception e) { logger.error("ppt07文件轉html關閉流出錯:",e); return false; } } /** * 輸出文件流 * @param content * @param path * @return */ public static boolean writeFile(String content, String path){ FileOutputStream fos = null; BufferedWriter bw = null; File file = new File(path); try { fos = new FileOutputStream(file); bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8")); bw.write(content); return true; } catch (java.io.IOException e) { logger.error("輸出文件出錯",e); return false; }finally { try { if (null != bw) { bw.close(); } if (null != fos) { fos.close(); } } catch (java.io.IOException e) { logger.error("輸出文件關閉流出錯",e); } } } /** * 判斷文件夾是否存在,不存在則新建 * @param path */ private static void fileExists(String path) { File file = new File(path); if (!file.exists()){ file.mkdirs(); } } /** * 檢查文件類型 * @param fileName * @return */ public static boolean checkFile(String fileName,String type) { boolean flag = false; String suffixname = FileUtils.getFileExtension(fileName); if (suffixname != null && suffixname.equalsIgnoreCase(type)) { flag = true; } return flag; } /** * 根據文件全路徑獲取文件所在路徑 * @param fileFullName * @return */ public static String getFilePath(String fileFullName) { File file = new File(fileFullName); String filePath = fileFullName.replace(file.getName(),""); return filePath; } private static String getImageUrl(String filePath){ filePath = filePath.replace(rootPath,""); //圖片引用地址須要去掉 rootpath return IMAGE_SERVER+FileUtils.getFileNameWithoutExtension(filePath)+"/"; } private static String getImageSavePath(String filePath){ return FileUtils.getFileNameWithoutExtension(filePath)+File.separator; } }