package com.feiruo.officeConvert;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import org.apache.poi.hwpf.usermodel.Picture;
public abstract class OfficeConvert {
// 圖片的存放地址
private String imgPath = null;
// 文件存放的地址
private String parentPath = null;
// 文件內容
private String fileContent = null;
private String encode = "UTF-8";
/**
* 將指定的doc文檔進行格式轉換
*
* @param docPath
* *.doc文檔地址
*
* @throws FileNotFoundException
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
*/
public abstract void convert(String docPath) throws FileNotFoundException,
IOException, ParserConfigurationException, TransformerException;
/**
* 將文件內容寫入到磁盤
*
* @param filepath
* 保存轉換文件的地址
*/
public void writeFile(String filepath) {
FileOutputStream fos = null;
BufferedWriter bw = null;
File f=new File(this.parentPath);
if(!f.exists()){
f.mkdirs();
}
try {
File file = new File(filepath);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos, encode));
bw.write(fileContent);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
if (bw != null)
bw.close();
if (fos != null)
fos.close();
} catch (IOException ie) {
}
}
}
public String checkSetPath(String path){
path=path.trim();
if(path.lastIndexOf("/")<path.length()-1) path+="/";
if(path.indexOf("\"")>0)path=path.replaceAll("\"", "");
if(path.indexOf(">")>0)path=path.replaceAll(">", ">");
if(path.indexOf("<")>0)path=path.replaceAll("<", "<");
//TODO if(path.indexOf("*")>0)path=path.replaceAll("/*", "");
return path;
}
public String getEncode() {
return encode;
}
public void setEncode(String encode) {
this.encode = encode;
}
/**
* 獲取圖片存放地址
*
* @return <strong>java.lang.String</strong>
*/
public String getImgPath() {
return imgPath;
}
/**
* 設置圖片的存放地址文件夾路徑
*
* @param imgPath
* 設置圖片的存放文件夾名稱
*/
public void setImgPath(String imgPath) {
this.imgPath = checkSetPath(imgPath);
}
/**
* 獲取存放文件的目錄地址
*
* @return <strong>java.lang.String</strong>
*/
public String getParentPath() {
return parentPath;
}
/**
* 設置文件存放的路徑
*
* @param parentPath
* 文件地址
*/
public void setParentPath(String parentPath) {
this.parentPath = checkSetPath(parentPath);
}
/**
* 獲取文件內容
*
* @return <strong>java.lang.String</strong>
*/
public String getFileContent() {
return fileContent;
}
public void setFileContent(String content){
this.fileContent=content;
}
}
package com.feiruo.officeConvert;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
/**
* 將*.doc文檔轉換爲*.html文件格式
*
* @author Jdk.feiruo.
* @since JDK 1.7 POI 3.8
* @version 1.0
*/
public class DocToHtml extends OfficeConvert implements IOfficeConvert {
private List<Picture> pics = null;
/**
* @param parentPath
* html文件存放地址
* @param imageppth
* html圖片存放地址
* @param encoding
* 設置html的編碼格式
*/
public DocToHtml(String parentPath, String imageppth, String encoding) {
setParentPath(checkSetPath(parentPath));
setImgPath(checkSetPath(imageppth));
this.setEncode(encoding);
}
public DocToHtml() {
}
/**
* 將*doc文檔轉爲*html文件
*
* @param docPath
* *doc文檔的所在地址
*
* @throws FileNotFoundException
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
*/
public void convert(String docPath) throws FileNotFoundException,
IOException, ParserConfigurationException, TransformerException {
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(
docPath));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {
return suggestedName;
}
});
wordToHtmlConverter.processDocument(wordDocument);
pics = wordDocument.getPicturesTable().getAllPictures();
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, this.getEncode());
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
String htmlContent = new String(out.toByteArray());
if(htmlContent.indexOf("<img src=\"") > 0){
htmlContent=htmlContent.replaceAll("<img src=\"", "<img src=\"" + getImgPath());
}
setFileContent(htmlContent);
}
@Override
public void writeWithName(String fileName) {
// 先保存文檔中的圖片
if (pics != null) {
File imgfile = new File(this.getParentPath() + this.getImgPath());
// 若是當前文件夾不存在,則建立新文件夾
if (!imgfile.exists())
imgfile.mkdirs();
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(imgfile + "//"
+ pic.suggestFullFileName()));
} catch (IOException e) {
e.printStackTrace();
}
}
}
// 保存html源碼文件
this.writeFile(getParentPath()+fileName+".html");
}
}
package com.feiruo.Test;
import java.io.FileNotFoundException;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import com.yinhai.officeConvert.DocToHtml;
public class Test{
public static void main(String[] args) {
Test t=new Test();
}
public Test(){
DocToHtml dth=new DocToHtml("C://test", "f", "UTF-8");
try {
dth.convert("D://test//test.doc");
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (TransformerException e) {
e.printStackTrace();
}
dth.writeWithName("feiruo");
}
}
package com.feiruo.officeConvert;
public interface IOfficeConvert {
/**
* 將文件寫入到磁盤
* @param fileName 要寫入文件的名稱
*/
public void writeWithName(String fileName);
}