c#把word文檔轉換爲html頁面

首先找到一個引用,這個引用2015上的vs都有的,諾,就是這個!html

using Microsoft.Office.Interop.Word測試

首選準備好你的word這裏作測試呢 就在項目裏面建立一個文件夾,給你要轉換的word放到裏面,ui

其次copy下面這段方法到你的項目裏面this

 1         private string GetPathByDocToHTML(string strFile)
 2         {
 3             if (string.IsNullOrEmpty(strFile))
 4             {
 5                 return "0";//沒有文件
 6             }
 7 
 8             Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
 9             Type wordType = word.GetType();
10             Microsoft.Office.Interop.Word.Documents docs = word.Documents;
11 
12             // 打開文件  
13             Type docsType = docs.GetType();
14 
15             object fileName = strFile;
16 
17             Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
18             System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });
19 
20             // 轉換格式,另存爲html  
21             Type docType = doc.GetType();
22             //給文件從新起名
23             string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
24             System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();
25 
26             string strFileFolder = "/html/";
27             DateTime dt = DateTime.Now;
28             //以yyyymmdd形式生成子文件夾名
29             string strFileSubFolder = dt.Year.ToString();
30             strFileSubFolder += (dt.Month < 10) ? ("0" + dt.Month.ToString()) : dt.Month.ToString();
31             strFileSubFolder += (dt.Day < 10) ? ("0" + dt.Day.ToString()) : dt.Day.ToString();
32             string strFilePath = strFileFolder + strFileSubFolder + "/";
33             // 判斷指定目錄下是否存在文件夾,若是不存在,則建立 
34             if (!Directory.Exists(Server.MapPath(strFilePath)))
35             {
36                 // 建立up文件夾 
37                 Directory.CreateDirectory(Server.MapPath(strFilePath));
38             }
39 
40             //被轉換的html文檔保存的位置 
41             // HttpContext.Current.Server.MapPath("html" + strFileSubFolder + filename + ".html")
42             string ConfigPath = Server.MapPath(strFilePath + filename + ".html");
43             object saveFileName = ConfigPath;
44 
45             /*下面是Microsoft Word 9 Object Library的寫法,若是是10,可能寫成: 
46               * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, 
47               * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML}); 
48               * 其它格式: 
49               * wdFormatHTML 
50               * wdFormatDocument 
51               * wdFormatDOSText 
52               * wdFormatDOSTextLineBreaks 
53               * wdFormatEncodedText 
54               * wdFormatRTF 
55               * wdFormatTemplate 
56               * wdFormatText 
57               * wdFormatTextLineBreaks 
58               * wdFormatUnicodeText 
59             */
60             docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
61             null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
62 
63             //docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
64             //  null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML }); 
65 
66             //關閉文檔  
67             docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,
68             null, doc, new object[] { null, null, null });
69 
70             // 退出 Word  
71             wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
72             //轉到新生成的頁面  
73             //return ("/" + filename + ".html");
74 
75             //轉化HTML頁面統一編碼格式
76             TransHTMLEncoding(ConfigPath);
77 
78             return (strFilePath + filename + ".html");
79         }
80         private void TransHTMLEncoding(string strFilePath)
81         {
82             try
83             {
84                 System.IO.StreamReader sr = new System.IO.StreamReader(strFilePath, Encoding.GetEncoding(0));
85                 string html = sr.ReadToEnd();
86                 sr.Close();
87                 html = System.Text.RegularExpressions.Regex.Replace(html, @"<meta[^>]*>", "<meta http-equiv=Content-Type content='text/html; charset=gb2312'>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
88                 System.IO.StreamWriter sw = new System.IO.StreamWriter(strFilePath, false, Encoding.Default);
89 
90                 sw.Write(html);
91                 sw.Close();
92             }
93             catch (Exception ex)
94             {
95                 Page.ClientScript.RegisterStartupScript(Page.ClientScript.GetType(), "myscript", "<script>alert('" + ex.Message + "')</script>");
96             }
97         }

實際上是兩個方法。編碼

爲了測試呢 你須要隨便弄個地方調用這個這個方法spa

string strWord = Server.MapPath("/wordpath/thisisword.doc");GetPathByDocToHTML(strWord);就是這個,須要把你的word傳進去,固然你能夠寫在Page_Load裏面,測試用運行以後打開你的項目找到你保存的位置就好了。
相關文章
相關標籤/搜索