10-利用com組件讀取office

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;

namespace _04查找文件中指定字符串的位置
{
    class Program
    {
        static void Main(string[] args)
        {
            //===========操做word文檔==========
            string docFileName = @"F:\人力資源行業用語.docx";
            string docText = Doc2Text(docFileName);

            //===========操做pdf文檔===========
            string pdfFileName = @"F:\2級1課備課.pdf";
            string pdfText = Pdf2Text(pdfFileName);

            //===========操做xls文檔===========
            string xlsFileName = @"F:\學生信息導入表.xls";
            string xlsText = Xls2Text(xlsFileName);
        }

        #region 利用com組件讀取office
/// <summary> /// 判斷文件是否存在 /// </summary> /// <param name="pFileName"></param> private static void IsExists(string pFileName) { if (!File.Exists(pFileName)) { throw new ApplicationException("指定目錄下的無該文件"); } } //得到word文件的文本內容 public static string Doc2Text(string docFileName) { IsExists(docFileName); //實例化COM Microsoft.Office.Interop.Word.Application wordApp = new Microsoft.Office.Interop.Word.Application(); object fileobj = docFileName; object nullobj = System.Reflection.Missing.Value; //打開指定文件(不一樣版本的COM參數個數有差別,通常而言除第一個外都用nullobj就好了) Microsoft.Office.Interop.Word.Document doc = wordApp.Documents.Open(ref fileobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj ); //取得doc文件中的文本 string outText = doc.Content.Text; //關閉文件 doc.Close(ref nullobj, ref nullobj, ref nullobj); //關閉COM,關閉word程序 wordApp.Quit(ref nullobj, ref nullobj, ref nullobj); GC.Collect(); //返回 return outText; } //得到pdf文件的文本內容,添加 itextsgarp.dll 第三方組件 public static string Pdf2Text(string pdfFileName) { PdfReader reader = new PdfReader(pdfFileName); StringBuilder data = new StringBuilder(); for (int count = 1; count <= reader.NumberOfPages; count++) { data.Append(PdfTextExtractor.GetTextFromPage(reader, count)); } return data.ToString(); } //得到excel文件的文本內容 public static string Xls2Text(string xlsFileName) { IsExists(xlsFileName); Microsoft.Office.Interop.Excel.Application xlsApp = new Microsoft.Office.Interop.Excel.Application(); object nullobj = System.Reflection.Missing.Value; //打開Excel文檔 Microsoft.Office.Interop.Excel.Workbook excel = xlsApp.Workbooks.Open(xlsFileName, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj, nullobj); //遍歷Excel工做表 Microsoft.Office.Interop.Excel.Worksheet ews = null; StringBuilder builder = new StringBuilder(); try { for (int k = 1; k <= excel.Worksheets.Count; k++) { ews = (Microsoft.Office.Interop.Excel.Worksheet)excel.Worksheets[k]; //builder.Append(((Excel.Range)ews.UsedRange).Text); if (ews.UsedRange.Value2 != null) { for (int i = 1; i <= ews.UsedRange.Cells.Rows.Count; i++) { for (int j = 1; j <= ews.UsedRange.Cells.Columns.Count; j++) { if (((object[,])(ews.UsedRange.Value2))[i, j] != null) { builder.Append(((object[,])(ews.UsedRange.Value2))[i, j]).Append("|"); } } } } } } catch (Exception ex) { throw ex; } finally { excel.Close(nullobj, nullobj, nullobj); xlsApp.Quit(); GC.Collect(); } return builder.ToString(); } //得到PPT文件的文本內容 public static string Ppt2Text(string pptFileName) { IsExists(pptFileName); Microsoft.Office.Interop.PowerPoint.Application pptApp = new Microsoft.Office.Interop.PowerPoint.Application(); object nullobj = System.Reflection.Missing.Value; Microsoft.Office.Interop.PowerPoint.Presentation ppt = pptApp.Presentations.Open(pptFileName, Microsoft.Office.Core.MsoTriState.msoTrue, Microsoft.Office.Core.MsoTriState.msoFalse, Microsoft.Office.Core.MsoTriState.msoFalse); StringBuilder builder = new StringBuilder(); try { foreach (Microsoft.Office.Interop.PowerPoint.Slide slide in ppt.Slides) { foreach (Microsoft.Office.Interop.PowerPoint.Shape shape in slide.Shapes) { if (shape.TextFrame.HasText == Microsoft.Office.Core.MsoTriState.msoTrue) { builder.Append(shape.TextFrame.TextRange.Text); } } } } catch (Exception ex) { throw ex; } finally { ppt.Close(); pptApp.Quit(); GC.Collect(); } return builder.ToString(); } #endregion } }
相關文章
相關標籤/搜索