C#讀取word,ppt,excel,txt,pdf文件內容

1、讀取文件內容html

(1)wordapp

 /// <summary>ide

        /// 讀取doc、docxui

        /// </summary>url

        /// <param name="filepath">文件路徑</param>spa

        /// <returns>字符串</returns>excel

        protected string contentdoc(string filepath)code

        {htm

            Microsoft.Office.Interop.Word.Application app = new Microsoft.Office.Interop.Word.Application();blog

            Document doc = null;

            object unknow = Type.Missing;

            app.Visible = true;

            string str = filepath;

            object file = str;

            doc = app.Documents.Open(ref file,

                ref unknow, ref unknow, ref unknow, ref unknow,

                ref unknow, ref unknow, ref unknow, ref unknow,

                ref unknow, ref unknow, ref unknow, ref unknow,

                ref unknow, ref unknow, ref unknow);

            //string temp = doc.Paragraphs[1].Range.Text.Trim();//分段讀取

            string temp = doc.Content.Text;

            return temp;

        }

說明:1: 對項目添加引用,Microsoft Word 11.0 Object Library
2: 在程序中添加 using Word = Microsoft.Office.Interop.Word;
3: 程序中添加
Word.Application app = new Microsoft.Office.Interop.Word.Application(); //能夠打開word程序
Word.Document doc = null; //一會要記錄word打開的文檔

參考網址:http://www.cnblogs.com/no7dw/archive/2009/08/14/1546367.html

(2)Ppt

    /// <summary>

        /// 讀取ppt內容

        /// </summary>

        /// <param name="filepath"></param>

        /// <returns></returns>

        protected string contentppt(string filepath)

        {

            Microsoft.Office.Interop.PowerPoint.Application pa = new Microsoft.Office.Interop.PowerPoint.Application();

            Microsoft.Office.Interop.PowerPoint.Presentation pp = pa.Presentations.Open(filepath,

                            Microsoft.Office.Core.MsoTriState.msoTrue,

                            Microsoft.Office.Core.MsoTriState.msoFalse,

                            Microsoft.Office.Core.MsoTriState.msoFalse);

            string pps = "";

            foreach (Microsoft.Office.Interop.PowerPoint.Slide slide in pp.Slides)

            {

                foreach (Microsoft.Office.Interop.PowerPoint.Shape shape in slide.Shapes)

                 pps += shape.TextFrame.TextRange.Text.ToString();

            }

            return pps;

        }

說明:: 對項目添加引用,Microsoft Word 11.0 Object Library
2: 在程序中添加 using Word = Microsoft.Office.Interop.Powerpoint;
3: 程序中添加
Word.Application app = new Microsoft.Office.Interop.Powerpoint.Application(); //能夠打開ppt程序

參考網址:http://blog.sina.com.cn/s/blog_651ff6920100oi9u.html

(3)Pdf

/// <summary>

        /// 讀取含有文本的pdf

        /// </summary>

        /// <param name="filepath">文件路徑</param>

        /// <returns>字符串</returns>

        protected string contentpdf(string filepath)

        {

            StringBuilder text = new StringBuilder();

            string fileName = filepath;

            if (System.IO.File.Exists(fileName))

            {

                PdfReader pdfReader = new PdfReader(fileName);

 

                for (int page = 1; page <= pdfReader.NumberOfPages; page++)

                {

                    ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();

                    string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

 

                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));

                    text.Append(currentText);

                }

                pdfReader.Close();

            }

            return text.ToString();

        }

說明:引用iTextSharp.dll.等三個dll.

using iTextSharp.text.pdf;

using iTextSharp.text.pdf.parser;

參考網址:http://www.codeproject.com/Tips/387327/Convert-PDF-file-content-into-string-using-Csharp

(4)t'x't

/// <summary>

        /// 讀取txt

        /// </summary>

        /// <param name="filepath">文件路徑</param>

        /// <returns>字符串</returns>

        protected string contenttxt(string filepath)

        {

            StringBuilder sb = new StringBuilder();

            //Open the stream and read it back.

            using (FileStream fs = new FileStream(filepath, FileMode.Open))

            {

                byte[] b = new byte[fs.Length];

                fs.Read(b, 0, b.Length);//把文件讀進byte[]裏面

                sb.Append(Encoding.GetEncoding("gb2312").GetString(b));//從byte[]裏面把數據轉成字符放到sb裏面

            }

            return sb.ToString();

        }

(5)excel

詳細參考:http://www.cnblogs.com/Tsong/archive/2013/02/21/2920941.html

相關文章
相關標籤/搜索