Lucene.net 全文檢索文件

時間 2019-12-13
標籤 lucene.net lucene 全文檢索文件简体版
原文原文鏈接
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace TestApp
{
    class Program
    {
        static void Main()
        {
            

            #region 查詞
            StringBuilder sb = new StringBuilder();
            //索引庫目錄
            Lucene.Net.Store.Directory dir_search = FSDirectory.Open(new System.IO.DirectoryInfo("IndexDir"), new NoLockFactory());
            IndexReader reader = IndexReader.Open(dir_search, true);
            IndexSearcher search = null;
            try
            {
                search = new IndexSearcher(reader);
                QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "body", new PanGuAnalyzer());
                Query query = parser.Parse(LuceneHelper.GetKeyWordSplid("文章"));
                //執行搜索，獲取查詢結果集對象  
                TopDocs ts = search.Search(query, null, 1000);
                ///獲取命中的文檔信息對象  
                ScoreDoc[] docs = ts.ScoreDocs;
                for (int i = 0; i < docs.Length; i++)
                {
                    int docId = docs[i].Doc;
                    Document doc = search.Doc(docId);
                    var t = doc.Get("number");
                    Console.WriteLine(t);
                    var b = doc.Get("body");
                    Console.WriteLine(b);
                }
            }
            catch (Exception ex)
            {
                throw;
            }
            finally
            {
                if (search != null)
                    search.Dispose();
                if (dir_search != null)
                    dir_search.Dispose();
            }
            #endregion
        }

        //幫助類，對搜索的關鍵詞進行分詞
        public static class LuceneHelper
        {
            public static string GetKeyWordSplid(string keywords)
            {
                StringBuilder sb = new StringBuilder();
                Analyzer analyzer = new PanGuAnalyzer();
                TokenStream stream = analyzer.TokenStream(keywords, new StringReader(keywords));
                ITermAttribute ita = null;
                bool hasNext = stream.IncrementToken();
                while (hasNext)
                {
                    ita = stream.GetAttribute<ITermAttribute>();
                    sb.Append(ita.Term + " ");
                    hasNext = stream.IncrementToken();
                }
                return sb.ToString();
            }
        }

        /// <summary>
        /// 建立索引文件
        /// </summary>
        private static void CreateIndex()
        {
            IndexWriter writer = null;
            Analyzer analyzer = new PanGuAnalyzer();
            Lucene.Net.Store.Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo("../ItemIndexDir"));
            try
            {
                ////IndexReader:對索引進行讀取的類。
                //該語句的做用：判斷索引庫文件夾是否存在以及索引特徵文件是否存在。
                bool isCreate = !IndexReader.IndexExists(dir);
                writer = new IndexWriter(dir, analyzer, isCreate, IndexWriter.MaxFieldLength.UNLIMITED);
                //添加索引
                for (int i = 1; i <= 5; i++)
                {
                    Document doc = new Document();
                    string path = System.IO.Directory.GetParent(System.IO.Directory.GetCurrentDirectory()).Parent.FullName + @"\Data\Test\" + i + ".txt";
                    string text = File.ReadAllText(path, Encoding.Default);
                    //Field.Store.YES:表示是否存儲原值。只有當Field.Store.YES在後面才能用doc.Get("number")取出值來.Field.Index. NOT_ANALYZED:不進行分詞保存
                    doc.Add(new Field("number", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    // Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS:不只保存分詞還保存分詞的距離。
                    doc.Add(new Field("body", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                    writer.AddDocument(doc);
                }
                writer.Optimize();
            }
            catch (Exception ex)
            {
                throw;
            }
            finally
            {
                if (writer != null)
                    writer.Dispose();
                if (dir != null)
                    dir.Dispose();
            }
        }
    }
}
相關標籤/搜索
每日一句
每一个你不满意的现在，都有一个你没有努力的曾经。