using Lucene.Net.Analysis; using Lucene.Net.Analysis.Tokenattributes; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.QueryParsers; using Lucene.Net.Search; using Lucene.Net.Store; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; namespace TestApp { class Program { static void Main() { #region 查詞 StringBuilder sb = new StringBuilder(); //索引庫目錄 Lucene.Net.Store.Directory dir_search = FSDirectory.Open(new System.IO.DirectoryInfo("IndexDir"), new NoLockFactory()); IndexReader reader = IndexReader.Open(dir_search, true); IndexSearcher search = null; try { search = new IndexSearcher(reader); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "body", new PanGuAnalyzer()); Query query = parser.Parse(LuceneHelper.GetKeyWordSplid("文章")); //執行搜索,獲取查詢結果集對象 TopDocs ts = search.Search(query, null, 1000); ///獲取命中的文檔信息對象 ScoreDoc[] docs = ts.ScoreDocs; for (int i = 0; i < docs.Length; i++) { int docId = docs[i].Doc; Document doc = search.Doc(docId); var t = doc.Get("number"); Console.WriteLine(t); var b = doc.Get("body"); Console.WriteLine(b); } } catch (Exception ex) { throw; } finally { if (search != null) search.Dispose(); if (dir_search != null) dir_search.Dispose(); } #endregion } //幫助類,對搜索的關鍵詞進行分詞 public static class LuceneHelper { public static string GetKeyWordSplid(string keywords) { StringBuilder sb = new StringBuilder(); Analyzer analyzer = new PanGuAnalyzer(); TokenStream stream = analyzer.TokenStream(keywords, new StringReader(keywords)); ITermAttribute ita = null; bool hasNext = stream.IncrementToken(); while (hasNext) { ita = stream.GetAttribute<ITermAttribute>(); sb.Append(ita.Term + " "); hasNext = stream.IncrementToken(); } return sb.ToString(); } } /// <summary> /// 建立索引文件 /// </summary> private static void CreateIndex() { IndexWriter writer = null; Analyzer analyzer = new PanGuAnalyzer(); Lucene.Net.Store.Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo("../ItemIndexDir")); try { ////IndexReader:對索引進行讀取的類。 //該語句的做用:判斷索引庫文件夾是否存在以及索引特徵文件是否存在。 bool isCreate = !IndexReader.IndexExists(dir); writer = new IndexWriter(dir, analyzer, isCreate, IndexWriter.MaxFieldLength.UNLIMITED); //添加索引 for (int i = 1; i <= 5; i++) { Document doc = new Document(); string path = System.IO.Directory.GetParent(System.IO.Directory.GetCurrentDirectory()).Parent.FullName + @"\Data\Test\" + i + ".txt"; string text = File.ReadAllText(path, Encoding.Default); //Field.Store.YES:表示是否存儲原值。只有當Field.Store.YES在後面才能用doc.Get("number")取出值來.Field.Index. NOT_ANALYZED:不進行分詞保存 doc.Add(new Field("number", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS:不只保存分詞還保存分詞的距離。 doc.Add(new Field("body", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); } writer.Optimize(); } catch (Exception ex) { throw; } finally { if (writer != null) writer.Dispose(); if (dir != null) dir.Dispose(); } } } }