Lucene.net 全文檢索 盤古分詞

lucene.net + 盤古分詞優化

引用:spa

1.Lucene.Net.dll.net

2.PanGu.Lucene.Analyzer.dll pwa

3.PanGu.HighLight.dllcode

4.PanGu.dllorm

 1 using Lucene.Net.Search;
 2 using Lucene.Net.Store;
 3 using Lucene.Net.QueryParsers;
 4 using Lucene.Net.Documents;
 5 using Lucene.Net.Index;
 6 using Lucene.Net.Analysis.Standard;
 7 using Lucene.Net.Analysis;
 8 using Lucene.Net.Analysis.PanGu;
 9 using PanGu.HighLight;
10 using PanGu;

 

1.創建索引:server

 1 static string path = @"G:\indextest";//索引文件儲存位置
 2 
 3 static void CreateIndex()
 4         {
 5             //建立索引庫目錄
 6             var directory = FSDirectory.Open(new DirectoryInfo(path));
 7             Analyzer analyzer = null;
 8             //analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
 9 
10             if (isPangu)
11             {
12                 analyzer = new PanGuAnalyzer();//盤古Analyzer
13             }
14             else
15             {
16                 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
17             }
18 
19             //建立一個索引,採用StandardAnalyzer對句子進行分詞
20             IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
21             MySqlConnection conn = new MySqlConnection(@"server=localhost;User Id=root;password=123456;Database=ecshop");
22             conn.Open();
23             MySqlCommand cmd = new MySqlCommand("select goods_name,goods_brief from ecs_goods", conn);
24             MySqlDataReader reader = cmd.ExecuteReader();
25             while (reader.Read())
26             {
27                 //域的集合:文檔,相似於表的行
28                 Document doc = new Document();
29                 //要索引的字段
30                 doc.Add(new Field("goods_name", reader["goods_name"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
31                 doc.Add(new Field("goods_brief", reader["goods_brief"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
32                 indexWriter.AddDocument(doc);
33             }
34             reader.Close();
35             //對索引文件進行優化
36             indexWriter.Optimize();
37             indexWriter.Close();
38         }

 

2.搜索:blog

 1      protected void Page_Load(object sender, EventArgs e)
 2         {
 3             keyword = Request.Form["q"];
 4             if (keyword != null && keyword != "")
 5             {
 6                 var watch = Stopwatch.StartNew();
 7                 Analyzer analyzer = null;
 8                 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
 9 
10                 //搜索
11                 IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(path)), true);
12 
13                 //查詢表達式
14                 QueryParser queryP = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "goods_name", analyzer);
15 
16                 //query.parse:注入查詢條件
17                 Query query = queryP.Parse(keyword);
18                 var hits = searcher.Search(query, 200);
19 
20                 //create highlighter
21                 //IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;color: red;\">", "</span>");
22                 //SimpleFragmenter fragmenter = new SimpleFragmenter(80);
23                 //var scorer = new QueryScorer(query);
24                 //Highlighter highlighter = new Highlighter(formatter, scorer);
25                 //highlighter.TextFragmenter = fragmenter;
26 
27                 //PanGu create highlighter
28                 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter =
29                    new PanGu.HighLight.SimpleHTMLFormatter("<span style=\"font-weight:bold;color: red;\">", "</span>");
30                 PanGu.HighLight.Highlighter highlighter =
31                     new PanGu.HighLight.Highlighter(simpleHTMLFormatter,
32                     new Segment());
33                 highlighter.FragmentSize = 50;
34 
35                 for (int i = 0; i < hits.totalHits; i++)
36                 {
37                     Document doc = searcher.Doc(hits.scoreDocs[i].doc);
38                     //TokenStream stream = analyzer.TokenStream("goods_name", new StringReader(doc.Get("goods_name")));
39                     //String sample = highlighter.GetBestFragments(stream, doc.Get("goods_name"), 2, "...");
40                     goods g = new goods();
41                     g.goods_name = highlighter.GetBestFragment(keyword, doc.Get("goods_name"));
42                     g.goods_brief = highlighter.GetBestFragment(keyword, doc.Get("goods_brief"));
43                     gs.Add(g);
44                 }
45 
46                 watch.Stop();
47 
48                 tasktime = "搜索耗費時間:" + watch.ElapsedMilliseconds + "毫秒";
49             }
50         }

 多字段搜索索引

1  string[] fields = { "Title", "Content" };
2                 MultiFieldQueryParser mq = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, fields, analyzer);
3                 Query multiquery = mq.Parse(keyword);// MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, new string[] { keyword }, fields, analyzer);
4                 var hits1 = searcher.Search(multiquery, 200);
相關文章
相關標籤/搜索