lucene.net + 盤古分詞優化
引用:spa
1.Lucene.Net.dll.net
2.PanGu.Lucene.Analyzer.dll pwa
3.PanGu.HighLight.dllcode
4.PanGu.dllorm
1 using Lucene.Net.Search; 2 using Lucene.Net.Store; 3 using Lucene.Net.QueryParsers; 4 using Lucene.Net.Documents; 5 using Lucene.Net.Index; 6 using Lucene.Net.Analysis.Standard; 7 using Lucene.Net.Analysis; 8 using Lucene.Net.Analysis.PanGu; 9 using PanGu.HighLight; 10 using PanGu;
1.創建索引:server
1 static string path = @"G:\indextest";//索引文件儲存位置 2 3 static void CreateIndex() 4 { 5 //建立索引庫目錄 6 var directory = FSDirectory.Open(new DirectoryInfo(path)); 7 Analyzer analyzer = null; 8 //analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); 9 10 if (isPangu) 11 { 12 analyzer = new PanGuAnalyzer();//盤古Analyzer 13 } 14 else 15 { 16 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); 17 } 18 19 //建立一個索引,採用StandardAnalyzer對句子進行分詞 20 IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); 21 MySqlConnection conn = new MySqlConnection(@"server=localhost;User Id=root;password=123456;Database=ecshop"); 22 conn.Open(); 23 MySqlCommand cmd = new MySqlCommand("select goods_name,goods_brief from ecs_goods", conn); 24 MySqlDataReader reader = cmd.ExecuteReader(); 25 while (reader.Read()) 26 { 27 //域的集合:文檔,相似於表的行 28 Document doc = new Document(); 29 //要索引的字段 30 doc.Add(new Field("goods_name", reader["goods_name"].ToString(), Field.Store.YES, Field.Index.ANALYZED)); 31 doc.Add(new Field("goods_brief", reader["goods_brief"].ToString(), Field.Store.YES, Field.Index.ANALYZED)); 32 indexWriter.AddDocument(doc); 33 } 34 reader.Close(); 35 //對索引文件進行優化 36 indexWriter.Optimize(); 37 indexWriter.Close(); 38 }
2.搜索:blog
1 protected void Page_Load(object sender, EventArgs e) 2 { 3 keyword = Request.Form["q"]; 4 if (keyword != null && keyword != "") 5 { 6 var watch = Stopwatch.StartNew(); 7 Analyzer analyzer = null; 8 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); 9 10 //搜索 11 IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(path)), true); 12 13 //查詢表達式 14 QueryParser queryP = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "goods_name", analyzer); 15 16 //query.parse:注入查詢條件 17 Query query = queryP.Parse(keyword); 18 var hits = searcher.Search(query, 200); 19 20 //create highlighter 21 //IFormatter formatter = new SimpleHTMLFormatter("<span style=\"font-weight:bold;color: red;\">", "</span>"); 22 //SimpleFragmenter fragmenter = new SimpleFragmenter(80); 23 //var scorer = new QueryScorer(query); 24 //Highlighter highlighter = new Highlighter(formatter, scorer); 25 //highlighter.TextFragmenter = fragmenter; 26 27 //PanGu create highlighter 28 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = 29 new PanGu.HighLight.SimpleHTMLFormatter("<span style=\"font-weight:bold;color: red;\">", "</span>"); 30 PanGu.HighLight.Highlighter highlighter = 31 new PanGu.HighLight.Highlighter(simpleHTMLFormatter, 32 new Segment()); 33 highlighter.FragmentSize = 50; 34 35 for (int i = 0; i < hits.totalHits; i++) 36 { 37 Document doc = searcher.Doc(hits.scoreDocs[i].doc); 38 //TokenStream stream = analyzer.TokenStream("goods_name", new StringReader(doc.Get("goods_name"))); 39 //String sample = highlighter.GetBestFragments(stream, doc.Get("goods_name"), 2, "..."); 40 goods g = new goods(); 41 g.goods_name = highlighter.GetBestFragment(keyword, doc.Get("goods_name")); 42 g.goods_brief = highlighter.GetBestFragment(keyword, doc.Get("goods_brief")); 43 gs.Add(g); 44 } 45 46 watch.Stop(); 47 48 tasktime = "搜索耗費時間:" + watch.ElapsedMilliseconds + "毫秒"; 49 } 50 }
多字段搜索索引
1 string[] fields = { "Title", "Content" }; 2 MultiFieldQueryParser mq = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, fields, analyzer); 3 Query multiquery = mq.Parse(keyword);// MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, new string[] { keyword }, fields, analyzer); 4 var hits1 = searcher.Search(multiquery, 200);