一、加入盤古分詞方法javascript
/// <summary> /// 對輸入的搜索的條件進行分詞 /// </summary> /// <param name="str"></param> /// <returns></returns> public static List<string> GetPanGuWord(string str) { Analyzer analyzer = new PanGuAnalyzer(); TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str)); Lucene.Net.Analysis.Token token = null; List<string> list = new List<string>(); while ((token = tokenStream.Next()) != null) { list.Add(token.TermText()); } return list; }
二、建立視圖顯示的MODEL(ViewModel)css
using System; using System.Collections.Generic; using System.Linq; using System.Web; namespace CZBK.ItcastOA.WebApp.Models { public class ViewSarchContentModel { public string Id { get; set; } public string Title { get; set; } public string Content { get; set; } } }
三、根據數據庫表中字段建立索引html
/// <summary> /// 負責向寫數據 /// </summary> private void CreateSearchIndex() { string indexPath = @"C:\lucenedir";//注意和磁盤上文件夾的大小寫一致,不然會報錯。將建立的分詞內容放在該目錄下。 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());//指定索引文件(打開索引目錄) FS指的是就是FileSystem bool isUpdate = IndexReader.IndexExists(directory);//IndexReader:對索引進行讀取的類。該語句的做用:判斷索引庫文件夾是否存在以及索引特徵文件是否存在。 if (isUpdate) { //同時只能有一段代碼對索引庫進行寫操做。當使用IndexWriter打開directory時會自動對索引庫文件上鎖。 //若是索引目錄被鎖定(好比索引過程當中程序異常退出),則首先解鎖(提示一下:若是我如今正在寫着已經加鎖了,可是尚未寫完,這時候又來一個請求,那麼不就解鎖了嗎?這個問題後面會解決) if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);//向索引庫中寫索引。這時在這裏加鎖。 List<Books>list= BookService.LoadEntities(c=>true).ToList(); foreach (Books bookModel in list) { writer.DeleteDocuments(new Term("Id",bookModel.Id.ToString())); Document document = new Document();//表示一篇文檔。 //Field.Store.YES:表示是否存儲原值。只有當Field.Store.YES在後面才能用doc.Get("number")取出值來.Field.Index. NOT_ANALYZED:不進行分詞保存 document.Add(new Field("Id",bookModel.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Index. ANALYZED:進行分詞保存:也就是要進行全文的字段要設置分詞 保存(由於要進行模糊查詢) //Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS:不只保存分詞還保存分詞的距離。 document.Add(new Field("Title", bookModel.Title, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); document.Add(new Field("Content", bookModel.ContentDescription, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); } writer.Close();//會自動解鎖。 directory.Close();//不要忘了Close,不然索引結果搜不到 }
四、搜索java
private List<ViewSarchContentModel> SearchBookContent() { string indexPath = @"C:\lucenedir"; List<string> kw =Common.WebCommon.GetPanGuWord(Request["txtContent"]); FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); //搜索條件 PhraseQuery query = new PhraseQuery(); foreach (string word in kw)//先用空格,讓用戶去分詞,空格分隔的就是詞「計算機 專業」 { query.Add(new Term("Content", word)); } //query.Add(new Term("body","語言"));--能夠添加查詢條件,二者是add關係.順序沒有關係. //query.Add(new Term("body", "大學生")); //query.Add(new Term("body", kw));//body中含有kw的文章 query.SetSlop(100);//多個查詢條件的詞之間的最大距離.在文章中相隔太遠 也就無心義.(例如 「大學生」這個查詢條件和"簡歷"這個查詢條件之間若是間隔的詞太多也就沒有意義了。) //TopScoreDocCollector是盛放查詢結果的容器 TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); searcher.Search(query, null, collector);//根據query查詢條件進行查詢,查詢結果放入collector容器 ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//獲得全部查詢結果中的文檔,GetTotalHits():表示總條數 TopDocs(300, 20);//表示獲得300(從300開始),到320(結束)的文檔內容. //能夠用來實現分頁功能 List<ViewSarchContentModel> list = new List<ViewSarchContentModel>(); for (int i = 0; i < docs.Length; i++) { ViewSarchContentModel viewModel = new ViewSarchContentModel(); // //搜索ScoreDoc[]只能得到文檔的id,這樣不會把查詢結果的Document一次性加載到內存中。下降了內存壓力,須要得到文檔的詳細內容的時候經過searcher.Doc來根據文檔id來得到文檔的詳細內容對象Document. int docId = docs[i].doc;//獲得查詢結果文檔的id(Lucene內部分配的id) Document doc = searcher.Doc(docId);//找到文檔id對應的文檔詳細信息 viewModel.Id = doc.Get("Id"); viewModel.Title = doc.Get("Title"); viewModel.Content =Common.WebCommon.CreateHightLight(Request["txtContent"], doc.Get("Content"));//搜索內容關鍵字高亮顯示 list.Add(viewModel); } return list; }
五、返回給VIEWjquery
public ActionResult SearchContent() { if (!string.IsNullOrEmpty(Request["btnSearch"])) { List<ViewSarchContentModel>list= SearchBookContent(); ViewData["searchList"] = list; ViewData["searchWhere"] = Request["txtContent"]; return View("Index"); } else { CreateSearchIndex(); } return Content("ok"); }
六、視圖表現數據庫
@{ Layout = null; } @using CZBK.ItcastOA.WebApp.Models <!DOCTYPE html> <html> <head> <meta name="viewport" content="width=device-width" /> <title>文檔搜索</title> <script src="~/Scripts/jquery-1.7.1.min.js"></script> <style type="text/css"> .search-text2{ display:block; width:528px; height:26px; line-height:26px; float:left; margin:3px 5px; border:1px solid; font-family:'Microsoft Yahei'; font-size:14px;} .search-btn2{width:102px; height:32px; line-height:32px; cursor:pointer; border:0px; background-color:#d6000f;font-family:'Microsoft Yahei'; font-size:16px;color:#f3f3f3;} .search-list{width:600px; overflow:hidden; margin:10px 20px 0px 20px;} .search-list dt{font-family:'Microsoft Yahei'; font-size:16px; line-height:20px; margin-bottom:7px; font-weight:normal;} .search-list .search-detail{font-size:12px; color:#666666;margin-bottom:5px; font-family:Arial;line-height:16px;} .search-list dt a{color:#2981a9;} </style> </head> <body> <!-- JiaThis Button BEGIN --> <script type="text/javascript" > var jiathis_config = { data_track_clickback: true, showClose: true, hideMore: false } </script> <script type="text/javascript" src="http://v3.jiathis.com/code/jiathis_r.js?uid=1986459&type=left&btn=l.gif&move=0" charset="utf-8"></script> <!-- JiaThis Button END --> <div> <form method="get" action="/Search/SearchContent"> <input type="text" value="@ViewData["searchWhere"]" name="txtContent" autocomplete="off" class="search-text2"/> <input type="submit" value="搜一搜" name="btnSearch" class="search-btn2" /> <input type="submit" value="建立索引庫" name="btnCreate" /> </form> <dl class="search-list"> @if (ViewData["searchList"] != null) { foreach (ViewSarchContentModel viewModel in (List<ViewSarchContentModel>)ViewData["searchList"]) { <dt><a href="/Book/ShowDetail/?id=@viewModel.Id"> @viewModel.Title</a></dt> <dd class="search-detail">@MvcHtmlString.Create(viewModel.Content)</dd> } } </dl> </div> </body> </html>
改變輸入框、按鈕樣式,高亮顯示ide
<style type="text/css"> .search-text2{ display:block; width:528px; height:26px; line-height:26px; float:left; margin:3px 5px; border:1px solid; font-family:'Microsoft Yahei'; font-size:14px;} .search-btn2{width:102px; height:32px; line-height:32px; cursor:pointer; border:0px; background-color:#d6000f;font-family:'Microsoft Yahei'; font-size:16px;color:#f3f3f3;} .search-list{width:600px; overflow:hidden; margin:10px 20px 0px 20px;} .search-list dt{font-family:'Microsoft Yahei'; font-size:16px; line-height:20px; margin-bottom:7px; font-weight:normal;} .search-list .search-detail{font-size:12px; color:#666666;margin-bottom:5px; font-family:Arial;line-height:16px;} .search-list dt a{color:#2981a9;} </style>
盤古分詞的高亮組件PanGu.HighLight.dll,引用高亮顯示組件網站
// /建立HTMLFormatter,參數爲高亮單詞的先後綴 public static string CreateHightLight(string keywords, string Content) { PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>"); //建立Highlighter ,輸入HTMLFormatter 和盤古分詞對象Semgent PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new Segment()); //設置每一個摘要段的字符數 highlighter.FragmentSize = 150; //獲取最匹配的摘要段 return highlighter.GetBestFragment(keywords, Content); }
keywords搜索關鍵詞,Content搜索結果ui
viewModel.Content =Common.WebCommon.CreateHightLight(Request["txtContent"], doc.Get("Content"));//搜索內容關鍵字高亮顯示this
視圖中
<dd class="search-detail">@MvcHtmlString.Create(viewModel.Content)</dd>
@輸出進行了編碼,用@會輸出HTML標籤
Lucene每次生成索引不會刪除、覆蓋之前生成的,會形成搜索時搜索到重複的記錄,因此生成前先要刪除一次(實質沒有刪除文件,只是給文件個刪除標記)
writer.DeleteDocuments(new Term("Id",bookModel.Id.ToString()));
搜索頁面採用<form method="get" .............>,並採用靜態頁面:有利於網站推廣
分享到
加<script src="~/Scripts/jquery-1.7.1.min.js"></script>
並將如下代碼放入body <!-- JiaThis Button BEGIN --><script type="text/javascript" > var jiathis_config = { data_track_clickback: true, showClose: true, hideMore: false }</script><script type="text/javascript" src="http://v3.jiathis.com/code/jiathis_r.js?uid=1986459&type=left&btn=l.gif&move=0" charset="utf-8"></script><!-- JiaThis Button END -->