Lucene.net應用

一、加入盤古分詞方法javascript

  /// <summary>
       /// 對輸入的搜索的條件進行分詞
       /// </summary>
       /// <param name="str"></param>
       /// <returns></returns>
       public static List<string> GetPanGuWord(string str)
       {
           Analyzer analyzer = new PanGuAnalyzer();
           TokenStream tokenStream = analyzer.TokenStream("", new StringReader(str));
           Lucene.Net.Analysis.Token token = null;
           List<string> list = new List<string>();
           while ((token = tokenStream.Next()) != null)
           {
              list.Add(token.TermText());
           }
           return list;
       }
View Code

二、建立視圖顯示的MODEL(ViewModel)css

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;

namespace CZBK.ItcastOA.WebApp.Models
{
    public class ViewSarchContentModel
    {
        public string Id { get; set; }
        public string Title { get; set; }
        public string Content { get; set; }
    }
}
View Code

三、根據數據庫表中字段建立索引html

       /// <summary>
        /// 負責向寫數據
        /// </summary>
        private void CreateSearchIndex()
        {
            string indexPath = @"C:\lucenedir";//注意和磁盤上文件夾的大小寫一致,不然會報錯。將建立的分詞內容放在該目錄下。
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());//指定索引文件(打開索引目錄) FS指的是就是FileSystem
            bool isUpdate = IndexReader.IndexExists(directory);//IndexReader:對索引進行讀取的類。該語句的做用:判斷索引庫文件夾是否存在以及索引特徵文件是否存在。
            if (isUpdate)
            {
                //同時只能有一段代碼對索引庫進行寫操做。當使用IndexWriter打開directory時會自動對索引庫文件上鎖。
                //若是索引目錄被鎖定(好比索引過程當中程序異常退出),則首先解鎖(提示一下:若是我如今正在寫着已經加鎖了,可是尚未寫完,這時候又來一個請求,那麼不就解鎖了嗎?這個問題後面會解決)
                if (IndexWriter.IsLocked(directory))
                {
                    IndexWriter.Unlock(directory);
                }
            }
            IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);//向索引庫中寫索引。這時在這裏加鎖。
           List<Books>list= BookService.LoadEntities(c=>true).ToList();

           foreach (Books bookModel in list)
           {
               writer.DeleteDocuments(new Term("Id",bookModel.Id.ToString()));
               Document document = new Document();//表示一篇文檔。
               //Field.Store.YES:表示是否存儲原值。只有當Field.Store.YES在後面才能用doc.Get("number")取出值來.Field.Index. NOT_ANALYZED:不進行分詞保存
               document.Add(new Field("Id",bookModel.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));

               //Field.Index. ANALYZED:進行分詞保存:也就是要進行全文的字段要設置分詞 保存(由於要進行模糊查詢)

               //Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS:不只保存分詞還保存分詞的距離。
               document.Add(new Field("Title", bookModel.Title, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));

               document.Add(new Field("Content", bookModel.ContentDescription, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));

               writer.AddDocument(document);
           }
            
            writer.Close();//會自動解鎖。
            directory.Close();//不要忘了Close,不然索引結果搜不到

        }
View Code

四、搜索java

       private List<ViewSarchContentModel> SearchBookContent()
        {
            string indexPath = @"C:\lucenedir";
            List<string> kw =Common.WebCommon.GetPanGuWord(Request["txtContent"]);

            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            //搜索條件
            PhraseQuery query = new PhraseQuery();
            foreach (string word in kw)//先用空格,讓用戶去分詞,空格分隔的就是詞「計算機   專業」
            {
                query.Add(new Term("Content", word));
            }
            //query.Add(new Term("body","語言"));--能夠添加查詢條件,二者是add關係.順序沒有關係.
            //query.Add(new Term("body", "大學生"));
            //query.Add(new Term("body", kw));//body中含有kw的文章
            query.SetSlop(100);//多個查詢條件的詞之間的最大距離.在文章中相隔太遠 也就無心義.(例如 「大學生」這個查詢條件和"簡歷"這個查詢條件之間若是間隔的詞太多也就沒有意義了。)
            //TopScoreDocCollector是盛放查詢結果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);//根據query查詢條件進行查詢,查詢結果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//獲得全部查詢結果中的文檔,GetTotalHits():表示總條數   TopDocs(300, 20);//表示獲得300(從300開始),到320(結束)的文檔內容.
            //能夠用來實現分頁功能

            List<ViewSarchContentModel> list = new List<ViewSarchContentModel>();
            for (int i = 0; i < docs.Length; i++)
            {
                ViewSarchContentModel viewModel = new ViewSarchContentModel();
                //
                //搜索ScoreDoc[]只能得到文檔的id,這樣不會把查詢結果的Document一次性加載到內存中。下降了內存壓力,須要得到文檔的詳細內容的時候經過searcher.Doc來根據文檔id來得到文檔的詳細內容對象Document.
                
                
                int docId = docs[i].doc;//獲得查詢結果文檔的id(Lucene內部分配的id)
                Document doc = searcher.Doc(docId);//找到文檔id對應的文檔詳細信息
                viewModel.Id = doc.Get("Id");
                viewModel.Title = doc.Get("Title");
                viewModel.Content =Common.WebCommon.CreateHightLight(Request["txtContent"], doc.Get("Content"));//搜索內容關鍵字高亮顯示
                list.Add(viewModel);
                
            }
            return list;
        }
View Code

五、返回給VIEWjquery

 public ActionResult SearchContent()
        {
            if (!string.IsNullOrEmpty(Request["btnSearch"]))
            {
               List<ViewSarchContentModel>list= SearchBookContent();
               ViewData["searchList"] = list;
               ViewData["searchWhere"] = Request["txtContent"];
               return View("Index");
            }
            else
            {
                CreateSearchIndex();
            }
            return Content("ok");
        }
View Code

六、視圖表現數據庫

@{
    Layout = null;
}
@using CZBK.ItcastOA.WebApp.Models
<!DOCTYPE html>
<html>
<head>
    <meta name="viewport" content="width=device-width" />
    <title>文檔搜索</title>
    <script src="~/Scripts/jquery-1.7.1.min.js"></script>
    <style type="text/css">
      .search-text2{ display:block; width:528px; height:26px; line-height:26px; float:left; margin:3px 5px; border:1px solid; font-family:'Microsoft Yahei'; font-size:14px;}
      .search-btn2{width:102px; height:32px; line-height:32px; cursor:pointer; border:0px; background-color:#d6000f;font-family:'Microsoft Yahei'; font-size:16px;color:#f3f3f3;}
      .search-list{width:600px; overflow:hidden; margin:10px 20px 0px 20px;}
       .search-list dt{font-family:'Microsoft Yahei'; font-size:16px; line-height:20px; margin-bottom:7px; font-weight:normal;}
       .search-list .search-detail{font-size:12px; color:#666666;margin-bottom:5px; font-family:Arial;line-height:16px;}
       .search-list dt a{color:#2981a9;}
    </style>
 
</head>
<body>

      <!-- JiaThis Button BEGIN -->
<script type="text/javascript" >
    var jiathis_config = {
        data_track_clickback: true,
        showClose: true,
        hideMore: false
    }
</script>
<script type="text/javascript" src="http://v3.jiathis.com/code/jiathis_r.js?uid=1986459&type=left&btn=l.gif&move=0" charset="utf-8"></script>
<!-- JiaThis Button END -->
    <div>
        <form method="get" action="/Search/SearchContent">
            <input type="text" value="@ViewData["searchWhere"]" name="txtContent" autocomplete="off" class="search-text2"/>
            <input type="submit" value="搜一搜" name="btnSearch" class="search-btn2" />
            <input type="submit" value="建立索引庫" name="btnCreate" />
        </form>

        <dl class="search-list">
        @if (ViewData["searchList"] != null)
        {
            foreach (ViewSarchContentModel viewModel in (List<ViewSarchContentModel>)ViewData["searchList"])
            {
                <dt><a href="/Book/ShowDetail/?id=@viewModel.Id"> @viewModel.Title</a></dt>
             <dd class="search-detail">@MvcHtmlString.Create(viewModel.Content)</dd>
            }
        }
            </dl>

    </div>
</body>
</html>
View Code

 

改變輸入框、按鈕樣式,高亮顯示ide

 <style type="text/css">
      .search-text2{ display:block; width:528px; height:26px; line-height:26px; float:left; margin:3px 5px; border:1px solid; font-family:'Microsoft Yahei'; font-size:14px;}
      .search-btn2{width:102px; height:32px; line-height:32px; cursor:pointer; border:0px; background-color:#d6000f;font-family:'Microsoft Yahei'; font-size:16px;color:#f3f3f3;}
      .search-list{width:600px; overflow:hidden; margin:10px 20px 0px 20px;}
       .search-list dt{font-family:'Microsoft Yahei'; font-size:16px; line-height:20px; margin-bottom:7px; font-weight:normal;}
       .search-list .search-detail{font-size:12px; color:#666666;margin-bottom:5px; font-family:Arial;line-height:16px;}
       .search-list dt a{color:#2981a9;}
    </style>

盤古分詞的高亮組件PanGu.HighLight.dll,引用高亮顯示組件網站

    // /建立HTMLFormatter,參數爲高亮單詞的先後綴
       public static string CreateHightLight(string keywords, string Content)
       {
           PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter =
            new PanGu.HighLight.SimpleHTMLFormatter("<font color=\"red\">", "</font>");
           //建立Highlighter ,輸入HTMLFormatter 和盤古分詞對象Semgent
           PanGu.HighLight.Highlighter highlighter =
           new PanGu.HighLight.Highlighter(simpleHTMLFormatter,
           new Segment());
           //設置每一個摘要段的字符數
           highlighter.FragmentSize = 150;
           //獲取最匹配的摘要段
           return highlighter.GetBestFragment(keywords, Content);

       }

keywords搜索關鍵詞,Content搜索結果ui

 viewModel.Content =Common.WebCommon.CreateHightLight(Request["txtContent"], doc.Get("Content"));//搜索內容關鍵字高亮顯示this

視圖中

<dd class="search-detail">@MvcHtmlString.Create(viewModel.Content)</dd>

@輸出進行了編碼,用@會輸出HTML標籤

Lucene每次生成索引不會刪除、覆蓋之前生成的,會形成搜索時搜索到重複的記錄,因此生成前先要刪除一次(實質沒有刪除文件,只是給文件個刪除標記)

writer.DeleteDocuments(new Term("Id",bookModel.Id.ToString()));

 

搜索頁面採用<form method="get" .............>,並採用靜態頁面:有利於網站推廣

 

分享到

加<script src="~/Scripts/jquery-1.7.1.min.js"></script>

並將如下代碼放入body <!-- JiaThis Button BEGIN --><script type="text/javascript" > var jiathis_config = { data_track_clickback: true, showClose: true, hideMore: false }</script><script type="text/javascript" src="http://v3.jiathis.com/code/jiathis_r.js?uid=1986459&type=left&btn=l.gif&move=0" charset="utf-8"></script><!-- JiaThis Button END -->

相關文章
相關標籤/搜索