PS:當在搜索框檢索「是」,根據lucenne存取記錄,會有一張詞彙表(詞彙表會根據不一樣分詞原則進行分詞,多是一個或者多個,右邊裏保存這標號)
建立索引庫,document是原始記錄表
/** * 建立索引庫 * 將Aritcle對象放入索引庫中的原始記錄表中,從而造成詞彙表 */ @Test public void createIndexDB() throws Exception{ //建立Article對象 Article article = new Article(1,"培訓","傳智是一家IT培訓機構"); //建立Document對象 Document document = new Document(); //將Article對象中的三個屬性值分別綁定到Document對象中 /* *參數一:document對象中的屬性名叫xid,article對象中的屬性名叫id,項目中提倡相同 *參數二:document對象中的屬性xid的值,與article對象中相同 *參數三:是否將xid屬性值存入由原始記錄表中轉存入詞彙表 * Store.YES表示該屬性值會存入詞彙表 * Store.NO表示該屬性值不會存入詞彙表 * 項目中提倡非id值都存入詞彙表 *參數四:是否將xid屬性值進行分詞算法 * Index.ANALYZED表示該屬性值會進行詞彙拆分 * Index.NOT_ANALYZED表示該屬性值不會進行詞彙拆分 * 項目中提倡非id值都進行詞彙拆分 * 目前將分詞理解爲分匯拆分,目前認爲一個漢字一個分詞拆分 */ document.add(new Field("xid",article.getId().toString(),Store.YES,Index.ANALYZED)); document.add(new Field("xtitle",article.getTitle(),Store.YES,Index.ANALYZED)); document.add(new Field("xcontent",article.getContent(),Store.YES,Index.ANALYZED)); Directory directory = FSDirectory.open(new File("E:/IndexDBDBDB")); Version version = Version.LUCENE_30; Analyzer analyzer = new StandardAnalyzer(version); //這種分詞模型是按照單個字進行分詞的 MaxFieldLength maxFieldLength = MaxFieldLength.LIMITED; //若是有多餘2萬個字內容,只會之前1萬個爲準 //建立IndexWriter字符流對象 /* * 參數一:lucene索引庫最終應對於硬盤中的目錄,例如:E:/IndexDBDBDB * 參數二:採用什麼策略將文本拆分,一個策略就是一個具體的實現類 * 參數三:最多將文本拆分出多少詞彙,LIMITED表示1萬個,即只取前1萬個詞彙,若是不足1W個詞彙個,以實際爲準 */ IndexWriter indexWriter = new IndexWriter(directory,analyzer,maxFieldLength); //將document對象寫入lucene索引庫 indexWriter.addDocument(document); //關閉IndexWriter字符流對象 indexWriter.close(); }
PS:建立好的文件javascript
/** * 根據關鍵字從索引庫中搜索符合條件的內容 */ @Test public void findIndexDB() throws Exception{ //準備工做 String keywords = "培訓"; List<Article> articleList = new ArrayList<Article>(); Directory directory = FSDirectory.open(new File("D:/All_Files/IndexDBDBDB")); Version version = Version.LUCENE_30; Analyzer analyzer = new StandardAnalyzer(version); MaxFieldLength maxFieldLength = MaxFieldLength.LIMITED; //建立IndexSearcher字符流對象 IndexSearcher indexSearcher = new IndexSearcher(directory); //建立查詢解析器對象 /* * 參數一:使用分詞器的版本,提倡使用該jar包中的最高版本 * 參數二:爭對document對象中的哪一個屬性進行搜索 */ QueryParser queryParser = new QueryParser(version,"xcontent",analyzer); //建立對象對象封裝查詢關鍵字 Query query = queryParser.parse(keywords); //根據關鍵字,去索引庫中的詞彙表搜索 /* * 參數一:表示封裝關鍵字查詢對象,其它QueryParser表示查詢解析器 * 參數二:MAX_RECORD表示若是根據關鍵字搜索出來的內容較多,只取前MAX_RECORD個內容 * 不足MAX_RECORD個數的話,以實際爲準 */ int MAX_RECORD = 100; TopDocs topDocs = indexSearcher.search(query,MAX_RECORD); //TopDocs就是分好詞右邊的記錄 //迭代詞彙表中符合條件的編號 for(int i=0;i<topDocs.scoreDocs.length;i++){ //取出封裝編號和分數的ScoreDoc對象 ScoreDoc scoreDoc = topDocs.scoreDocs[i]; //取出每個編號,例如:0,1,2 int no = scoreDoc.doc; //根據編號去索引庫中的原始記錄表中查詢對應的document對象 Document document = indexSearcher.doc(no); //獲取document對象中的三個屬性值 String xid = document.get("xid"); String xtitle = document.get("xtitle"); String xcontent = document.get("xcontent"); //封裝到artilce對象中 Article article = new Article(Integer.parseInt(xid),xtitle,xcontent); //將article對象加入到list集合中 articleList.add(article); } //迭代結果集 for(Article a:articleList){ System.out.println(a); } }
PS : 輸出,目前標準格式只是能對 中文進行識別
編號:1
標題:培訓
內容:傳智是一家IT培訓機構php
PS : LucenueUtil的重構html
package cn.itcast.javaee.lucene.util; import java.io.File; import java.lang.reflect.Method; import org.apache.commons.beanutils.BeanUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import cn.itcast.javaee.lucene.entity.Article; /** * 工具類 * @author AdminTC */ public class LuceneUtil { private static Directory directory; private static Version version; private static Analyzer analyzer; private static MaxFieldLength maxFieldLength; static{ try { directory = FSDirectory.open(new File("D:/All_Files/IndexDBDBDB")); version = Version.LUCENE_30; analyzer = new StandardAnalyzer(version); maxFieldLength = MaxFieldLength.LIMITED; } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } } public static Directory getDirectory() { return directory; } public static Version getVersion() { return version; } public static Analyzer getAnalyzer() { return analyzer; } public static MaxFieldLength getMaxFieldLength() { return maxFieldLength; } //不讓外界new該幫助類 private LuceneUtil(){} //將JavaBean轉成Document對象 public static Document javabean2document(Object obj) throws Exception{ //建立Docuemnt對象 Document document = new Document(); //獲取obj引用的對象字節碼 Class clazz = obj.getClass(); //經過對象字節碼獲取私有的屬性 java.lang.reflect.Field[] reflectFields = clazz.getDeclaredFields(); //迭代 for(java.lang.reflect.Field reflectField : reflectFields){ //強力反射 reflectField.setAccessible(true); //獲取屬性名,id/title/content String name = reflectField.getName(); //人工拼接方法名 String methodName = "get" + name.substring(0,1).toUpperCase()+name.substring(1); //獲取方法,例如:getId()/getTitle()/getContent() Method method = clazz.getMethod(methodName,null); //執行方法 String value = method.invoke(obj,null).toString(); //加入到Document對象中去,這時javabean的屬性與document對象的屬性相同 document.add(new Field(name,value,Store.YES,Index.ANALYZED)); } //返回document對象 return document; } //將Document對象轉成JavaBean對象 public static Object document2javabean(Document document,Class clazz) throws Exception{ Object obj = clazz.newInstance(); java.lang.reflect.Field[] reflectFields = clazz.getDeclaredFields(); for(java.lang.reflect.Field reflectField : reflectFields){ reflectField.setAccessible(true); String name = reflectField.getName();//id/title/content String value = document.get(name);//1/培訓/傳智是一家培訓機構 BeanUtils.setProperty(obj,name,value);//封裝javabean對應的屬性中去,經過setXxx()方法 } return obj; } //測試 public static void main(String[] args) throws Exception{ Article aritcle = new Article(1,"培訓","傳智是一家培訓機構"); Document document = LuceneUtil.javabean2document(aritcle); System.out.println("---------------------------------------"); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); System.out.println(article); } }
PS: 12_Lucene索引庫查詢的過程(寫代碼時參考)java
PS: 對二次 查詢Lucenue的改造ios
package cn.itcast.javaee.lucene.secondapp; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 重構FirstApp * @author AdminTC */ public class SecondApp { /** * 建立索引庫 */ @Test public void createIndexDB() throws Exception{ Article article = new Article(1,"培訓","傳智是一家it培訓機構"); //Article article = new Article(2,"培訓","北大是一家it培訓機構"); //Article article = new Article(3,"培訓","中大是一家it培訓機構"); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } /** * 根據關鍵字從索引庫中查詢符合條件的數據 */ @Test public void findIndexDB() throws Exception{ String keywords = "培訓"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article)LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println( a ); } } }
PS: 建立一次庫,就會生成一次 .cfs文件,本次老是生成3次c++
輸出:...................................
編號:1 標題:培訓 內容:傳智是一家it培訓機構 編號:1 標題:培訓 內容:傳智是一家it培訓機構 編號:1 標題:培訓 內容:傳智是一家it培訓機構
Lucence dao層的CURD算法
package cn.itcast.javaee.lucene.curd; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 增刪改查索引庫 * @author AdminTC */ public class ArticleDao { @Test public void add() throws Exception{ Article article = new Article(1,"培訓","傳智是一家java培訓機構"); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document);//核心 indexWriter.close(); } @Test public void addAll() throws Exception{ IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); Article article1 = new Article(1,"培訓","傳智是一家java培訓機構"); Document document1 = LuceneUtil.javabean2document(article1); indexWriter.addDocument(document1); Article article2 = new Article(2,"培訓","傳智是一家net培訓機構"); Document document2 = LuceneUtil.javabean2document(article2); indexWriter.addDocument(document2); Article article3 = new Article(3,"培訓","傳智是一家php培訓機構"); Document document3 = LuceneUtil.javabean2document(article3); indexWriter.addDocument(document3); Article article4 = new Article(4,"培訓","傳智是一家ios培訓機構"); Document document4 = LuceneUtil.javabean2document(article4); indexWriter.addDocument(document4); Article article5 = new Article(5,"培訓","傳智是一家ui培訓機構"); Document document5 = LuceneUtil.javabean2document(article5); indexWriter.addDocument(document5); Article article6 = new Article(6,"培訓","傳智是一家c++培訓機構"); Document document6 = LuceneUtil.javabean2document(article6); indexWriter.addDocument(document6); Article article7 = new Article(7,"培訓","傳智是一家seo培訓機構"); Document document7 = LuceneUtil.javabean2document(article7); indexWriter.addDocument(document7); indexWriter.close(); }
//在update的時候,會刪除掉更新的id,而後再建立我以爲 @Test public void update() throws Exception{ Article newArticle = new Article(1,"培訓","傳智是一家JAVA培訓機構"); Document document = LuceneUtil.javabean2document(newArticle); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); //更新id=7的document對象 /* * 參數一:term表示須要更新的document對象,id表示document對象中的id屬性,7表示該id屬性的值 * 參數二:新的document對象 */ indexWriter.updateDocument(new Term("id","1"),document);//核心 indexWriter.close(); } @Test public void delete() throws Exception{ IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.deleteDocuments(new Term("id","2"));//核心 indexWriter.close(); } @Test public void deleteAll() throws Exception{ IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory(),LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.deleteAll();//核心 indexWriter.close(); } @Test public void findAllByKeywords() throws Exception{ String keywords = "培"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100);//核心 for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article)LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println( a ); } } }
Lucene分頁持久層,獲取總頁數和每頁的內容apache
package cn.itcast.javaee.lucene.fy.dao; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import cn.itcast.javaee.lucene.fy.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 持久層 * @author AdminTC */ public class ArticleDao { /** * 根據關鍵字,獲取總記錄數 * @return 總記錄數 */ public int getAllRecord(String keywords) throws Exception{ QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,2); //返回符合條件的真實總記錄數,不受2的影響 return topDocs.totalHits; //返回符合條件的總記錄數,受2的影響 //return topDocs.scoreDocs.length; } /** * 根據關鍵字,批量查詢記錄 * @param start 從第幾條記錄的索引號開始查詢,索引號從0開始 * @param size 最多查詢幾條記錄,不知足最多數目時,以實際爲準 * @return 集合 */ public List<Article> findAll(String keywords,int start,int size) throws Exception{ List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); //小技巧 int middle = Math.min(start+size,topDocs.totalHits); for(int i=start;i<middle;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } return articleList; } public static void main(String[] args) throws Exception{ ArticleDao dao = new ArticleDao(); System.out.println(dao.getAllRecord("培訓")); System.out.println("------------------------------"); System.out.println("第一頁"); List<Article> list = dao.findAll("培訓",0,2); for(Article a : list){ System.out.println(a); } System.out.println("第二頁"); list = dao.findAll("培訓",2,2); for(Article a : list){ System.out.println(a); } System.out.println("第三頁"); list = dao.findAll("培訓",4,2); for(Article a : list){ System.out.println(a); } System.out.println("第四頁"); list = dao.findAll("培訓",6,2); for(Article a : list){ System.out.println(a); } } }
PageBeanapp
package cn.itcast.javaee.lucene.fy.entity; import java.util.ArrayList; import java.util.List; /** * 用於Article的分頁類 * @author AdminTC */ public class Page { private Integer currPageNO;//當前頁號OK private Integer perPageSize = 2;//每頁顯示記錄數,默認爲2條OK private Integer allRecordNO;//總記錄數OK private Integer allPageNO;//總頁數OK private List<Article> articleList = new ArrayList<Article>();//內容OK public Page(){} public Integer getCurrPageNO() { return currPageNO; } public void setCurrPageNO(Integer currPageNO) { this.currPageNO = currPageNO; } public Integer getPerPageSize() { return perPageSize; } public void setPerPageSize(Integer perPageSize) { this.perPageSize = perPageSize; } public Integer getAllRecordNO() { return allRecordNO; } public void setAllRecordNO(Integer allRecordNO) { this.allRecordNO = allRecordNO; } public Integer getAllPageNO() { return allPageNO; } public void setAllPageNO(Integer allPageNO) { this.allPageNO = allPageNO; } public List<Article> getArticleList() { return articleList; } public void setArticleList(List<Article> articleList) { this.articleList = articleList; } }
Service異步
package cn.itcast.javaee.lucene.fy.service; import java.util.List; import cn.itcast.javaee.lucene.fy.dao.ArticleDao; import cn.itcast.javaee.lucene.fy.entity.Article; import cn.itcast.javaee.lucene.fy.entity.Page; /** * 業務層 * @author AdminTC */ public class ArticleService { //持久層 private ArticleDao articleDao = new ArticleDao(); /** * 根據關鍵字和頁號,查詢內容 */ public Page show(String keywords,int currPageNO) throws Exception{ Page page = new Page(); //封裝當前頁號 page.setCurrPageNO(currPageNO); //封裝總記錄數 int allRecordNO = articleDao.getAllRecord(keywords); page.setAllRecordNO(allRecordNO); //封裝總頁數 int allPageNO = 0; if(page.getAllRecordNO() % page.getPerPageSize() == 0){ allPageNO = page.getAllRecordNO() / page.getPerPageSize(); }else{ allPageNO = page.getAllRecordNO() / page.getPerPageSize() + 1; } page.setAllPageNO(allPageNO); //封裝內容 int size = page.getPerPageSize(); int start = (page.getCurrPageNO()-1) * size; List<Article> articleList = articleDao.findAll(keywords,start,size); page.setArticleList(articleList); return page; } //測試 public static void main(String[] args) throws Exception{ ArticleService test = new ArticleService(); Page page = test.show("培訓",1);//第一頁 System.out.println(page.getCurrPageNO()); System.out.println(page.getPerPageSize()); System.out.println(page.getAllRecordNO()); System.out.println(page.getAllPageNO());//分紅4頁 for(Article a : page.getArticleList()){ System.out.println(a); } } }
Controller
package cn.itcast.javaee.lucene.fy.action; import java.io.IOException; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import cn.itcast.javaee.lucene.fy.entity.Page; import cn.itcast.javaee.lucene.fy.service.ArticleService; public class ArticleServlet extends HttpServlet { public void doGet(HttpServletRequest request, HttpServletResponse response)throws ServletException, IOException { this.doPost(request,response); } public void doPost(HttpServletRequest request, HttpServletResponse response)throws ServletException, IOException { try { request.setCharacterEncoding("UTF-8"); //獲取關鍵字 String keywords = request.getParameter("keywords");//培訓 if(keywords == null || keywords.trim().length()==0){ keywords = "培訓";//默認值 } //獲取當前頁號 String temp = request.getParameter("currPageNO"); if(temp == null || temp.trim().length()==0){ temp = "1";//默認值 } //調用業務層 ArticleService articleService = new ArticleService(); Page page = articleService.show(keywords,Integer.parseInt(temp)); //將Page對象綁定到request域對象中 request.setAttribute("PAGE",page); //將keywords變量綁定到request域對象中 request.setAttribute("KEYWORDS",keywords); //轉發到list.jsp頁面 request.getRequestDispatcher("/list.jsp").forward(request,response); } catch (Exception e) { e.printStackTrace(); throw new RuntimeException(e); } } }
list.jsp
<%@ page language="java" pageEncoding="UTF-8"%> <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <title>同步分頁</title> </head> <body> <!-- 輸入區 --> <form action="${pageContext.request.contextPath}/ArticleServlet" method="POST"> <input type="hidden" name="currPageNO" value="1"/> <table border="2" align="center"> <tr> <th>輸入關鍵字</th> <td><input type="text" name="keywords" value="${requestScope.KEYWORDS}" maxlength="10"/></td> <td><input id="search" type="button" value="站內搜索"/></td> </tr> </table> </form> <script type="text/javascript"> //去空格 function trim(str){//" 培訓 " //先去左邊空格 str = str.replace(/^\s*/,"");//"培訓 "" //後去右邊空格 str = str.replace(/\s*$/,"");//"培訓" //返回str return str; } //定位"站內搜索"按鈕,同時提供單擊事件 document.getElementById("search").onclick = function(){ //定位表單 var formElement = document.forms[0]; //獲取關鍵字 var keywords = formElement.keywords.value; //去空格 keywords = trim(keywords); //判斷長度 if(keywords.length == 0){ //提示 alert("你沒有填關鍵字!!!"); }else{ //提交表單 formElement.submit(); } } </script> <!-- 顯示區 --> <table border="2" align="center" width="70%"> <tr> <th>編號</th> <th>標題</th> <th>內容</th> </tr> <c:forEach var="article" items="${requestScope.PAGE.articleList}"> <tr> <td>${article.id}</td> <td>${article.title}</td> <td>${article.content}</td> </tr> </c:forEach> <tr> <th colspan="3" align="center"> <a onclick="fy(1)" style="cursor:hand;color:blue;text-decoration:underline">首頁</a> <c:choose> <c:when test="${requestScope.PAGE.currPageNO+1<=requestScope.PAGE.allPageNO}"> <a onclick="fy(${requestScope.PAGE.currPageNO+1})" style="cursor:hand;color:blue;text-decoration:underline">下一頁</a> </c:when> <c:otherwise> 下一頁 </c:otherwise> </c:choose> <a onclick="fy()" style="cursor:hand;color:blue;text-decoration:underline">上一頁</a> <a onclick="fy(${requestScope.PAGE.allPageNO})" style="cursor:hand;color:blue;text-decoration:underline">未頁</a> </th> </tr> </table> <script type="text/javascript"> function fy(currPageNO){ //定位表單 var formElement = document.forms[0]; //修改當前頁號 formElement.currPageNO.value = currPageNO; //提交表單 formElement.submit(); } </script> </body> </html>
PS :站內檢索
package cn.itcast.javaee.lucene.optimize; import java.io.File; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 索引庫進行優化 * @author AdminTC */ public class ArticleDao { /** * 增長document對象索引庫中 * 問題的引入 */ @Test public void add() throws Exception{ Article article = new Article(1,"培訓","傳智是一家it培訓機構",10); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } /** * 合併cfs文件,合併後的cfs文件是二進制壓縮字符,能解決是的文件大小和數量的問題 * @throws Exception */ @Test public void type1() throws Exception{ Article article = new Article(1,"培訓","傳智是一家it培訓機構",10); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); //合併cfs文本 indexWriter.optimize(); //會合併成一個文件 indexWriter.close(); } /** * 設定合併因子,自動合併cfs文件 * @throws Exception */ @Test public void type2() throws Exception{ Article article = new Article(1,"培訓","傳智是一家it培訓機構",10); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); //設置合併因子,即知足3個cfs文本一合併 indexWriter.setMergeFactor(3); //每3個文件一合併 indexWriter.close(); }
/** * 默認狀況下,每10個cfs文本一合併 * @throws Exception */ @Test public void type3() throws Exception{ Article article = new Article(1,"培訓","傳智是一家it培訓機構",10); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); //設置合併因子,即知足10個cfs文本一合併 //indexWriter.setMergeFactor(10); indexWriter.close(); }
![](http://static.javashuo.com/static/loading.gif)
PS : 上述能解決快的問題,是這樣的:首先硬盤索引庫 同步到內存索引庫, 當進行操做時是在內存索引庫中, 操做完之後先清空硬盤索引庫,而後再存入硬盤索引庫
/** * 使用RAMDirectory,相似於內存索引庫,能解決是的讀取索引庫文件的速度問題 * @throws Exception */ @Test public void type4() throws Exception{ Article article = new Article(1,"培訓","傳智是一家it培訓機構",10); Document document = LuceneUtil.javabean2document(article); //硬盤索引庫 Directory fsDirectory = FSDirectory.open(new File("D:/All_Files/IndexDBDBDB")); //內存索引庫,由於硬盤索引庫的內容要同步到內存索引庫中 Directory ramDirectory = new RAMDirectory(fsDirectory); //指向硬盤索引庫的字符流,true表示若是內存索引庫中和硬盤索引庫中的相同的document對象時,先刪除硬盤索引庫中的document對象, //再將內存索引庫的document對象寫入硬盤索引庫中 //反之是false,默認爲false,這個boolean值寫在硬盤字符流的構造器 IndexWriter fsIndexWriter = new IndexWriter(fsDirectory,LuceneUtil.getAnalyzer(),true,LuceneUtil.getMaxFieldLength()); //指向內存索引庫的字符流 IndexWriter ramIndexWriter = new IndexWriter(ramDirectory,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); //將document對象寫入內存索引庫 ramIndexWriter.addDocument(document); ramIndexWriter.close(); //將內存索引庫的全部document對象同步到硬盤索引庫中 fsIndexWriter.addIndexesNoOptimize(ramDirectory); fsIndexWriter.close(); } @Test public void findAll() throws Exception{ String keywords = "家"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println(a); } } }
package cn.itcast.javaee.lucene.analyzer; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.cn.ChineseAnalyzer; import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.wltea.analyzer.lucene.IKAnalyzer; import cn.itcast.javaee.lucene.util.*; /** * 測試Lucene內置和第三方分詞器的分詞效果 * @author AdminTC */ public class TestAnalyzer { private static void testAnalyzer(Analyzer analyzer, String text) throws Exception { System.out.println("當前使用的分詞器:" + analyzer.getClass()); TokenStream tokenStream = analyzer.tokenStream("content",new StringReader(text)); tokenStream.addAttribute(TermAttribute.class); while (tokenStream.incrementToken()) { TermAttribute termAttribute = tokenStream.getAttribute(TermAttribute.class); System.out.println(termAttribute.term()); } } public static void main(String[] args) throws Exception{ //Lucene內存的分詞器 //testAnalyzer(new StandardAnalyzer(LuceneUtil.getVersion()),"傳智播客說咱們的首都是北京呀it"); //testAnalyzer(new FrenchAnalyzer(LuceneUtil.getVersion()),"傳智播客說咱們的首都是北京呀it"); //testAnalyzer(new RussianAnalyzer(LuceneUtil.getVersion()),"傳智播客說咱們的首都是北京呀it"); //testAnalyzer(new ChineseAnalyzer(),"傳智播客說咱們的首都是北京呀it"); //testAnalyzer(new CJKAnalyzer(LuceneUtil.getVersion()),"傳智播客說咱們的首都是北京呀it");//兩兩分詞 //testAnalyzer(new CJKAnalyzer(LuceneUtil.getVersion()),"傳智是一家IT培訓機構"); //testAnalyzer(new FrenchAnalyzer(LuceneUtil.getVersion()),"傳智是一家how are you培訓機構"); //testAnalyzer(new IKAnalyzer(),"傳智播客說咱們的首都是北京呀"); testAnalyzer(new IKAnalyzer(),"上海自來水來自海上"); } }
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> <properties> <comment>IK Analyzer 擴展配置</comment> <!-- 用戶能夠在這裏配置本身的擴展字典 ,也就是配置 專用的本身的分詞--> <entry key="ext_dict">/mydict.dic</entry> <!--用戶能夠在這裏配置本身的擴展中止詞字典,不想被搜索出來的單詞 --> <entry key="ext_stopwords">/surname.dic</entry> </properties>
三)搜索結果高亮 3.1什麼是搜索結果高亮 在搜索結果中,將與關健字相同的字符用紅色顯示 String keywords = "培訓"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,1000000); Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>"); Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter,scorer); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); String highlighterContent = highlighter.getBestFragment(LuceneUtil.getAnalyzer(),"content",document.get("content")); document.getField("content").setValue(highlighterContent); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article article : articleList){ System.out.println(article); } } 四)搜索結果摘要 4.1什麼是搜索結果搞要 若是搜索結果內容太多,咱們只想顯示前幾個字符, 必須與高亮一塊兒使用 String keywords = "培訓"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,1000000); Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>"); Scorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(formatter,scorer); Fragmenter fragmenter = new SimpleFragmenter(4); highlighter.setTextFragmenter(fragmenter); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); String highlighterContent = highlighter.getBestFragment(LuceneUtil.getAnalyzer(),"content",document.get("content")); document.getField("content").setValue(highlighterContent); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article article : articleList){ System.out.println(article); } }
package cn.itcast.javaee.lucene.highlighter; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 搜索結果中關鍵字高亮 * @author AdminTC */ public class ArticleDao { /** * 增長document對象索引庫中 */ @Test public void add() throws Exception{ Article article = new Article(1,"培訓","傳智是一家it培訓機構",10); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } @Test public void findAll() throws Exception{ String keywords = "培訓"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); //如下代碼對內容中含有關鍵字的字符串高亮顯示 //格式對象 Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>"); //關鍵字對象 Scorer scorer = new QueryScorer(query); //高亮對象 Highlighter highlighter = new Highlighter(formatter,scorer); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; //關鍵字沒有高亮 Document document = indexSearcher.doc(no); //關鍵字高亮 String titleHighlighter = highlighter.getBestFragment(LuceneUtil.getAnalyzer(),"title",document.get("title")); String contentHighlighter = highlighter.getBestFragment(LuceneUtil.getAnalyzer(),"content",document.get("content")); //將高亮後的結果再次封裝到document對象中 document.getField("title").setValue(titleHighlighter); document.getField("content").setValue(contentHighlighter); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println(a); } } }
package cn.itcast.javaee.lucene.search; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 演示Lucene中,根據多個字段搜索 * @author AdminTC */ public class ArticleDao { /** * 增長document對象索引庫中 */ @Test public void add() throws Exception{ Article article = new Article(1,"培訓","傳智是一家it培訓機構",10); //Article article = new Article(2,"培訓","北大是一家it培訓機構",20); //Article article = new Article(3,"培訓","中大是一家it培訓機構",20); //Article article = new Article(4,"培訓","小大是一家it培訓機構",30); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } @Test public void findAll() throws Exception{ String keywords = "機構"; List<Article> articleList = new ArrayList<Article>(); //單字段搜索 //QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"title",LuceneUtil.getAnalyzer()); //多字段搜索,好處:搜索的範圍大,最大限度匹配搜索結果 能夠檢索多個字段 QueryParser queryParser = new MultiFieldQueryParser( LuceneUtil.getVersion(), new String[]{"content","title"}, LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println(a); } } }
1.根據得分排序
package cn.itcast.javaee.lucene.sort; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.Scorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 在默認狀況下,Lucene是按照相關度得份排序的 * @author AdminTC */ public class ArticleDao1 { /** * 增長document對象索引庫中 */ @Test public void add() throws Exception{ //Article article = new Article(1,"培訓","傳智是一家it培訓機構",10); //Article article = new Article(2,"培訓","北大是一家it培訓機構",20); Article article = new Article(3,"培訓","中大是一家華南地區it培訓機構",30); //Article article = new Article(4,"培訓","哈哈培訓機構是好的培訓",9); //Article article = new Article(5,"培訓","培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓",15); //Article article = new Article(6,"培訓","培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓培訓",35); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); //人工設置該document的得分 //document.setBoost(100F); indexWriter.addDocument(document); indexWriter.close(); } @Test public void findAll() throws Exception{ String keywords = "培訓"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); TopDocs topDocs = indexSearcher.search(query,100); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; //獲取document對象的評分 float score = scoreDoc.score; System.out.println("score=" + score); Document document = indexSearcher.doc(no); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println(a); } } }
2.根據單個或多個字段排序
package cn.itcast.javaee.lucene.sort; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.junit.Test; import cn.itcast.javaee.lucene.entity.Article; import cn.itcast.javaee.lucene.util.LuceneUtil; /** * 演示Lucene中,根據單個或多個字段排序 * @author AdminTC */ public class ArticleDao2 { /** * 增長document對象索引庫中 */ @Test public void add() throws Exception{ //Article article = new Article(1,"培訓","傳智是一家it培訓機構",10); //Article article = new Article(2,"培訓","北大是一家it培訓機構",20); //Article article = new Article(3,"培訓","中大是一家it培訓機構",20); Article article = new Article(4,"培訓","小大是一家it培訓機構",30); Document document = LuceneUtil.javabean2document(article); IndexWriter indexWriter = new IndexWriter(LuceneUtil.getDirectory() ,LuceneUtil.getAnalyzer(),LuceneUtil.getMaxFieldLength()); indexWriter.addDocument(document); indexWriter.close(); } @Test public void findAll() throws Exception{ String keywords = "培訓"; List<Article> articleList = new ArrayList<Article>(); QueryParser queryParser = new QueryParser(LuceneUtil.getVersion(),"content",LuceneUtil.getAnalyzer()); Query query = queryParser.parse(keywords); IndexSearcher indexSearcher = new IndexSearcher(LuceneUtil.getDirectory()); //按得分度高低排序 //TopDocs topDocs = indexSearcher.search(query,100); //建立排序對象 //參數一:id表示依據document對象中的哪一個字段排序,例如:id //參數二:SortField.INT表示document對象中該字段的類型,以常量方式書寫 //參數三:true表示降序,相似於order by id desc //參數三:false表示升序,相似於order by id asc //Sort sort = new Sort(new SortField("id",SortField.INT,false)); //按count字段的降序排列,若是count字段相同的話,再按id的升序排序 Sort sort = new Sort( new SortField("count",SortField.INT,true), new SortField("id",SortField.INT,false)); //sort表示排序的條件 TopDocs topDocs = indexSearcher.search(query,null,100,sort); for(int i=0;i<topDocs.scoreDocs.length;i++){ ScoreDoc scoreDoc = topDocs.scoreDocs[i]; int no = scoreDoc.doc; Document document = indexSearcher.doc(no); Article article = (Article) LuceneUtil.document2javabean(document,Article.class); articleList.add(article); } for(Article a : articleList){ System.out.println(a); } } }
PS:lucene異步分頁結果