以前作去轉盤網的時候,我已經公開了非全文搜索的代碼,須要的朋友但願可以前去閱讀個人博客。本文主要討論如何進行全文搜索,因爲本人花了很長時間設計了新做:觀點,觀點對全文搜索的要求仍是很高的,因此我又花了很多時間研究全文搜索,你能夠先體驗下:點我搜索。廢話也很少說了,直接上代碼:html
public Map<String,Object> articleSearchAlgorithms(SearchCondition condition,IndexSearcher searcher) throws ParseException, IOException{ Map<String,Object> map =new HashMap<String,Object>(); String[] filedsList=condition.getFiledsList(); String keyWord=condition.getKeyWord(); int currentPage=condition.getCurrentPage(); int pageSize=condition.getPageSize(); String sortField=condition.getSortField(); boolean isASC=condition.isDESC(); String sDate=condition.getsDate(); String eDate=condition.geteDate(); String classify=condition.getClassify(); //過濾終結字符 keyWord=escapeExprSpecialWord(keyWord); BooleanQuery q1 = new BooleanQuery(); BooleanQuery q2 = new BooleanQuery(); BooleanQuery booleanQuery = new BooleanQuery(); //boolean查詢 if(classify!=null&&(classify.equals("guanzhi")||classify.equals("opinion")||classify.equals("write"))){ String typeId="1";//默認言論 if(classify.equals("guanzhi")){ typeId="2"; } if(classify.equals("opinion")){ typeId="3"; } Query termQuery = new TermQuery(new Term("typeId",typeId)); q1.add(termQuery,BooleanClause.Occur.MUST); } if(sDate!=null&&eDate!=null){//是否範圍查詢由這兩個參數決定 Query rangeQuery = new TermRangeQuery("writingTime", new BytesRef(sDate), new BytesRef(eDate),true, true); q1.add(rangeQuery,BooleanClause.Occur.MUST); } Sort sort = new Sort(); // 排序 sort.setSort(SortField.FIELD_SCORE); if(sortField!=null){ sort.setSort(new SortField(sortField, SortField.Type.STRING, isASC)); } int start = (currentPage - 1) * pageSize; int hm = start + pageSize; TopFieldCollector res = TopFieldCollector.create(sort,hm,false, false, false, false); //徹底匹配查詢 Term t0=new Term(filedsList[1],keyWord); TermQuery termQuery = new TermQuery(t0);//兩種高度匹配的查詢 q2.add(termQuery,BooleanClause.Occur.SHOULD); //前綴匹配 Term t1=new Term(filedsList[1],keyWord); PrefixQuery prefixQuery=new PrefixQuery(t1); q2.add(prefixQuery,BooleanClause.Occur.SHOULD); //短語,類似度匹配,適用於分詞的內容 for(int i=0;i<filedsList.length;i++){ //多字段term查詢算法 if(i!=1){ PhraseQuery phraseQuery=new PhraseQuery(); Term ts0=new Term(filedsList[i],keyWord); phraseQuery.add(ts0); FuzzyQuery fQuery=new FuzzyQuery(new Term(filedsList[i],keyWord),2);//最後類似度查詢 q2.add(phraseQuery,BooleanClause.Occur.SHOULD); q2.add(fQuery,BooleanClause.Occur.SHOULD);//後綴類似的拿出來 } } MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_47,filedsList,analyzer); queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = queryParser.parse(keyWord); q2.add(query,BooleanClause.Occur.SHOULD); //必須加邏輯判斷,不然結果是不一樣的 if(q1!=null && q1.toString().length()>0){ booleanQuery.add(q1,BooleanClause.Occur.MUST); } if(q2!=null && q2.toString().length()>0){ booleanQuery.add(q2,BooleanClause.Occur.MUST); } searcher.search(booleanQuery, res); long amount = res.getTotalHits(); TopDocs tds = res.topDocs(start, pageSize); map.put("amount",amount); map.put("tds",tds); map.put("query",booleanQuery); return map; }
注意下:上面代碼的搜索條件(SearchCondition )是觀點網的具體需求,您能夠按照您本身的搜索條件作改動,這裏也很難適配全部讀者。算法
public Map<String, Object> searchArticle(SearchCondition condition) throws Exception{ Map<String,Object> map =new HashMap<String,Object>(); List<Write> list=new ArrayList<Write>(); DirectoryReader reader=condition.getReader(); String URL=condition.getURL(); boolean isHighligth=condition.isHighlight(); String keyWord=condition.getKeyWord(); IndexSearcher searcher=getSearcher(reader,URL); try{ Map<String,Object> output=articleSearchAlgorithms(condition,searcher); if(output==null){ map.put("amount",0L); map.put("source",null); return map; } map.put("amount", output.get("amount")); TopDocs tds = (TopDocs) output.get("tds"); ScoreDoc[] sd = tds.scoreDocs; Query query =(Query) output.get("query"); for (int i = 0; i < sd.length; i++) { Document doc = searcher.doc(sd[i].doc); String id = doc.get("id"); /**********************start*************************須要處理的放一起********************/ String temp=doc.get("title"); String title =temp; //默認不高亮 if(isHighligth){ //高亮文章標題 Highlighter highlighterTitle = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighterTitle.setTextFragmenter(new SimpleFragmenter(40)); // 字長度 TokenStream ts = analyzer.tokenStream("title", new StringReader(temp)); title= highlighterTitle.getBestFragment(ts,temp); if(title==null){ title=temp.replace(keyWord,"<span style='color:red'>"+keyWord+"</span>");//高亮處理插件bug,加這句話避免 } } String temp1=HtmlEnDecode.htmlEncode(doc.get("content")); String content=temp1;//使用本身封裝的方法來轉義 if(isHighligth){ //作高亮處理,content Highlighter highlighterContent = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighterContent.setTextFragmenter(new SimpleFragmenter(Constant.HIGHLIGHT_CONTENT_LENGTH)); // 字長度 //temp1=StringEscapeUtils.escapeHtml(temp1);//將漢字轉義致使高亮失效 TokenStream ts1 = analyzer.tokenStream("content", new StringReader(temp1)); content = highlighterContent.getBestFragment(ts1,temp1); if(content==null){ content=temp1.replace(keyWord,"<span style='color:red'>"+keyWord+"</span>");//高亮處理插件bug,加這句話避免 //假設趕上這種狀況作處理,其餘的高亮器會自動截圖 content=subContent(content);//截取處理 content=HtmlEnDecode.htmldecode(content);//html解碼 content=SubStringHTML.sub(content,Constant.HIGHLIGHT_CONTENT_LENGTH); } } /*---------------------------------------不斷變更的數據放一起----------------------------*/ Write write=writeDao.getArticle(Long.parseLong(id)); if(write!=null){ write.setTitle(title); write.setContent(content); Date writingTime=write.getWritingTime(); String timeGap=DateUtil.dateGap(writingTime);//timeGap write.setTimeGap(timeGap); list.add(write); } } }catch(Exception e){ e.printStackTrace(); } map.put("source",list); return map; }
注意上面,這是具體的搜索代碼,不一樣的應用場景有不一樣的需求,請您按照本身的需求封裝對象,查詢數據庫等,代碼毫無保留,絕對可用。數據庫
若是有什麼疑問能夠加qq羣:284205104 若是羣滿了就麻煩去趟去轉盤找下最新的羣加了便可,謝謝您的閱讀。spa