#lucene 對於歷來沒有接觸過lucene的開發人員可能對lucene比較的恐懼,第一點,lucene的發展是至關快的,API在不斷的更新當中,第二點,lucene並非一個完整的全文檢索引擎,而是一個全文檢索引擎的架構,lucene的目的只是爲軟件開發人員提供一個簡單易用的工具包。可是呢,若是隻是想使用lucene,而不是專門作搜索引擎,使用lucene開發是很是簡單的,這是官網給的一個例子(lucene6.1.0):java
建立索引:apache
//建立一個標準的分析器(這個分析器不支持中文) Analyzer analyzer = new StandardAnalyzer(); // 在內存中建立一個目錄 Directory directory = new RAMDirectory(); // 在磁盤中建立一個目錄(最經常使用) //Directory directory = FSDirectory.open("/tmp/testindex"); //建立一個索引Writer配置 IndexWriterConfig config = new IndexWriterConfig(analyzer); //建立一個索引Writer IndexWriter iwriter = new IndexWriter(directory, config); //建立一個文件 Document doc = new Document(); String text = "This is the text to be indexed."; //向文件中添加字段 doc.add(new Field("fieldname", text, TextField.TYPE_STORED)); //向Writer中添加文件 iwriter.addDocument(doc); //關閉 iwriter.close();
查詢:架構
//建立一個標準的分析器(這個分析器不支持中文) Analyzer analyzer = new StandardAnalyzer(); // 在內存中建立一個目錄 Directory directory = new RAMDirectory(); // 在磁盤中建立一個目錄(最經常使用) //Directory directory = FSDirectory.open("/tmp/testindex"); //建立一個Reader DirectoryReader ireader = DirectoryReader.open(directory); //建立一個索引搜索 IndexSearcher isearcher = new IndexSearcher(ireader); //c建立一個查詢表達式 QueryParser parser = new QueryParser("fieldname", analyzer); //咱們搜索的關鍵字爲「test」 Query query = parser.parse("text"); //進行查詢 ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; assertEquals(1, hits.length); // 遍歷結果 for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); } ireader.close(); directory.close();
從官網給的例子中能夠發現,想要用lucene作搜索就兩步:工具
例子:ui
添加lucene支持搜索引擎
<dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>5.3.1</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>5.3.1</version> </dependency>
博客索引類code
package com.lucene; import java.io.StringReader; import java.nio.file.Paths; import java.util.LinkedList; import java.util.List; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * 博客索引類 * @author Administrator * */ public class BlogIndex { private Directory dir=null; /** * 獲取IndexWriter實例 * @return * @throws Exception */ private IndexWriter getWriter()throws Exception{ dir=FSDirectory.open(Paths.get("C://lucene")); SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer(); IndexWriterConfig iwc=new IndexWriterConfig(analyzer); IndexWriter writer=new IndexWriter(dir, iwc); return writer; } /** * 添加博客索引 * @param blog */ public void addIndex(Blog blog)throws Exception{ IndexWriter writer=getWriter(); Document doc=new Document(); doc.add(new StringField("id",String.valueOf(blog.getId()),Field.Store.YES)); doc.add(new TextField("title",blog.getTitle(),Field.Store.YES)); doc.add(new StringField("releaseDate",DateUtil.formatDate(blog.getReleaseDate(), "yyyy-MM-dd"),Field.Store.YES)); doc.add(new TextField("content",blog.getContentNoTag(),Field.Store.YES)); writer.addDocument(doc); writer.close(); } /** * 更新博客索引 * @param blog * @throws Exception */ public void updateIndex(Blog blog)throws Exception{ IndexWriter writer=getWriter(); Document doc=new Document(); doc.add(new StringField("id",String.valueOf(blog.getId()),Field.Store.YES)); doc.add(new TextField("title",blog.getTitle(),Field.Store.YES)); doc.add(new StringField("releaseDate",DateUtil.formatDate(blog.getReleaseDate(), "yyyy-MM-dd"),Field.Store.YES)); doc.add(new TextField("content",blog.getContentNoTag(),Field.Store.YES)); writer.updateDocument(new Term("id", String.valueOf(blog.getId())), doc); writer.close(); } /** * 刪除指定博客的索引 * @param blogId * @throws Exception */ public void deleteIndex(String blogId)throws Exception{ IndexWriter writer=getWriter(); writer.deleteDocuments(new Term("id",blogId)); writer.forceMergeDeletes(); // 強制刪除 writer.commit(); writer.close(); } /** * 查詢博客信息 * @param q 查詢關鍵字 * @return * @throws Exception */ public List<Blog> searchBlog(String q)throws Exception{ dir=FSDirectory.open(Paths.get("C://lucene")); IndexReader reader = DirectoryReader.open(dir); IndexSearcher is=new IndexSearcher(reader); BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer(); QueryParser parser=new QueryParser("title",analyzer); Query query=parser.parse(q); QueryParser parser2=new QueryParser("content",analyzer); Query query2=parser2.parse(q); booleanQuery.add(query,BooleanClause.Occur.SHOULD); booleanQuery.add(query2,BooleanClause.Occur.SHOULD); TopDocs hits=is.search(booleanQuery.build(), 100); QueryScorer scorer=new QueryScorer(query); Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>"); Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer); highlighter.setTextFragmenter(fragmenter); List<Blog> blogList=new LinkedList<Blog>(); for(ScoreDoc scoreDoc:hits.scoreDocs){ Document doc=is.doc(scoreDoc.doc); Blog blog=new Blog(); blog.setId(Integer.parseInt(doc.get(("id")))); blog.setReleaseDateStr(doc.get(("releaseDate"))); String title=doc.get("title"); String content=doc.get("content"); if(title!=null){ TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title)); String hTitle=highlighter.getBestFragment(tokenStream, title); if(StringUtil.isEmpty(hTitle)){ blog.setTitle(title); }else{ blog.setTitle(hTitle); } } if(content!=null){ TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content)); String hContent=highlighter.getBestFragment(tokenStream, content); if(StringUtil.isEmpty(hContent)){ if(content.length()<=200){ blog.setContent(content); }else{ blog.setContent(content.substring(0, 200)); } }else{ blog.setContent(hContent); } } blogList.add(blog); } return blogList; } }