有了上一篇創建的索引,就能夠進行檢索了。java
數據庫查詢使用SQL,lucene檢索使用Query。git
lucene提供了一個IndexSearcher類,檢索的功能經過這個類完成,其構造方法須要一個IndexReader對象。IndexReader用於讀取索引庫Directory。數據庫
IndexSearcher有許多重構的方法,其中返回值爲TopDocs類型的爲最簡單的。本文使用這個方法進行演示。TopDocs保存檢索結果,其中的scoreDocs屬性保存了記錄的docId及評分,根據docId就能夠取得對應的記錄。apache
上一篇中已經知道初始化Directory對象須要索引庫的路徑,咱們提供一個Searcher類,簡化索引的操做。windows
僞代碼以下:
單元測試
public class Searcher { /** * 檢索 * * @param indexDir * 索引存放目錄 * @param query * 檢索條件 * @param n * 返回結果數量 * @return * @throws Exception */ public List<Document> search(String indexDir, Query query, int n) throws Exception { 建立Directory對象; 建立IndexReader對象; 建立IndexSearcher對象; 使用IndexSearcher對象進行檢索; } }
實現代碼以下:測試
package cn.lym.lucene.quickstart.search; import java.io.File; import java.util.ArrayList; import java.util.List; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; /** * 提供檢索的類 * * @author liuyimin * */ public class Searcher { /** * logger */ private static final Logger logger = LogManager.getLogger(Searcher.class); /** * 檢索 * * @param indexDir * 索引存放目錄 * @param query * 檢索條件 * @param n * 返回結果數量 * @return * @throws Exception */ public List<Document> search(String indexDir, Query query, int n) throws Exception { if (logger.isDebugEnabled()) { logger.debug("Search " + indexDir + " for " + n + " documents, with query: " + query); } Directory directory = FSDirectory.open(new File(indexDir)); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(query, n); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (logger.isDebugEnabled()) { logger.debug("Totally " + scoreDocs.length + " documents hit."); } List<Document> documents = new ArrayList<>(scoreDocs.length); for (ScoreDoc scoreDoc : scoreDocs) { documents.add(searcher.doc(scoreDoc.doc)); } return documents; } }
按照上一篇的需求,編寫單元測試。ui
package cn.lym.lucene.quickstart.search; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.util.Date; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.junit.Before; import org.junit.Test; public class SearcherTest { /** * 索引存放目錄 */ private static final String indexDir = "E:\\Documents\\lucene-quickstart\\"; private Searcher searcher; @Before public void init() { this.searcher = new Searcher(); } /** * 按文件名搜索文件 */ @Test public void testSearchWithFileName() throws Exception { // 搜索文件名爲jdk-8u60-windows-x64.exe Query query = new TermQuery(new Term("filename", "jdk-8u60-windows-x64.exe")); int n = 10; List<Document> documents = this.searcher.search(indexDir, query, n); System.out.println(documents.size() + " documents hit."); for (Document document : documents) { System.out.println(document); } assertEquals(1, documents.size()); } /** * 按文件類型搜索文件 */ @Test public void testSearchWithFileType() throws Exception { // 搜索文件類型爲exe的文件 Query query = new TermQuery(new Term("type", "exe")); int n = Integer.MAX_VALUE; List<Document> documents = this.searcher.search(indexDir, query, n); System.out.println(documents.size() + " documents hit."); assertTrue(documents.size() > 0); } /** * 按文件類型搜索文件 */ @Test public void testSearchWithFileType2() throws Exception { // 搜索文件類型爲exe的文件 Query query = new TermQuery(new Term("type", "txt")); int n = Integer.MAX_VALUE; List<Document> documents = this.searcher.search(indexDir, query, n); System.out.println(documents.size() + " documents hit."); for (Document document : documents) { System.out.println(document.get("pathname")); } assertTrue(documents.size() > 0); } /** * 按文件大小搜索文件 */ @Test public void testSearchWithFileSize() throws Exception { // 搜索文件大小爲195,200,088字節的文件(jdk-8u60-windows-x64.exe) long size = 195_200_088L; Query query = NumericRangeQuery.newLongRange("size", size, size, true, true); int n = 10; List<Document> documents = this.searcher.search(indexDir, query, n); System.out.println(documents.size() + " documents hit."); for (Document document : documents) { System.out.println(document); } assertEquals(1, documents.size()); } /** * 按文件大小搜索文件 */ @Test public void testSearchWithFileSize2() throws Exception { // 搜索文件大小在1024~2048字節之間的文件 Long min = 1024L; Long max = 2048L; Query query = NumericRangeQuery.newLongRange("size", min, max, true, true); int n = 10; List<Document> documents = this.searcher.search(indexDir, query, n); System.out.println(documents.size() + " documents hit."); for (Document document : documents) { System.out.println(document); } assertTrue(documents.size() > 0); } /** * 按文件大小搜索文件 */ @Test public void testSearchWithFileSize3() throws Exception { // 搜索文件大小小於1024字節的文件 Long min = null; Long max = 1024L; Query query = NumericRangeQuery.newLongRange("size", min, max, true, true); int n = 10; List<Document> documents = this.searcher.search(indexDir, query, n); System.out.println(documents.size() + " documents hit."); for (Document document : documents) { System.out.println(document); } assertTrue(documents.size() > 0); } /** * 按文件大小搜索文件 */ @Test public void testSearchWithFileSize4() throws Exception { // 搜索文件大小大於1024 * 1024 * 1024字節的文件 Long min = 1024 * 1024 * 1024L; Long max = null; Query query = NumericRangeQuery.newLongRange("size", min, max, true, true); int n = 10; List<Document> documents = this.searcher.search(indexDir, query, n); System.out.println(documents.size() + " documents hit."); for (Document document : documents) { System.out.println(document); } assertTrue(documents.size() > 0); } /** * 按文件修改日期搜索文件 */ @Test public void testSearchWithModifiedTime() throws Exception { // 搜索最近一週修改的文件 Long max = new Date().getTime(); Long min = max - 7 * 24 * 3600 * 1000L; Query query = NumericRangeQuery.newLongRange("lastmodified", min, max, true, true); int n = 10; List<Document> documents = this.searcher.search(indexDir, query, n); System.out.println(documents.size() + " documents hit."); for (Document document : documents) { System.out.println(document); } assertTrue(documents.size() > 0); } /** * 按文件內容搜索文件 */ @Test public void testSearchWithContent() throws Exception { // 搜索內容中包含success的文件 Query query = new TermQuery(new Term("content", "success")); int n = Integer.MAX_VALUE; List<Document> documents = this.searcher.search(indexDir, query, n); System.out.println(documents.size() + " documents hit."); for (Document document : documents) { System.out.println(document.get("pathname")); } assertTrue(documents.size() > 0); } }
須要說明的幾點:this
關於Query。Query有幾種子類:spa
等值查詢:TermQuery。文件名、文件路徑、文件類型、文件內容都屬於這種。
範圍查詢:NumericRangeQuery。文件大小、修改時間屬於這種。NumericRangeQuery經過靜態工廠方法建立,幾個參數分別爲:字段名,最小值,最大值,是否包含最小值,是否包含最大值。最大值、最小值某個爲null時,表示無窮大、無窮小。
關於檢索結果Document對象。咱們能夠從Document對象中,經過get(String name)方法,得到創建索引時,存儲選項設置爲存儲的字段。當試圖得到非存儲字段的值時,返回null。
本文代碼能夠從 https://git.oschina.net/coding4j/lucene-quickstart 獲取。