Lucene最初是由Doug Cutting開發的,2000年3月,發佈第一個版本,是一個全文檢索引擎的架構,提供了完整的查詢引擎和索引引擎 ;Lucene得名於Doug妻子的中名,同時這也她外祖母的姓;目前是Apache基金會的一個頂級項目,同時也是學習搜索引擎入門必知必會。java
Lucene 是一個 JAVA 搜索類庫,它自己並非一個完整的解決方案,須要額外的開發工做。數據庫
優勢:成熟的解決方案,有不少的成功案例。apache 頂級項目,正在持續快速的進步。龐大而活躍的開發社區,大量的開發人員。它只是一個類庫,有足夠的定製和優化空間:通過簡單定製,就能夠知足絕大部分常見的需求;通過優化,能夠支持 10億+ 量級的搜索。apache
缺點:須要額外的開發工做。全部的擴展,分佈式,可靠性等都須要本身實現;非實時,從建索引到能夠搜索中間有一個時間延遲,而當前的「近實時」(Lucene Near Real Time search)搜索方案的可擴展性有待進一步完善。架構
對於全文檢索通常都由如下3個部分組成:分佈式
在接下來的一系列文章中會詳細介紹這三個部分,本文將簡單介紹lucene環境搭建以及lucene索引初步。ide
目前基於Lucene的產品有:oop
Solr,Nutch,Hbase,Katta,constellio,Summa,Compass,Bobo Search,Index Tank,Elastic Search,Hadoop contrib/index ,LinkedIn ,Eclipse,Cocoon學習
目錄最新版的Lucene爲4.10.0(今天是2014-09-22 )版,其官方主頁爲:http://lucene.apache.org/ 測試
或者點擊下載優化
若是你會使用Maven,那麼能夠很是簡單的將pom.xml中加入如下內容便可:
<dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>4.10.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>4.10.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>4.10.0</version> </dependency>
若是不會使用Maven,則須要手工下載相應的jar包進行開發。
一、建立Directory
二、建立IndexWriter
三、建立Document對象
四、爲Docuemnt添加Field
五、經過IndexWriter添加文檔到Document
package com.amos.lucene; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import java.io.File; import java.io.FileReader; import java.io.IOException; /** * Created by amosli on 14-9-17. */ public class HelloLucene { static String indexDir = "/home/amosli/developtest/lucene"; public void index() { IndexWriter indexWriter = null; FSDirectory directory = null; try { //一、建立Directory directory = FSDirectory.open(new File(indexDir)); //RAMDirectory directory = new RAMDirectory(); //二、建立IndexWriter IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_0, new StandardAnalyzer(Version.LUCENE_4_10_0)); indexWriter = new IndexWriter(directory, indexWriterConfig); File file = new File("/home/amosli/developtest/testfile"); for (File f : file.listFiles()) { FieldType fieldType = new FieldType(); //三、建立Docuemnt對象 Document document = new Document(); //四、爲Document添加Field document.add(new TextField("content", new FileReader(f)) ); fieldType.setIndexed(true); fieldType.setStored(true); document.add(new Field("name", f.getName(),fieldType)); fieldType.setIndexed(false); fieldType.setStored(true); document.add(new Field("path", f.getAbsolutePath(), fieldType)); //五、經過IndexWriter添加文檔索引中 indexWriter.addDocument(document); } } catch (IOException e) { e.printStackTrace(); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (IOException e) { e.printStackTrace(); } } } } }
注:
一、這裏使用的是FSDirectory,是爲了方便進行測試,將生成的文件寫入到本地硬盤中;
二、Document至關於數據庫中的一條記錄,field至關數據庫中表的一列;
三、使用indexWriter當記錄添加到文檔索引中;
四、fieldType能夠設置是否須要索引和是否須要存儲;
五、記得關閉indexWriter
生成的索引文件,以下圖所示:
一、建立Directory
二、建立IndexReader
三、根據IndexReader建立IndexSearcher
四、建立搜索的Query
五、根據Searcher搜索而且返回TopDocs
六、根據TopDocs獲取ScoreDoc對象
七、根據Seacher和ScoreDoc對象獲取具體的Document對象
八、根據Document對象獲取須要的值
public void search() { IndexReader indexReader = null; try { //一、建立Directory FSDirectory directory = FSDirectory.open(new File(indexDir)); //二、建立IndexReader indexReader = DirectoryReader.open(directory); //三、根據IndexReader建立IndexSearcher IndexSearcher indexSearcher = new IndexSearcher(indexReader); //四、建立搜索的Query //建立querypaser來肯定要搜索文件的內容,第二個參數表示搜索的域 QueryParser queryParser = new QueryParser("content", new StandardAnalyzer()); //建立query,表示搜索域爲content中包含java的文檔 Query query = queryParser.parse("java"); //五、根據Searcher搜索而且返回TopDocs TopDocs topDocs = indexSearcher.search(query, 100); //六、根據TopDocs獲取ScoreDoc對象 ScoreDoc[] sds = topDocs.scoreDocs; //七、根據Seacher和ScoreDoc對象獲取具體的Document對象 for (ScoreDoc sdc : sds) { Document doc = indexSearcher.doc(sdc.doc); //八、根據Document對象獲取須要的值 System.out.println("name:" + doc.get("name") + "-----> path:" + doc.get("path")); } } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); }finally{ if(indexReader!=null){ try { indexReader.close(); } catch (IOException e) { e.printStackTrace(); } } } }
輸出結果:
全部源碼:HelloLucene.java
package com.amos.lucene; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.index.*; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import java.io.File; import java.io.FileReader; import java.io.IOException; /** * Created by amosli on 14-9-17. */ public class HelloLucene { static String indexDir = "/home/amosli/developtest/lucene"; public void index() { IndexWriter indexWriter = null; FSDirectory directory = null; try { //一、建立Directory directory = FSDirectory.open(new File(indexDir)); //RAMDirectory directory = new RAMDirectory(); //二、建立IndexWriter IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_0, new StandardAnalyzer(Version.LUCENE_4_10_0)); indexWriter = new IndexWriter(directory, indexWriterConfig); File file = new File("/home/amosli/developtest/testfile"); for (File f : file.listFiles()) { FieldType fieldType = new FieldType(); //三、建立Docuemnt對象 Document document = new Document(); //四、爲Document添加Field document.add(new TextField("content", new FileReader(f))); fieldType.setIndexed(true); fieldType.setStored(true); document.add(new Field("name", f.getName(), fieldType)); fieldType.setIndexed(false); fieldType.setStored(true); document.add(new Field("path", f.getAbsolutePath(), fieldType)); //五、經過IndexWriter添加文檔索引中 indexWriter.addDocument(document); } } catch (IOException e) { e.printStackTrace(); } finally { if (indexWriter != null) { try { indexWriter.close(); } catch (IOException e) { e.printStackTrace(); } } } } public void search() { IndexReader indexReader = null; try { //一、建立Directory FSDirectory directory = FSDirectory.open(new File(indexDir)); //二、建立IndexReader indexReader = DirectoryReader.open(directory); //三、根據IndexReader建立IndexSearcher IndexSearcher indexSearcher = new IndexSearcher(indexReader); //四、建立搜索的Query //建立querypaser來肯定要搜索文件的內容,第二個參數表示搜索的域 QueryParser queryParser = new QueryParser("content", new StandardAnalyzer()); //建立query,表示搜索域爲content中包含java的文檔 Query query = queryParser.parse("java"); //五、根據Searcher搜索而且返回TopDocs TopDocs topDocs = indexSearcher.search(query, 100); //六、根據TopDocs獲取ScoreDoc對象 ScoreDoc[] sds = topDocs.scoreDocs; //七、根據Seacher和ScoreDoc對象獲取具體的Document對象 for (ScoreDoc sdc : sds) { Document doc = indexSearcher.doc(sdc.doc); //八、根據Document對象獲取須要的值 System.out.println("name:" + doc.get("name") + "-----> path:" + doc.get("path")); } } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); }finally{ if(indexReader!=null){ try { indexReader.close(); } catch (IOException e) { e.printStackTrace(); } } } } }
TestHelloLucene.java
package com.amos.lucene; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.queryparser.classic.QueryParser; import org.junit.Test; /** * Created by amosli on 14-9-17. */ public class TestHelloLucene { @Test public void testIndex(){ HelloLucene helloLucene = new HelloLucene(); helloLucene.index(); } @Test public void testSearch(){ HelloLucene helloLucene = new HelloLucene(); helloLucene.search(); } }