Lucene是apache軟件基金會4 jakarta項目組的一個子項目,是一個開放源代碼的全文檢索引擎工具包,即它不是一個完整的全文檢索引擎,而是一個全文檢索引擎的架構,提供了完整的查詢引擎和索引引擎,部分文本分析引擎(英文與德文兩種西方語言)。Lucene的目的是爲軟件開發人員提供一個簡單易用的工具包,以方便的在目標系統中實現全文檢索的功能,或者是以此爲基礎創建起完整的全文檢索引擎。java
以上介紹來自百度百科。apache
在全文索引工具中,都是由三部分組成的:架構
1,索引部分ide
2,分詞部分工具
3,搜索部分學習
下面進入Lucene的學習,由lucene建立索引測試
建立工程引入jar包lucene-core-3.6.2.jarspa
HelloLucene.java開放源代碼
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- import java.io.Reader;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.CorruptIndexException;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.store.LockObtainFailedException;
- import org.apache.lucene.store.RAMDirectory;
- import org.apache.lucene.util.Version;
- public class HelloLucene {
- /**
- * 創建索引
- */
- public void index(){
- IndexWriter writer = null;
- try {
- //1,建立詞典
- // Directory directory = new RAMDirectory();//存儲到內存
- Directory directory = FSDirectory.open(new File("D:\\lucene\\index"));
- //2,建立IndexWriter索引筆
- IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));
- writer = new IndexWriter(directory, config);
- //3,建立Document對象
- Document doc = null;
- File file = new File("d:/lucene/file");
- for(File f : file.listFiles()){
- doc = new Document();
- //4,爲Document添加Field
- doc.add(new Field("content",new FileReader(f)));
- doc.add(new Field("filename",f.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
- doc.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
- //5,經過IndexWriter添加文檔到索引中
- writer.addDocument(doc);
- }
- } catch (CorruptIndexException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (LockObtainFailedException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- if(writer!=null){
- try {
- writer.close();
- } catch (CorruptIndexException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- }
- /**
- * 搜索
- */
- public void search(){
- IndexReader reader = null;
- try {
- //1,建立Directory
- Directory directory = FSDirectory.open(new File("d:/lucene/index"));
- //2,建立IndexReader
- reader = IndexReader.open(directory);
- //3,根據IndexReader建立IndexSearcher
- IndexSearcher searcher = new IndexSearcher(reader);
- //4,建立搜索的Query
- QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
- Query query = parser.parse("Apache License");
- //5,根據searcher搜索並返回TopDocs
- TopDocs tds = searcher.search(query, 10);
- //6,根據TopDocs獲取ScoreDoc對象
- ScoreDoc[] sds = tds.scoreDocs;
- //7,根據searcher和ScoreDoc對象獲取具體的Document對象
- for(ScoreDoc sd:sds){
- Document doc = searcher.doc(sd.doc);
- //8,根據Document對象獲取須要的值
- System.out.println(doc.get("filename")+":"+doc.get("path"));
- }
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- } catch (ParseException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }finally{
- //9,關閉reader
- if(reader!=null){
- try {
- reader.close();
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- }
- }
- }
測試
- import static org.junit.Assert.*;
- import org.junit.BeforeClass;
- import org.junit.Test;
- public class TestCase {
- @BeforeClass
- public static void setUpBeforeClass() throws Exception {
- }
- @Test
- public void testIndex() {
- new HelloLucene().index();
- }
- @Test
- public void testSearch(){
- new HelloLucene().search();
- }
- }