lucene中FSDirectory、RAMDirectory的用法

時間 2019-12-04

標籤 lucene fsdirectory ramdirectory 用法简体版

原文原文鏈接

package com.ljq.one;java

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;apache

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.junit.Test;app

public class DirectoryTest {
// 數據源路徑
String dspath = "E:/workspace/mylucene/lucenes/IndexWriter addDocument's a javadoc .txt";
//存放索引文件的位置，即索引庫
String indexpath = "E:/workspace/mylucene/luceneIndex";
//分詞器
Analyzer analyzer = new StandardAnalyzer();

/**
* 建立索引，會拋異常，由於沒對索引庫進行保存
*
* IndexWriter 用來操做（增、刪、改）索引庫的
*/
@Test
public void createIndex() throws Exception {
//Directory dir=FSDirectory.getDirectory(indexpath);
//內存存儲：優勢速度快，缺點程序退出數據就沒了，因此記得程序退出時保存索引庫，已FSDirectory結合使用
//因爲此處只暫時保存在內存中，程序退出時沒進行索引庫保存，所以在搜索時程序會報錯
Directory dir=new RAMDirectory();
File file = new File(dspath);
//Document存放通過組織後的數據源，只有轉換爲Document對象才能夠被索引和搜索到
Document doc = new Document();
//文件名稱
doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
//檢索到的內容
doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));
//文件大小
doc.add(new Field("size", NumberTools.longToString(file.length()),
Store.YES, Index.NOT_ANALYZED));
//檢索到的文件位置
doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));優化

// 創建索引
//第一種方式
//IndexWriter indexWriter = new IndexWriter(indexpath, analyzer, MaxFieldLength.LIMITED);
//第二種方式
IndexWriter indexWriter = new IndexWriter(dir, analyzer, MaxFieldLength.LIMITED);
indexWriter.addDocument(doc);
indexWriter.close();
}

/**
* 建立索引(推薦)
*
* IndexWriter 用來操做（增、刪、改）索引庫的
*/
@Test
public void createIndex2() throws Exception {
Directory fsDir = FSDirectory.getDirectory(indexpath);
//一、啓動時讀取
Directory ramDir = new RAMDirectory(fsDir);

// 運行程序時操做ramDir
IndexWriter ramIndexWriter = new IndexWriter(ramDir, analyzer, MaxFieldLength.LIMITED);

//數據源
File file = new File(dspath);
// 添加 Document
Document doc = new Document();
//文件名稱
doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
//檢索到的內容
doc.add(new Field("content", readFileContent(file), Store.YES, Index.ANALYZED));
//文件大小
doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));
//檢索到的文件位置
doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));
ramIndexWriter.addDocument(doc);
ramIndexWriter.close();

//二、退出時保存
IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, true, MaxFieldLength.LIMITED);
fsIndexWriter.addIndexesNoOptimize(new Directory[]{ramDir});

// 優化操做
fsIndexWriter.commit();
fsIndexWriter.optimize();

fsIndexWriter.close();
}

/**
* 優化操做
*
* @throws Exception
*/
@Test
public void createIndex3() throws Exception{
Directory fsDir = FSDirectory.getDirectory(indexpath);
IndexWriter fsIndexWriter = new IndexWriter(fsDir, analyzer, MaxFieldLength.LIMITED);

fsIndexWriter.optimize();
fsIndexWriter.close();
}

/**
* 搜索
*
* IndexSearcher 用來在索引庫中進行查詢
*/
@Test
public void search() throws Exception {
//請求字段
//String queryString = "document";
String queryString = "adddocument";spa

// 1，把要搜索的文本解析爲 Query
String[] fields = { "name", "content" };
QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
Query query = queryParser.parse(queryString);對象

// 2，進行查詢，從索引庫中查找
IndexSearcher indexSearcher = new IndexSearcher(indexpath);
Filter filter = null;
TopDocs topDocs = indexSearcher.search(query, filter, 10000);
System.out.println("總共有【" + topDocs.totalHits + "】條匹配結果");索引

// 3，打印結果
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
// 文檔內部編號
int index = scoreDoc.doc;
// 根據編號取出相應的文檔
Document doc = indexSearcher.doc(index);
System.out.println("------------------------------");
System.out.println("name = " + doc.get("name"));
System.out.println("content = " + doc.get("content"));
System.out.println("size = " + NumberTools.stringToLong(doc.get("size")));
System.out.println("path = " + doc.get("path"));
}
}內存

/**
* 讀取文件內容
*/
public static String readFileContent(File file) {
try {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
StringBuffer content = new StringBuffer();
for (String line = null; (line = reader.readLine()) != null;) {
content.append(line).append("\n");
}
reader.close();
return content.toString();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
文檔