Lucene4.3入門

時間 2020-08-09

標籤 lucene4.3 lucene 入門简体版

原文原文鏈接

辭職交接期間無聊看了一下搜索引擎，java社區比較火的固然是Lucene,想寫一個簡單的小例子，在網上找了些資料，不過都不是4.3的，本身看了一下。java

下載地址：http://lucene.apache.org/core/
apache

項目結構搜索引擎

constans.java 是常量類spa

LuceneIndex.java 創建索引類
code

LuceneSearch.java 搜索類
對象

數據文件：索引

package com.xin;

public class Constants { 
    public final static String INDEX_FILE_PATH = "e:\\lucene\\test"; //索引的文件的存放路徑 
    public final static String INDEX_STORE_PATH = "e:\\lucene\\index"; //索引的存放位置 
}

package com.xin;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
 * @author chongxin
 * @since 2013/6/19
 * @version Lucene 4.3.1
 * */
public class LuceneIndex {
	// 索引器
	private IndexWriter writer = null;
	public LuceneIndex() {
		try {
			//索引文件的保存位置
			Directory dir = FSDirectory.open(new File(Constants.INDEX_STORE_PATH));
			//分析器
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
			//配置類
			IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,analyzer);
			iwc.setOpenMode(OpenMode.CREATE);//建立模式 OpenMode.CREATE_OR_APPEND 添加模式
			
			writer = new IndexWriter(dir, iwc);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	// 將要創建索引的文件構形成一個Document對象，並添加一個域"content"
	private Document getDocument(File f) throws Exception {
		Document doc = new Document();

		FileInputStream is = new FileInputStream(f);
		Reader reader = new BufferedReader(new InputStreamReader(is));
		//字符串 StringField LongField TextField
		Field pathField = new StringField("path", f.getAbsolutePath(),Field.Store.YES);
		Field contenField = new TextField("contents", reader);
		//添加字段
		doc.add(contenField);
		doc.add(pathField);
		return doc;
	}

	public void writeToIndex() throws Exception {
		File folder = new File(Constants.INDEX_FILE_PATH);
		
		if (folder.isDirectory()) {
			String[] files = folder.list();
			for (int i = 0; i < files.length; i++) {
				File file = new File(folder, files[i]);
				Document doc = getDocument(file);
				System.out.println("正在創建索引 : " + file + "");
				writer.addDocument(doc);
			}
		}
	}

	public void close() throws Exception {
		writer.close();
	}

	public static void main(String[] args) throws Exception {
		// 聲明一個對象
		LuceneIndex indexer = new LuceneIndex();
		// 創建索引
		Date start = new Date();
		indexer.writeToIndex();
		Date end = new Date();

		System.out.println("創建索引用時" + (end.getTime() - start.getTime()) + "毫秒");

		indexer.close();
	}
}

執行結果：字符串

正在創建索引 : e:\lucene\test\a.txt
正在創建索引 : e:\lucene\test\b.txt
正在創建索引 : e:\lucene\test\c.txt
正在創建索引 : e:\lucene\test\d.txt
創建索引用時109毫秒

生成的索引文件：get

查找：it

package com.xin;

import java.io.File;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * @author chongxin
 * @since 2013/6/19
 * @version Lucene 4.3.1
 * */
public class LuceneSearch {
	// 聲明一個IndexSearcher對象
	private IndexSearcher searcher = null;
	// 聲明一個Query對象
	private Query query = null;
	private String field = "contents";

	public LuceneSearch() {
		try {
			IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(Constants.INDEX_STORE_PATH)));
			searcher = new IndexSearcher(reader);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
    //返回查詢結果 
	public final TopDocs search(String keyword) {
		System.out.println("正在檢索關鍵字 : " + keyword);
		try {
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
			QueryParser parser = new QueryParser(Version.LUCENE_40, field,analyzer);
			// 將關鍵字包裝成Query對象
			query = parser.parse(keyword);
			Date start = new Date();
			TopDocs results = searcher.search(query, 5 * 2);
			Date end = new Date();
			System.out.println("檢索完成，用時" + (end.getTime() - start.getTime())
					+ "毫秒");
			return results;
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}
      //打印結果
	public void printResult(TopDocs results) {
		ScoreDoc[] h = results.scoreDocs;
		if (h.length == 0) {
			System.out.println("對不起，沒有找到您要的結果。");
		} else {
			for (int i = 0; i < h.length; i++) {
				try {
					Document doc = searcher.doc(h[i].doc);
					System.out.print("這是第" + i + "個檢索到的結果，文件名爲：");
					System.out.println(doc.get("path"));
				} catch (Exception e) {
					e.printStackTrace();
				}
			}
		}
		System.out.println("--------------------------");
	}

	public static void main(String[] args) throws Exception {
		LuceneSearch test = new LuceneSearch();
		TopDocs h = null;
		h = test.search("中國");
		test.printResult(h);
		h = test.search("人民");
		test.printResult(h);
		h = test.search("共和國");
		test.printResult(h);
	}

}

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。