辭職交接期間無聊看了一下搜索引擎,java社區比較火的固然是Lucene,想寫一個簡單的小例子,在網上找了些資料,不過都不是4.3的,本身看了一下。java
下載地址:http://lucene.apache.org/core/
apache
項目結構搜索引擎
constans.java 是常量類spa
LuceneIndex.java 創建索引類
code
LuceneSearch.java 搜索類
對象
數據文件:索引
package com.xin; public class Constants { public final static String INDEX_FILE_PATH = "e:\\lucene\\test"; //索引的文件的存放路徑 public final static String INDEX_STORE_PATH = "e:\\lucene\\index"; //索引的存放位置 }
package com.xin; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.io.Reader; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * @author chongxin * @since 2013/6/19 * @version Lucene 4.3.1 * */ public class LuceneIndex { // 索引器 private IndexWriter writer = null; public LuceneIndex() { try { //索引文件的保存位置 Directory dir = FSDirectory.open(new File(Constants.INDEX_STORE_PATH)); //分析器 Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); //配置類 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,analyzer); iwc.setOpenMode(OpenMode.CREATE);//建立模式 OpenMode.CREATE_OR_APPEND 添加模式 writer = new IndexWriter(dir, iwc); } catch (Exception e) { e.printStackTrace(); } } // 將要創建索引的文件構形成一個Document對象,並添加一個域"content" private Document getDocument(File f) throws Exception { Document doc = new Document(); FileInputStream is = new FileInputStream(f); Reader reader = new BufferedReader(new InputStreamReader(is)); //字符串 StringField LongField TextField Field pathField = new StringField("path", f.getAbsolutePath(),Field.Store.YES); Field contenField = new TextField("contents", reader); //添加字段 doc.add(contenField); doc.add(pathField); return doc; } public void writeToIndex() throws Exception { File folder = new File(Constants.INDEX_FILE_PATH); if (folder.isDirectory()) { String[] files = folder.list(); for (int i = 0; i < files.length; i++) { File file = new File(folder, files[i]); Document doc = getDocument(file); System.out.println("正在創建索引 : " + file + ""); writer.addDocument(doc); } } } public void close() throws Exception { writer.close(); } public static void main(String[] args) throws Exception { // 聲明一個對象 LuceneIndex indexer = new LuceneIndex(); // 創建索引 Date start = new Date(); indexer.writeToIndex(); Date end = new Date(); System.out.println("創建索引用時" + (end.getTime() - start.getTime()) + "毫秒"); indexer.close(); } }
執行結果:字符串
正在創建索引 : e:\lucene\test\a.txt 正在創建索引 : e:\lucene\test\b.txt 正在創建索引 : e:\lucene\test\c.txt 正在創建索引 : e:\lucene\test\d.txt 創建索引用時109毫秒
生成的索引文件:get
查找:it
package com.xin; import java.io.File; import java.util.Date; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * @author chongxin * @since 2013/6/19 * @version Lucene 4.3.1 * */ public class LuceneSearch { // 聲明一個IndexSearcher對象 private IndexSearcher searcher = null; // 聲明一個Query對象 private Query query = null; private String field = "contents"; public LuceneSearch() { try { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(Constants.INDEX_STORE_PATH))); searcher = new IndexSearcher(reader); } catch (Exception e) { e.printStackTrace(); } } //返回查詢結果 public final TopDocs search(String keyword) { System.out.println("正在檢索關鍵字 : " + keyword); try { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); QueryParser parser = new QueryParser(Version.LUCENE_40, field,analyzer); // 將關鍵字包裝成Query對象 query = parser.parse(keyword); Date start = new Date(); TopDocs results = searcher.search(query, 5 * 2); Date end = new Date(); System.out.println("檢索完成,用時" + (end.getTime() - start.getTime()) + "毫秒"); return results; } catch (Exception e) { e.printStackTrace(); return null; } } //打印結果 public void printResult(TopDocs results) { ScoreDoc[] h = results.scoreDocs; if (h.length == 0) { System.out.println("對不起,沒有找到您要的結果。"); } else { for (int i = 0; i < h.length; i++) { try { Document doc = searcher.doc(h[i].doc); System.out.print("這是第" + i + "個檢索到的結果,文件名爲:"); System.out.println(doc.get("path")); } catch (Exception e) { e.printStackTrace(); } } } System.out.println("--------------------------"); } public static void main(String[] args) throws Exception { LuceneSearch test = new LuceneSearch(); TopDocs h = null; h = test.search("中國"); test.printResult(h); h = test.search("人民"); test.printResult(h); h = test.search("共和國"); test.printResult(h); } }