前言: java
使用lucene主要的幾步驟: 數據庫
準備(建立索引): apache
使用(查詢): spa
使用lucene要把大部分的精力放在建立索引這塊 code
code: orm
建立索引: 對象
import java.io.File; import java.io.FileReader; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public static void init() throws IOException { /** * step 1 * 建立Directory索引文件存放目錄 */ Path path = Paths.get("F:/lucene/index"); Directory directory = FSDirectory.open(path); /** * step 2 * 選用構建分詞解析器 */ CharArraySet stopword = new CharArraySet(200, true);//除去沒必要構建索引的詞 stopword.add("是,的,我,們,你,他,那,這,它".split(",")); Analyzer analyzer = new SmartChineseAnalyzer(stopword); /** * step3 * 構建寫索引器 */ IndexWriterConfig iwc = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(directory, iwc); writer.deleteAll();//清空索引 /** * step4 & stpe5 * 將數據庫的表記錄,文件系統的文本,或其它的數據封裝成一個Document, * 並經過寫索引器解析Document生成索引文件 */ File doc = new File("F:/lucene/doc");//源文件 for (File file : doc.listFiles()) { System.out.println(String.format("read file : %s", file.getName())); Document document = new Document(); document.add(new TextField("title", file.getName(), Field.Store.YES)); //文章的標題 document.add(new TextField("contents", new FileReader(file))); //文章的內容 document.add(new StringField("path", file.getAbsolutePath(), Field.Store.YES)); //文章的地址 writer.addDocument(document); } writer.close(); }
查詢: 索引
import java.io.IOException; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public static ScoreDoc[] query(String key) throws IOException, ParseException{ /** * stpe1 * 構建讀索引器(指定索引目錄) */ Directory directory = FSDirectory.open(Paths.get("F:/lucene/index"));//索引文件存放目錄 IndexReader indexReader = DirectoryReader.open(directory); /** * stpe2 * 查詢器 */ IndexSearcher searcher = new IndexSearcher(indexReader); /** * stpe3 * 選用構建分詞解析器,要和建立索引時的分析器一致 */ Analyzer analyzer = new SmartChineseAnalyzer(); /** * step4 * 建立Query * key: 查詢關鍵字 * contents:只在文章內容中配置查詢(這是你建立索引時指定的域名). * 全部你也能夠指定「title」即按標題查詢,固然lunene提交多域查詢 */ QueryParser queryParser = new QueryParser("contents", analyzer); Query query = queryParser.parse(key); /** * step 5 * 查詢獲得查詢結果 */ TopDocs topDocs = searcher.search(query, 10); indexReader.close(); return topDocs.scoreDocs; }
根據業務來處理獲得的結果 get
ScoreDoc[] hits = query("lucene文章"); for (ScoreDoc scoreDoc : hits) { int docId = scoreDoc.doc; Document d = indexSearcher.doc(docId); System.out.println(String.format("文章名字:%s /t 路徑:%s", d.get("title"),d.get("path"))); }