前言html
搞檢索的,應該多少都會了解Lucene一些,它開源並且簡單上手,官方API足夠編寫些小DEMO。而且根據倒排索引,實現快速檢索。本文就簡單的實現增量添加索引,刪除索引,經過關鍵字查詢,以及更新索引等操做。java
目前博豬使用的不爽的地方就是,讀取文件內容進行全文檢索時,須要本身編寫讀取過程(這個solr免費幫咱們實現)。並且建立索引的過程比較慢,還有很大的優化空間,這個就要細心下來研究了。apache
Lucene在進行建立索引時,根據前面一篇博客,已經講完了大致的流程,這裏再簡單說下:ide
1 Directory directory = FSDirectory.open("/tmp/testindex"); 2 IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer); 3 IndexWriter iwriter = new IndexWriter(directory, config); 4 Document doc = new Document(); 5 String text = "This is the text to be indexed."; 6 doc.add(new Field("fieldname", text, TextField.TYPE_STORED)); iwriter.close();
1 建立Directory,獲取索引目錄優化
2 建立詞法分析器,建立IndexWriter對象spa
3 建立document對象,存儲數據code
4 關閉IndexWriter,提交htm
1 /** 2 * 創建索引 3 * 4 * @param args 5 */ 6 public static void index() throws Exception { 7 8 String text1 = "hello,man!"; 9 String text2 = "goodbye,man!"; 10 String text3 = "hello,woman!"; 11 String text4 = "goodbye,woman!"; 12 13 Date date1 = new Date(); 14 analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 15 directory = FSDirectory.open(new File(INDEX_DIR)); 16 17 IndexWriterConfig config = new IndexWriterConfig( 18 Version.LUCENE_CURRENT, analyzer); 19 indexWriter = new IndexWriter(directory, config); 20 21 Document doc1 = new Document(); 22 doc1.add(new TextField("filename", "text1", Store.YES)); 23 doc1.add(new TextField("content", text1, Store.YES)); 24 indexWriter.addDocument(doc1); 25 26 Document doc2 = new Document(); 27 doc2.add(new TextField("filename", "text2", Store.YES)); 28 doc2.add(new TextField("content", text2, Store.YES)); 29 indexWriter.addDocument(doc2); 30 31 Document doc3 = new Document(); 32 doc3.add(new TextField("filename", "text3", Store.YES)); 33 doc3.add(new TextField("content", text3, Store.YES)); 34 indexWriter.addDocument(doc3); 35 36 Document doc4 = new Document(); 37 doc4.add(new TextField("filename", "text4", Store.YES)); 38 doc4.add(new TextField("content", text4, Store.YES)); 39 indexWriter.addDocument(doc4); 40 41 indexWriter.commit(); 42 indexWriter.close(); 43 44 Date date2 = new Date(); 45 System.out.println("建立索引耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); 46 }
Lucene擁有增量添加索引的功能,在不會影響以前的索引狀況下,添加索引,它會在什麼時候的時機,自動合併索引文件。對象
1 /** 2 * 增長索引 3 * 4 * @throws Exception 5 */ 6 public static void insert() throws Exception { 7 String text5 = "hello,goodbye,man,woman"; 8 Date date1 = new Date(); 9 analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 10 directory = FSDirectory.open(new File(INDEX_DIR)); 11 12 IndexWriterConfig config = new IndexWriterConfig( 13 Version.LUCENE_CURRENT, analyzer); 14 indexWriter = new IndexWriter(directory, config); 15 16 Document doc1 = new Document(); 17 doc1.add(new TextField("filename", "text5", Store.YES)); 18 doc1.add(new TextField("content", text5, Store.YES)); 19 indexWriter.addDocument(doc1); 20 21 indexWriter.commit(); 22 indexWriter.close(); 23 24 Date date2 = new Date(); 25 System.out.println("增長索引耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); 26 }
Lucene也是經過IndexWriter調用它的delete方法,來刪除索引。咱們能夠經過關鍵字,刪除與這個關鍵字有關的全部內容。若是僅僅是想要刪除一個文檔,那麼最好就頂一個惟一的ID域,經過這個ID域,來進行刪除操做。blog
1 /** 2 * 刪除索引 3 * 4 * @param str 刪除的關鍵字 5 * @throws Exception 6 */ 7 public static void delete(String str) throws Exception { 8 Date date1 = new Date(); 9 analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 10 directory = FSDirectory.open(new File(INDEX_DIR)); 11 12 IndexWriterConfig config = new IndexWriterConfig( 13 Version.LUCENE_CURRENT, analyzer); 14 indexWriter = new IndexWriter(directory, config); 15 16 indexWriter.deleteDocuments(new Term("filename",str)); 17 18 indexWriter.close(); 19 20 Date date2 = new Date(); 21 System.out.println("刪除索引耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); 22 }
Lucene沒有真正的更新操做,經過某個fieldname,能夠更新這個域對應的索引,可是實質上,它是先刪除索引,再從新創建的。
1 /** 2 * 更新索引 3 * 4 * @throws Exception 5 */ 6 public static void update() throws Exception { 7 String text1 = "update,hello,man!"; 8 Date date1 = new Date(); 9 analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 10 directory = FSDirectory.open(new File(INDEX_DIR)); 11 12 IndexWriterConfig config = new IndexWriterConfig( 13 Version.LUCENE_CURRENT, analyzer); 14 indexWriter = new IndexWriter(directory, config); 15 16 Document doc1 = new Document(); 17 doc1.add(new TextField("filename", "text1", Store.YES)); 18 doc1.add(new TextField("content", text1, Store.YES)); 19 20 indexWriter.updateDocument(new Term("filename","text1"), doc1); 21 22 indexWriter.close(); 23 24 Date date2 = new Date(); 25 System.out.println("更新索引耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); 26 }
Lucene的查詢方式有不少種,這裏就不作詳細介紹了。它會返回一個ScoreDoc的集合,相似ResultSet的集合,咱們能夠經過域名獲取想要獲取的內容。
1 /** 2 * 關鍵字查詢 3 * 4 * @param str 5 * @throws Exception 6 */ 7 public static void search(String str) throws Exception { 8 directory = FSDirectory.open(new File(INDEX_DIR)); 9 analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 10 DirectoryReader ireader = DirectoryReader.open(directory); 11 IndexSearcher isearcher = new IndexSearcher(ireader); 12 13 QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content",analyzer); 14 Query query = parser.parse(str); 15 16 ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; 17 for (int i = 0; i < hits.length; i++) { 18 Document hitDoc = isearcher.doc(hits[i].doc); 19 System.out.println(hitDoc.get("filename")); 20 System.out.println(hitDoc.get("content")); 21 } 22 ireader.close(); 23 directory.close(); 24 }
1 package test; 2 3 import java.io.File; 4 import java.util.Date; 5 import java.util.List; 6 7 import org.apache.lucene.analysis.Analyzer; 8 import org.apache.lucene.analysis.standard.StandardAnalyzer; 9 import org.apache.lucene.document.Document; 10 import org.apache.lucene.document.LongField; 11 import org.apache.lucene.document.TextField; 12 import org.apache.lucene.document.Field.Store; 13 import org.apache.lucene.index.DirectoryReader; 14 import org.apache.lucene.index.IndexWriter; 15 import org.apache.lucene.index.IndexWriterConfig; 16 import org.apache.lucene.index.Term; 17 import org.apache.lucene.queryparser.classic.QueryParser; 18 import org.apache.lucene.search.IndexSearcher; 19 import org.apache.lucene.search.Query; 20 import org.apache.lucene.search.ScoreDoc; 21 import org.apache.lucene.store.Directory; 22 import org.apache.lucene.store.FSDirectory; 23 import org.apache.lucene.util.Version; 24 25 public class TestLucene { 26 // 保存路徑 27 private static String INDEX_DIR = "D:\\luceneIndex"; 28 private static Analyzer analyzer = null; 29 private static Directory directory = null; 30 private static IndexWriter indexWriter = null; 31 32 public static void main(String[] args) { 33 try { 34 // index(); 35 search("man"); 36 // insert(); 37 // delete("text5"); 38 // update(); 39 } catch (Exception e) { 40 e.printStackTrace(); 41 } 42 } 43 /** 44 * 更新索引 45 * 46 * @throws Exception 47 */ 48 public static void update() throws Exception { 49 String text1 = "update,hello,man!"; 50 Date date1 = new Date(); 51 analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 52 directory = FSDirectory.open(new File(INDEX_DIR)); 53 54 IndexWriterConfig config = new IndexWriterConfig( 55 Version.LUCENE_CURRENT, analyzer); 56 indexWriter = new IndexWriter(directory, config); 57 58 Document doc1 = new Document(); 59 doc1.add(new TextField("filename", "text1", Store.YES)); 60 doc1.add(new TextField("content", text1, Store.YES)); 61 62 indexWriter.updateDocument(new Term("filename","text1"), doc1); 63 64 indexWriter.close(); 65 66 Date date2 = new Date(); 67 System.out.println("更新索引耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); 68 } 69 /** 70 * 刪除索引 71 * 72 * @param str 刪除的關鍵字 73 * @throws Exception 74 */ 75 public static void delete(String str) throws Exception { 76 Date date1 = new Date(); 77 analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 78 directory = FSDirectory.open(new File(INDEX_DIR)); 79 80 IndexWriterConfig config = new IndexWriterConfig( 81 Version.LUCENE_CURRENT, analyzer); 82 indexWriter = new IndexWriter(directory, config); 83 84 indexWriter.deleteDocuments(new Term("filename",str)); 85 86 indexWriter.close(); 87 88 Date date2 = new Date(); 89 System.out.println("刪除索引耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); 90 } 91 /** 92 * 增長索引 93 * 94 * @throws Exception 95 */ 96 public static void insert() throws Exception { 97 String text5 = "hello,goodbye,man,woman"; 98 Date date1 = new Date(); 99 analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 100 directory = FSDirectory.open(new File(INDEX_DIR)); 101 102 IndexWriterConfig config = new IndexWriterConfig( 103 Version.LUCENE_CURRENT, analyzer); 104 indexWriter = new IndexWriter(directory, config); 105 106 Document doc1 = new Document(); 107 doc1.add(new TextField("filename", "text5", Store.YES)); 108 doc1.add(new TextField("content", text5, Store.YES)); 109 indexWriter.addDocument(doc1); 110 111 indexWriter.commit(); 112 indexWriter.close(); 113 114 Date date2 = new Date(); 115 System.out.println("增長索引耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); 116 } 117 /** 118 * 創建索引 119 * 120 * @param args 121 */ 122 public static void index() throws Exception { 123 124 String text1 = "hello,man!"; 125 String text2 = "goodbye,man!"; 126 String text3 = "hello,woman!"; 127 String text4 = "goodbye,woman!"; 128 129 Date date1 = new Date(); 130 analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 131 directory = FSDirectory.open(new File(INDEX_DIR)); 132 133 IndexWriterConfig config = new IndexWriterConfig( 134 Version.LUCENE_CURRENT, analyzer); 135 indexWriter = new IndexWriter(directory, config); 136 137 Document doc1 = new Document(); 138 doc1.add(new TextField("filename", "text1", Store.YES)); 139 doc1.add(new TextField("content", text1, Store.YES)); 140 indexWriter.addDocument(doc1); 141 142 Document doc2 = new Document(); 143 doc2.add(new TextField("filename", "text2", Store.YES)); 144 doc2.add(new TextField("content", text2, Store.YES)); 145 indexWriter.addDocument(doc2); 146 147 Document doc3 = new Document(); 148 doc3.add(new TextField("filename", "text3", Store.YES)); 149 doc3.add(new TextField("content", text3, Store.YES)); 150 indexWriter.addDocument(doc3); 151 152 Document doc4 = new Document(); 153 doc4.add(new TextField("filename", "text4", Store.YES)); 154 doc4.add(new TextField("content", text4, Store.YES)); 155 indexWriter.addDocument(doc4); 156 157 indexWriter.commit(); 158 indexWriter.close(); 159 160 Date date2 = new Date(); 161 System.out.println("建立索引耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); 162 } 163 164 /** 165 * 關鍵字查詢 166 * 167 * @param str 168 * @throws Exception 169 */ 170 public static void search(String str) throws Exception { 171 directory = FSDirectory.open(new File(INDEX_DIR)); 172 analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 173 DirectoryReader ireader = DirectoryReader.open(directory); 174 IndexSearcher isearcher = new IndexSearcher(ireader); 175 176 QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content",analyzer); 177 Query query = parser.parse(str); 178 179 ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; 180 for (int i = 0; i < hits.length; i++) { 181 Document hitDoc = isearcher.doc(hits[i].doc); 182 System.out.println(hitDoc.get("filename")); 183 System.out.println(hitDoc.get("content")); 184 } 185 ireader.close(); 186 directory.close(); 187 } 188 }
http://www.cnblogs.com/xing901022/p/3933675.html