/** * */ package demo; import java.io.BufferedReader; import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * @author * */ publicclass Indexer { private IndexWriter writer; /** * * @param indexDir * @throws IOException */ public Indexer(String indexDir) throws IOException { // 定義索引的存放目錄爲File System Directory dir = FSDirectory.open(new File(indexDir)); // 配置建立建立方式,使用標準的Analyzer IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_44, new StandardAnalyzer(Version.LUCENE_44)); writer = new IndexWriter(dir, conf); } publicint index(String dataDir, FileFilter filter) throws Exception { // 遍歷數據目錄 File[] files = new File(dataDir).listFiles(); for (File f : files) { if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f))) { // 獲得符合條件的txt文件 indexFile(f); } } return writer.numDocs(); } privatevoid indexFile(File f) throws Exception { System.out.println("Indexing " + f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); } // 爲Document 添加 Field protected Document getDocument(File f) throws Exception { Document doc = new Document(); doc.add(new TextField("content", new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8")))); doc.add(new StringField("filename", f.getName(), Field.Store.YES)); doc.add(new StringField("fullpath", f.getCanonicalPath(), Field.Store.YES)); return doc; } /** * @throws IOException */ publicvoid close() throws IOException { writer.close(); } // 過濾文件 privatestaticclass TextFilesFilter implements FileFilter { publicboolean accept(File path) { return path.getName().toLowerCase().endsWith(".txt"); } } /** * @param args * @throws Exception */ publicstaticvoid main(String[] args) throws Exception { // 索引存放路徑 String indexDir = "/Users/apple/Documents/index"; // 數據文件路徑 String dataDir = "/Users/apple/Documents/data"; long start = System.currentTimeMillis(); Indexer indexer = new Indexer(indexDir); int numIndexed; try { // 調用本身寫得index方法建立索引 numIndexed = indexer.index(dataDir, new TextFilesFilter()); } finally { indexer.close(); } long end = System.currentTimeMillis(); System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } } |
Directory dir = FSDirectory.open(new File("/tmp/index"));
IndexSearcher searcher = new IndexSearcher(dir);
Query q = new TermQuery(new Term("contents", "lucene"));
TopDocs hits = searcher.search(q, 10);
java