Lucene 4.9 document的簡單應用

時間 2019-12-13
標籤 lucene 4.9 document 簡單應用简体版
原文原文鏈接
package com.merlin.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class LuceneIndexDemo {

	public static void main(String[] args) throws IOException, ParseException {

		LuceneIndexDemo demo = new LuceneIndexDemo();
		
//		demo.createIndex(); 建立索引
		demo.searcher("merlin");
		
		//刪除
		demo.delete();
		demo.query();
	}

	private void delete() {
		
		IndexWriter writer = null;
		 
        try {
        	
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9));
            writer = new IndexWriter(FSDirectory.open(new File("D:\\index")), indexWriterConfig);
            
            //參數是一個選項,能夠是一個Query,也能夠是一個Term,Term是一個精確查找的值
            //此時刪除的文檔並不會徹底被刪除,而是存儲在一個回收站中,能夠恢復
            //使用Reader能夠有效的恢復取到的文檔數
            
            writer.deleteDocuments(new Term("path","E:\\wamp\\www\\meal\\Application\\Common\\Conf\\config.php"));
            
        } catch (Exception e) {
            e.printStackTrace();
        }finally{
            if(writer!=null){
                try {
                	writer.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }		
	}

	/**
	 *  被刪除的索引查詢
	 */
	public void query(){
        try {
            IndexReader indexReader = IndexReader.open(FSDirectory.open(new File("D:\\index")));
            System.out.println("存儲的文檔數:" + indexReader.numDocs());
            System.out.println("總存儲量:" + indexReader.maxDoc());
            System.out.println("被刪除的文檔：" + indexReader.numDeletedDocs());
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
	
	/**
	 * 更新 索引
	 */

	public void update(){
        IndexWriter indexWriter = null;
        try {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9));
            indexWriter = new IndexWriter(FSDirectory.open(new File("D:\\index")), indexWriterConfig);
            //Luence並無提供更新,這裏的更新操做實際上是先刪除再添加的操做合集
            Document document = new Document();
            //更新path 爲 d:\test\test的數據
            indexWriter.updateDocument(new Term("path","D:\\test\\test"), document);
        } catch (Exception e) {
            e.printStackTrace();
        }finally{
            if(indexWriter!=null){
                try {
                    indexWriter.close();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
}
	
	/**
	 * 1.建立Directory 2.建立IndexWriter 3.建立Document對象 4.爲Document添加Field 爲本地文件夾建立
	 * 索引
	 */
	public void createIndex() {

		String indexPath = "D:\\index";// 索引存放路徑
		String docsPath = "E:\\wamp\\www\\meal";// 爲該文件夾下的全部文件創建索引
		boolean create = true; // 建立

		final File docDir = new File(docsPath);
		if (!docDir.exists() || !docDir.canRead()) {
			System.out
					.println("Document directory '"
							+ docDir.getAbsolutePath()
							+ "' does not exist or is not readable, please check the path");
			System.exit(1);
		}

		Date start = new Date();

		try {
			System.out.println("Indexing to directory '" + indexPath + "'...");

			Directory dir = FSDirectory.open(new File(indexPath));
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
			IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9,
					analyzer);

			if (create) {
				// Create a new index in the directory, removing any
				// previously indexed documents:
				iwc.setOpenMode(OpenMode.CREATE);
			} else {
				// Add new documents to an existing index:
				iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
			}

			IndexWriter writer = new IndexWriter(dir, iwc);
			indexDocs(writer, docDir);

			writer.close();

			Date end = new Date();
			System.out.println(end.getTime() - start.getTime()
					+ " total milliseconds");

		} catch (IOException e) {
			System.out.println(" caught a " + e.getClass()
					+ "\n with message: " + e.getMessage());
		}
	}

	/**
	 * 建立Directory 2.建立IndexReader 3.根據IndexReader建立IndexSearcher 4.建立搜索的Query
	 * 5.根據Searcher搜索而且返回TopDocs 6.根據TopDocs獲取ScoreDoc對象
	 * 7.根據Seacher和ScoreDoc對象獲取具體的Document對象 8.根據Document對象獲取須要的值
	 * 9.關閉IndexReader
	 * 
	 * @throws IOException
	 * @throws ParseException
	 */
	public void searcher( String querystring) throws IOException, ParseException {

		IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(
				"D:\\index")));

		IndexSearcher searcher = new IndexSearcher(reader);
		// :Post-Release-Update-Version.LUCENE_XY:

		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
		QueryParser parser = new QueryParser(Version.LUCENE_4_9, "contents",
				analyzer);

		// 搜索文件中含有querystring的文件列表
		Query query = parser.parse(querystring);

		TopDocs results = searcher.search(query, 10);
		ScoreDoc[] hits = results.scoreDocs;

		int numTotalHits = results.totalHits;
		System.out.println(numTotalHits + " total matching documents");

		for (int i = 0; i < hits.length; i++) {
			Document doc = searcher.doc(hits[i].doc);
			String path = doc.get("path");
			System.out.println(path);
		}

	}

	private void indexDocs(IndexWriter writer, File file) throws IOException {

		if (file.canRead()) {

			if (file.isDirectory()) {

				String[] files = file.list();
				if (files != null) {
					for (int i = 0; i < files.length; i++) {
						indexDocs(writer, new File(file, files[i]));
					}
				}
			} else {

				FileInputStream fis;
				try {
					fis = new FileInputStream(file);
				} catch (FileNotFoundException fnfe) {
					return;
				}

				try {

					Document doc = new Document();

					Field pathField = new StringField("path", file.getPath(),
							Field.Store.YES);
					doc.add(pathField);
					doc.add(new LongField("modified", file.lastModified(),
							Field.Store.NO));
					doc.add(new TextField("contents", new BufferedReader(
							new InputStreamReader(fis, StandardCharsets.UTF_8))));

					if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
						System.out.println("adding " + file);
						writer.addDocument(doc);
					} else {
						System.out.println("updating " + file);
						writer.updateDocument(new Term("path", file.getPath()),
								doc);
					}

				} finally {
					fis.close();
				}
			}
		}
	}
}
相關標籤/搜索
每日一句
每一个你不满意的现在，都有一个你没有努力的曾经。