Apache Lucene 6.3.0 Demo

時間 2019-11-06

標籤 apache lucene 6.3.0 demo 欄目 Apache 简体版

原文原文鏈接

準備工做

本着凡事都用新版本的原則，本人的的jdk是1.8版本，完美運行。但爲了驗證Apache Lucene 6.3.0須要jdk1.8的傳說，因而乎換了jdk1.7，發現果真運行不了，收到來自虛擬機的報錯。java

依賴包

pom.xml的dependencies中加入以下代碼後，自動導入了數據庫

lucene-queryparser-6.3.0.jarapache
lucene-core-6.3.0.jarthis
lucene-queries-6.3.0.jar.net

lucene-sandbox-6.3.0.jarcode

<dependency>
  	<groupId>org.apache.lucene</groupId>
  	<artifactId>lucene-queryparser</artifactId>
  	<version>6.3.0</version>
  </dependency>

開始碼字

package com.lucene.test;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class FSDirectoryDemo {
    private static String INDEX_DIR = "D://LuceneTest//index";// 索引存放目錄  
    
    private Directory directory;  
    private Analyzer analyzer;

    public FSDirectoryDemo() throws Exception {
		super();
		analyzer = new StandardAnalyzer();
		directory = initLuceneDirctory();
	}

	/***
	 * 初始化索引文件目錄
	 * 
	 * [@return](https://my.oschina.net/u/556800)
	 * [@throws](https://my.oschina.net/throws) Exception
	 */
	private Directory initLuceneDirctory() throws Exception {
		if (directory == null) {
			File indexDir = new File(INDEX_DIR);
			/*
			 * 文件目錄
			 * 把索引文件存儲到磁盤目錄
			 * 索引文件可放的位置：索引能夠存放在兩個地方
			 * 1.硬盤，directory = FSDirectory.open(Path path);
			 * 2.內存；directory = new RAMDirectory();
			 * 放在硬盤上能夠用FSDirectory()，放在內存的用RAMDirectory()不過一關機就沒了
			 */
			directory = FSDirectory.open(indexDir.toPath());
		}
		return directory;
	};

	/**
	 * 該方法用來建立org.apache.lucene.document.Document對象。
	 * 從代碼上看，Document對象封裝着被檢索的文檔，裏面包含着多個Field對象。
	 * 本段代碼用的構造方法爲Field(String name, String value, FieldType type)，
	 * 參數分別爲Field的name，value和type。
	 * TextField對象裏有兩個常量，TYPE_NOT_STORED和TYPE_STORED，
	 * 表示該Field檢索到以後是否被儲存，這一點後面會演示。
	 * [@param](https://my.oschina.net/u/2303379) title
	 * [@param](https://my.oschina.net/u/2303379) content
	 * [@return](https://my.oschina.net/u/556800)
	 */
	public static Document createDocument(String title, String content) {
		Document doc = new Document();
		doc.add(new Field("content", content, TextField.TYPE_STORED));
		doc.add(new Field("title", title, TextField.TYPE_STORED));
		doc.add(new Field("author", "paul", TextField.TYPE_NOT_STORED));
		return doc;
	}

	/**
	 * 添加索引，此處是寫死的，實際開發中，能夠從數據庫中讀取
	 * @throws IOException
	 */
	public void addDirectory() throws IOException {
		IndexWriterConfig iwc = new IndexWriterConfig();
		IndexWriter writer = new IndexWriter(directory, iwc);
		
		/*
		 * 這裏給IndexWriter writer對象添加了3個Document對象
		 * 後面調用IndexSearcher的方法時，並未用到此處的IndexWriter writer，
		 * 但看上去，咱們把被檢索的內容都放到了該對象裏，這是爲何呢？
		 * 答案就是經過前面和後面各類對象之間的關聯實現的
		 */
		writer.addDocument(createDocument("FieldName_1", "FieldValue content one"));
		writer.addDocument(createDocument("FieldName title two test", "FieldValue two"));
		writer.addDocument(createDocument("FieldName title three test", "FieldValue three"));
		writer.addDocument(createDocument("FieldName test title fore test", "FieldValue fore"));
		
		writer.commit();
		writer.close();
	}
	
	public void luceneDemo() throws Exception {
		IndexReader ir = DirectoryReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(ir);
		/*
		 * 此處有個"title"，下行代碼也有一個"title"，
		 * 查看QueryParser源代碼發現，
		 * QueryParser(String f, Analyzer a)
		 * 對這個構造方法的第一個參數註釋
		 * the default field for query terms.
		 */
		QueryParser qp = new QueryParser("title", analyzer);
		
		/*
		 * 查看Query parse(String query)源代碼發現，
		 * 參數此處格式爲"name:'value'"時，從當前指定name中檢索
		 * 若是隻是一個普通String類型對象時，則從QueryParser中第一個參數String f指定的name中檢索
		 */
		Query query = qp.parse("title:'fieldName test'");
		
		/*
		 * 此處是執行檢索的語句。
		 * TopDocs search(Query, int)
		 * 經過前面的準備工做，Query封裝了查詢所須要的各類前提，
		 * 包括內容和規則
		 * int，經過查源碼得知，是返回的最大結果數。
		 * 
		 * TopDocs封裝了查詢返回的結果，
		 * 其中totalHits是命中的個數
		 * 
		 * 此處有疑問請接着往下看
		 */
		TopDocs topdoc = searcher.search(query, 2);
		System.out.println("命中個數:" + topdoc.totalHits);
		

		/*
		 * scoreDocs封裝了返回的文檔的檢索得分排序，裏面三個屬性
		 * 
		 * The score of this document for the query
		 * float score 文檔相關程度的得分
		 * 
		 * A hit document's number. @see IndexSearcher#doc(int)
		 * int doc 文檔的index序號
		 * 
		 * Only set by {@link TopDocs#merge}
		 * int shardIndex	查源碼得知，是在TopDocs[]合併的時候用到的index
		 * 此處咱們只有一個TopDocs對象，因此就用不上了
		 * 
		 */
		ScoreDoc[] hits = topdoc.scoreDocs;
		System.out.println("hits: "+Arrays.toString(hits));

		/*
		 * 從輸出結果能夠看出，
		 * FieldName test title fore test文檔關聯程度得分score較高，
		 * 因此排在前面
		 * 
		 * ****** 此處能夠看出search(Query, int)中的int、totalHits與hits.length的區別 *****
		 * int是最大結果數，search源碼裏調用IndexSearcher類中TopDocs searchAfter(ScoreDoc, Query, int)能夠看到，
		 * final int limit = Math.max(1, reader.maxDoc());
		 * numHits = Math.min(numHits, limit);
		 * 此處numHits爲傳入的int，
		 * 最終檢索結果取的是int和和命中數目小的那個值
		 * 這次檢索命中3個，但傳入的int爲2，
		 * 因此最終顯示了兩條結果
		 */
		if (hits != null && hits.length > 0) {
			for (int i = 0; i < hits.length; i++) {
				Document hitDoc = searcher.doc(hits[i].doc);
				//因爲content的FieldType爲TYPE_STORED，因此能夠輸出
				System.out.println(hitDoc.get("content"));
				//因爲author的FieldType爲TYPE_NOT_STORED，因此輸出爲null
				System.out.println(hitDoc.get("author"));
			}
		}
	}

	public static void main(String[] args) {
		try {
			FSDirectoryDemo fsdd = new FSDirectoryDemo();
			/*
			 * 第一次執行時，要建立索引，
			 * 以後再執行時，下面行就能夠註釋掉了
			 */
//			fsdd.addDirectory();
			fsdd.luceneDemo();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

輸出結果xml

命中個數:3
hits: [doc=3 score=0.7275808 shardIndex=0, doc=1 score=0.6739625 shardIndex=0]
FieldValue fore
null
FieldValue two
null

若是用jdk1.7，報錯以下對象

Exception in thread "main" java.lang.UnsupportedClassVersionError: org/apache/lucene/analysis/Analyzer : Unsupported major.minor version 52.0
	at java.lang.ClassLoader.defineClass1(Native Method)
	at java.lang.ClassLoader.defineClass(ClassLoader.java:800)
	at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
	at java.net.URLClassLoader.defineClass(URLClassLoader.java:449)
	at java.net.URLClassLoader.access$100(URLClassLoader.java:71)
	at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
	at java.security.AccessController.doPrivileged(Native Method)
	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
	at java.lang.Class.getDeclaredMethods0(Native Method)
	at java.lang.Class.privateGetDeclaredMethods(Class.java:2531)
	at java.lang.Class.getMethod0(Class.java:2774)
	at java.lang.Class.getMethod(Class.java:1663)
	at sun.launcher.LauncherHelper.getMainMethod(LauncherHelper.java:494)
	at sun.launcher.LauncherHelper.checkAndLoadMain(LauncherHelper.java:486)

參考文檔：http://blog.csdn.net/lb521200200/article/details/53549660blog

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。