Lucene基礎

時間 2019-11-06

標籤 lucene 基礎简体版

原文原文鏈接

package com.snow.lucene;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.IOException;

public class MainTest {

    public static void main(String[] args) {

    }

    // 寫入索引的入門代碼
    @Test
    public void indexWriterTest01() throws Exception {
        //1. 建立索引寫入器對象
        // 參數1: Directory   : 指定索引庫的目錄位置   沒有中文和空格
        /**
         * 經常使用的實現類:
         *
         * FSDirectory: 用來指定文件系統的目錄, 將索引信息保存到磁盤上
         *      優勢: 索引能夠進行長期保存, 安全係數高
         *      缺點: 讀取略慢
         *
         * RAMDriectory: 內存目錄, 將索引庫信息存放到內存中
         *      優勢: 讀取速度快
         *      缺點: 不安全, 沒法長期保存, 關機後就消失了
         **/
        FSDirectory directory = FSDirectory.open(new File("F:\\idearWork\\test"));

        /**
         * 默認分詞器
         */
        //StandardAnalyzer analyzer = new StandardAnalyzer();
        /**
         * IK 分詞器
         **/
        IKAnalyzer analyzer = new IKAnalyzer();
        /**
         * 索引寫入器的配置類
         */
        // 參數2: IndexWriterConfig :  索引寫入器的配置對象
        //  Version.LATEST:  當前導入了那個版本的lucene, 就使用那個版本的, 若是導入了兩個版本的就使用最新的
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST,analyzer);

        //參數值: APPEND CREATE   CREATE_OR_APPEND
        /**
         * APPEND: 表示追加, 若是索引庫存在, 就會向索引庫中追加數據, 若是索引庫不存在, 直接報錯
         *
         * CREATE: 表示建立, 無論索引庫有沒有, 每一次都是從新建立一個新的索引庫
         *
         * CREATE_OR_APPEND: 若是索引庫有, 就會追加, 若是沒有 就會建立索引庫
         默認值也是 CREATE_OR_APPEND
         */
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

        IndexWriter indexWriter = new IndexWriter(directory,config);

        //2. 添加原始文檔數據
        Document doc = new Document();  // 剛建立好的文檔對象, 沒有任何的數據

        doc.add(new LongField("id",1L, Field.Store.YES));
        doc.add(new TextField("title","2018.1.12傳智播客召開年會了", Field.Store.YES));
        doc.add(new TextField("content","今年我必定要中大獎, 要來特等獎, 去國外旅遊", Field.Store.YES));
        indexWriter.addDocument(doc);

        //2. 添加原始文檔數據
        Document doc2 = new Document();  // 剛建立好的文檔對象, 沒有任何的數據

        doc2.add(new LongField("id",2L, Field.Store.YES));
        doc2.add(new TextField("title","2019.2.12傳智播客召開年會了", Field.Store.YES));
        doc2.add(new TextField("content","今年我必定要中大獎, 去全世界旅遊", Field.Store.YES));
        indexWriter.addDocument(doc2);


        //3. 提交文檔數據
        indexWriter.commit();

        //4. 釋放資源
        indexWriter.close();

    }


    // 查詢索引的入門
    @Test
    public void indexSearcherTest01() throws Exception{
        //1. 建立查詢索引器對象
        DirectoryReader reader = DirectoryReader.open(FSDirectory.open(new File("F:\\idearWork\\test")));
        IndexSearcher indexSearcher  = new IndexSearcher(reader);

        //2. 執行查詢, 添加查詢的條件
        // 注意事項:
//        QueryParser queryParser = new QueryParser("title",new IKAnalyzer());
//        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(new String[]{"年會", "世界"}, new IKAnalyzer());
//        Query query = queryParser.parse("大獎");//   其實就是用戶號輸入的關鍵詞
        FuzzyQuery query = new FuzzyQuery(new Term("title","年會"));

        // 建立排序對象,須要排序字段SortField，參數：字段的名稱、字段的類型、是否反轉若是是false，升序。true降序
        Sort sort = new Sort(new SortField("id", SortField.Type.LONG, false));
        // 搜索數據，查詢0~end條
        TopDocs topDocs = indexSearcher.search(query, 1,sort);    //結果集
        System.out.println("本次搜索共" + topDocs.totalHits + "條數據");

//        TopDocs topDocs = indexSearcher.search(query, Integer.MAX_VALUE); // 結果集

        //3. 獲取數據
        // TopDocs :  表示的結果集:  一共爲分兩部份內容  : 一個是總條數  文檔的數組
        int size = topDocs.totalHits;// 獲取總條數
        System.out.println("總條數是:"+size);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs; // 得分文檔的數據

        // 高亮的設置  --------------start----------------
        SimpleHTMLFormatter formatter  =new SimpleHTMLFormatter("<font color='red'>","</font>");
        QueryScorer scorer = new QueryScorer(query);
        Highlighter highlighter = new Highlighter(formatter,scorer);

        // 高亮的設置  --------------end----------------


        //遍歷數組
        for (ScoreDoc scoreDoc : scoreDocs) {
            // scoreDoc : 得分文檔的對象  包含兩部分: 一部分是文檔的得分, 一部分文檔的id值
            float score = scoreDoc.score;//文檔的得分
            int docId = scoreDoc.doc;// 獲取文檔的id值 從0開始

            // 根據id獲取文檔的原始數據
            Document document = indexSearcher.doc(docId);

            String id = document.get("id"); // 是手動添加的id字段
            String title = document.get("title");
            String content =  document.get("content");


            // 高亮獲取 -----------start-------------
            title = highlighter.getBestFragment(new IKAnalyzer(), "title", title);

            // 高亮獲取 -----------end-------------

            System.out.println("文檔的id爲: "+id+";得分是:"+score +"; 文檔的標題是:"+title+"; 文檔的內容: " + content);
        }
    }

    // 索引修改:  索引的修改本質上就是先刪除, 後添加, 因此新修改後的數據,都是在最後面
    @Test
    public void indexWriterUpdateTest() throws  Exception {
        //1. 建立indexWriter對象
        IndexWriter indexWriter = CreateIndexWrite(new IKAnalyzer());

        //2. 添加須要進行修改的文檔對象
        Document doc = new Document(); // 替換後的數據
        doc.add(new StringField("id","5", Field.Store.YES));
        doc.add(new TextField("content","這是修改後的數據", Field.Store.YES));

        indexWriter.updateDocument(new Term("content","上海"),doc);

        //3. 提交
        indexWriter.commit();

        //4. 釋放資源
        indexWriter.close();

    }


    //索引刪除:
    @Test
    public void deleteIndexTest() throws Exception {
        //1. 建立 indexWriter對象
        IndexWriter indexWriter = CreateIndexWrite(new IKAnalyzer());

        //2. 刪除索引操做


        indexWriter.deleteAll(); //刪除所有數據
        // 想把 id爲5的這個文檔刪除, 能不能
        // 注意: 在進行使用id刪除的時候, 若是id是LongFiled此時不能直接使用id來刪除數據
        //indexWriter.deleteDocuments(new Term("id","2"));


        //3. 提交
        indexWriter.commit();

        //4. 釋放資源
        indexWriter.close();

    }



    /**
     * 建立寫入IndexWriter
     * @return
     * @throws IOException
     */
    private IndexWriter CreateIndexWrite(Analyzer analyzer) throws IOException {
        FSDirectory directory = FSDirectory.open(new File("F:\\idearWork\\test"));
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
        return new IndexWriter(directory,config);
    }
}

 <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>4.10.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queries</artifactId>
            <version>4.10.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-test-framework</artifactId>
            <version>4.10.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>4.10.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>4.10.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>4.10.2</version>
        </dependency>
        <!-- 引入IK分詞器 -->
        <dependency>
            <groupId>com.janeluo</groupId>
            <artifactId>ikanalyzer</artifactId>
            <version>2012_u6</version>
        </dependency>

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。