大型運輸行業實戰_day15_1_全文檢索之Lucene

時間 2020-05-29

標籤大型運輸行業實戰 day15 day 全文檢索 lucene 简体版

原文原文鏈接

1.引入

全文檢索簡介: 非結構化數據又一種叫法叫全文數據。從全文數據（文本）中進行檢索就叫全文檢索。java

2.數據庫搜索的弊端

    案例：
     select * from product where product like ‘蘋果’g
一、使用like，會致使索引失效
    （沒有索引時）速度相對慢
二、搜索效果很差
三、沒有相關度排序數據庫

3.全文解鎖實現原理

4.簡單使用

4.1.建立索引與搜索索引apache

首先導入jar包測試

代碼:spa

 1 package com.day02.lucene;  2 
 3 import org.apache.lucene.analysis.Analyzer;  4 import org.apache.lucene.analysis.standard.StandardAnalyzer;  5 import org.apache.lucene.document.Document;  6 import org.apache.lucene.document.Field;  7 import org.apache.lucene.document.FieldType;  8 import org.apache.lucene.index.DirectoryReader;  9 import org.apache.lucene.index.IndexReader;  10 import org.apache.lucene.index.IndexWriter;  11 import org.apache.lucene.index.IndexWriterConfig;  12 import org.apache.lucene.queryparser.classic.ParseException;  13 import org.apache.lucene.queryparser.classic.QueryParser;  14 import org.apache.lucene.search.IndexSearcher;  15 import org.apache.lucene.search.Query;  16 import org.apache.lucene.search.ScoreDoc;  17 import org.apache.lucene.search.TopDocs;  18 import org.apache.lucene.store.Directory;  19 import org.apache.lucene.store.FSDirectory;  20 import org.apache.lucene.util.Version;  21 import org.junit.Test;  22 
 23 import java.io.File;  24 import java.io.IOException;  25 
 26 /**
 27  * Created by Administrator on 2/10.  28  */
 29 public class HelloLucene {  30     //索引地址目錄
 31     private String file = "E:\\lucene\\indexOne";  32     //索引版本配置
 33     private Version matchVersion = Version.LUCENE_4_10_4;  34     //案例文檔
 35     private String doc1 = "Hello world Hello";  36     private String doc2 = "Hello java world Hello Hello";  37     private String doc3 = "Hello lucene world";  38 
 39     /**
 40  * 建立索引代碼  41  *  42  * @throws IOException  43      */
 44  @Test  45     public void testCreateIndex() throws IOException {  46         System.out.println("-----測試開始------");  47         //建立索引目錄地址對象
 48         Directory directory = FSDirectory.open(new File(file));  49         //指定分詞規則
 50         Analyzer analyzer = new StandardAnalyzer();  51         //建立索引配置對象
 52         IndexWriterConfig conf = new IndexWriterConfig(matchVersion, analyzer);  53         //建立索引對象
 54         IndexWriter indexWriter = new IndexWriter(directory, conf);  55         //建立文本屬性
 56         FieldType fieldType = new FieldType();  57         fieldType.setStored(true);//存儲數據
 58         fieldType.setIndexed(true);//添加索引  59 
 60         //建立要添加的文本對象
 61         Document document1 = new Document();  62         document1.add(new Field("doc", doc1, fieldType));  63         //添加索引
 64  indexWriter.addDocument(document1);  65 
 66         //建立要添加的文本對象
 67         Document document2 = new Document();  68         document2.add(new Field("doc", doc2, fieldType));  69         //添加索引
 70  indexWriter.addDocument(document2);  71 
 72         //建立要添加的文本對象
 73         Document document3 = new Document();  74         document3.add(new Field("doc", doc3, fieldType));  75         //添加索引
 76  indexWriter.addDocument(document3);  77 
 78         //關閉資源
 79  indexWriter.close();  80  }  81 
 82     /**
 83  *獲取索引  84  * 1.建立查詢分析器(QueryParser),使用查詢分析器獲得查詢對象  85  * 2.使用索引搜索器(IndexSearcher).search(查詢對象, 獲取的多少條數據),使用索引搜索器得到文檔結果集(TopDocs)  86  * 3.遍歷文檔結果集獲取文檔id  87  * 4.使用IndexSearcher經過文檔id獲取文檔對象,並獲取文檔具體字段值  88      */
 89     String key = "lucene";  90 
 91  @Test  92     public void testSearchIndex() throws IOException, ParseException {  93         System.out.println("-----測試開始------");  94         //1.建立索引目錄地址對象
 95         Directory directory = FSDirectory.open(new File(file));  96         //2.建立目錄閱讀器
 97         IndexReader indexReader = DirectoryReader.open(directory);  98         //3.建立索引搜索器
 99         IndexSearcher indexSearcher = new IndexSearcher(indexReader); 100         //須要查詢的字段
101         String query = "doc"; 102         //4.建立分詞器
103         StandardAnalyzer standardAnalyzer = new StandardAnalyzer(); 104         //5.建立查詢分析器
105         QueryParser queryParser = new QueryParser(query, standardAnalyzer); 106         //6.使用查詢分析器(查詢關鍵字)獲取對應的對象
107         Query parse = queryParser.parse(key); 108         //7.獲取查詢結果
109         int n = 1000;//最大返回對象數
110         TopDocs topDocs = indexSearcher.search(parse, n); 111         //8.獲取總天數
112         int totalHits = topDocs.totalHits; 113         System.out.println("totalHits=>" + totalHits); 114         //9.獲取查詢返回結果集
115         ScoreDoc[] scoreDocs = topDocs.scoreDocs; 116         //10.遍歷結果集
117         for (ScoreDoc scoreDoc : scoreDocs) { 118             //獲取文檔主鍵
119             int docId = scoreDoc.doc; 120             System.out.println("docId=" + docId); 121             //經過文檔Id獲取文檔對象
122             Document doc = indexSearcher.doc(docId); 123             //獲取文檔值
124             String docValue = doc.get("doc");//根據存放的key
125             System.out.println("docValue=" + docValue); 126  } 127  } 128 }