lucene

時間 2019-11-17

標籤 lucene 简体版

原文原文鏈接

1.依賴的jar包java

package cn.itheima.lucene;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class IndexManagerTest {
    @Test
    public void testIndexCreate() throws Exception{
        //建立文檔列表,保存多個Docuemnt
        List<Document> docList = new ArrayList<Document>();
        
        //指定文件所在目錄
        File dir = new File("建立成索引的文件所在位置"); 
        //循環文件夾取出文件
        for(File file : dir.listFiles()){
            //文件名稱
            String fileName = file.getName();
            //文件內容
            String fileContext = FileUtils.readFileToString(file);
            //文件大小
            Long fileSize = FileUtils.sizeOf(file);
            
            //文檔對象,文件系統中的一個文件就是一個Docuemnt對象
            Document doc = new Document();
            
            //第一個參數:域名
            //第二個參數:域值
            //第三個參數:是否存儲,是爲yes,不存儲爲no
            /*TextField nameFiled = new TextField("fileName", fileName, Store.YES);
            TextField contextFiled = new TextField("fileContext", fileContext, Store.YES);
            TextField sizeFiled = new TextField("fileSize", fileSize.toString(), Store.YES);*/
            
            //是否分詞:要,由於它要索引,而且它不是一個總體,分詞有意義
            //是否索引:要,由於要經過它來進行搜索
            //是否存儲:要,由於要直接在頁面上顯示
            TextField nameFiled = new TextField("fileName", fileName, Store.YES);
            
            //是否分詞: 要,由於要根據內容進行搜索,而且它分詞有意義
            //是否索引: 要,由於要根據它進行搜索
            //是否存儲: 能夠要也能夠不要,不存儲搜索完內容就提取不出來
            TextField contextFiled = new TextField("fileContext", fileContext, Store.NO);
            
            //是否分詞: 要, 由於數字要對比,搜索文檔的時候能夠搜大小, lunene內部對數字進行了分詞算法
            //是否索引: 要, 由於要根據大小進行搜索
            //是否存儲: 要, 由於要顯示文檔大小
            LongField sizeFiled = new LongField("fileSize", fileSize, Store.YES);
            
            //將全部的域都存入文檔中
            doc.add(nameFiled);
            doc.add(contextFiled);
            doc.add(sizeFiled);
            
            //將文檔存入文檔集合中
            docList.add(doc);
        }
        
        //建立分詞器,StandardAnalyzer標準分詞器,標準分詞器對英文分詞效果很好,對中文是單字分詞
        Analyzer analyzer = new IKAnalyzer();
        //指定索引和文檔存儲的目錄
        Directory directory = FSDirectory.open(new File("E:\\dic"));
        //建立寫對象的初始化對象
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
        //建立索引和文檔寫對象
        IndexWriter indexWriter = new IndexWriter(directory, config);
        
        //將文檔加入到索引和文檔的寫對象中
        for(Document doc : docList){
            indexWriter.addDocument(doc);
        }
        //提交
        indexWriter.commit();
        //關閉流
        indexWriter.close();
    }
    
    @Test
    public void testIndexDel() throws Exception{
        //建立分詞器,StandardAnalyzer標準分詞器,標準分詞器對英文分詞效果很好,對中文是單字分詞
        Analyzer analyzer = new IKAnalyzer();
        //指定索引和文檔存儲的目錄
        Directory directory = FSDirectory.open(new File("E:\\dic"));
        //建立寫對象的初始化對象
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
        //建立索引和文檔寫對象
        IndexWriter indexWriter = new IndexWriter(directory, config);
        
        //刪除全部
        //indexWriter.deleteAll();
        
        //根據名稱進行刪除
        //Term詞元,就是一個詞, 第一個參數:域名, 第二個參數:要刪除含有此關鍵詞的數據
        indexWriter.deleteDocuments(new Term("fileName", "apache"));
        
        //提交
        indexWriter.commit();
        //關閉
        indexWriter.close();
    }
    
    /**
     * 更新就是按照傳入的Term進行搜索,若是找到結果那麼刪除,將更新的內容從新生成一個Document對象
     * 若是沒有搜索到結果,那麼將更新的內容直接添加一個新的Document對象
     * @throws Exception
     */
    @Test
    public void testIndexUpdate() throws Exception{
        //建立分詞器,StandardAnalyzer標準分詞器,標準分詞器對英文分詞效果很好,對中文是單字分詞
        Analyzer analyzer = new IKAnalyzer();
        //指定索引和文檔存儲的目錄
        Directory directory = FSDirectory.open(new File("E:\\dic"));
        //建立寫對象的初始化對象
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
        //建立索引和文檔寫對象
        IndexWriter indexWriter = new IndexWriter(directory, config);
        
        
        //根據文件名稱進行更新
        Term term = new Term("fileName", "web");
        //更新的對象
        Document doc = new Document();
        doc.add(new TextField("fileName", "xxxxxx", Store.YES));
        doc.add(new TextField("fileContext", "think in java xxxxxxx", Store.NO));
        doc.add(new LongField("fileSize", 100L, Store.YES));
        
        //更新
        indexWriter.updateDocument(term, doc);
        
        //提交
        indexWriter.commit();
        //關閉
        indexWriter.close();
    }
}

2.web

package cn.itheima.lucene;

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class IndexSearchTest {

    @Test
    public void testIndexSearch() throws Exception{
        
        //建立分詞器(建立索引和全部時所用的分詞器必須一致)
        Analyzer analyzer = new IKAnalyzer();
        //建立查詢對象,第一個參數:默認搜索域, 第二個參數:分詞器
        //默認搜索域做用:若是搜索語法中指定域名從指定域中搜索,若是搜索時只寫了查詢關鍵字,則從默認搜索域中進行搜索
        QueryParser queryParser = new QueryParser("fileContext", analyzer);
        //查詢語法=域名:搜索的關鍵字
        Query query = queryParser.parse("fileName:web");
        
        //指定索引和文檔的目錄
        Directory dir = FSDirectory.open(new File("E:\\dic"));
        //索引和文檔的讀取對象
        IndexReader indexReader = IndexReader.open(dir);
        //建立索引的搜索對象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        //搜索:第一個參數爲查詢語句對象, 第二個參數:指定顯示多少條
        TopDocs topdocs = indexSearcher.search(query, 5);
        //一共搜索到多少條記錄
        System.out.println("=====count=====" + topdocs.totalHits);
        //從搜索結果對象中獲取結果集
        ScoreDoc[] scoreDocs = topdocs.scoreDocs;
        
        for(ScoreDoc scoreDoc : scoreDocs){
            //獲取docID
            int docID = scoreDoc.doc;
            //經過文檔ID從硬盤中讀取出對應的文檔
            Document document = indexReader.document(docID);
            //get域名能夠取出值 打印
            System.out.println("fileName:" + document.get("fileName"));
            System.out.println("fileSize:" + document.get("fileSize"));
            System.out.println("============================================================");
        }
        
    }
    
    @Test
    public void testIndexTermQuery() throws Exception{
        //建立分詞器(建立索引和全部時所用的分詞器必須一致)
        Analyzer analyzer = new IKAnalyzer();
        
        //建立詞元:就是詞,   
        Term term = new Term("fileName", "apache");
        //使用TermQuery查詢,根據term對象進行查詢
        TermQuery termQuery = new TermQuery(term);
        
        
        //指定索引和文檔的目錄
        Directory dir = FSDirectory.open(new File("E:\\dic"));
        //索引和文檔的讀取對象
        IndexReader indexReader = IndexReader.open(dir);
        //建立索引的搜索對象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        //搜索:第一個參數爲查詢語句對象, 第二個參數:指定顯示多少條
        TopDocs topdocs = indexSearcher.search(termQuery, 5);
        //一共搜索到多少條記錄
        System.out.println("=====count=====" + topdocs.totalHits);
        //從搜索結果對象中獲取結果集
        ScoreDoc[] scoreDocs = topdocs.scoreDocs;
        
        for(ScoreDoc scoreDoc : scoreDocs){
            //獲取docID
            int docID = scoreDoc.doc;
            //經過文檔ID從硬盤中讀取出對應的文檔
            Document document = indexReader.document(docID);
            //get域名能夠取出值 打印
            System.out.println("fileName:" + document.get("fileName"));
            System.out.println("fileSize:" + document.get("fileSize"));
            System.out.println("============================================================");
        }
    }
    
    @Test
    public void testNumericRangeQuery() throws Exception{
        //建立分詞器(建立索引和全部時所用的分詞器必須一致)
        Analyzer analyzer = new IKAnalyzer();
        
        //根據數字範圍查詢
        //查詢文件大小,大於100 小於1000的文章
        //第一個參數:域名      第二個參數:最小值,  第三個參數:最大值, 第四個參數:是否包含最小值,   第五個參數:是否包含最大值
        Query query = NumericRangeQuery.newLongRange("fileSize", 100L, 1000L, true, true);        
        
        //指定索引和文檔的目錄
        Directory dir = FSDirectory.open(new File("E:\\dic"));
        //索引和文檔的讀取對象
        IndexReader indexReader = IndexReader.open(dir);
        //建立索引的搜索對象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        //搜索:第一個參數爲查詢語句對象, 第二個參數:指定顯示多少條
        TopDocs topdocs = indexSearcher.search(query, 5);
        //一共搜索到多少條記錄
        System.out.println("=====count=====" + topdocs.totalHits);
        //從搜索結果對象中獲取結果集
        ScoreDoc[] scoreDocs = topdocs.scoreDocs;
        
        for(ScoreDoc scoreDoc : scoreDocs){
            //獲取docID
            int docID = scoreDoc.doc;
            //經過文檔ID從硬盤中讀取出對應的文檔
            Document document = indexReader.document(docID);
            //get域名能夠取出值 打印
            System.out.println("fileName:" + document.get("fileName"));
            System.out.println("fileSize:" + document.get("fileSize"));
            System.out.println("============================================================");
        }
    }
    
    @Test
    public void testBooleanQuery() throws Exception{
        //建立分詞器(建立索引和全部時所用的分詞器必須一致)
        Analyzer analyzer = new IKAnalyzer();
        
        //布爾查詢,就是能夠根據多個條件組合進行查詢
        //文件名稱包含apache的,而且文件大小大於等於100 小於等於1000字節的文章
        BooleanQuery query = new BooleanQuery();
        
        //根據數字範圍查詢
        //查詢文件大小,大於100 小於1000的文章
        //第一個參數:域名      第二個參數:最小值,  第三個參數:最大值, 第四個參數:是否包含最小值,   第五個參數:是否包含最大值
        Query numericQuery = NumericRangeQuery.newLongRange("fileSize", 100L, 1000L, true, true);
        
        //建立詞元:就是詞,   
        Term term = new Term("fileName", "apache");
        //使用TermQuery查詢,根據term對象進行查詢
        TermQuery termQuery = new TermQuery(term);
        
        //Occur是邏輯條件
        //must至關於and關鍵字,是而且的意思
        //should,至關於or關鍵字或者的意思
        //must_not至關於not關鍵字, 非的意思
        //注意:單獨使用must_not  或者 獨自使用must_not沒有任何意義
        query.add(termQuery, Occur.MUST);
        query.add(numericQuery, Occur.MUST);
        
        //指定索引和文檔的目錄
        Directory dir = FSDirectory.open(new File("E:\\dic"));
        //索引和文檔的讀取對象
        IndexReader indexReader = IndexReader.open(dir);
        //建立索引的搜索對象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        //搜索:第一個參數爲查詢語句對象, 第二個參數:指定顯示多少條
        TopDocs topdocs = indexSearcher.search(query, 5);
        //一共搜索到多少條記錄
        System.out.println("=====count=====" + topdocs.totalHits);
        //從搜索結果對象中獲取結果集
        ScoreDoc[] scoreDocs = topdocs.scoreDocs;
        
        for(ScoreDoc scoreDoc : scoreDocs){
            //獲取docID
            int docID = scoreDoc.doc;
            //經過文檔ID從硬盤中讀取出對應的文檔
            Document document = indexReader.document(docID);
            //get域名能夠取出值 打印
            System.out.println("fileName:" + document.get("fileName"));
            System.out.println("fileSize:" + document.get("fileSize"));
            System.out.println("============================================================");
        }
    }
    
    @Test
    public void testMathAllQuery() throws Exception{
        //建立分詞器(建立索引和全部時所用的分詞器必須一致)
        Analyzer analyzer = new IKAnalyzer();
        
        //查詢全部文檔
        MatchAllDocsQuery query = new MatchAllDocsQuery();
        
        //指定索引和文檔的目錄
        Directory dir = FSDirectory.open(new File("E:\\dic"));
        //索引和文檔的讀取對象
        IndexReader indexReader = IndexReader.open(dir);
        //建立索引的搜索對象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        //搜索:第一個參數爲查詢語句對象, 第二個參數:指定顯示多少條
        TopDocs topdocs = indexSearcher.search(query, 5);
        //一共搜索到多少條記錄
        System.out.println("=====count=====" + topdocs.totalHits);
        //從搜索結果對象中獲取結果集
        ScoreDoc[] scoreDocs = topdocs.scoreDocs;
        
        for(ScoreDoc scoreDoc : scoreDocs){
            //獲取docID
            int docID = scoreDoc.doc;
            //經過文檔ID從硬盤中讀取出對應的文檔
            Document document = indexReader.document(docID);
            //get域名能夠取出值 打印
            System.out.println("fileName:" + document.get("fileName"));
            System.out.println("fileSize:" + document.get("fileSize"));
            System.out.println("============================================================");
        }
    }
    
    @Test
    public void testMultiFieldQueryParser() throws Exception{
        //建立分詞器(建立索引和全部時所用的分詞器必須一致)
        Analyzer analyzer = new IKAnalyzer();
        
        String [] fields = {"fileName","fileContext"};
        //從文件名稱和文件內容中查詢,只有含有apache的就查出來
        MultiFieldQueryParser multiQuery = new MultiFieldQueryParser(fields, analyzer);
        //輸入須要搜索的關鍵字
        Query query = multiQuery.parse("apache");
        
        //指定索引和文檔的目錄
        Directory dir = FSDirectory.open(new File("E:\\dic"));
        //索引和文檔的讀取對象
        IndexReader indexReader = IndexReader.open(dir);
        //建立索引的搜索對象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        //搜索:第一個參數爲查詢語句對象, 第二個參數:指定顯示多少條
        TopDocs topdocs = indexSearcher.search(query, 5);
        //一共搜索到多少條記錄
        System.out.println("=====count=====" + topdocs.totalHits);
        //從搜索結果對象中獲取結果集
        ScoreDoc[] scoreDocs = topdocs.scoreDocs;
        
        for(ScoreDoc scoreDoc : scoreDocs){
            //獲取docID
            int docID = scoreDoc.doc;
            //經過文檔ID從硬盤中讀取出對應的文檔
            Document document = indexReader.document(docID);
            //get域名能夠取出值 打印
            System.out.println("fileName:" + document.get("fileName"));
            System.out.println("fileSize:" + document.get("fileSize"));
            System.out.println("============================================================");
        }
    }
}

相關標籤/搜索

springmvc+mybatis+shiro+lucene+rest+webservice+maven

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。