Lucene整合項目

      1:導入Lucene相關的jar包

其中:java

  • lucene-core-3.6.2.jar核心包
  • lucene-analyzers-3.6.2.jar(分詞器)
  • lucene-highlighter-3.6.2.jar(高亮)
  • lucene-memory-3.6.2.jar(高亮)
  • IKAnalyzer2012_u6.jar(中文分詞器)

    2:lucene原理圖

    (1)索引庫操做原理數據庫

    (2)索引庫中存放數據原理spa

  • 3:lucene開發原理(數據庫與索引庫同步)

  •  

  •  

    數據庫與索引庫中存放相同的數據,能夠使用數據庫中存放的ID用來表示和區分同一條數據。3d

    l  數據庫中用來存放數據orm

    l  索引庫中用來查詢、檢索xml

    檢索庫支持查詢檢索多種方式,對象

    特色:blog

    1:因爲是索引查詢(經過索引查詢數據),檢索速度快,搜索的結果更加準確索引

    2:生成文本摘要,摘要截取搜索的文字出現最多的地方資源

    3:顯示查詢的文字高亮

    4:分詞查詢等  

  • 4:配置文件

  • (1)導入如下3個配置文件,放置到項目的資源路徑下(類路徑)

  •   IKAnalyzer.cfg.xml

  • <?xml version="1.0" encoding="UTF-8"?>

    <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> 

    <properties> 

        <comment>IK Analyzer 擴展配置</comment>

        <!--用戶能夠在這裏配置本身的擴展字典 -->

        <entry key="ext_dict">ext.dic;</entry>

       

        <!--用戶能夠在這裏配置本身的擴展中止詞字典-->

        <entry key="ext_stopwords">stopword.dic;</entry>

       

    </properties>

     

    以及ext.dic(擴展詞庫)

  • stopword.dic(停用詞庫)
  • 5:索引庫基本代碼

    (1)導入如下3個文件,放置到項目的util包下
  • l  Configuration.java

    public class Configuration {

     

        //索引庫的目錄位置

        private static Directory directory;

        //分詞器

        private static Analyzer analyzer;

       

        static{

            try {

                /**索引庫目錄爲D盤indexDir*/

                directory = FSDirectory.open(new File("D:/indexDir/"));

                /**詞庫分詞*/

                analyzer = new IKAnalyzer();

            } catch (Exception e) {

                e.printStackTrace();

            }

        }

       

        public static Directory getDirectory() {

            return directory;

        }

        public static Analyzer getAnalyzer() {

            return analyzer;

        }

       

    }

    分析:

    /**索引庫目錄爲D盤indexDir*/

                directory = FSDirectory.open(new File("D:/indexDir/"));

    表示指定索引庫的位置,在D盤的indexDir文件夾下,索引庫存放的數據將採用二進制的方式

                /**詞庫分詞*/

                analyzer = new IKAnalyzer();

    表示分詞器,對索引庫新增數據和對索引庫查詢數據,都須要對操做的數據進行分詞,將分詞後的數據存放,查詢的時候也要經過你的條件,分詞查詢和檢索

     

    l  FileUploadDocument.java

    public class FileUploadDocument {

     

        /**將ElecFileUpload對象轉換成Document對象*/

        public static Document FileUploadToDocument(ElecFileUpload elecFileUpload){

            Document document = new Document();

            String seqId = NumericUtils.intToPrefixCoded(elecFileUpload.getSeqId());

            //主鍵ID

            document.add(new Field("seqId",seqId,Store.YES,Index.NOT_ANALYZED));

            //文件名

            document.add(new Field("fileName", elecFileUpload.getFileName(), Store.YES, Index.ANALYZED));

            //文件描述

            document.add(new Field("comment", elecFileUpload.getComment(), Store.YES, Index.ANALYZED));

            //所屬單位

            document.add(new Field("projId",elecFileUpload.getProjId(),Store.YES,Index.NOT_ANALYZED));

            //圖紙類別

            document.add(new Field("belongTo",elecFileUpload.getBelongTo(),Store.YES,Index.NOT_ANALYZED));

            return document;

        }

       

        /**將Document對象轉換成ElecFileUpload對象*/

        public static ElecFileUpload documentToFileUpload(Document document){

            ElecFileUpload elecFileUpload = new ElecFileUpload();

            Integer seqId = NumericUtils.prefixCodedToInt(document.get("seqId"));

            //主鍵ID

            elecFileUpload.setSeqId(seqId);

            //文件名

            elecFileUpload.setFileName(document.get("fileName"));

            //文件描述

            elecFileUpload.setComment(document.get("comment"));

            //所屬單位

            elecFileUpload.setProjId(document.get("projId"));

            //圖紙類別

            elecFileUpload.setBelongTo(document.get("belongTo"));

            return elecFileUpload;

        }

    }

  •  

    這裏要注意:索引庫中存放的數據要轉換成Document對象(每條數據就是一個Document對象),並向Document對象中存放Field對象(每條數據對應的字段,例如主鍵ID、所屬單位、圖紙類別、文件名稱、備註等),將每一個字段中的值都存放到Field對象中

     

    l  LuceneUtils.java

    public class LuceneUtils {

     

        /**向索引庫中新增數據*/

        public void saveFileUpload(ElecFileUpload elecFileUpload) {

            Document document = FileUploadDocument.FileUploadToDocument(elecFileUpload);

            try {

                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36,Configuration.getAnalyzer());

                IndexWriter indexWriter = new IndexWriter(Configuration.getDirectory(),indexWriterConfig);

                indexWriter.addDocument(document);

                indexWriter.close();

            } catch (Exception e) {

                throw new RuntimeException();

            }

        }

       

        /**索引庫中刪除數據*/

        public void deleteFileUploadByID(Integer seqId) {

            //指定詞條的最小單位,至關於id=1

            String id = NumericUtils.intToPrefixCoded(seqId);

            Term term = new Term("seqId", id);

            try {

                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36,Configuration.getAnalyzer());

                IndexWriter indexWriter = new IndexWriter(Configuration.getDirectory(),indexWriterConfig);

                indexWriter.deleteDocuments(term);

                indexWriter.close();

            } catch (Exception e) {

                e.printStackTrace();

            }

           

        }

     

        /**使用搜索條件,從索引庫中搜索出對應的結果*/

        public List<ElecFileUpload> searchFileUploadByCondition(String queryString,String projId,String belongTo) {

            List<ElecFileUpload> list = new ArrayList<ElecFileUpload>();

            try {

                IndexSearcher indexSearcher = new IndexSearcher(IndexReader.open(Configuration.getDirectory()));

               

                /**使用lucene的多條件查詢,即boolean查詢,即必須知足3個條件*/

                BooleanQuery booleanQuery = new BooleanQuery();

                //【按文件名稱和描述搜素】搜素的條件

                if(StringUtils.isNotBlank(queryString)){

                    //指定查詢條件在文件名稱和文件描述、所屬單位、圖紙類別的字段上進行搜索

                    QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"fileName","comment"},Configuration.getAnalyzer());

                    Query query1 = queryParser.parse(queryString);

                    booleanQuery.add(query1,Occur.MUST);

                }

                //【所屬單位】搜素的條件

                if(StringUtils.isNotBlank(projId)){

                    Query query2 = new TermQuery(new Term("projId", projId));

                    booleanQuery.add(query2, Occur.MUST);

                }

                //【圖紙類別】搜素的條件

                if(StringUtils.isNotBlank(belongTo)){

                    Query query3 = new TermQuery(new Term("belongTo", belongTo));

                    booleanQuery.add(query3, Occur.MUST);

                }

                //返回前100條數據

                TopDocs topDocs = indexSearcher.search(booleanQuery, 100);

                //返回結果集

                ScoreDoc [] scoreDocs = topDocs.scoreDocs;

                /**設置高亮效果 begin*/

                Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");

                Scorer scorer = new QueryScorer(booleanQuery);

                Highlighter highlighter = new Highlighter(formatter,scorer);

                //摘要大小(設置大點,最比如文件名大,由於文件名最好不要截取)

                int fragmentSize = 50;

                Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);

                highlighter.setTextFragmenter(fragmenter);

                /**設置高亮效果 end*/

                if(scoreDocs!=null && scoreDocs.length>0){

                    for(int i=0;i<scoreDocs.length;i++){

                        ScoreDoc scoreDoc = scoreDocs[i];

                        //使用內部唯一編號,獲取對應的數據,編號從0開始

                        Document document = indexSearcher.doc(scoreDoc.doc);

                        /**獲取高亮效果begin*/

                        /**返回文件名的高亮效果*/

                       String fileNameText = highlighter.getBestFragment(Configuration.getAnalyzer(), "fileName", document.get("fileName"));

                        //沒有高亮的效果

                        if(fileNameText==null){

                            fileNameText = document.get("fileName");

                            if(fileNameText!=null && fileNameText.length()>fragmentSize){

                                fileNameText = fileNameText.substring(0, fragmentSize);

                            }

                        }

                        document.getField("fileName").setValue(fileNameText);

                        /**返回文件描述的高亮效果*/

                        String commentText = highlighter.getBestFragment(Configuration.getAnalyzer(), "comment", document.get("comment"));

                        //沒有高亮的效果

                        if(commentText==null){

                            commentText = document.get("comment");

                            if(commentText!=null && commentText.length()>fragmentSize){

                                commentText = commentText.substring(0, fragmentSize);

                            }

                        }

                        document.getField("comment").setValue(commentText);

                        /**獲取高亮效果end*/

                        //將Document轉換成ElecFileUpload

                        ElecFileUpload elecFileUpload = FileUploadDocument.documentToFileUpload(document);

                        list.add(elecFileUpload);

                    }

                }

            } catch (Exception e) {

                throw new RuntimeException();

            }

           

            return list;

        }

    }

     

     

    查看索引庫中的數據,使用lukeall-3.5.0.jar:

  • java -jar lukeall-3.5.0.jar
相關文章
相關標籤/搜索