其中:java
(1)索引庫操做原理數據庫
(2)索引庫中存放數據原理spa
數據庫與索引庫中存放相同的數據,能夠使用數據庫中存放的ID用來表示和區分同一條數據。3d
l 數據庫中用來存放數據orm
l 索引庫中用來查詢、檢索xml
檢索庫支持查詢檢索多種方式,對象
特色:blog
1:因爲是索引查詢(經過索引查詢數據),檢索速度快,搜索的結果更加準確索引
2:生成文本摘要,摘要截取搜索的文字出現最多的地方資源
3:顯示查詢的文字高亮
4:分詞查詢等
(1)導入如下3個配置文件,放置到項目的資源路徑下(類路徑)
IKAnalyzer.cfg.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 擴展配置</comment>
<!--用戶能夠在這裏配置本身的擴展字典 -->
<entry key="ext_dict">ext.dic;</entry>
<!--用戶能夠在這裏配置本身的擴展中止詞字典-->
<entry key="ext_stopwords">stopword.dic;</entry>
</properties>
以及ext.dic(擴展詞庫)
l Configuration.java
public class Configuration {
//索引庫的目錄位置
private static Directory directory;
//分詞器
private static Analyzer analyzer;
static{
try {
/**索引庫目錄爲D盤indexDir*/
directory = FSDirectory.open(new File("D:/indexDir/"));
/**詞庫分詞*/
analyzer = new IKAnalyzer();
} catch (Exception e) {
e.printStackTrace();
}
}
public static Directory getDirectory() {
return directory;
}
public static Analyzer getAnalyzer() {
return analyzer;
}
}
分析:
/**索引庫目錄爲D盤indexDir*/
directory = FSDirectory.open(new File("D:/indexDir/"));
表示指定索引庫的位置,在D盤的indexDir文件夾下,索引庫存放的數據將採用二進制的方式
/**詞庫分詞*/
analyzer = new IKAnalyzer();
表示分詞器,對索引庫新增數據和對索引庫查詢數據,都須要對操做的數據進行分詞,將分詞後的數據存放,查詢的時候也要經過你的條件,分詞查詢和檢索
l FileUploadDocument.java
public class FileUploadDocument {
/**將ElecFileUpload對象轉換成Document對象*/
public static Document FileUploadToDocument(ElecFileUpload elecFileUpload){
Document document = new Document();
String seqId = NumericUtils.intToPrefixCoded(elecFileUpload.getSeqId());
//主鍵ID
document.add(new Field("seqId",seqId,Store.YES,Index.NOT_ANALYZED));
//文件名
document.add(new Field("fileName", elecFileUpload.getFileName(), Store.YES, Index.ANALYZED));
//文件描述
document.add(new Field("comment", elecFileUpload.getComment(), Store.YES, Index.ANALYZED));
//所屬單位
document.add(new Field("projId",elecFileUpload.getProjId(),Store.YES,Index.NOT_ANALYZED));
//圖紙類別
document.add(new Field("belongTo",elecFileUpload.getBelongTo(),Store.YES,Index.NOT_ANALYZED));
return document;
}
/**將Document對象轉換成ElecFileUpload對象*/
public static ElecFileUpload documentToFileUpload(Document document){
ElecFileUpload elecFileUpload = new ElecFileUpload();
Integer seqId = NumericUtils.prefixCodedToInt(document.get("seqId"));
//主鍵ID
elecFileUpload.setSeqId(seqId);
//文件名
elecFileUpload.setFileName(document.get("fileName"));
//文件描述
elecFileUpload.setComment(document.get("comment"));
//所屬單位
elecFileUpload.setProjId(document.get("projId"));
//圖紙類別
elecFileUpload.setBelongTo(document.get("belongTo"));
return elecFileUpload;
}
}
這裏要注意:索引庫中存放的數據要轉換成Document對象(每條數據就是一個Document對象),並向Document對象中存放Field對象(每條數據對應的字段,例如主鍵ID、所屬單位、圖紙類別、文件名稱、備註等),將每一個字段中的值都存放到Field對象中
l LuceneUtils.java
public class LuceneUtils {
/**向索引庫中新增數據*/
public void saveFileUpload(ElecFileUpload elecFileUpload) {
Document document = FileUploadDocument.FileUploadToDocument(elecFileUpload);
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36,Configuration.getAnalyzer());
IndexWriter indexWriter = new IndexWriter(Configuration.getDirectory(),indexWriterConfig);
indexWriter.addDocument(document);
indexWriter.close();
} catch (Exception e) {
throw new RuntimeException();
}
}
/**索引庫中刪除數據*/
public void deleteFileUploadByID(Integer seqId) {
//指定詞條的最小單位,至關於id=1
String id = NumericUtils.intToPrefixCoded(seqId);
Term term = new Term("seqId", id);
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36,Configuration.getAnalyzer());
IndexWriter indexWriter = new IndexWriter(Configuration.getDirectory(),indexWriterConfig);
indexWriter.deleteDocuments(term);
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**使用搜索條件,從索引庫中搜索出對應的結果*/
public List<ElecFileUpload> searchFileUploadByCondition(String queryString,String projId,String belongTo) {
List<ElecFileUpload> list = new ArrayList<ElecFileUpload>();
try {
IndexSearcher indexSearcher = new IndexSearcher(IndexReader.open(Configuration.getDirectory()));
/**使用lucene的多條件查詢,即boolean查詢,即必須知足3個條件*/
BooleanQuery booleanQuery = new BooleanQuery();
//【按文件名稱和描述搜素】搜素的條件
if(StringUtils.isNotBlank(queryString)){
//指定查詢條件在文件名稱和文件描述、所屬單位、圖紙類別的字段上進行搜索
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"fileName","comment"},Configuration.getAnalyzer());
Query query1 = queryParser.parse(queryString);
booleanQuery.add(query1,Occur.MUST);
}
//【所屬單位】搜素的條件
if(StringUtils.isNotBlank(projId)){
Query query2 = new TermQuery(new Term("projId", projId));
booleanQuery.add(query2, Occur.MUST);
}
//【圖紙類別】搜素的條件
if(StringUtils.isNotBlank(belongTo)){
Query query3 = new TermQuery(new Term("belongTo", belongTo));
booleanQuery.add(query3, Occur.MUST);
}
//返回前100條數據
TopDocs topDocs = indexSearcher.search(booleanQuery, 100);
//返回結果集
ScoreDoc [] scoreDocs = topDocs.scoreDocs;
/**設置高亮效果 begin*/
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>","</font>");
Scorer scorer = new QueryScorer(booleanQuery);
Highlighter highlighter = new Highlighter(formatter,scorer);
//摘要大小(設置大點,最比如文件名大,由於文件名最好不要截取)
int fragmentSize = 50;
Fragmenter fragmenter = new SimpleFragmenter(fragmentSize);
highlighter.setTextFragmenter(fragmenter);
/**設置高亮效果 end*/
if(scoreDocs!=null && scoreDocs.length>0){
for(int i=0;i<scoreDocs.length;i++){
ScoreDoc scoreDoc = scoreDocs[i];
//使用內部唯一編號,獲取對應的數據,編號從0開始
Document document = indexSearcher.doc(scoreDoc.doc);
/**獲取高亮效果begin*/
/**返回文件名的高亮效果*/
String fileNameText = highlighter.getBestFragment(Configuration.getAnalyzer(), "fileName", document.get("fileName"));
//沒有高亮的效果
if(fileNameText==null){
fileNameText = document.get("fileName");
if(fileNameText!=null && fileNameText.length()>fragmentSize){
fileNameText = fileNameText.substring(0, fragmentSize);
}
}
document.getField("fileName").setValue(fileNameText);
/**返回文件描述的高亮效果*/
String commentText = highlighter.getBestFragment(Configuration.getAnalyzer(), "comment", document.get("comment"));
//沒有高亮的效果
if(commentText==null){
commentText = document.get("comment");
if(commentText!=null && commentText.length()>fragmentSize){
commentText = commentText.substring(0, fragmentSize);
}
}
document.getField("comment").setValue(commentText);
/**獲取高亮效果end*/
//將Document轉換成ElecFileUpload
ElecFileUpload elecFileUpload = FileUploadDocument.documentToFileUpload(document);
list.add(elecFileUpload);
}
}
} catch (Exception e) {
throw new RuntimeException();
}
return list;
}
}
查看索引庫中的數據,使用lukeall-3.5.0.jar: