lunece是一個全文檢索工具,lunece的出現能夠對關鍵詞和部分數據創建索引,優化查詢效率java
這裏引入須要lunecejar version 3.5 和junit jar version4.0數據庫
學習lunece須要最重要如下幾個工具
索引部分學習
分詞器測試
搜索部分優化
v1.下面先了解索引部分spa
v1.創建索引.net
首先創建一個IndexUtil類orm
//建立索引
public void index() {對象
IndexWriter writer=null;
try {
// 1.建立Directory 將索引創建在什麼地方(是內存中仍是硬盤),這裏保存到硬盤上
//Directory directory=new RAMDirectory();//創建在內存中
Directory directroy=FSDirectory.open(new File("F:/lunece"));
// 2.建立IndexWriter 寫入索引
IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));
writer=new IndexWriter(directroy,iwc);
// 3.建立Document對象 至關於數據庫中的表,或者硬盤的某個文件
Document doc=null;
File f=new File("F:/lunecetest1");
for(File file:f.listFiles()){
doc=new Document();
// 4.爲Document添加Filed 至關於表中的字段,爲那些字段創建索引
doc.add(new Field("content", new FileReader(file)));
/**
* Field.Store.YES或者NO(存儲域選項)
設置爲YES表示或把這個域中的內容徹底存儲到文件中,方便進行文本的還原
設置爲NO表示把這個域的內容不存儲到文件中,可是能夠被索引,此時內容沒法徹底還原(doc.get)
*/
doc.add(new Field("filename", file.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
/***
* Field.Index(索引選項)
Index.ANALYZED:進行分詞和索引,適用於標題、內容等
Index.NOT_ANALYZED:進行索引,可是不進行分詞,若是身份證號,姓名,ID等,適用於精確搜索
Index.ANALYZED_NOT_NORMS:進行分詞可是不存儲norms信息,這個norms中包括了建立索引的時間和權值等信息
Index.NOT_ANALYZED_NOT_NORMS:即不進行分詞也不存儲norms信息
*/
doc.add(new Field("path", file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
// 5.經過IndexWriter添加文檔到索引中
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
try {
if(writer!=null)writer.close();//6.這裏要關閉writer,否則寫入索引可能會有問題
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
v3.搜索索引
//搜索索引
public void search(){
try {
//1.建立打開索引目錄
Directory directory = FSDirectory.open(new File("f:/lunece"));
//2.讀取索引
IndexReader reader = IndexReader.open(directory);
//3.根據IndexReader 建立IndexSearcher 對reader在進行解析搜索索引
IndexSearcher searcher = new IndexSearcher(reader);
//4.建立Query對象,對那個解析索引域進行搜索如內容 StandardAnalyzer 分詞器
//建立parse來肯定搜索的內容,第二個參數表明搜索的域
QueryParser parse=new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
//建立Query 表示搜索的域中包含java的文檔
Query query=parse.parse("java");
//5.根據searcher搜索而且返回TopDocs,執行搜索
TopDocs tds=searcher.search(query, 10);
//6.根據TopDocs獲取ScoreDoc對象
ScoreDoc[] sds=tds.scoreDocs;
for(ScoreDoc sd:sds){
//7.根據searcher和ScoreDoc對象獲取具體Document對象
Document d=searcher.doc(sd.doc);
//8根據Document對象獲取須要的值
System.out.println(d.get("filename")+"["+d.get("path")+"]");
}
reader.close(); //關閉reader
} catch (Exception e) {
// TODO: handle exception
}
}
//查詢索引
public void query() {
try {
IndexReader reader = IndexReader.open(directory);
//經過reader能夠有效的獲取到文檔的數量
System.out.println("numDocs:"+reader.numDocs());
System.out.println("maxDocs:"+reader.maxDoc());
System.out.println("deleteDocs:"+reader.numDeletedDocs());
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
//刪除索引
列如我有
private String[] ids = {"1","2","3","4","5","6"};
private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
private String[] contents = {
"welcome to visited the space,I like book",
"hello boy, I like pingpeng ball",
"my name is cc I like game",
"I like football",
"I like football and I like basketball too",
"I like movie and swim"
};
private Date[] dates = null;
private int[] attachs = {2,3,1,4,5,5};
private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
private Directory directory =directory = new RAMDirectory();
public void index() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
writer.deleteAll();
Document doc = null;
for(int i=0;i<ids.length;i++) {
doc = new Document();
doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("email","test"+i+"@test.com",Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
//存儲數字
doc.add(new NumericField("attach",Field.Store.YES,true).setIntValue(attachs[i]));
//存儲日期
doc.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
System.out.println(et);
if(scores.containsKey(et)) {
doc.setBoost(scores.get(et));
} else {
doc.setBoost(0.5f);
}
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null)writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public void delete() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,
new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
//參數是一個選項,能夠是一個Query,也能夠是一個term,term是一個精確查找的值
//此時刪除的文檔並不會被徹底刪除,而是存儲在一個回收站中的,能夠恢復
writer.deleteDocuments(new Term("id","1"));
writer.commit();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null) writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
而後創建一個TestLunece測試類
@Test
public void testIndex(){
IndexUtil indexUtil=new IndexUtil();
indexUtil.index();
}
@Test
public void testDelete() {
IndexUtil iu = new IndexUtil();
iu.delete();
}
//恢復索引
public void undelete() {
//使用IndexReader進行恢復
try {
IndexReader reader = IndexReader.open(directory,false);
//恢復時,必須把IndexReader的只讀(readOnly)設置爲false
reader.undeleteAll();
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (StaleReaderException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
v4.搜索部分
創建SearcherUtil