Lucene學習記錄java
———三種實現,超全超細緻web
下載lucene3.6.0.zip http://download.csdn.net/detail/leilovegege/6800405 ,解壓,將裏邊的lucene-core-3.6.0.jar等包拷貝到工程lib中。還須要中文分詞器IKAnalyzer3.2.8.jar,ojdbc14.jarsql
至此環境搭配完成。數據庫
下面開始實現,只用lava類,沒鏈接web頁面,因此在測試時執行java文件,在控制檯進行測試。apache
工程原文件lucene36 http://download.csdn.net/detail/leilovegege/6804669oracle
Test1檢索絕對路徑上的文件工具
package test1; 學習
import java.io.BufferedReader; 測試
import java.io.File; this
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class TestFileIndexer {
public static void main(String[] args) throws Exception {
/* 指明要索引文件夾的位置,這裏是C盤的source文件夾下 */
File fileDir = new File( "E:\\Documents and Settings\\Administrator\\Workspaces\\MyEclipse 8.6\\lucene36\\source" );
/* 這裏放索引文件的位置 */
File indexDir = new File( ".\\index\\test1" );
Directory dir=FSDirectory.open(indexDir);//將索引存放在磁盤上
Analyzer lucenAnalyzer=new StandardAnalyzer(Version.LUCENE_36);//分析器
IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_36,lucenAnalyzer);
iwc.setOpenMode(OpenMode.CREATE);//建立新的索引文件create 表示建立或追加到已有索引庫
IndexWriter indexWriter=new IndexWriter(dir,iwc);//把文檔寫入到索引庫
File[] textFiles=fileDir.listFiles();//獲得索引文件夾下全部文件
long startTime=new Date().getTime();
//增長document到檢索去
for (int i = 0; i < textFiles.length; i++) {
// if (textFiles[i].isFile()&& textFiles[i].getName().endsWith(".txt")) {
System.out.println(":;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;");
System.out.println("File"+textFiles[i].getCanonicalPath()+"正在被索引...");
String temp=FileReaderAll(textFiles[i].getCanonicalPath(),"GBK");
System.out.println(temp);
Document document=new Document();
Field FieldPath=new Field("path",textFiles[i].getPath(),Field.Store.YES,Field.Index.NO);
Field FieldBody=new Field("body",temp,Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS);
NumericField modifiField=new NumericField("modified");//因此key爲modified
modifiField.setLongValue(fileDir.lastModified());
document.add(FieldPath);
document.add(FieldBody);
document.add(modifiField);
indexWriter.addDocument(document);
// }
}
indexWriter.close();
//計算一下索引的時間
long endTime=new Date().getTime();
System.out.println("花了"+(endTime-startTime)+"毫秒把文檔添加到索引裏面去"+fileDir.getPath());
}
public static String FileReaderAll(String FileName,String charset)throws IOException{
BufferedReader reader=new BufferedReader(new InputStreamReader(new FileInputStream(FileName),charset));
String line=new String();
String temp=new String();
while ((line=reader.readLine())!=null) {
temp+=line;
}
reader.close();
return temp;
}
}
package test1;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Scanner;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class TestQuery {
public static void main(String[] args) throws ParseException, IOException {
String index="./index/test1";//搜索的索引路徑
IndexReader reader=IndexReader.open(FSDirectory.open(new File(index)));
IndexSearcher searcher=new IndexSearcher(reader);//檢索工具
ScoreDoc[] hits=null;
// BufferedReader reader1=new BufferedReader(new InputStreamReader(System.in));
// String queryString=reader1.readLine().toString(); //搜索關鍵字
// Scanner sca=new Scanner(System.in);
// String queryString=sca.next().toString();
String queryString="測試";
Query query=null;
Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36);
try {
QueryParser qp=new QueryParser(Version.LUCENE_36,"body",analyzer);//用於解析用戶輸入的工具
query=qp.parse(queryString);
} catch (Exception e) {
// TODO: handle exception
}
if (searcher!=null) {
TopDocs results=searcher.search(query, 10);//只取排名前十的搜索結果
hits=results.scoreDocs;
Document document=null;
if (hits.length>0) {
System.out.println("找到"+hits.length+"條結果");
for (int i = 0; i < hits.length; i++) {
document=searcher.doc(hits[i].doc);
String body=document.get("body");
String path=document.get("path");
String modifiedtime=document.get("modifiField");
System.out.print(body+" ");
System.out.println(path);
}
}else
System.out.println("沒查到結果");
searcher.close();
reader.close();
}else
System.out.println("沒查找到索引");
}
}
Test2檢索相對路徑上的文件
package test2;
import java.io.*;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
public class MyLucene {
private static final File INDEX_PATH = new File(".\\index\\test2"); // 索引文件位置, 當前路徑下的index文件
private static final String filePath = ".\\luceneDataSource\\test.txt";// 索引數據源文件位置,當前路徑下的luceneDataSource\test.txt文件
private static final Analyzer ANALYZER = new IKAnalyzer(); // 中文分詞器
public static void main(String[] args){
/**
* 建立索引
*/
File readFile = new File(filePath); // 獲取數據源文件
HashMap<String, String> words = readFile(readFile);
Document doc = null;
if (words != null) {
try {
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, ANALYZER);
iwc.setOpenMode(OpenMode.CREATE);//建立新的索引文件create 表示建立或追加到已有索引庫,沒有這句話索引庫會有重複的
IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_PATH), iwc);
Set<String> keys = words.keySet();
for (Iterator<String> it = keys.iterator(); it.hasNext();) {
String key = it.next();
doc = new Document();
Field index = new Field("index", key, Field.Store.YES,Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS);
Field contents = new Field("contents", words.get(key),Field.Store.YES, Field.Index.NO);
doc.add(index);
doc.add(contents);
writer.addDocument(doc);
}
writer.close(); // 這裏不關閉,創建索引會失敗
} catch (Exception e) {
e.printStackTrace();
}
}
else
System.out.println("文件讀取錯誤");
}
/**
* 判斷索引庫是已否建立
*/
public boolean noIndex() {
File[] indexs = INDEX_PATH.listFiles();
if (indexs.length == 0) {
return true;
} else {
return false;
}
}
/**
* 讀取文件
* @param file
*/
public static HashMap<String, String> readFile(File file) {
InputStream in = null;
InputStreamReader inR = null;
BufferedReader br = null;
HashMap<String, String> wordsMap = new HashMap<String, String>();
try {
in = new FileInputStream(file);
inR = new InputStreamReader(in, "GBK"); //utf-8
br = new BufferedReader(inR);
String line;
while ((line = br.readLine()) != null) {
System.out.println(line);
wordsMap.put(line.trim(), line.trim());
}
return wordsMap;
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
try {
if (in != null)
in.close();
if (inR != null)
inR.close();
if (br != null)
br.close();
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
}
/**
* 檢索
* @param queryStr
* @param hitsPerPage
*/
public void search(String queryStr) {
try {
IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_PATH));// 獲得索引的目錄
IndexSearcher searcher = new IndexSearcher(reader);
Query query = IKQueryParser.parse("index", queryStr);
TopScoreDocCollector collector = TopScoreDocCollector.create(100, true);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
if(hits.length > 0){
System.out.println("檢索詞:"+queryStr+"\t共找到 "+hits.length+"條記錄");
for (int i = 0; i < hits.length; i++) {
Document result = searcher.doc(hits[i].doc);
System.out.println((i+1) +")" + "\n index:" + result.get("index") + "\n contents:" + result.get("contents"));
}
}else{
System.out.println("未找到結果");
}
} catch (Exception e) {
System.out.println("Exception");
}
}
}
package test2;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
public class TestMyLucene {
public static void main(String[] args) throws IOException {
MyLucene myLucene = new MyLucene();
// 索引庫是已否建立,若是沒有則建立
if(myLucene.noIndex()){
System.out.println("索引庫尚未建立");
}else{
BufferedReader reader1=new BufferedReader(new InputStreamReader(System.in));
String queryString=reader1.readLine().toString(); //搜索關鍵字
myLucene.search(queryString);
}
}
}
Test3檢索數據庫中的數據(本例爲oracle)
package test3;
import java.io.File;
import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class IndexCreateUtill {
private List<NewsItem> list;
public void createIndexForMynews() throws IOException, ClassNotFoundException{
//存放索引的文件夾
File indxeFile = new File(".\\index\\test3");
//建立Directory對象
Directory directory =FSDirectory.open(indxeFile);
//使用IKAnalyzer分詞器
Analyzer analyzer = new IKAnalyzer();
//建立IndexWriterConfig
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);
//建立IndexWriter
indexWriterConfig.setOpenMode(OpenMode.CREATE);
IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
//從數據庫中讀取出全部的新聞記錄以便進行索引的建立
try {
// DBSource dbSource =DBSource.getInstance();
// Connection conn = dbSource.getConnection();
Connection conn=Utils.getConnection();
Statement stmt = null;
ResultSet rs = null;
String sql = "select * from t_newsitem";
stmt = conn.createStatement();
rs = stmt.executeQuery(sql);
list = new ArrayList<NewsItem>();
while(rs.next()){
NewsItem newsItem = new NewsItem();
newsItem.setId(rs.getInt("id"));
newsItem.setNewsTitle(rs.getString("newsTitle"));
newsItem.setNewsContent(rs.getString("newsContent"));
newsItem.setPublishTime(rs.getTimestamp("publishTime"));
newsItem.setResource(rs.getString("resourcer"));
newsItem.setT_newsType_id(rs.getInt("t_newsType_id"));
newsItem.setEditor(rs.getString("editor"));
list.add(newsItem);
}
DateFormat dateFormat = new SimpleDateFormat("yyyy年MM月dd日 HH時mm分ss秒");
for (int i=0;i<list.size();i++) {
//創建一個lucene文檔
Document doc = new Document();
//獲得新聞標題
String newsTitle = list.get(i).getNewsTitle();
//獲得新聞內容
String newsContent = list.get(i).getNewsContent();
//獲得新聞事件
String publishDate = dateFormat.format(list.get(i).getPublishTime());
//獲得新聞主鍵id
String id = list.get(i).getId() + "";
//將新聞標題加入文檔,由於要搜索和高亮,因此index是tokennized,TermVector是WITH_POSITIONS_OFFSETS
doc.add(new Field("title" , newsTitle , Field.Store.YES , Field.Index.ANALYZED , Field.TermVector.WITH_POSITIONS_OFFSETS));
//添加新聞內容至文檔,與標題類似
doc.add(new Field("content" , newsContent , Field.Store.YES , Field.Index.ANALYZED , Field.TermVector.WITH_POSITIONS_OFFSETS));
//添加時間至文檔,由於要按照此字段降序排列排序,因此tokenzied,不用高亮因此TermVector是no就好了
doc.add(new Field("date" , publishDate , Field.Store.YES , Field.Index.ANALYZED , Field.TermVector.NO));
//添加主鍵至文檔,不分詞,不高亮。
doc.add(new Field("id" , id , Field.Store.YES , Field.Index.NO , Field.TermVector.NO));
indexWriter.addDocument(doc);
}
indexWriter.close();
Utils.closeAll(rs, stmt, conn);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) throws Exception {
IndexCreateUtill util = new IndexCreateUtill();
util.createIndexForMynews();
}
}
package test3;
import java.io.Serializable;
import java.util.Date;
public class NewsItem implements Serializable{
private static final long serialVersionUID = 1L;
private Integer id ;
private String newsTitle ;
private String newsContent;
private Date publishTime;
private String resource;
private Integer t_newsType_id;
private String editor;
public NewsItem() {
}
public NewsItem(Integer id, String newsTitle, String newsContent,
Date publishTime, String resource, Integer t_newsType_id, String editor) {
super();
this.id = id;
this.newsTitle = newsTitle;
this.newsContent = newsContent;
this.publishTime = publishTime;
this.resource = resource;
this.t_newsType_id = t_newsType_id;
this.editor = editor;
}
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String getNewsTitle() {
return newsTitle;
}
public void setNewsTitle(String newsTitle) {
this.newsTitle = newsTitle;
}
public String getNewsContent() {
return newsContent;
}
public void setNewsContent(String newsContent) {
this.newsContent = newsContent;
}
public Date getPublishTime() {
return publishTime;
}
public void setPublishTime(Date publishTime) {
this.publishTime = publishTime;
}
public String getResource() {
return resource;
}
public void setResource(String resource) {
this.resource = resource;
}
public Integer getT_newsType_id() {
return t_newsType_id;
}
public void setT_newsType_id(Integer t_newsType_id) {
this.t_newsType_id = t_newsType_id;
}
public String getEditor() {
return editor;
}
public void setEditor(String editor) {
this.editor = editor;
}
}
package test3;
import java.io.File;
import java.io.IOException;
import java.util.Scanner;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class TestQuery {
public static void main(String[] args) throws ParseException, IOException {
String index=".\\index\\test3";//搜索的索引路徑
IndexReader reader=IndexReader.open(FSDirectory.open(new File(index)));
IndexSearcher searcher=new IndexSearcher(reader);//檢索工具
ScoreDoc[] hits=null;
// BufferedReader reader1=new BufferedReader(new InputStreamReader(System.in));
// String queryString=reader1.readLine().toString(); //搜索關鍵字
Scanner sca=new Scanner(System.in);
String queryString=sca.next().toString();
System.out.print("搜索關鍵詞爲"+queryString+",");
Query query=null;
Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36);
try {
QueryParser qp=new QueryParser(Version.LUCENE_36,"content",analyzer);//用於解析用戶輸入的工具
query=qp.parse(queryString);
} catch (Exception e) {
// TODO: handle exception
}
if (searcher!=null) {
TopDocs results=searcher.search(query, 10);//只取排名前十的搜索結果
hits=results.scoreDocs;
Document document=null;
if (hits.length>0) {
System.out.println("找到"+hits.length+"條結果");
for (int i = 0; i < hits.length; i++) {
document=searcher.doc(hits[i].doc);
String title=document.get("title");
String content=document.get("content");
String date=document.get("date");
String id=document.get("id");
System.out.println("標題:"+title);
System.out.println("內容:"+content);
System.out.println("日期:"+date);
System.out.println("ID:"+id);
}
}else
System.out.println("沒查到結果");
searcher.close();
reader.close();
}else
System.out.println("沒查找到索引");
}
}
package test3;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class Utils {
public static Connection getConnection() {
Connection con = null;
try {
Class.forName("oracle.jdbc.driver.OracleDriver");
con = DriverManager.getConnection("jdbc:oracle:thin:@localhost :1521:orcl", "hr", "orcl");
}catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return con;
}
public static void closeAll(ResultSet rs, Statement ps,Connection conn) throws SQLException{
closeResultSet(rs);
closeStatement(ps);
closeConnection(conn);
}
public static void closeConnection(Connection con) {
try {
if (con != null) {
con.close();
}
}catch (SQLException ex) {
ex.printStackTrace();
}
}
public static void closeStatement(Statement st) {
try {
if (st != null) {
st.close();
}
}catch (SQLException ex) {
ex.printStackTrace();
}
}
public static void closeResultSet(ResultSet rs) {
try {
if (rs != null) {
rs.close();
}
}catch (SQLException ex) {
ex.printStackTrace();
}
}
}