轉自 http://mybar.iteye.com/blog/1933351java
今天看了下Lucene的更新,已經到4.4.0的版本了,並且新的API變化也比較多,對於老版本的Lucene想要升級到最新的版本,不是簡單的更新jar包就能夠的。apache
下面寫了個簡單的小例子。能夠看一下app
1.建立maven工程eclipse
在eclipse裏面建立個maven項目,一個簡單的項目便可,下面是對於的pom.xml文件,須要將Lucene的jar包引入進來。maven
- <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>com.jacksoft</groupId>
- <artifactId>Lucene-test</artifactId>
- <version>0.0.1-SNAPSHOT</version>
-
- <properties>
- <lucene.version>4.4.0</lucene.version>
- </properties>
-
- <dependencies>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-core</artifactId>
- <version>${lucene.version}</version>
- </dependency>
-
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-analyzers-common</artifactId>
- <version>${lucene.version}</version>
- </dependency>
-
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-queryparser</artifactId>
- <version>${lucene.version}</version>
- </dependency>
-
-
-
- </dependencies>
-
- </project>
這裏只是簡單的添加依賴關係,下載好jar包以後,我在本地建立了3個txt文件,以下:工具

準備工做就差很少了,下面就開始進行編碼工做編碼
2. 編碼spa
因爲是多個文件的搜索,這裏建立一個工具類來遞歸目錄,找到這三個txt文件,我將這三個txt文件放在本地:D:\lucene\luceneData目錄中excel
- package com.jacksoft.lucene.util;
-
- import java.io.File;
- import java.util.List;
-
- public class FileUtils {
-
- public static void listFile(File f,List<String> fileList){
- if(f.isDirectory()){
- File[] files = f.listFiles();
- for(int i=0;i<files.length ;i++)
- {
- listFile(files[i],fileList) ;
- }
- }else{
- fileList.add(f.getAbsolutePath());
- }
- }
- }
這樣返回的fileList中就包含了咱們須要的txt文件,固然這裏還能夠設置過濾器來指定後綴名。xml
接下來就是建立索引和查詢的過程:
- package com.jacksoft.lucene;
-
- import java.io.BufferedReader;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.InputStreamReader;
- import java.util.ArrayList;
- import java.util.List;
-
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.TextField;
- import org.apache.lucene.document.Field.Store;
- import org.apache.lucene.index.DirectoryReader;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.index.IndexWriterConfig.OpenMode;
- import org.apache.lucene.queryparser.classic.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
-
- import com.jacksoft.lucene.util.FileUtils;
-
-
-
-
-
-
-
-
-
-
-
-
- public class LuceneTest {
-
- private static final String QUERY_STR = "四川";
-
- private static final String FILE_TARGET = "D:\\lucene\\luceneData";
-
- private static final String FILE_INDEX = "D:\\lucene\\luceneIndex";
-
-
- public static void main(String[] args) {
- try {
- LuceneTest t = new LuceneTest();
- t.createIndex();
- t.searchByKeyWords(QUERY_STR);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
-
-
-
-
- private void createIndex() throws Exception{
- Long startTime = System.currentTimeMillis();
- File indexDir = new File(FILE_INDEX);
- Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_44);
- IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, luceneAnalyzer);
- config.setOpenMode(OpenMode.CREATE);
- Directory directory = FSDirectory.open(indexDir);
- IndexWriter indexWriter = new IndexWriter(directory, config);
- List<String> fileList = new ArrayList<String>();
- FileUtils.listFile(new File(FILE_TARGET), fileList);
- for(String filePath : fileList){
- System.out.println("文件:" + filePath + "正在被索引....");
- String content = readFile(filePath);
- Document doc = new Document();
- doc.add(new TextField("content", content.toString(), Store.YES));
- doc.add(new TextField("path", filePath, Store.YES));
- indexWriter.addDocument(doc);
- }
- indexWriter.close();
- Long endTime = System.currentTimeMillis();
- System.out.println("花費了" + (endTime - startTime) + "毫秒來建立索引文件");
-
- }
-
-
-
-
-
-
- private String readFile(String filePath) throws Exception{
- @SuppressWarnings("resource")
- BufferedReader bufferedReader = new BufferedReader(
- new InputStreamReader(new FileInputStream(filePath)));
- StringBuffer content = new StringBuffer();
- String str = null;
- while ((str = bufferedReader.readLine()) != null) {
- content.append(str).append("\n");
- }
- return content.toString();
- }
-
-
- private void searchByKeyWords(String keyWords) throws Exception{
- IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(FILE_INDEX)));
- IndexSearcher searcher = new IndexSearcher(reader);
- Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
- QueryParser parser = new QueryParser(Version.LUCENE_44, "content",analyzer);
- Query query = parser.parse(keyWords);
- TopDocs results = searcher.search(query,1000);
- ScoreDoc[] score = results.scoreDocs;
- if (score.length == 0) {
- System.out.println("對不起,沒有找到您要的結果。");
- } else {
- System.out.println("查找["+QUERY_STR+"]有" + score.length + "個結果");
- for (int i = 0; i < score.length; i++) {
- try {
- Document doc = searcher.doc(score[i].doc);
- System.out.print("這是第" + i + "個檢索到的結果,文件名爲:");
- System.out.println(doc.get("path"));
- System.out.println("內容:\n" + doc.get("content"));
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
- }
- }
- }
代碼中的常量能夠經過文檔來查看,運行後就能夠看到搜索的結果了。
固然這裏只是對txt進行搜索,你還能夠對word,excel,pdf等文檔進行搜索,前提是要將其內容讀取出來,建立索引便可。