下載地址:http://archive.apache.org/dist/lucene/java/.
lucene不一樣版本之間有不小的差異,這裏下載的是lucene 4.3.java
打開eclipse,新建dynamic web project.解壓下載的lucene壓縮包,依次找到下面幾個jar包,加到/WebContent/WEB-INF/lib目錄下,而後Add to Build Path: git
包名 | 位置 |
lucene-analyzers-common-4.3.0.jar | lucene-4.3.0/analysis/common |
lucene-analyzers-smartcn-4.3.0.jar | lucene-4.3.0/analysis/smartcn |
lucene-core-4.3.0.jar | lucene-4.3.0/core |
lucene-highlighter-4.3.0.jar | lucene-4.3.0/highlighter |
lucene-queries-4.3.0.jar | lucene-4.3.0/queries |
lucene-queryparser-4.3.0.jar | lucene-4.3.0/queryparser |
package ac.ucas.lucene;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class IndexCreate {
public static void main(String[] args) {
// TODO Auto-generated method stub
// 建立標準分詞器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
// 建立indexwriter配置信息
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
// 設置索引的打開方式
indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
// 索引的存儲路徑
Directory directory = null;
// 索引的增刪改由indexWriter建立
IndexWriter indexWriter = null;
try {
directory = FSDirectory.open(new File("/Users/yaopan/Documents/eclipseworkspace/test"));
if (indexWriter.isLocked(directory)) {//若indexWriter鎖定則解鎖
indexWriter.unlock(directory);
}
//實例化indexWriter
indexWriter = new IndexWriter(directory, indexWriterConfig);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Document doc1 = new Document();
//添加三個域
doc1.add(new StringField("id", "abcde", Store.YES));
doc1.add(new TextField("content", "極客學院", Store.YES));
doc1.add(new IntField("num", 1, Store.YES));
// 寫入索引
try {
indexWriter.addDocument(doc1);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Document doc2 = new Document();
doc2.add(new StringField("id", "addff", Store.YES));
doc2.add(new TextField("content", "LUCENE案例", Store.YES));
doc2.add(new IntField("num", 2, Store.YES));
// 寫入索引
try {
indexWriter.addDocument(doc2);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
indexWriter.commit();
indexWriter.close();
directory.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("index ceate complete!");
}
}
{%codeblock lang:java lucene分詞器 %}github
package ac.ucas.lucene; import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Version; import sun.dc.pr.PRError; public class AnalyerStudy { private static String str = "lucene, 全文檢索框架"; public static void print(Analyzer analyzer){ StringReader stringReader=new StringReader(str); try { TokenStream tokenStream=analyzer.tokenStream(str, stringReader); tokenStream.reset(); CharTermAttribute term=tokenStream.getAttribute(CharTermAttribute.class); System.out.println("分詞技術:"+analyzer.getClass()); while(tokenStream.incrementToken()){ System.out.print(term.toString()+" | "); } System.out.println("\n"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static void main(String[] args) { Analyzer analyzer=null; //標準分詞 analyzer=new StandardAnalyzer(Version.LUCENE_43); print(analyzer); //空格分詞 analyzer =new WhitespaceAnalyzer(Version.LUCENE_43); print(analyzer); //簡單分詞 analyzer=new SimpleAnalyzer(Version.LUCENE_43); print(analyzer); //二分法 analyzer=new CJKAnalyzer(Version.LUCENE_43); print(analyzer); //關鍵字 analyzer=new KeywordAnalyzer(); print(analyzer); // analyzer=new StopAnalyzer(Version.LUCENE_43); print(analyzer); } }
{% endcodeblock %}web
分詞結果:
分詞技術:class org.apache.lucene.analysis.standard.StandardAnalyzer
lucene | 全 | 文 | 檢 | 索 | 框 | 架 |
分詞技術:class org.apache.lucene.analysis.core.WhitespaceAnalyzer
lucene, | 全文檢索框架 |
分詞技術:class org.apache.lucene.analysis.core.SimpleAnalyzer
lucene | 全文檢索框架 |
分詞技術:class org.apache.lucene.analysis.cjk.CJKAnalyzer
lucene | 全文 | 文檢 | 檢索 | 索框 | 框架 |
分詞技術:class org.apache.lucene.analysis.core.KeywordAnalyzer
lucene, 全文檢索框架 |
分詞技術:class org.apache.lucene.analysis.core.StopAnalyzer
lucene | 全文檢索框架 |
apache
Luke是一個用於Lucene搜索引擎的,方便開發和診斷的第三方工具,它能夠訪問現有Lucene的索引.
luke下載地址:https://github.com/DmitryKey/luke/releasesmarkdown