環境:Jdk 1.7 Solr 5.3.0 Tomcat 7 mmseg4j-solr-2.3.0 java
一、Solr環境搭建 web
1.解壓solr 5.3.0 apache
2.新建solr_home,將解壓文件中的 server/solr 文件夾的複製到solr_home 瀏覽器
3.配置solr_home。在solr_home/solr中新建應用 mysolr tomcat
4.將solr_home/solr/configsets/sample_techproducts_configs中的conf文件夾複製到mysolr中 服務器
5.在mysolr目錄中新建core.properties內容爲name=mysolr (solr中的mysolr應用) app
6.以tomcat 7爲solr容器,解壓tomcat webapp
7.將solr-5.3.0\server中的solr-webapp複製到tomcat的webapps目錄,重命名爲solr maven
8.複製文件 測試
(1)將如下內容複製到tomcat/webapps/solr/WEB-INF/lib文件夾中
(2)將log4j.properties複製到tomcat/webapps/solr/WEB-INF/classes文件夾中(新建classes)
9.配置solr項目中的web.xml,打開 env-entry節點,配置solr_home
10.啓動tomcat瀏覽器輸入http://localhost:8080/solr
11.選擇剛在solr_home中建的mysolr的應用、測試分詞、默認分詞器對中文支持很差
2、配置中文分詞器(mmseg4j)
mmseg4j-solr-2.3.0支持solr5.3
1.將兩個jar包考入tomcat中solr項目裏的lib文件內
2.配置solr_home中mysolr域的schema.xml
新增:
<fieldtype name="textComplex" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="complex" dicPath="dic"/> </analyzer> </fieldtype> <fieldtype name="textMaxWord" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="max-word" /> </analyzer> </fieldtype> <fieldtype name="textSimple" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="simple" dicPath="n:/custom/path/to/my_dic" /> </analyzer> </fieldtype>
3.重啓tomcat測試分詞:(選擇剛剛定義的textMaxWord)
修改solr_home中mysolr域的schema.xml,新增要用到mmseg4j分詞索引的字段 content_test 分詞器選擇定義好的textMaxWord
<field name="content_test" type="textMaxWord" indexed="true" stored="true" multiValued="true"/>
3、Java調用Solr 5.3
package myjava.cn.dx.solr; import org.apache.solr.client.solrj.*; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrInputDocument; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** * solr 5.3.0 * Created by daxiong on 2015/10/23. */ public class MySolr { //solr url public static final String URL = "http://localhost:8080/solr"; //solr應用 public static final String SERVER = "mysolr"; //待索引、查詢字段 public static String[] docs = {"Solr是一個獨立的企業級搜索應用服務器", "它對外提供相似於Web-service的API接口", "用戶能夠經過http請求", "向搜索引擎服務器提交必定格式的XML文件生成索引", "也能夠經過Http Get操做提出查找請求", "並獲得XML格式的返回結果"}; public static SolrClient getSolrClient(){ return new HttpSolrClient(URL+"/"+SERVER); } /** * 新建索引 */ public static void createIndex(){ SolrClient client = getSolrClient(); int i = 0; List<SolrInputDocument> docList = new ArrayList<SolrInputDocument>(); for(String str : docs){ SolrInputDocument doc = new SolrInputDocument(); doc.addField("id",i++); doc.addField("content_test", str); docList.add(doc); } try { client.add(docList); client.commit(); } catch (SolrServerException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }; /** * 搜索 */ public static void search(){ SolrClient client = getSolrClient(); SolrQuery query = new SolrQuery(); query.setQuery("content_test:搜索"); QueryResponse response = null; try { response = client.query(query); System.out.println(response.toString()); System.out.println(); SolrDocumentList docs = response.getResults(); System.out.println("文檔個數:" + docs.getNumFound()); System.out.println("查詢時間:" + response.getQTime()); for (SolrDocument doc : docs) { System.out.println("id: " + doc.getFieldValue("id") + " content: " + doc.getFieldValue("content_test")); } } catch (SolrServerException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { //createIndex(); search(); } }
maven配置 pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>myjava</groupId> <artifactId>cn.dx</artifactId> <version>1.0-SNAPSHOT</version> <packaging>jar</packaging> <name>cn.dx</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.solr</groupId> <artifactId>solr-solrj</artifactId> <version>5.3.0</version> </dependency> <dependency> <groupId>log4j</groupId> <artifactId>log4j</artifactId> <version>1.2.17</version> </dependency> <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> <version>1.1.1</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-nop</artifactId> <version>1.7.7</version> </dependency> </dependencies> </project>