分詞的話,咱們把「村村通工程 」名詞化,分詞結果爲:java
<!-- 須要分詞的字段 --> <field name="content" type="text_ik" indexed="true" stored="true" required="true" multiValued="false" /> <!-- 我添加的IK分詞 --> <fieldType name="text_ik" class="solr.TextField"> <analyzer type="index" isMaxWordLength="false" class="org.wltea.analyzer.lucene.IKAnalyzer"/> <analyzer type="query" isMaxWordLength="true" class="org.wltea.analyzer.lucene.IKAnalyzer"/> </fieldType>
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> <properties> <comment>IK Analyzer 擴展配置</comment> <!-- 此處可配置擴展字典,例如你能夠去下載搜狗的互聯網詞庫 --> <entry key="ext_dict">ext.dic;</entry> <!--中止詞字典--> <entry key="ext_stopwords">stopword.dic;</entry> </properties>
<fieldtype name="textComplex" class="solr.TextField" positionIncrementGap="100"> <analyzer type="index"> <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="complex" dicPath="E:/solr_home/connect/conf/dic/" /> <filter class="solr.StopFilterFactory" ignoreCase="false" words="stopwords.txt"/> <filter class="solr.WordDelimiterFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.NGramFilterFactory" minGramSize="1" maxGramSize="20"/> <filter class="solr.StandardFilterFactory"/> </analyzer> <analyzer type="query"> <tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="complex" dicPath="E:/solr_home/connect/conf/dic/" /> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter class="solr.StopFilterFactory" ignoreCase="false" words="stopwords.txt"/> <filter class="solr.WordDelimiterFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/> <!-- <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="20"/> --> <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> </analyzer> </fieldtype>
配置詞庫git