wget http://mirrors.cnnic.cn/apache/maven/maven-3/3.3.9/binaries/apache-maven-3.3.9-bin.tar.gz
tar xvf apache-maven-3.3.9-bin.tar.gz mv maven to /usr/local/apache-maven mv apache-maven-3.3.9 /usr/local/apache-maven
vim ~/.bashrc # 添加以下內容 export M2_HOME=/usr/local/apache-maven export M2=$M2_HOME/bin export PATH=$M2:$PATH
source ~/.bashrc
mvn -version
IK version | ES version |
---|---|
master | 5.x -> master |
5.6.1 | 5.6.1 |
5.5.3 | 5.5.3 |
5.4.3 | 5.4.3 |
5.3.3 | 5.3.3 |
5.2.2 | 5.2.2 |
5.1.2 | 5.1.2 |
1.10.1 | 2.4.1 |
1.9.5 | 2.3.5 |
1.8.1 | 2.2.1 |
1.7.0 | 2.1.1 |
1.5.0 | 2.0.0 |
1.2.6 | 1.0.0 |
1.2.5 | 0.90.x |
1.1.3 | 0.20.x |
1.0.0 | 0.16.2 -> 0.19.0 |
目前使用的elasticsearch爲2.4 版本,根據以上對應表得出,應該選擇1.10.1版本IKjava
https://github.com/medcl/elasticsearch-analysis-ik/tree/v1.10.1
下載IK源碼上傳到服務器,進入到該目錄下git
mvn package
打包須要向maven庫下載一些jar包,須要一些時間,靜等 ~github
編譯完成後,能夠在target/releases目錄下找到對應的zip包apache
cd target/releases
能夠看到編譯後的IK包,直接使用便可vim
elasticsearch-analysis-ik-1.10.1.zip
將IK 解壓後,放到 /data/components/elasticsearch/plugins/ik
目錄下bash
vim plugin-descriptor.properties # 按照實際狀況修改如下參數 elasticsearch.version=2.4.1 java.version=1.7
vim /etc/elasticsearch/elasticsearch.yml # 添加如下內容 index.analysis.analyzer.default.type: ik
systemctl restart elasticsearch.service
配置完成,測試下 ~服務器
ik 帶有兩個分詞器
ik_max_word :會將文本作最細粒度的拆分;儘量多的拆分出詞語
ik_smart:會作最粗粒度的拆分;已被分出的詞語將不會再次被其它詞語佔有curl
curl -XGET 'http://172.16.200.101:9200/_analyze?pretty&analyzer=ik_max_word' -d '掌通家園是全球最大的幼教管理平臺' # 返回信息以下 { "tokens" : [ { "token" : "掌", "start_offset" : 0, "end_offset" : 1, "type" : "CN_WORD", "position" : 0 }, { "token" : "通", "start_offset" : 1, "end_offset" : 2, "type" : "CN_CHAR", "position" : 1 }, { "token" : "家園", "start_offset" : 2, "end_offset" : 4, "type" : "CN_WORD", "position" : 2 }, { "token" : "家", "start_offset" : 2, "end_offset" : 3, "type" : "CN_WORD", "position" : 3 }, { "token" : "全球", "start_offset" : 5, "end_offset" : 7, "type" : "CN_WORD", "position" : 4 }, { "token" : "最大", "start_offset" : 7, "end_offset" : 9, "type" : "CN_WORD", "position" : 5 }, { "token" : "幼教", "start_offset" : 10, "end_offset" : 12, "type" : "CN_WORD", "position" : 6 }, { "token" : "教管", "start_offset" : 11, "end_offset" : 13, "type" : "CN_WORD", "position" : 7 }, { "token" : "管理", "start_offset" : 12, "end_offset" : 14, "type" : "CN_WORD", "position" : 8 }, { "token" : "平臺", "start_offset" : 14, "end_offset" : 16, "type" : "CN_WORD", "position" : 9 }, { "token" : "臺", "start_offset" : 15, "end_offset" : 16, "type" : "CN_WORD", "position" : 10 } ] }
curl -XGET 'http://172.16.200.101:9200/_analyze?pretty&analyzer=ik_smart' -d '掌通家園是全球最大的幼教管理平臺' # 返回信息以下 { "tokens" : [ { "token" : "掌", "start_offset" : 0, "end_offset" : 1, "type" : "CN_WORD", "position" : 0 }, { "token" : "通", "start_offset" : 1, "end_offset" : 2, "type" : "CN_CHAR", "position" : 1 }, { "token" : "家園", "start_offset" : 2, "end_offset" : 4, "type" : "CN_WORD", "position" : 2 }, { "token" : "全球", "start_offset" : 5, "end_offset" : 7, "type" : "CN_WORD", "position" : 3 }, { "token" : "最大", "start_offset" : 7, "end_offset" : 9, "type" : "CN_WORD", "position" : 4 }, { "token" : "幼教", "start_offset" : 10, "end_offset" : 12, "type" : "CN_WORD", "position" : 5 }, { "token" : "管理", "start_offset" : 12, "end_offset" : 14, "type" : "CN_WORD", "position" : 6 }, { "token" : "平臺", "start_offset" : 14, "end_offset" : 16, "type" : "CN_WORD", "position" : 7 } ] }