https://github.com/hangxin1940/elasticsearch-cn-out-of-box
爲elasticsearch集成一些實用插件以及配置的開箱即用的版本。node
======git
瀏覽器進入插件 http://localhost:9200/_plugin/oob
github
# 集羣名 cluster.name: "cn-out-of-box" # 節點名 node.name: "node1" # 是否有資格成爲主節點 node.master: true # 是否存儲索引數據 node.data: true # 默認索引分片數 index.number_of_shards: 3 # 默認索引副本數 index.number_of_replicas: 1 # 臨時文件存儲路路徑 #path.work: "/tmp/elasticsearch" # 日誌文件存儲路路徑 #path.logs: "/var/log/elasticsearch/logs" # tcp傳輸端口 transport.tcp.port: 9300 # 是否壓縮tcp傳輸數據 transport.tcp.compress: true # http端口 http.port: 9200 # 是否開啓http服務 #http.enabled: true # 是否打開多播發現節點 discovery.zen.ping.multicast.enabled: true # 慢查詢日誌參數 #index.search.slowlog.threshold.query.warn: 10s #index.search.slowlog.threshold.query.info: 5s #index.search.slowlog.threshold.query.debug: 2s #index.search.slowlog.threshold.query.trace: 500ms #index.search.slowlog.threshold.fetch.warn: 1s #index.search.slowlog.threshold.fetch.info: 800ms #index.search.slowlog.threshold.fetch.debug: 500ms #index.search.slowlog.threshold.fetch.trace: 200ms # 啓用jetty插件提供http服務 http.type: com.sonian.elasticsearch.http.jetty.JettyHttpServerTransport # sonian.elasticsearch.http.jetty: # ==== 開啓 https #ssl_port: 9443 #config: jetty.xml,jetty-ssl.xml, jetty-gzip.xml #keystore_password: "OBF:1nc01vuz1w8f1w1c1rbu1rac1w261w9b1vub1ndq" # ==== 開啓用戶認證 # config: jetty.xml,jetty-hash-auth.xml,jetty-restrict-all.xml # 索引配置 index: # 分析配置 analysis: # 分詞器配置 tokenizer: index_ansj_token: type: ansj_index_token is_name: false is_num: false is_quantifier: false query_ansj_token: type: ansj_query_token is_name: false is_num: false is_quantifier: false # ======== analysis-pinyin ======== # 完整拼音 my_pinyin: type: pinyin first_letter: prefix padding_char: ' ' # 拼音首字母 pinyin_first_letter: type: pinyin first_letter: only # ======== analysis-mmseg ======== # 簡單正向匹配 # example: 一個勁兒的說話 # 一個 # 一個勁 # 一個勁兒 # 一個勁兒的 mmseg_simple: type: mmseg seg_type: simple # 匹配出全部的「三個詞的詞組」 # 並使用四種規則消歧(最大匹配、最大平均詞語長度、詞語長度的最小變化率、全部單字詞詞頻的天然對數之和) # example: 研究生命起源 # 研_究_生 # 研_究_生命 # 研究生_命_起源 # 研究_生命_起源 mmseg_complex: type: mmseg seg_type: complex # 基於complex的最多分詞 # example: 中國人民銀行 # 中國|人民|銀行 mmseg_maxword: type: mmseg seg_type: max_word # ======== analysis-stconvert ======== # 簡繁轉換,只輸出繁體 s2t_convert: type: stconvert delimiter: "," convert_type: s2t # 繁簡轉換,只輸出簡體 t2s_convert: type: stconvert delimiter: "," convert_type: t2s # 簡繁轉換,同時輸出繁體簡體 s2t_keep_both_convert: type: stconvert delimiter: "," keep_both: 'true' convert_type: s2t # 繁簡轉換,同時輸出簡體繁體 t2s_keep_both_convert: type: stconvert delimiter: "," keep_both: 'true' convert_type: t2s # ======== analysis-pattern ======== # 正則,分號分詞 semicolon_spliter: type: pattern pattern: ";" # 正則,%分詞 pct_spliter: type: pattern pattern: "[%]+" # ======== analysis-nGram ======== # 1~2字爲一詞 ngram_1_to_2: type: nGram min_gram: 1 max_gram: 2 # 1~3字爲一詞 ngram_1_to_3: type: nGram min_gram: 1 max_gram: 3 # 過濾器配置 filter: # ======== ngram filter ======== ngram_min_3: max_gram: 10 min_gram: 3 type: nGram ngram_min_2: max_gram: 10 min_gram: 2 type: nGram ngram_min_1: max_gram: 10 min_gram: 1 type: nGram # ======== length filter ======== min2_length: min: 2 max: 4 type: length min3_length: min: 3 max: 4 type: length # ======== string2int filter ======== # my_string2int: # type: string2int # redis_server: 127.0.0.1 # redis_port: 6379 # redis_key: index1_type2_name2 # ======== pinyin filter ======== pinyin_first_letter: type: pinyin first_letter: only # 分析器配置 analyzer: lowercase_keyword: type: custom filter: - lowercase tokenizer: standard lowercase_keyword_ngram_min_size1: type: custom filter: - lowercase - stop - trim - unique tokenizer: nGram lowercase_keyword_ngram_min_size2: type: custom filter: - lowercase - min2_length - stop - trim - unique tokenizer: nGram lowercase_keyword_ngram_min_size3: type: custom filter: - lowercase - min3_length - stop - trim - unique tokenizer: ngram_1_to_3 lowercase_keyword_ngram: type: custom filter: - lowercase - stop - trim - unique tokenizer: ngram_1_to_3 lowercase_keyword_without_standard: type: custom filter: - lowercase tokenizer: keyword lowercase_whitespace: type: custom filter: - lowercase tokenizer: whitespace # ======== ik ======== # ik分詞器 ik: alias: - ik_analyzer type: org.elasticsearch.index.analysis.IkAnalyzerProvider # ik智能切分 ik_max_word: type: ik use_smart: false # ik最細粒度切分 ik_smart: type: ik use_smart: true # ======== mmseg ======== # mmseg分詞器 mmseg: alias: - mmseg_analyzer type: org.elasticsearch.index.analysis.MMsegAnalyzerProvider mmseg_maxword: type: custom filter: - lowercase tokenizer: mmseg_maxword mmseg_complex: type: custom filter: - lowercase tokenizer: mmseg_complex mmseg_simple: type: custom filter: - lowercase tokenizer: mmseg_simple # ======== 正則 ======== comma_spliter: type: pattern pattern: "[,|\\s]+" pct_spliter: type: pattern pattern: "[%]+" custom_snowball_analyzer: type: snowball language: English simple_english_analyzer: type: custome tokenizer: whitespace filter: - standard - lowercase - snowball edge_ngram: type: custom tokenizer: edgeNGram filter: - lowercase # ======== 拼音分析 ======== pinyin_ngram_analyzer: type: custom tokenizer: my_pinyin filter: - lowercase - nGram - trim - unique # ======== 拼音首字母分詞 ======== pinyin_first_letter_analyzer: type: custom tokenizer: pinyin_first_letter filter: - standard - lowercase # ======== 拼音首字母分詞並過濾 ======== pinyin_first_letter_keyword_analyzer: alias: - pinyin_first_letter_analyzer_keyword type: custom tokenizer: keyword filter: - pinyin_first_letter - lowercase # ======== 簡繁體 ======== stconvert: alias: - st_analyzer type: org.elasticsearch.index.analysis.STConvertAnalyzerProvider s2t_convert: type: stconvert delimiter: "," convert_type: s2t t2s_convert: type: stconvert delimiter: "," convert_type: t2s s2t_keep_both_convert: type: stconvert delimiter: "," keep_both: 'true' convert_type: s2t t2s_keep_both_convert: type: stconvert delimiter: "," keep_both: 'true' convert_type: t2s #string2int: #type: org.elasticsearch.index.analysis.String2IntAnalyzerProvider # redis_server: 127.0.0.1 # redis_port: 6379 # redis_key: index1_type1_name1 #custom_string2int: #type: custom #tokenizer: whitespace #filter: #- string2int #- lowercase # 路徑分析 path_analyzer: type: custom tokenizer: path_hierarchy # ======== ansj ======== index_ansj: alias: - ansj_index_analyzer type: ansj_index user_path: ansj/user ambiguity: ansj/ambiguity.dic stop_path: ansj/stopLibrary.dic #is_name: false # s_num: true #is_quantifier: true redis: false #pool: #maxactive: 20 # maxidle: 10 #maxwait: 100 #testonborrow: true #ip: 127.0.0.1:6379 #channel: ansj_term query_ansj: alias: - ansj_query_analyzer type: ansj_query user_path: ansj/user ambiguity: ansj/ambiguity.dic stop_path: ansj/stopLibrary.dic #is_name: false # is_num: true # is_quantifier: true redis: false #pool: #maxactive: 20 # maxidle: 10 #maxwait: 100 #testonborrow: true #ip: 127.0.0.1:6379 #channel: ansj_term uax_url_email: tokenizer: uax_url_email filter: [standard, lowercase, stop] # ======== combo ======== combo: type: combo sub_analyzers: - ansj_index - ik_smart - mmseg_complex - uax_url_email - s2t_convert - t2s_convert - smartcn - simple_english_analyzer # 默認分析器 index.analysis.analyzer.default.type: combo # 線程池設置 threadpool: index: type: fixed size: 30 queue: -1 reject_policy: caller