本篇介紹的是基於Elasticsearch實現搜索推薦詞,其中須要用到Elasticsearch的pinyin插件以及ik分詞插件,代碼的實現這裏提供了java跟C#的版本方便你們參考。html
1.實現的結果java
①當搜索【qiy】的時候,能匹配企業、祈願等node
②當搜索【qi業】的時候,只能匹配的到企業,若是沒有企業,將使用模糊查詢,匹配祈願。c#
③當搜索【q業】的時候結果同②。app
④當搜索【企y】或【企ye】的時候結果同②。elasticsearch
④當搜索【qy】的時候,能匹配企業、祈願等。ide
2.實現的邏輯測試
中文匹配前綴==》全拼匹配前綴==》拼音首字母匹配前綴==》拼音模糊匹配前綴ui
優先級從左到右,當前面三個有結果的時候不建議用模糊匹配,這樣結果更加精確。好比須要獲取8個推薦詞,先獲取中文的,若是足夠8個將再也不獲取以後的匹配結果。可是當模糊匹配以前已經存在匹配結果了,即便數量沒有達到8個,也再也不繼續獲取模糊匹配結果。this
3.插件準備
ik分詞插件安裝相對簡單,網上教程也多,這裏不作介紹。這裏講解下pinyin插件,官方版本的拼音插件不支持中文,處理結果只有拼音的,這樣會出現同音字匹配,結果不許確。
這裏感謝小夥伴分享的拼音插件修改方法:http://www.javashuo.com/article/p-ehbmizvl-nx.html。
按照裏面的操做處理後的插件將實現:
企業畫報:{"qi","企","ye","業","hua","畫","bao","報"}
拼音插件的各項具體屬性參考:https://blog.csdn.net/a1148233614/article/details/80280024,裏面有詳細介紹。
4.Elasticsearch建立index
這裏使用的ES版本爲7.0.1,再也不支持mapping,建立代碼以下:
PUT /suggest_tset { "settings": { "number_of_shards": 1, "number_of_replicas": 0, "analysis": { "analyzer": { "prefix_pinyin_analyzer": { "tokenizer": "standard", "filter": [ "lowercase", "prefix_pinyin" ] }, "full_pinyin_analyzer": { "tokenizer": "standard", "filter": [ "lowercase", "full_pinyin" ] }, "like_pinyin_analyzer": { "tokenizer": "standard", "filter": [ "lowercase", "like_pinyin" ] } }, "filter": { "_pattern": { "type": "pattern_capture", "preserve_original": true, "patterns": [ "([0-9])", "([a-z])" ] }, "prefix_pinyin": { "type": "pinyin", "keep_first_letter": "true", "keep_full_pinyin": "false", "none_chinese_pinyin_tokenize": "false", "keep_separate_chinese": "true", "keep_original": "false" }, "full_pinyin": { "type": "pinyin", "keep_first_letter": "false", "keep_full_pinyin": "true", "keep_original": "false", "keep_separate_chinese": "true", "keep_none_chinese_in_first_letter": "false" }, "like_pinyin": { "type": "pinyin", "keep_first_letter": "true", "keep_full_pinyin": "true", "keep_joined_full_pinyin": "false", "keep_original": "false", "keep_separate_chinese": "false", "keep_none_chinese_in_first_letter": "false" } } } }, "mappings": { "dynamic": "false", "properties": { "kwsuggest": { "fields": { "suggestText": { "type": "completion", "analyzer": "standard", "preserve_separators": "false", "preserve_position_increments": "true", "max_input_length": 50 }, "prefix_pinyin": { "type": "completion", "analyzer": "prefix_pinyin_analyzer", "search_analyzer": "standard", "preserve_separators": "false" }, "full_pinyin": { "type": "completion", "analyzer": "full_pinyin_analyzer", "search_analyzer": "standard", "preserve_separators": "false" }, "like_pinyin": { "type": "completion", "analyzer": "like_pinyin_analyzer", "preserve_separators": "false" } }, "type": "text" } } } }
這裏插入幾條測試數據
POST _bulk/?refresh=true { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "企業規劃"} { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "祈願設計 完美無瑕"} { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "懸崖的圖片 美景"} { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "縣衙地址 那裏呢"} { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "懸崖風景圖"} { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "起夜的風光 真的美"} { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "起夜第二個詞 測試使用"} { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "須要一半留下一半打一字謎"} { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "許亞爲"} { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "許雅非測試"} { "index" : { "_index" : "suggest_tset", "_type" : "_doc" } } { "kwsuggest": "徐楊是誰"}
下面爲測試的查詢語句
GET /suggest_tset/_search { "suggest": { "suggestText": { "prefix": "qi業", "completion": { "field": "kwsuggest.suggestText", "skip_duplicates": true } }, "full_pinyin": { "prefix": "qi業", "completion": { "field": "kwsuggest.full_pinyin", "skip_duplicates": true } }, "prefix_pinyin": { "prefix": "qi業", "completion": { "field": "kwsuggest.prefix_pinyin", "skip_duplicates": true } }, "like_pinyin": { "prefix": "qi業", "completion": { "field": "kwsuggest.like_pinyin", "skip_duplicates": true, "fuzzy": { "fuzziness": 1 } } } } }
當輸入查詢條件爲【qiy】的時候,結果爲:
{ "took" : 17, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 0, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "suggest" : { "full_pinyin" : [ { "text" : "qiy", "offset" : 0, "length" : 3, "options" : [ { "text" : "起夜的風光 真的美", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "-jgnlHMBSEyTxFiDO4lU", "_score" : 1.0, "_source" : { "kwsuggest" : "起夜的風光 真的美" } }, { "text" : "起夜第二個詞 測試使用", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "aDg3lHMBSEyTxFiDXprV", "_score" : 1.0, "_source" : { "kwsuggest" : "起夜第二個詞 測試使用" } } ] } ], "like_pinyin" : [ { "text" : "qiy", "offset" : 0, "length" : 3, "options" : [ { "text" : "企業規劃", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "9TgnlHMBSEyTxFiDO4lU", "_score" : 2.0, "_source" : { "kwsuggest" : "企業規劃" } }, { "text" : "祈願設計 這是啥呢", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "9jgnlHMBSEyTxFiDO4lU", "_score" : 2.0, "_source" : { "kwsuggest" : "祈願設計 這是啥呢" } }, { "text" : "起夜的風光 真的美", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "-jgnlHMBSEyTxFiDO4lU", "_score" : 2.0, "_source" : { "kwsuggest" : "起夜的風光 真的美" } }, { "text" : "起夜第二個詞 測試使用", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "aDg3lHMBSEyTxFiDXprV", "_score" : 2.0, "_source" : { "kwsuggest" : "起夜第二個詞 測試使用" } } ] } ], "prefix_pinyin" : [ { "text" : "qiy", "offset" : 0, "length" : 3, "options" : [ ] } ], "suggestText" : [ { "text" : "qiy", "offset" : 0, "length" : 3, "options" : [ ] } ] } }
輸入【qi業】的查詢結果爲
{ "took" : 2, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 0, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "suggest" : { "full_pinyin" : [ { "text" : "qi業", "offset" : 0, "length" : 3, "options" : [ { "text" : "企業規劃", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "9TgnlHMBSEyTxFiDO4lU", "_score" : 1.0, "_source" : { "kwsuggest" : "企業規劃" } } ] } ], "like_pinyin" : [ { "text" : "qi業", "offset" : 0, "length" : 3, "options" : [ { "text" : "企業規劃", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "9TgnlHMBSEyTxFiDO4lU", "_score" : 2.0, "_source" : { "kwsuggest" : "企業規劃" } }, { "text" : "祈願設計 這是啥呢", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "9jgnlHMBSEyTxFiDO4lU", "_score" : 2.0, "_source" : { "kwsuggest" : "祈願設計 這是啥呢" } }, { "text" : "起夜的風光 真的美", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "-jgnlHMBSEyTxFiDO4lU", "_score" : 2.0, "_source" : { "kwsuggest" : "起夜的風光 真的美" } }, { "text" : "起夜第二個詞 測試使用", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "aDg3lHMBSEyTxFiDXprV", "_score" : 2.0, "_source" : { "kwsuggest" : "起夜第二個詞 測試使用" } } ] } ], "prefix_pinyin" : [ { "text" : "qi業", "offset" : 0, "length" : 3, "options" : [ ] } ], "suggestText" : [ { "text" : "qi業", "offset" : 0, "length" : 3, "options" : [ ] } ] } }
輸入【qy】的結果爲
{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 0, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "suggest" : { "full_pinyin" : [ { "text" : "qy", "offset" : 0, "length" : 2, "options" : [ ] } ], "like_pinyin" : [ { "text" : "qy", "offset" : 0, "length" : 2, "options" : [ { "text" : "起夜的風光 真的美", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "-jgnlHMBSEyTxFiDO4lU", "_score" : 2.0, "_source" : { "kwsuggest" : "起夜的風光 真的美" } }, { "text" : "起夜第二個詞 測試使用", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "aDg3lHMBSEyTxFiDXprV", "_score" : 2.0, "_source" : { "kwsuggest" : "起夜第二個詞 測試使用" } } ] } ], "prefix_pinyin" : [ { "text" : "qy", "offset" : 0, "length" : 2, "options" : [ { "text" : "起夜的風光 真的美", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "-jgnlHMBSEyTxFiDO4lU", "_score" : 1.0, "_source" : { "kwsuggest" : "起夜的風光 真的美" } }, { "text" : "起夜第二個詞 測試使用", "_index" : "suggest_tset", "_type" : "_doc", "_id" : "aDg3lHMBSEyTxFiDXprV", "_score" : 1.0, "_source" : { "kwsuggest" : "起夜第二個詞 測試使用" } } ] } ], "suggestText" : [ { "text" : "qy", "offset" : 0, "length" : 2, "options" : [ ] } ] } }
5.java版本代碼
這裏使用elasticsearch-rest-high-level-client
application.yml添加配置
# ES配置 elasticsearch: ipAddress: [127.0.0.1:9200]
添加配置類
@Component @Configuration @ConfigurationProperties(prefix = "elasticsearch") @Data public class ElasticsearchRestClientConfig { private Logger logger = LoggerFactory.getLogger(getClass()); private static final int ADDRESS_LENGTH = 2; private static final String HTTP_SCHEME = "http"; /** * 使用冒號隔開ip和端口 */ public String[] ipAddress; @Bean public RestClientBuilder restClientBuilder() { HttpHost[] hosts = Arrays.stream(ipAddress) .map(this::makeHttpHost) .filter(Objects::nonNull) .toArray(HttpHost[]::new); logger.debug("hosts:{}", Arrays.toString(hosts)); return RestClient.builder(hosts); } @Bean(name = "highLevelClient") public RestHighLevelClient highLevelClient(@Autowired RestClientBuilder restClientBuilder) { return new RestHighLevelClient(restClientBuilder); } private HttpHost makeHttpHost(String s) { assert StringUtils.isNotEmpty(s); String[] address = s.split(":"); if (address.length == ADDRESS_LENGTH) { String ip = address[0]; int port = Integer.parseInt(address[1]); return new HttpHost(ip, port, HTTP_SCHEME); } else { return null; } } }
實現的代碼:
@Service public class KwSuggestService implements IKwSuggest { @Autowired RestHighLevelClient highLevelClient; @Override public List<String> GetKwSuggestList(String kw){ SearchRequest searchRequest = new SearchRequest("suggest_tset"); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); SuggestBuilder suggestBuilder=new SuggestBuilder(); suggestBuilder.addSuggestion("suggestText", SuggestBuilders.completionSuggestion("kwsuggest.suggestText").prefix(kw).skipDuplicates(true).size(5)); suggestBuilder.addSuggestion("full_pinyin", SuggestBuilders.completionSuggestion("kwsuggest.full_pinyin").prefix(kw).skipDuplicates(true).size(5)); suggestBuilder.addSuggestion("prefix_pinyin", SuggestBuilders.completionSuggestion("kwsuggest.prefix_pinyin").prefix(kw).skipDuplicates(true).size(5)); suggestBuilder.addSuggestion("like_pinyin", SuggestBuilders.completionSuggestion("kwsuggest.like_pinyin").prefix(kw, Fuzziness.fromEdits(1)).skipDuplicates(true).size(5)); sourceBuilder.suggest(suggestBuilder); sourceBuilder.timeout(new TimeValue(10, TimeUnit.SECONDS)); searchRequest.source(sourceBuilder); List<String> result = new ArrayList<>(); List<String> suggestionList= Arrays.asList("suggestText","full_pinyin","prefix_pinyin","like_pinyin"); try { SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT); Suggest suggestions = response.getSuggest(); Integer index = 1; for(String suggestionType : suggestionList){ CompletionSuggestion completionSuggestion = suggestions.getSuggestion(suggestionType); for (CompletionSuggestion.Entry entry : completionSuggestion.getEntries()) { for (CompletionSuggestion.Entry.Option option : entry) { String suggestText = option.getHit().getSourceAsMap().get("kwsuggest").toString(); result.add(suggestText); } } // 按照中文匹配、全拼匹配、拼音首字母匹配、模糊匹配的順序,結果大於5的時候返回結果,根據本身業務須要判斷這個返回的數量 if(result.size()>=5){ break; } // 中文匹配,全拼匹配以及拼音首字母匹配存在結果的,不須要模糊匹配 if(index==3 && result.size()>0){ break; } // 超過3個字模糊匹配不許確 if(kw.length()>3 && result.size()==0){ break; } } return result; } catch (IOException e) { e.printStackTrace(); return new ArrayList<>(); } } }
6..c#代碼實現
C#使用的是NEST
public partial class ElasticFactory { public ExternalServiceResponse<KeywordsSuggestResponseDataEntity> GetKeywordsSuggest(ElasticKeywordsSuggestRequest request) { var result = new ExternalServiceResponse<KeywordsSuggestResponseDataEntity>(); try { if (string.IsNullOrEmpty(request.q)) return result; var nodes = new Uri[0]; nodes[0] = new Uri("http://127.0.0.1:9200"); var pool = new StaticConnectionPool(nodes); var settings = new ConnectionSettings(pool).DefaultIndex("suggest_tset"); var client = new ElasticClient(settings); string[] keys = new[] { "suggestText", "full_pinyin", "prefix_pinyin", "like_pinyin" }; SearchDescriptor<object> search = new SearchDescriptor<object>(); search .Source(r => r .Includes(f => f .Fields("kw") ) ) .Suggest(s => s.Completion(keys[0], c => c.Field("kwsuggest.suggestText").SkipDuplicates(true).Prefix(request.q).SkipDuplicates()) .Completion(keys[1], c => c.Field("kwsuggest.full_pinyin").SkipDuplicates(true).Prefix(request.q).SkipDuplicates()) .Completion(keys[2], c => c.Field("kwsuggest.prefix_pinyin").SkipDuplicates(true).Prefix(request.q).SkipDuplicates()) .Completion(keys[3], c => c.Field("kwsuggest.like_pinyin").SkipDuplicates(true).Prefix(request.q).SkipDuplicates().Fuzzy(m=>m.Fuzziness(Fuzziness.EditDistance(1))))) ; var esResult = client.Search<dynamic>(s => search); if (esResult != null) { result.code = 1; result.data = new KeywordsSuggestResponseDataEntity(); //1.先獲取中文全匹配 //2.上面不滿5個,再匹配全拼 //3.上面不滿5個,中文全拼匹配首字母 //4.上面都沒有用模糊匹配 if (esResult.Suggest != null) { result.data.items = new List<KeywordsSuggestResponseItemEntity>(); int index = 1; foreach (var key in keys) { AddSuggestItems(esResult.Suggest, key, result.data.items); //1-3之間,夠了5個就返回 if (index >= 1 && index <= 3 && result.data.items.Count >= 5) { result.data.items = result.data.items.Skip(0).Take(5).ToList(); break; } //到了第3步若是尚未知足5個,直接返回,模糊匹配不精確 if (index == 3 && result.data.items.Count > 0) { break; } //輸入的字符數大於3個以上,前面沒有關鍵詞匹配,後面不作模糊處理,匹配度太差了 if (index == 3 && request.q.Length>3) { break; } index++; } result.data.num = result.data.items.Count; } else { result.data.num = 0; } } else { result.code = 0; result.msg = "查詢失敗"; } } catch (Exception ex) { result.code = 0; result.msg = ex.Message; } return result; } private void AddSuggestItems(ISuggestDictionary<dynamic> suggest, string key, List<KeywordsSuggestResponseItemEntity> items) { var suggestFullPinyin = suggest[key]; if (suggestFullPinyin != null) { foreach (var hit in suggestFullPinyin[0].Options) { string kwSource = hit.Source["kwsuggest"]; //已經存在的不要重複添加 if (items.Any(m => m.kw == kwSource)) { continue; } items.Add(new KeywordsSuggestResponseItemEntity() { kw = kwSource }); } } } }