【搜索引擎】Solr全文檢索近實時查詢優化

設置多個搜索建議查找算法

<searchComponent name="suggest" class="solr.SuggestComponent">
    <lst name="suggester">
      <str name="name">AnalyzingSuggester</str>
      <str name="lookupImpl">AnalyzingLookupFactory</str>      
      <str name="dictionaryImpl">DocumentDictionaryFactory</str>
      <str name="field">suggest_name</str>
      <str name="weightField">suggest_name</str>
      <str name="payloadField">gid</str>
      <str name="suggestAnalyzerFieldType">text_suggest</str>
      <str name="buildOnStartup">false</str>
      <str name="buildOnCommit">true</str>
    </lst>
    
    <lst name="suggester">
      <str name="name">AnalyzingInfixSuggester</str>
      <str name="lookupImpl">AnalyzingInfixLookupFactory</str>      
      <str name="dictionaryImpl">DocumentDictionaryFactory</str>
      <str name="field">suggest_name</str>
      <str name="weightField">suggest_name</str>
      <str name="highlight">false</str>
      <str name="payloadField">gid</str>
      <str name="suggestAnalyzerFieldType">text_suggest</str>
      <str name="buildOnStartup">false</str>
      <str name="buildOnCommit">true</str>
    </lst>
  </searchComponent>
  • 設置AnalyzingLookupFactory和AnalyzingInfixLookupFactory兩種查找算法。首先經過AnalyzingLookupFactory先分析傳入文本並將分析後的表單添加到加權FST的查找,而後在查找時執行相同的操做,若查找不夠你需求的數量。再經過AnalyzingInfixLookupFactory前綴分析。
  • 例如 AnalyzingInfixLookupFactory "aaa bbb ccc",可經過bbb,或者ccc搜索到,而 AnalyzingLookupFactory必須是先從a開始匹配才能出結果。
  • AnalyzingInfixLookupFactory可經過標籤 false 關閉高亮提示。
  • true 可經過此標籤設置軟提交時才進行文本構建。注意此種需求須要在提交文本不頻繁的場景設置。java

    設置軟提交時間

  • 配置在本身core下的conf文件夾中的solrconfig.xml文件算法

vim solrconfig.xml
<autoSoftCommit>
      <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
    </autoSoftCommit>

將maxTime能夠設置成你須要的時間,單位是毫秒ms.vim

  • 也能夠在solr啓動的時候經過命令設置軟提交:
bin/solr start -force -Dsolr.autoSoftCommit.maxTime=10000

設置了軟提交時間後,當有新的文檔提交時,會達到設置的軟提交時間才真正提交。服務器

關閉停用詞過濾器

在創建索引的時候,fileType定義的字段可不加入停用詞過濾器,由於咱們要檢索的詞很短,加入會影響檢索結果。socket

<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />

Java服務器調用suggest接口時,禁用suggest.build=true

加入suggest.build=true這個條件,每輸入一個字符檢索的時候都會去從新構建suggest索引,檢索效率大大減低。經過上面的軟提交方式達到近實時檢索。測試

Java服務器測試用例

/**
 * @author monkjavaer
 * @version V1.0
 * @date 2019/6/21 0021 22:42
 */
public class SolJTest {
    /**
     * 日誌
     */
    private static Logger logger = LoggerFactory.getLogger(SolJTest.class);

    /**
     * solr 地址
     */
    private static String SOLR_URL = PropertyReaderUtils.getProValue("solr.address_url");

    /**
     * suggest AnalyzingLookupFactory
     */
    public final static String SOLR_ANALYZINGSUGGESTER = PropertyReaderUtils.getProValue("solr.AnalyzingSuggester");

    /**
     * suggest AnalyzingInfixLookupFactory
     */
    public final static String SOLR_ANALYZINGINFIXSUGGESTER = PropertyReaderUtils.getProValue("solr.AnalyzingInfixSuggester");

    /**
     * HttpSolrClient
     */
    private HttpSolrClient httpSolrClient;

    /**
     * default socket connection timeout in ms
     */
    private static int DEFAULT_CONNECTION_TIMEOUT = 60000;

    /**
     * @return void
     * @author monkjavaer
     * @description get HttpSolrClient
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Before
    public void getHttpSolrClient() {
        logger.info("start getHttpSolrClient......");
        try {
            if (httpSolrClient == null) {
                httpSolrClient = new HttpSolrClient.Builder(SOLR_URL).build();
                httpSolrClient.setConnectionTimeout(DEFAULT_CONNECTION_TIMEOUT);
                httpSolrClient.setDefaultMaxConnectionsPerHost(100);
                httpSolrClient.setMaxTotalConnections(100);
            }
        } catch (Exception e) {
            e.printStackTrace();
            logger.error(e.getMessage());
        }
        logger.info("end getHttpSolrClient......");
    }

    /**
     * @return void
     * @author monkjavaer
     * @description test suggester response object
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testSuggesterResponseObject() throws IOException, SolrServerException {
        SolrQuery query = new SolrQuery("*:*");
        query.set(CommonParams.QT, "/suggest");
        query.set("suggest.dictionary", SOLR_ANALYZINGSUGGESTER, SOLR_ANALYZINGINFIXSUGGESTER);
        query.set("suggest.q", "aoa");
        query.set("suggest.build", true);
        QueryRequest request = new QueryRequest(query);
        QueryResponse queryResponse = request.process(httpSolrClient);
        SuggesterResponse response = queryResponse.getSuggesterResponse();
        Map<String, List<Suggestion>> suggestionsMap = response.getSuggestions();
        assertTrue(suggestionsMap.keySet().contains(SOLR_ANALYZINGSUGGESTER));

        List<Suggestion> mySuggester = suggestionsMap.get(SOLR_ANALYZINGSUGGESTER);
        logger.info(mySuggester.get(0).getTerm());
        logger.info(mySuggester.get(0).getPayload());
    }

    /**
     * @return void
     * @author monkjavaer
     * @description test suggester response terms
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testSuggesterResponseTerms() throws Exception {
        SolrQuery query = new SolrQuery("*:*");
        query.set(CommonParams.QT, "/suggest");
        query.set("suggest.dictionary", SOLR_ANALYZINGSUGGESTER, SOLR_ANALYZINGINFIXSUGGESTER);
        query.set("suggest.q", "aoa");
//        query.set("suggest.build", true);
        QueryRequest request = new QueryRequest(query);
        QueryResponse queryResponse = request.process(httpSolrClient);
        SuggesterResponse response = queryResponse.getSuggesterResponse();
        Map<String, List<String>> dictionary2suggestions = response.getSuggestedTerms();
        assertTrue(dictionary2suggestions.keySet().contains(SOLR_ANALYZINGSUGGESTER));

        List<String> mySuggester = dictionary2suggestions.get(SOLR_ANALYZINGSUGGESTER);
        assertEquals("aoa", mySuggester.get(0));
        assertEquals("aoa bob", mySuggester.get(1));
    }

    /**
     * @return void
     * @author monkjavaer
     * @description 簡單查詢自動轉換爲bean
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testSolrQueryGetBeans() throws IOException, SolrServerException {
        final SolrQuery query = new SolrQuery();
        query.setQuery("Zhong Hua Yuan");
        //設置查詢列
        query.addField("id");
        query.addField("name");
        //排序
        query.setSort("id", SolrQuery.ORDER.asc);

        final QueryResponse response = httpSolrClient.query("adress", query);
        final List<Adress> adresses = response.getBeans(Adress.class);

        logger.info("Found " + adresses.size() + " documents");
        for (Adress adress : adresses) {
            logger.info("id:{} ; name:{}; ", adress.getId(), adress.getName());
        }
    }

    /**
     * @return void
     * @author monkjavaer
     * @description 批量添加
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testAddIndex() throws IOException, SolrServerException {
        List<Adress> lists = new ArrayList<>();
        Adress adress = new Adress();
        adress.setId(1);
        adress.setName("aoa");
        lists.add(adress);
        //向solr批量添加索引數據
        long startTime = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
        httpSolrClient.addBeans(lists);
        httpSolrClient.commit();
        long endTime = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
        logger.info("commit solr data cost {} ms.", endTime - startTime);
    }
}
相關文章
相關標籤/搜索