熱更新
在上一節《 IK分詞器配置文件講解以及自定義詞庫》自定義詞庫,每次都是在es的擴展詞典中,手動添加新詞語,很坑(1)每次添加完,都要重啓es才能生效,很是麻煩
(2)es是分佈式的,可能有數百個節點,你不能每次都一個一個節點上面去修改
es不停機,直接咱們在外部某個地方添加新的詞語,es中當即熱加載到這些新詞語
熱更新的方案
(1)修改ik分詞器源碼,而後手動支持從mysql中每隔必定時間,自動加載新的詞庫(2)基於ik分詞器原生支持的熱更新方案,部署一個web服務器,提供一個http接口,經過modified和tag兩個http響應頭,來提供詞語的熱更新
用第一種方案,第二種,ik git社區官方都不建議採用,以爲不太穩定
一、下載源碼
https://github.com/medcl/elasticsearch-analysis-ik/tree/v5.2.0
ik分詞器,是個標準的java maven工程,直接導入eclipse就能夠看到源碼
二、修改源碼
Dictionary單例類的初始化方法initial,在這裏須要建立一個咱們自定義的線程,而且啓動它
/** * 詞典初始化 因爲IK Analyzer的詞典採用Dictionary類的靜態方法進行詞典初始化 * 只有當Dictionary類被實際調用時,纔會開始載入詞典, 這將延長首次分詞操做的時間 該方法提供了一個在應用加載階段就初始化字典的手段 * * @return Dictionary */ public static synchronized Dictionary initial(Configuration cfg) { if (singleton == null) { synchronized (Dictionary.class) { if (singleton == null) { singleton = new Dictionary(cfg); singleton.loadMainDict(); singleton.loadSurnameDict(); singleton.loadQuantifierDict(); singleton.loadSuffixDict(); singleton.loadPrepDict(); singleton.loadStopWordDict(); new Thread(new HotDictReloadThread()).start(); if(cfg.isEnableRemoteDict()){ // 創建監控線程 for (String location : singleton.getRemoteExtDictionarys()) { // 10 秒是初始延遲能夠修改的 60是間隔時間 單位秒 pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); } for (String location : singleton.getRemoteExtStopWordDictionarys()) { pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); } } return singleton; } } } return singleton; }HotDictReloadThread類:就是死循環,不斷調用Dictionary.getSingleton().reLoadMainDict(),去從新加載詞典
public class HotDictReloadThread implements Runnable { private static final Logger logger = ESLoggerFactory.getLogger(HotDictReloadThread.class.getName()); @Override public void run() { while(true) { logger.info("[==========]reload hot dict from mysql......"); Dictionary.getSingleton().reLoadMainDict(); } } }Dictionary類:更新詞典 this.loadMySQLExtDict()
/** * 加載主詞典及擴展詞典 */ private void loadMainDict() { // 創建一個主詞典實例 _MainDict = new DictSegment((char) 0); // 讀取主詞典文件 Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN); InputStream is = null; try { is = new FileInputStream(file.toFile()); } catch (FileNotFoundException e) { logger.error(e.getMessage(), e); } try { BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { _MainDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { logger.error("ik-analyzer", e); } finally { try { if (is != null) { is.close(); is = null; } } catch (IOException e) { logger.error("ik-analyzer", e); } } // 加載擴展詞典 this.loadExtDict(); // 加載遠程自定義詞庫 this.loadRemoteExtDict(); // 從mysql加載詞典 this.loadMySQLExtDict(); } /** * 從mysql加載熱更新詞典 */ private void loadMySQLExtDict() { Connection conn = null; Statement stmt = null; ResultSet rs = null; try { Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties"); prop.load(new FileInputStream(file.toFile())); logger.info("[==========]jdbc-reload.properties"); for(Object key : prop.keySet()) { logger.info("[==========]" + key + "=" + prop.getProperty(String.valueOf(key))); } logger.info("[==========]query hot dict from mysql, " + prop.getProperty("jdbc.reload.sql") + "......"); conn = DriverManager.getConnection( prop.getProperty("jdbc.url"), prop.getProperty("jdbc.user"), prop.getProperty("jdbc.password")); stmt = conn.createStatement(); rs = stmt.executeQuery(prop.getProperty("jdbc.reload.sql")); while(rs.next()) { String theWord = rs.getString("word"); logger.info("[==========]hot word from mysql: " + theWord); _MainDict.fillSegment(theWord.trim().toCharArray()); } Thread.sleep(Integer.valueOf(String.valueOf(prop.get("jdbc.reload.interval")))); } catch (Exception e) { logger.error("erorr", e); } finally { if(rs != null) { try { rs.close(); } catch (SQLException e) { logger.error("error", e); } } if(stmt != null) { try { stmt.close(); } catch (SQLException e) { logger.error("error", e); } } if(conn != null) { try { conn.close(); } catch (SQLException e) { logger.error("error", e); } } } }
Dictionary類:更新分詞 this.loadMySQLStopwordDict();
/** * 從mysql加載停用詞 */ private void loadMySQLStopwordDict() { Connection conn = null; Statement stmt = null; ResultSet rs = null; try { Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties"); prop.load(new FileInputStream(file.toFile())); logger.info("[==========]jdbc-reload.properties"); for(Object key : prop.keySet()) { logger.info("[==========]" + key + "=" + prop.getProperty(String.valueOf(key))); } logger.info("[==========]query hot stopword dict from mysql, " + prop.getProperty("jdbc.reload.stopword.sql") + "......"); conn = DriverManager.getConnection( prop.getProperty("jdbc.url"), prop.getProperty("jdbc.user"), prop.getProperty("jdbc.password")); stmt = conn.createStatement(); rs = stmt.executeQuery(prop.getProperty("jdbc.reload.stopword.sql")); while(rs.next()) { String theWord = rs.getString("word"); logger.info("[==========]hot stopword from mysql: " + theWord); _StopWords.fillSegment(theWord.trim().toCharArray()); } Thread.sleep(Integer.valueOf(String.valueOf(prop.get("jdbc.reload.interval")))); } catch (Exception e) { logger.error("erorr", e); } finally { if(rs != null) { try { rs.close(); } catch (SQLException e) { logger.error("error", e); } } if(stmt != null) { try { stmt.close(); } catch (SQLException e) { logger.error("error", e); } } if(conn != null) { try { conn.close(); } catch (SQLException e) { logger.error("error", e); } } } }配置
jdbc.url=jdbc:mysql://localhost:3306/test?serverTimezone=GMT jdbc.user=root jdbc.password=root jdbc.reload.sql=select word from hot_words jdbc.reload.stopword.sql=select stopword as word from hot_stopwords jdbc.reload.interval=1000
三、mvn package打包代碼
target\releases\elasticsearch-analysis-ik-5.2.0.zip

四、解壓縮ik壓縮包
將mysql驅動jar,放入ik的目錄下

五、重啓es

六、在mysql中添加詞庫與停用詞

七、kibana分詞驗證
GET /my_index/_analyze { "text": "一人飲酒醉", "analyzer": "ik_max_word" } { "tokens": [ { "token": "一人飲酒醉", "start_offset": 0, "end_offset": 5, "type": "CN_WORD", "position": 0 }, { "token": "一人", "start_offset": 0, "end_offset": 2, "type": "CN_WORD", "position": 1 }, { "token": "一", "start_offset": 0, "end_offset": 1, "type": "TYPE_CNUM", "position": 2 }, { "token": "人", "start_offset": 1, "end_offset": 2, "type": "COUNT", "position": 3 }, { "token": "飲酒", "start_offset": 2, "end_offset": 4, "type": "CN_WORD", "position": 4 }, { "token": "飲", "start_offset": 2, "end_offset": 3, "type": "CN_WORD", "position": 5 }, { "token": "酒醉", "start_offset": 3, "end_offset": 5, "type": "CN_WORD", "position": 6 }, { "token": "酒", "start_offset": 3, "end_offset": 4, "type": "CN_WORD", "position": 7 }, { "token": "醉", "start_offset": 4, "end_offset": 5, "type": "CN_WORD", "position": 8 } ] }