solr第四篇（solr整合數據源）

時間 2019-11-11

標籤 solr 第四整合數據简体版

原文原文鏈接

#編輯/usr/local/tomcat/solr_home/new_core/conf下的solrconfig.xml
cd /usr/local/tomcat/solr_home/new_core/conf
vim solrconfig.xml
#首先配置jar位置
<lib dir="/usr/local/tomcat/tomcat8/webapps/solr/WEB-INF/lib" regex=".*\.jar" />
#在name="/select"  class="solr.SearchHandler"之上添加以下代碼
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
        <lst name="defaults">
            <str name="config">data-config.xml</str>
        </lst>
</requestHandler>

#完成如上操做以後 在和solrconfig.xml的同目錄項目添加文件data-config.xml
#在當前目錄下建立文件
mkdir data-config.xml
#編輯data-config.xml
vim data-config.xml
#添加以下
<?xml version="1.0" encoding="UTF-8"?>  
<dataConfig>  
	#更改成本身的數據源地址
	<dataSource name="source1" type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://localhost:3306/solrdata" user="root" password="root" batchSize="-1" />  
　　	<document>  
	#entity對應表
	<entity name="goods" pk="id"  dataSource="source1"   
		query="select * from  goods"  
		#配置增量更新須要使用
 		deltaImportQuery="select * from goods where id='${dih.delta.id}'"  
		#配置增量更新須要使用
		deltaQuery="select id from goods where updatetime> '${dataimporter.last_index_time}'">  
		#field對應列字段
		<field column="id" name="id"/> 
		<field column="name" name="name"/>  
		<field column="number" name="number"/>  
		<field column="updatetime" name="updatetime"/>  
　　　  	</entity> 
　　	</document>  
</dataConfig>
#其中deltaImportQuery，deltaQuery設置的內容是自動更新mysql數據到solr引擎中來所須要的。若是多個表再並列添加一個entity標籤

#編輯managed-schema
vim managed-schema
#添加剛纔數據源配置裏面的field字段
<field name="id" type="int" indexed="true" stored="true" required="true"  multiValued="false"/>
<field name="username" type="string" indexed="true" stored="true" />
#type="text_ik"表示使用自定義分詞 能夠中文分詞
<field name="nickname" type="text_ik" indexed="true" stored="true" />
<field name="password" type="string" indexed="true" stored="true" />
<field name="regTime" type="date" indexed="true" stored="true" />
#個人配置以下圖

#配置自動更新數據源
#在solr_home文件夾裏面建立conf文件，在conf裏面建立dataimport.properties文件
cd /usr/local/tomcat/solr_home
mkdir conf
cd conf
mkdir dataimport.properties
#編輯dataimport.properties 添加以下內容
#內容開始---------------------------
#################################################

#                                               #

#       dataimport scheduler properties         #

#                                               #

#################################################

#  to sync or not to sync
#  1 - active; anything else - inactive
# 這裏的配置不用修改
syncEnabled=1

#  which cores to schedule
#  in a multi-core environment you can decide which cores you want syncronized
#  leave empty or comment it out if using single-core deployment

#  修改爲你所使用的core，若是自定義了多個core，用逗號隔開
syncCores=new_core
#syncCores=new_core，new_core2，new_core3，....

#  solr server name or IP address
#  [defaults to localhost if empty]

# 這個通常都是localhost不會變
server=localhost

#  solr server port

#  [defaults to 80 if empty]

#  安裝solr的tomcat端口，若是你使用的是默認的端口，就不用改了，不然改爲本身的端口就行了
port=8080


#  application name/context

#  [defaults to current ServletContextListener's context (app) name]

#  這裏默認不改

webapp=solr

#  URL params [mandatory]

#  remainder of URL

#  這裏改爲下面的形式，solr同步數據時請求的連接

params=/dataimport?command=delta-import&clean=false&commit=true

#  schedule interval

#  number of minutes between two runs

#  [defaults to 30 if empty]

#這裏是設置定時任務的，單位是分鐘，也就是多長時間你檢測一次數據同步，根據項目需求修改

#  開始測試的時候爲了方便看到效果，時間能夠設置短一點

interval=1


#  重作索引的時間間隔，單位分鐘，默認7200，即5天;

#  爲空,爲0,或者註釋掉:表示永不重作索引

reBuildIndexInterval=7200


#  重作索引的參數

reBuildIndexParams=/select?qt=/dataimport&command=full-import&clean=true&commit=true


#  重作索引時間間隔的計時開始時間，第一次真正執行的時間=reBuildIndexBeginTime+reBuildIndexInterval*60*1000；

#  兩種格式：2012-04-11 03:10:00 或者  03:10:00，後一種會自動補全日期部分爲服務啓動時的日期

reBuildIndexBeginTime=03:10:00

#內容結束------------------------------

在/usr/local/tomcat/tomcat8/webapps/solr/WEB-INF/lib下添加自動增量更新Jar包
連接：https://pan.baidu.com/s/1KPf9qRPn3BePVLzdTFEvyQ
提取碼：wyn4
這個jar只適合solr5.5.x版本
個人數據源配置的庫和表 python

#執行這句話，用戶自動更新判別條件  意思就是結合上面data-config.xml
deltaQuery="select id from goods where regTime> '${dataimporter.last_index_time}'"來作更新
alter table user modify  regTime(因爲我這個表沒有設置updatetime使用regTime代替) TIMESTAMP  NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP

好了測試一下
第一次先手動導入下數據
mysql

q: 查詢字符串（必須的）。*:*表示查詢全部；keyword:東看 表示按關鍵字「東看」查詢
fq: filter query 過濾查詢。使用Filter Query能夠充分利用Filter Query Cache，提升檢索性能。做用：在q查詢符合結果中同時是fq查詢符合的(相似求交集)，例如：q=mm&fq=date_time:[20081001 TO 20091031]，找關鍵字mm，而且date_time是20081001到20091031之間的。
sort: 排序。格式以下：字段名 排序方式；如advertiserId desc 表示按id字段降序排列查詢結果。
start,rows:表示查回結果從第幾條數據開始顯示，共顯示多少條。
fl: field list。指定查詢結果返回哪些字段。多個時以空格「 」或逗號「,」分隔。不指定時，默認全返回。
df: default field默認的查詢字段，通常默認指定。
Raw Query Parameters:
wt: write type。指定查詢輸出結果格式，咱們經常使用的有json格式與xml格式。在solrconfig.xml中定義了查詢輸出格式：xml、json、python、ruby、php、phps、custom。
indent: 返回的結果是否縮進，默認關閉，用 indent=true | on 開啓，通常調試json,php,phps,ruby輸出纔有必要用這個參數。
debugQuery: 設置返回結果是否顯示Debug信息。
dismax:
edismax:
hl: high light 高亮。hl=true表示啓用高亮
hl.fl ： 用空格或逗號隔開的字段列表（指定高亮的字段）。要啓用某個字段的highlight功能，就得保證該字段在schema中是stored。若是該參數未被給出，那麼就會高 亮默認字段 standard handler會用df參數，dismax字段用qf參數。你可使用星號去方便的高亮全部字段。若是你使用了通配符，那麼要考慮啓用 hl.requiredFieldMatch選項。	
hl.simple.pre：
hl.requireFieldMatch: 若是置爲true，除非該字段的查詢結果不爲空纔會被高亮。它的默認值是false，意味 着它可能匹配某個字段卻高亮一個不一樣的字段。若是hl.fl使用了通配符，那麼就要啓用該參數。儘管如此，若是你的查詢是all字段（多是使用 copy-field 指令），那麼仍是把它設爲false，這樣搜索結果能代表哪一個字段的查詢文本未被找到
hl.usePhraseHighlighter：若是一個查詢中含有短語（引號框起來的）那麼會保證必定要徹底匹配短語的纔會被高亮。
hl.highlightMultiTerm：若是使用通配符和模糊搜索，那麼會確保與通配符匹配的term會高亮。默認爲false，同時hl.usePhraseHighlighter要爲true。
facet:分組統計，在搜索關鍵字的同時,可以按照Facet的字段進行分組並統計。
facet.query：Facet Query利用相似於filter query的語法提供了更爲靈活的Facet.經過facet.query參數，能夠對任意字段進行篩選。
facet.field：須要分組統計的字段，能夠多個。
facet.prefix： 表示Facet字段值的前綴。好比facet.field=cpu&facet.prefix=Intel，那麼對cpu字段進行Facet查詢，返回的cpu都是以Intel開頭的， AMD開頭的cpu型號將不會被統計在內。
spatial:
spellcheck: 拼寫檢查。

進行查詢
當前查詢了全部而且進行了0-10分頁 linux

進行text_ik分詞並進行檢索 以前在managed-schema中配置了<field name="nickname" type="text_ik" indexed="true" stored="true" /> 
下面咱們使用這個字段進行檢索
能夠看到數據只有5條

爲了證實進行了分詞而不是模糊查詢
我進行以下搜索
能夠看到以下 仍是5條數據 而且有隻爲涼城的結果也展現出來了

下面測試增量更新  以前咱們設置了自動更新時間爲1分鐘作測試
當前user表數據只有22條
如今新增一條

能夠看到數據已經更新爲23條了
等待一分鐘後再去查詢全部發現已經更新了 web

如今測試多表多字段聯合檢索查詢
爲了方便我在虛擬機再添加一個表 只有三個字段的

已經建立成功了

添加了4條數據
sql

去更新data-conig.xml 和 managed-schema配置
cd /usr/local/tomcat/solr_home/new_core/conf
vim data-conig.xml
#添加entity以下
vim managed-schema
#添加字段以下圖

product表也配置增量更新

重啓tomcat solr再次導入數據兩個表總共27條數據所有查詢出來了 apache

進行多字段查詢 結果以下

完成
因爲solr整合到項目中有不少方式這裏就不在寫了能夠百度博客json

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。