詳見:官網:elasticsearch 下載及安裝教程python
pip install elasticsearch
安裝,注意版本的對應(詳見elasticsearch python手冊)analysis-ik
,並將其複製到elasticsearch-7.1.1\plugins
目錄下使用Elasticsearch.indices.create()
建立名爲的indexing_test
索引git
from elasticsearch import Elasticsearch
es = Elasticsearch()
index = 'indexing_test'
# 自定義創建映射結構文件,很重要
mappings = {
"settings" : {
"index" : {
"number_of_shards" : 5,
"number_of_replicas" : 0
},
"analysis" : {
"analyzer" : {
"ik" : {
"tokenizer" : "ik_max_word"
}
}
},
},
"mappings":{
"properties":{
"sub" : {
"type" : "text",
"index" : "analyzed",
"analyzer" : "ik_max_word",
"include_in_all" : "false"
},
"verb" : {
"type" : "text",
"index" : "analyzed",
"analyzer" : "ik_max_word",
"include_in_all" : "false"
},
"obj" : {
"type" : "text",
"index" : "analyzed",
"analyzer" : "ik_max_word",
"include_in_all" : "false"
}
}
}
}
# 建立名爲indexing_test索引
es.indices.create(index=index, ignore=[400, 404], body=mappings)
複製代碼
本文用的數據爲csv格式。使用helpers.bulk()
批量上傳數據github
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import csv
es = Elasticsearch()
# 讀取csv文件
csv_reader = csv.reader(open('data/標引.csv', encoding='utf-8'))
action = ({
"_index": index,
"_source": {
"標題":row[0],"摘要":row[1],"關鍵詞":row[2],"標引詞":row[3]
}} for row in csv_reader)
# 使用bulk批量導入數據
helpers.bulk(es, action, index = "indexing_new", raise_on_error=True)
複製代碼
在特定字段中匹配輸入的詞並返回檢索結果。app
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import sys
def get_results(word):
results = []
es = Elasticsearch()
index = 'indexing_test'
query = {
"size":100,
"query":{
"multi_match":{
"query":word
, "fields": ["標題","摘要","關鍵詞"]
}
},
"highlight" : {
"fields" : {
"標題": {},
"摘要": {},
"關鍵詞": {}
}
}
}
query1 = {
"size":100,
"query":{
"match":{
"標引詞":word
}
},
"highlight" : {
"fields" : {
"標引詞": {}
}
}
}
res_left = es.search(index="indexing_test",body=query, size=30)
res_right = es.search(index="indexing_test",body=query1, size=30)
return res_left, res_right
if __name__ == "__main__":
main()
複製代碼
界面如圖所示: elasticsearch
from PyQt5 import QtCore,QtGui,QtWidgets
import sys
from query_scroll_scan import get_results
class MainUi(QtWidgets.QMainWindow):
def __init__(self):
super().__init__()
self.init_ui()
def init_ui(self):
self.resize(960,700)
self.setWindowTitle('檢索測試')
self.main_widget = QtWidgets.QWidget() # 建立窗口主部件
self.main_layout = QtWidgets.QGridLayout() # 建立主部件的網格佈局
self.main_widget.setLayout(self.main_layout) # 設置窗口主部件佈局爲網格佈局
self.right_bar_widget_search_input = QtWidgets.QLineEdit()
self.right_bar_widget_search_input.setPlaceholderText("輸入關鍵詞,點擊按鈕/回車進行搜索")
self.search_button = QtWidgets.QPushButton("搜索")
# self.search_button.setCheckable(True)
self.search_button.clicked.connect(self.get_words) # 爲按鈕添加點擊事件
self.right_bar_widget_search_input.returnPressed.connect(self.get_words)
self.up_widget = QtWidgets.QWidget() # 建立頂部部件
self.up_widget.setObjectName('up_widget')
self.up_layout = QtWidgets.QGridLayout() # 建立頂部部件的網格佈局層
self.up_widget.setLayout(self.up_layout)
self.up_layout.addWidget(self.right_bar_widget_search_input, 0, 0)
self.up_layout.addWidget(self.search_button, 0, 1)
self.left_label = QtWidgets.QLabel("全文檢索結果")
self.right_label = QtWidgets.QLabel("主題標引後檢索結果")
self.up_layout.addWidget(self.left_label,1,0)
self.up_layout.addWidget(self.right_label,1,1)
self.left_text = QtWidgets.QTextEdit()
self.up_layout.addWidget(self.left_text,3,0)
self.right_text = QtWidgets.QTextEdit()
self.up_layout.addWidget(self.right_text,3,1)
self.main_layout.addWidget(self.up_widget,0,0,1,1)
self.setCentralWidget(self.main_widget) # 設置窗口主部件
def get_words(self):
words = self.right_bar_widget_search_input.text()
res_left, res_right = get_results(words)
sizes_left = res_left['hits']['total']['value']
self.left_text.setText("")
self.left_label = QtWidgets.QLabel("全文檢索結果")
self.left_text.append("<font size='3'>共檢索到<em> " + str(sizes_left) + "</em> 條結果<br/></font>")
for hit in res_left['hits']['hits']:
self.left_text.append(
"<div>"
"<font color='red' size='3'>標題:" + hit["_source"]["標題"] + "<br/></font>"
"<font size='3'>摘要:" + hit["_source"]["摘要"] + "<br/></font>"
"<font size='3'>關鍵詞:" + hit["_source"]["關鍵詞"] + "<br/></font>"
"<font size='3'>標引詞:" + hit["_source"]["標引詞"] + "<br/></font>"
"<font color='black' size='3'>相關性:" + str(hit["_score"]) + "<br/></font>"
"</div>"
)
sizes_right = res_right['hits']['total']['value']
self.right_text.setText("")
self.right_text.append("<font size='3'>共檢索到<em> " + str(sizes_right) + "</em> 條結果<br/></font>")
for hit in res_right['hits']['hits']:
self.right_text.append(
"<div>"
"<font color='red' size='3'>標題:" + hit["_source"]["標題"] + "<br/></font>"
"<font size='3'>摘要:" + hit["_source"]["摘要"] + "<br/></font>"
"<font size='3'>關鍵詞:" + hit["_source"]["關鍵詞"] + "<br/></font>"
"<font size='3'>標引詞:" + hit["_source"]["標引詞"] + "<br/></font>"
"<font color='black' size='3'>相關性:" + str(hit["_score"]) + "<br/></font>"
"</div>"
)
def main():
app = QtWidgets.QApplication(sys.argv)
gui = MainUi()
gui.show()
sys.exit(app.exec_())
if __name__ == '__main__':
main()
複製代碼