elasticsearch | mysql |
index(索引) | 數據庫 |
type(類型) | 表 |
document(文檔) | 行 |
fields | 列 |
# 索引初始化
PUT lagou { "settings": { "index": { "number_of_shards": 5, # 分片 "number_of_replicas": 1 # 備份 } } } GET lagou/_settings GET _all/_settings GET .kibana,lagou/_settings GET _settings # 修改settings PUT lagou/_settings { "number_of_replicas": 2 } # 獲取索引信息 GET _all GET lagou # 新建/保存文檔 # 方式一 PUT lagou/job/1 { "title": "python爬蟲分佈式開發", "salary_min":15000, "city":"北京", "company":{ "name":"百度", "company_addr":"北京市軟件園" }, "publish_date":"2019-06-15", "comments":15 } # 新建文檔 # 方式二 POST lagou/job/ { "title": "python django 開發工程師", "salary_min":30000, "city":"上海", "company":{ "name":"美團科技", "company_addr":"北京市軟件園A區" }, "publish_date":"2019-06-15", "comments":120 } GET lagou/job/1 GET lagou/job/1?_source=title GET lagou/job/1?_source=title,city GET lagou/job/1?_source # 修改文章 # 方式一 PUT lagou/job/1 { "title": "python爬蟲分佈式開發", "salary_min":18000, "city":"廣州", "company":{ "name":"百度", "company_addr":"北京市軟件園" }, "publish_date":"2019-06-15", "comments":15 } # 方式二:修改修改某一字段 POST lagou/job/1/_update { "doc": { "comments":20 } } # 刪除 DELETE lagou/job/1 DELETE lagou/job DELETE lagou
# 批量操做 數據準備 POST lagou/job1/1 { "title": "python django 開發工程師", "salary_min":30000, "city":"上海", "company":{ "name":"美團科技", "company_addr":"北京市軟件園A區" }, "publish_date":"2019-06-15", "comments":120 } POST lagou/job1/2 { "title": "python django 開發工程師", "salary_min":30000, "city":"上海", "company":{ "name":"美團科技", "company_addr":"北京市軟件園A區" }, "publish_date":"2019-06-15", "comments":120 } POST lagou/job2/1 { "title": "python django 開發工程師", "salary_min":30000, "city":"上海", "company":{ "name":"美團科技", "company_addr":"北京市軟件園A區" }, "publish_date":"2019-06-15", "comments":120 } POST lagou/job2/2 { "title": "python django 開發工程師", "salary_min":30000, "city":"上海", "company":{ "name":"美團科技", "company_addr":"北京市軟件園A區" }, "publish_date":"2019-06-15", "comments":120 } mget批量獲取 GET _mget { "docs":[ {"_index":"lagou", "_type":"job1", "_id":1 }, {"_index":"lagou", "_type":"job2", "_id":2 } ] } GET lagou/_mget { "docs":[ { "_type":"job1", "_id":1 }, { "_type":"job2", "_id":2 } ] } GET lagou/job1/_mget { "docs":[ { "_id":1 }, { "_id":2 } ] } GET lagou/job1/_mget { "ids":[1,2] } bulk增刪改查 POST _bulk {"index":{"_index":"lagou","_type":"job1","_id":"3"}} {"title": "python django 開發工程師","salary_min":30000,"city":"上海","company":{"name":"美團科技","company_addr":"北京市軟件園A區"},"publish_date":"2019-06-15","comments":120} {"index":{"_index":"lagou","_type":"job2","_id":"3"}} {"title": "python django 開發工程師","salary_min":30000,"city":"上海","company":{"name":"美團科技","company_addr":"北京市軟件園A區"},"publish_date":"2019-06-15","comments":120} POST _bulk {"create":{"_index":"lagou","_type":"job1","_id":"3"}} {"title": "python django 開發工程師","salary_min":30000,"city":"上海","company":{"name":"美團科技","company_addr":"北京市軟件園A區"},"publish_date":"2019-06-15","comments":120} POST _bulk {"delete":{"_index":"lagou","_type":"job1","_id":"3"}} POST _bulk {"update":{"_index":"lagou","_type":"job1","_id":"3"}} {"doc":{"title": "python django 開發工程師","salary_min":30000,"city":"上海","company":{"name":"美團科技","company_addr":"北京市軟件園A區"},"publish_date":"2019-06-15","comments":120}}
# mapping操做 PUT lagou1 { "mappings":{ "job":{ "properties":{ "title":{ "type":"text" }, "salary_min":{ "type":"integer" }, "city":{ "type":"keyword" }, "company":{ "properties":{ "name":{ "type":"text" }, "company_addr":{ "type":"text" }, "employee_count":{ "type":"integer" } } }, "publish_date":{ "type":"date", "format":"yyyy-MM-dd" }, "comments":{ "type":"integer" } } } } } PUT lagou1/job/1 { "title": "python爬蟲分佈式開發", "salary_min":15000, "city":"北京", "company":{ "name":"百度", "company_addr":"北京市軟件園", "employee_count":50 }, "publish_date":"2019-06-15", "comments":15 } # get index mapping GET lagou1/_mapping GET lagou1/_mapping/job GET _all/_mapping/job # 查詢 PUT lagou2 { "mappings": { "job":{ "properties":{ "title":{ "type": "text", "store":true, "analyzer": "ik_max_word" }, "company_name": { "type": "keyword", "store":true }, "desc":{ "type":"text" }, "add_time":{ "type":"date", "format":"yyyy-MM-dd" }, "comments":{ "type": "integer" } } } } } POST lagou2/job { "title":"python django 開發工程師" , "company_name":"美國科技有限公司", "desc":"對django的概念熟悉,熟悉python基礎知識", "comments":20, "add_time":"2017-04-01" } POST lagou2/job { "title":"python scrapy redis 分佈式爬蟲基本" , "company_name":"百度科技有限公司", "desc":"對scrapy的概念熟悉,熟悉redis的基本操做", "comments":5, "add_time":"2017-04-15" } POST lagou2/job { "title":"Elasticsearch打造搜索引擎" , "company_name":"阿里巴巴科技有限公司", "desc":"熟悉數據結構算法,熟悉python的基本開發", "comments":15, "add_time":"2017-06-20" } POST lagou2/job { "title":"python打造推薦引擎系統" , "company_name":"阿里巴巴科技有限公司", "desc":"熟悉推薦引擎的原理以及算法、掌握C語言", "comments":60, "add_time":"2016-10-20" } # 簡單查詢 #查看分析器解析的結果 GET _analyze { "analyzer": "ik_smart", "text":"Python網絡開發師" } GET _analyze { "analyzer": "ik_max_word", "text":"Python網絡開發師" } #match查詢 (分詞查詢) python 和分佈式 #查詢第0-2條的title和company_name字段(desc字段的stored屬性不是true),並按comments排序 GET lagou2/_search { "stored_fields":["title","company_name","desc"], "query":{ "match":{ "title":"python分佈式" } }, "from": 0, "size": 2, "sort": [ { "comments": { "order": "desc" } } ] } #查詢comments在大於等於十、小於等於20、權重2.0的數據 GET lagou2/_search { "query":{ "range": { "comments": { "gte": 10, "lte": 20, "boost":2.0 } } } } GET lagou2/_search { "query":{ "range": { "add_time": { "gte": "2017-04-01", "lte": "now" } } } } #term查詢(不會作處理、直接查,相似於keyword屬性) GET lagou2/_search { "query":{ "term":{ "title":"python" } } } #terms 和用match查django分佈工程 效果同樣 GET lagou2/_search { "query":{ "terms":{ "title":["django" ,"分佈" ,"工程" ] } } } #match_all GET lagou2/_search { "query":{ "match_all":{} } } #match_phrase #短語查詢 #知足全部詞 既有python也有系統,倆個詞最小間距6位 GET lagou2/_search { "query":{ "match_phrase": { "title": { "query": "python系統", "slop":6 } } } } #multi_match 多字段匹配,title的權重高於desc的3倍 GET lagou2/_search { "query":{ "multi_match": { "query": "python系統", "fields":["title^3","desc"] } } } # sort查詢 GET lagou2/_search { "query": { "match_all": {} }, "sort": [ { "comments": { "order": "asc" } } ] } # range範圍查詢 GET lagou2/_search { "query": { "range": { "comments": { "gte": 20, "lte": 60, "boost":2.0 } } } } GET lagou2/_search { "query": { "range": { "add_time": { "gte": "2017-06-07", "lte": "now" } } } } #wildcard 通配符查詢 GET lagou2/_search { "query":{ "wildcard": { "title": { "value": "pyth*n", "boost": 2 } } } } # 組合查詢 #bool 查詢 #用 bool 包括 must should must_not filter來完成 #格式以下 #bool:{ # "filter":[], #不參與打分 # "must":[], #至關於 (salary=20 and title=Python) # "should":[], #至關於 (salary=20 or title=Python) # "must_not":[], #至關於not #} #創建測試數據 POST lagou/testjob/_bulk {"index":{"_id":1}} {"salary":10,"title":"Python"} {"index":{"_id":2}} {"salary":20,"title":"Scrapy"} {"index":{"_id":3}} {"salary":30,"title":"Django"} {"index":{"_id":4}} {"salary":30,"title":"Elasticsearch"} DELETE lagou/testjob #簡單的過濾查詢 #最簡單的fileter查詢 #select * from testjob where salary=20 GET lagou/testjob/_search { "query":{ "bool": { "must": { "match":{ "salary":20 } }, "filter":{ "match":{ "title":"Scrapy" } } } } } #select * from testjob #where (salary=20 or title=Python) and salary!=30 and salary!=10 GET lagou/testjob/_search { "query":{ "bool": { "should":[ {"term":{"salary":20}}, {"term":{"title":"python"}} ], "must_not": [ {"term": {"salary": "30"}}, {"term": {"salary": "10"}} ] } } } #where (salary=30 and title="django") or title="python" GET lagou/testjob/_search { "query":{ "bool": { "should":[ {"term":{"title":"python"}}, {"bool": { "must":[ {"term":{"salary":30}}, {"term":{"title":"django"}} ] }} ] } } } #測試數據 POST lagou/testjob2/_bulk {"index":{"_id":1}} {"tags":["search"]} {"index":{"_id":2}} {"tags":["search","python"]} {"index":{"_id":3}} {"other_filed":["some data"]} {"index":{"_id":4}} {"tags":null} {"index":{"_id":5}} {"tags":["search",null]} #處理null空值的方法 #select tags from testjob2 where tags is not null GET lagou/testjob2/_search { "query": { "bool": { "filter": { "exists": { "field": "tags" } } } } } #select tags from testjob2 where tags is null GET lagou/testjob2/_search { "query": { "bool": { "must_not": { "exists": { "field": "tags" } } } } }
gitee地址:https://gitee.com/zhangyafeii/ArticleSpider_LcvSearchpython