elasticsearch python 簡單實踐

時間 2019-11-29

原文原文鏈接

1.建立索引

# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch

es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"


def main():
    es = Elasticsearch(es_hosts)
    es.indices.create(index=index_name, body={"mappings":{doc_type: {"properties": {"name": {"type": "text"}, "gender":{"type": "text"}, "age":{"type": "integer"}, "phone":{"type": "keyword"}}}}})
    res = es.search(index=index_name, body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())


if __name__ == '__main__':
    main()
複製代碼

查詢建立的索引bash

{
    "log": {
        "aliases": {},
        "mappings": {
            "20170103": {
                "properties": {
                    "age": {
                        "type": "integer"
                    },
                    "gender": {
                        "type": "text"
                    },
                    "name": {
                        "type": "text"
                    },
                    "phone": {
                        "type": "keyword"
                    }
                }
            }
        },
        "settings": {
            "index": {
                "creation_date": "1512980895137",
                "number_of_shards": "5",
                "number_of_replicas": "1",
                "uuid": "TOrOEfoHQiSKX8oqlZ6URw",
                "version": {
                    "created": "5050099"
                },
                "provided_name": "log"
            }
        }
    }
}
複製代碼

你也能夠先建立索引，而後建立type再設置mappingapp

def main():
    es = Elasticsearch(es_hosts)
    # es.indices.create(index="students")
    es.indices.put_mapping(index="students", doc_type="yinianji", body={"properties": {"name": {"type": "text"}}})
    res = es.search(index=index_name, body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())


if __name__ == '__main__':
    main()
複製代碼

2.插入數據

index (單條插入)

#from datetime import datetime
#print(datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'))
#data = {
# "filename": '1233445',
# "url": '/root',
# "status": 0,
# "date": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
# #"date": datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'),
# "total": 100,
# "county": '111111 111112',
# "gender": 0,
# "agelow": 12,
# "agehigh": 18
#}
#es.index(index = index_name, doc_type = doc_type, id= '1233445', body = data)
複製代碼

bulk (批量插入)

# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch
from elasticsearch import helpers

es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"
body = []
for i in range(10):
    body.append({
        "_index": "students",
        "_type": "yinianji",
        "_id": i + 1,
        "_source": {
              "name": 'weishihao'
        }
        })

def main():
    es = Elasticsearch(es_hosts)
    helpers.bulk(es, body)
    res = es.search(index='students', body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())


if __name__ == '__main__':
    main()
複製代碼

3.修改mapping結構

在elasticsearch中，更改mapping結構只能新增field。因此elasticsearch

es.indices.put_mapping(
                index=index_name,
                doc_type=doc_type,
                body={
                    "properties": {
                         "county": {"type": "text"},
                         "total" : {"type": "integer"},
                         "gender": {"type": "integer"},
                         "agelow": {"type": "integer"},
                         "agehigh": {"type": "integer"}
                    }
                }
            )


複製代碼

4.查詢數據

# -.- coding:utf-8 -.-
from __future__ import print_function
from pprint import pprint
from elasticsearch import Elasticsearch
from elasticsearch import helpers

es_hosts = ["192.168.9.119:9200"]
index_name = "log"
doc_type = "20170103"
body = []
for i in range(10):
    body.append({
        "_index": "students",
        "_type": "yinianji",
        "_id": i + 1,
        "_source": {
              "name": 'weishihao'
        }
        })

def main():
    es = Elasticsearch(es_hosts)
    # helpers.bulk(es, body)
    res = es.search(index='students', doc_type='yinianji', body={"query": {"match_all": {}}})
    pprint(res)
    # pprint(es.info())


if __name__ == '__main__':
    main()
複製代碼

返回值中的total值會給出總數據量，可是_source中，返回顯示的，默認只有10條那麼，咱們如何查詢全部的數據呢，ide

es = Elasticsearch(es_hosts)
 scanResp = helpers.scan(es, {"query": {"match_all": {}}}, index= 'quanguorenkou', scroll= "10m") 
    for hit in scanResp:  
        print(hit)
複製代碼

這麼咱們就能夠查詢全部的數據了。可是若是數據量比較大的時候，咱們僅僅須要部分的數據的話，能夠指定查詢條件，好比：前綴查詢：ui

scanResp = helpers.scan(es, {"query": {"prefix": {"studentid": {"value": "330"}}}}, index= 'students', scroll= "100m", size=40000)
複製代碼

上述語句實現的功能是查詢studentid字段以330開頭的全部文檔。url

5. 修改數據

es.update(index = index_name, doc_type = doc_type, id='1233445', body={"script": "ctx._source.status = 1"})
複製代碼

6.刪除索引

es.indices.delete(index=index)
複製代碼

7.一個完整的例子

# -.- coding:utf-8 -.-
from __future__ import print_function
from elasticsearch import Elasticsearch, helpers
from pprint import pprint
import sys
import os
sys.path.append(os.path.abspath(os.path.pardir))


from multiprocessing import current_process, Pool
from collections import deque
import time
import re
es_hosts = ["192.168.31.13"]
es_auth = ("elastic", "changeme")
index_name = 'exportdata'
doc_type = 'output'
es = Elasticsearch(es_hosts, http_auth = es_auth)
es.indices.delete(index=index_name)
es.indices.create(index=index_name, body=
                      {"mappings":{doc_type:
                              {"properties": {
                                     "filename": {"type": "text"},
                                     "url":{"type": "text"},
                                     "status":{"type": "integer"},
                                     "date": {"type": "date","format": "yyyy-MM-dd HH:mm:ss"},
                                     "county": {"type": "text"},
                                     "total" : {"type": "integer"},
                                     "gender": {"type": "integer"},
                                     "agelow": {"type": "integer"},
                                     "agehigh": {"type": "integer"}
                                    }
                              }
                     }})
#es.indices.put_mapping(
# index=index_name,
# doc_type=doc_type,
# body={
# "properties": {
# "county": {"type": "text"},
# "total" : {"type": "integer"},
# "gender": {"type": "integer"},
# "agelow": {"type": "integer"},
# "agehigh": {"type": "integer"}
# }
# }
# )
#from datetime import datetime
#print(datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'))
#data = {
# "filename": '1233445',
# "url": '/root',
# "status": 0,
# "date": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
# #"date": datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'),
# "total": 100,
# "county": '111111 111112',
# "gender": 0,
# "agelow": 12,
# "agehigh": 18
#}
#es.index(index = index_name, doc_type = doc_type, id= '1233445', body = data)
#es.update(index = index_name, doc_type = doc_type, id='1233445', body={"script": "ctx._source.status = 1"})
複製代碼

相關標籤/搜索

elasticsearch+elasticsearch

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。