python 用scroll查詢大量es數據

時間 2019-12-07

原文原文鏈接

def get_all_es_data(url):
    # 過濾的query
    query = {}
    data = []
    # 每次取的數據量
    size = 10000
    scroll_id = None
    try:
        while size == 10000:
            if not scroll_id:
                query["size"] = 10000
                curr_url = url + '/_search?scroll=8m'
                response = requests.post(curr_url, json.dumps(query), headers={'content-type': 'application/json'})
            else:
                curr_url = url + '_search/scroll?scroll=8m&scroll_id=' + scroll_id
                response = requests.get(curr_url)
            if response:
                response = json.loads(response.text)
                scroll_id = response['_scroll_id']
                response_data = [doc["_source"] for doc in response['hits']['hits']]
                data.extend(response_data)
    except Exception as err:
        pass

相關標籤/搜索