之前寫過一個bulk insert,此次又查了下資料,看到另外一種作法,說bulk裏保存的是一段JSON數據序列,按照<操做><數據><操做><數據><操做><數據>...的格式保存的。感受比較古怪,本身寫了一段python代碼測試了一下,還能work。應該有更好的解決方法,但這種dirty code作作實驗也夠了。python
#ESIP = 'localhost' ESIP = '192.168.0.2' index_name = 'testindex' doc_type_name = 'testindex_data1' #init Elasticsearch es = Elasticsearch([{'host':ESIP}]) es.create(index=index_name, doc_type=doc_type_name, body={'any':'data', 'timestamp':datetime.now()}) #read spreedsheet data terminated by comma data = [] fd = codecs.open('test.csv', 'r', 'utf-8') for line in fd: line = line.strip().split(',') json_data = {} json_data['id'] = line[1] json_data['data'] = line[1] data.append(json_data) #bulk insert cache = [] bulk_size = 500 for d in data: if len(cache) >= bulk_size: es.bulk(body = cache, index=index_name, doc_type=doc_type_name) cache = [] else: new_action = {} new_action['_index'] = index_name new_action['_type'] = doc_type_name new_action['_id'] = d['id'] action = {} action['index'] = new_action cache.append(action) cache.append(d) if len(cache) > 0: es.bulk(body = cache, index=index_name, doc_type=doc_type_name) cache = []