您的位置:首页 > 其它

Elasticsearch数据导入

2017-08-16 18:30 190 查看
import linecache
from pyes import ES

ES_SERVER = "47.92.71.18:9200"     //服务器地址
ES_INDEX = "cellinfo"               //数据库实例
ES_TYPE = "cell"            //表名
CELL_FILE = "cellinfo_v2_19(19).txt"   //插入文件
BULK_SIZE = 1000              //1000条执行一次

def create_document(line):
# Delete new line '\n' notation
line = line.strip('\n')
# split line into list
data = line.split('\t')
# Compose return dict
ret_data = {
"mcc": data[0],
"mnc": data[1],
"lac": data[2],
"ci": data[3],
"location": {
"lat": round(float(data[4]), 8),
"lon": round(float(data[5]), 8),

},
"acc": data[6],
"date": data[7],
"validity": data[8],
"addr": data[9],
"province": data[10],
"city": data[11],
"district": data[12],
"township": data[13],

}
return ret_data

def main():

es_conn = ES(ES_SERVER, timeout=20.0, bulk_size=BULK_SIZE)

error_index = 0
try:
for i in range(1, 6000000):

current_line = linecache.getline(CELL_FILE, i)

es_conn.index(create_document(current_line), ES_INDEX, ES_TYPE, bulk=True)
if i % BULK_SIZE == 0:
print('%d' % i)

error_index = i
except IndexError:

es_conn.force_bulk()
print("end!")
except:
print("error at %d" % error_index)

if __name__ == '__main__':

main()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: