Commit ad174ef1 by 张云飞

zyf

parent 78862a3a
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020-07-02 21:46
# @Author : zhangyunfei
# @File : create-es-index.py
# @Software: PyCharm
from elasticsearch import Elasticsearch
def create_index():
'''
创建索引,创建索引名称为tx_ic_bigdata_business_heming_index_01,es7没有类型
'''
# 创建映射
mappings = {
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1
},
"mappings": {
"properties": {
"id": {
"type": "integer"
},
"company_name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "ik_max_word"
},
"company_name_standard": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "standard"
}
}
}
}
es = Elasticsearch(['9.223.8.84'], http_auth=("elastic", "Brg2020!"), port=9200)
if es.indices.exists(index="tx_ic_bigdata_business_heming_index_01"):
print("索引已存在,正在删除----", )
es.indices.delete(index="tx_ic_bigdata_business_heming_index_01")
if es.indices.exists(index="tx_ic_bigdata_business_heming_index_01") is not True:
print("创建索引------")
res = es.indices.create(index='tx_ic_bigdata_business_heming_index_01', body=mappings)
print("成功创建索引------", res)
create_index()
......@@ -6,7 +6,6 @@
# @Software: PyCharm
from elasticsearch import Elasticsearch
def create_index():
'''
创建索引,创建索引名称为tx_ic_bigdata_business_index_01,es7没有类型
......@@ -80,14 +79,15 @@ def create_index():
}
# es = Elasticsearch(['43.247.184.94'], http_auth=("admines", "adminGSBes."), port=7200)
# es = Elasticsearch(['192.168.1.131'])
es = Elasticsearch(['9.223.8.84'], http_auth=("elastic", "Brg2020!"), port=9200)
print("创建索引:")
print("索引是否存在:",es.indices.exists(index="tx_ic_bigdata_business_index_01"))
if es.indices.exists(index="tx_ic_bigdata_business_index_01"):
print("索引已存在,正在删除----", )
es.indices.delete(index="tx_ic_bigdata_business_index_01")
if es.indices.exists(index="tx_ic_bigdata_business_index_01") is not True:
print("创建索引------")
res = es.indices.create(index='tx_ic_bigdata_business_index_01', body=mappings)
print(res)
print("成功创建索引------",res)
create_index()
......@@ -18,11 +18,11 @@ server.config['JSON_AS_ASCII'] = False
class Elastic:
def __init__(self,ES_HOST,ES_USER,ES_PWD,ES_PORT):
def __init__(self, ES_HOST, ES_USER, ES_PWD, ES_PORT):
self.es = Elasticsearch([ES_HOST], http_auth=(ES_USER, ES_PWD), port=ES_PORT)
self.index_name = 'bigdata_ic_gsb_company_04'
# self.index_name = 'tx_big_data_business_index_test'
self.index_type = '_doc'
# self.index_name = 'bigdata_ic_gsb_company_04'
self.index_name = 'tx_ic_bigdata_business_heming_index_01'
self.index_type = None
def es_search(self, parameter):
cityname = parameter['cityname']
......@@ -48,8 +48,8 @@ class Elastic:
{
"query_string": {
"default_field": "company_name",
"query": "\""+cityname+"\""
}
"query": "\"" + cityname + "\""
}
},
{
"query_string": {
......@@ -60,7 +60,7 @@ class Elastic:
{
"bool": {
"should": [
{
{
"match": {
"company_name": btname
}
......@@ -115,8 +115,9 @@ class Elastic:
}
}
# querybody["query"]["bool"]["must"]["bool"]["should"].append(conditiontext)
print(json.dumps(querybody,ensure_ascii=False))
es_result = self.es.search(index=self.index_name, doc_type=self.index_type, body=querybody, size=100, request_timeout=1000)
print(json.dumps(querybody, ensure_ascii=False))
es_result = self.es.search(index=self.index_name, doc_type=self.index_type, body=querybody, size=100,
request_timeout=1000)
items = es_result['hits']['hits']
result = {
"Status": "",
......@@ -152,4 +153,3 @@ class Elastic:
"Result": []
}
return result
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020-07-02 17:38
# @Author : zhangyunfei
# @File : tx-ex-insert.py
# @Software: PyCharm
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020-06-26 17:16
# @Author : zhangyunfei
# @File : tx_es_data.py
# @Software: PyCharm
from elasticsearch import Elasticsearch
from elasticsearch import helpers
from elasticsearch.helpers import bulk
import json
'''
读取txt里面的全部数据,插入到新的索引中
'''
def DetData(datainfo):#根据ES5中返回的结果,组织导入ES6数据结构
isTrue = True
action = {}
try:
doc = {}
action["_index"] = "tx_ic_bigdata_business_heming_index_01"
# action["_type"] = "_doc" #elasticsearch7.0+版本需要注释
if "id" in datainfo.keys():
id = datainfo["id"]
doc["id"] = id
action["_id"] = id
if "company_name" in datainfo.keys():
company_name = datainfo["company_name"]
if company_name:
doc["company_name"] = company_name
doc["company_name_standard"] = company_name
else:
doc["company_name"] = None
doc["company_name_standard"] = None
else:
doc["company_name"] = None
action["_source"] = doc
except Exception as e:
isTrue = False
return isTrue, action
def main():
error = "ES_error.txt"
f1 = open(error, "a")
index_name = "tx_ic_bigdata_business_heming_index_01"
es = Elasticsearch(['9.223.8.84'], http_auth=("elastic", "Brg2020!"), port=9200)
readfile = open('2.txt','r',encoding='utf-8')
lines = readfile.readlines() # 获取全部
bulkList = []
for line in lines:
line = line.strip()
ll = json.loads(line)
isTrue, action = DetData(ll)
if isTrue:
bulkList.append(action)
else:
f1.write("获取" + str(ll) + "的数据出错!\r\n")
if len(bulkList) == 500:
print('插入500...............')
try:
bulk(es, bulkList, index=index_name, raise_on_error=True) # 批量插入
except Exception as e:
try:
for er in e.args[1]: # 和上面的for是一样的,都能取到错误信息
f1.write("插入" + str(er["index"]["data"]) + "的数据出错!\r\n")
except:
f1.write("插入" + str(bulkList) + "的数据出错!\r\n")
bulkList = []
if len(bulkList) > 0:
try:
bulk(es, bulkList, index=index_name, raise_on_error=True) # 批量插入
except Exception as e:
try:
for er in e.args[1]: # 和上面的for是一样的,都能取到错误信息
f1.write("插入" + str(er["index"]["data"]) + "的数据出错!\r\n")
except:
f1.write("插入" + str(bulkList) + "的数据出错!\r\n")
bulkList = []
f1.flush()
f1.close()
print("over")
main()
\ No newline at end of file
......@@ -144,8 +144,6 @@ def main():
f1 = open(error, "a")
index_name = "tx_ic_bigdata_business_index_01"
es = Elasticsearch(['9.223.8.84'], http_auth=("elastic", "Brg2020!"), port=9200)
# es = Elasticsearch(['43.247.184.94'], http_auth=("admines", "adminGSBes."), port=7200)
# es = Elasticsearch(['192.168.1.131'])
readfile = open('2.txt','r',encoding='utf-8')
lines = readfile.readlines() # 获取全部
bulkList = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment