Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
Z
zhichan
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
蒋勇
zhichan
Commits
ad174ef1
Commit
ad174ef1
authored
Jul 11, 2020
by
张云飞
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
zyf
parent
78862a3a
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
167 additions
and
18 deletions
+167
-18
ic-name/create-es-heming-index.py
+57
-0
ic-name/create-es-index.py
+6
-6
ic-name/elasticsearch_ic.py
+10
-10
ic-name/tx-ex-heming-insert.py
+94
-0
ic-name/tx-ex-insert.py
+0
-2
No files found.
ic-name/create-es-heming-index.py
0 → 100755
View file @
ad174ef1
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020-07-02 21:46
# @Author : zhangyunfei
# @File : create-es-index.py
# @Software: PyCharm
from
elasticsearch
import
Elasticsearch
def
create_index
():
'''
创建索引,创建索引名称为tx_ic_bigdata_business_heming_index_01,es7没有类型
'''
# 创建映射
mappings
=
{
"settings"
:
{
"number_of_shards"
:
5
,
"number_of_replicas"
:
1
},
"mappings"
:
{
"properties"
:
{
"id"
:
{
"type"
:
"integer"
},
"company_name"
:
{
"type"
:
"text"
,
"fields"
:
{
"raw"
:
{
"type"
:
"keyword"
}
},
"analyzer"
:
"ik_max_word"
},
"company_name_standard"
:
{
"type"
:
"text"
,
"fields"
:
{
"raw"
:
{
"type"
:
"keyword"
}
},
"analyzer"
:
"standard"
}
}
}
}
es
=
Elasticsearch
([
'9.223.8.84'
],
http_auth
=
(
"elastic"
,
"Brg2020!"
),
port
=
9200
)
if
es
.
indices
.
exists
(
index
=
"tx_ic_bigdata_business_heming_index_01"
):
print
(
"索引已存在,正在删除----"
,
)
es
.
indices
.
delete
(
index
=
"tx_ic_bigdata_business_heming_index_01"
)
if
es
.
indices
.
exists
(
index
=
"tx_ic_bigdata_business_heming_index_01"
)
is
not
True
:
print
(
"创建索引------"
)
res
=
es
.
indices
.
create
(
index
=
'tx_ic_bigdata_business_heming_index_01'
,
body
=
mappings
)
print
(
"成功创建索引------"
,
res
)
create_index
()
ic-name/create-es-index.py
View file @
ad174ef1
...
@@ -6,7 +6,6 @@
...
@@ -6,7 +6,6 @@
# @Software: PyCharm
# @Software: PyCharm
from
elasticsearch
import
Elasticsearch
from
elasticsearch
import
Elasticsearch
def
create_index
():
def
create_index
():
'''
'''
创建索引,创建索引名称为tx_ic_bigdata_business_index_01,es7没有类型
创建索引,创建索引名称为tx_ic_bigdata_business_index_01,es7没有类型
...
@@ -80,14 +79,15 @@ def create_index():
...
@@ -80,14 +79,15 @@ def create_index():
}
}
# es = Elasticsearch(['43.247.184.94'], http_auth=("admines", "adminGSBes."), port=7200)
# es = Elasticsearch(['192.168.1.131'])
es
=
Elasticsearch
([
'9.223.8.84'
],
http_auth
=
(
"elastic"
,
"Brg2020!"
),
port
=
9200
)
es
=
Elasticsearch
([
'9.223.8.84'
],
http_auth
=
(
"elastic"
,
"Brg2020!"
),
port
=
9200
)
print
(
"创建索引:"
)
print
(
"索引是否存在:"
,
es
.
indices
.
exists
(
index
=
"tx_ic_bigdata_business_index_01"
))
if
es
.
indices
.
exists
(
index
=
"tx_ic_bigdata_business_index_01"
):
print
(
"索引已存在,正在删除----"
,
)
es
.
indices
.
delete
(
index
=
"tx_ic_bigdata_business_index_01"
)
if
es
.
indices
.
exists
(
index
=
"tx_ic_bigdata_business_index_01"
)
is
not
True
:
if
es
.
indices
.
exists
(
index
=
"tx_ic_bigdata_business_index_01"
)
is
not
True
:
print
(
"创建索引------"
)
res
=
es
.
indices
.
create
(
index
=
'tx_ic_bigdata_business_index_01'
,
body
=
mappings
)
res
=
es
.
indices
.
create
(
index
=
'tx_ic_bigdata_business_index_01'
,
body
=
mappings
)
print
(
res
)
print
(
"成功创建索引------"
,
res
)
create_index
()
create_index
()
ic-name/elasticsearch_ic.py
View file @
ad174ef1
...
@@ -18,11 +18,11 @@ server.config['JSON_AS_ASCII'] = False
...
@@ -18,11 +18,11 @@ server.config['JSON_AS_ASCII'] = False
class
Elastic
:
class
Elastic
:
def
__init__
(
self
,
ES_HOST
,
ES_USER
,
ES_PWD
,
ES_PORT
):
def
__init__
(
self
,
ES_HOST
,
ES_USER
,
ES_PWD
,
ES_PORT
):
self
.
es
=
Elasticsearch
([
ES_HOST
],
http_auth
=
(
ES_USER
,
ES_PWD
),
port
=
ES_PORT
)
self
.
es
=
Elasticsearch
([
ES_HOST
],
http_auth
=
(
ES_USER
,
ES_PWD
),
port
=
ES_PORT
)
self
.
index_name
=
'bigdata_ic_gsb_company_04'
#
self.index_name = 'bigdata_ic_gsb_company_04'
# self.index_name = 'tx_big_data_business_index_test
'
self
.
index_name
=
'tx_ic_bigdata_business_heming_index_01
'
self
.
index_type
=
'_doc'
self
.
index_type
=
None
def
es_search
(
self
,
parameter
):
def
es_search
(
self
,
parameter
):
cityname
=
parameter
[
'cityname'
]
cityname
=
parameter
[
'cityname'
]
...
@@ -48,8 +48,8 @@ class Elastic:
...
@@ -48,8 +48,8 @@ class Elastic:
{
{
"query_string"
:
{
"query_string"
:
{
"default_field"
:
"company_name"
,
"default_field"
:
"company_name"
,
"query"
:
"
\"
"
+
cityname
+
"
\"
"
"query"
:
"
\"
"
+
cityname
+
"
\"
"
}
}
},
},
{
{
"query_string"
:
{
"query_string"
:
{
...
@@ -60,7 +60,7 @@ class Elastic:
...
@@ -60,7 +60,7 @@ class Elastic:
{
{
"bool"
:
{
"bool"
:
{
"should"
:
[
"should"
:
[
{
{
"match"
:
{
"match"
:
{
"company_name"
:
btname
"company_name"
:
btname
}
}
...
@@ -115,8 +115,9 @@ class Elastic:
...
@@ -115,8 +115,9 @@ class Elastic:
}
}
}
}
# querybody["query"]["bool"]["must"]["bool"]["should"].append(conditiontext)
# querybody["query"]["bool"]["must"]["bool"]["should"].append(conditiontext)
print
(
json
.
dumps
(
querybody
,
ensure_ascii
=
False
))
print
(
json
.
dumps
(
querybody
,
ensure_ascii
=
False
))
es_result
=
self
.
es
.
search
(
index
=
self
.
index_name
,
doc_type
=
self
.
index_type
,
body
=
querybody
,
size
=
100
,
request_timeout
=
1000
)
es_result
=
self
.
es
.
search
(
index
=
self
.
index_name
,
doc_type
=
self
.
index_type
,
body
=
querybody
,
size
=
100
,
request_timeout
=
1000
)
items
=
es_result
[
'hits'
][
'hits'
]
items
=
es_result
[
'hits'
][
'hits'
]
result
=
{
result
=
{
"Status"
:
""
,
"Status"
:
""
,
...
@@ -152,4 +153,3 @@ class Elastic:
...
@@ -152,4 +153,3 @@ class Elastic:
"Result"
:
[]
"Result"
:
[]
}
}
return
result
return
result
ic-name/tx-ex-heming-insert.py
0 → 100755
View file @
ad174ef1
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020-07-02 17:38
# @Author : zhangyunfei
# @File : tx-ex-insert.py
# @Software: PyCharm
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2020-06-26 17:16
# @Author : zhangyunfei
# @File : tx_es_data.py
# @Software: PyCharm
from
elasticsearch
import
Elasticsearch
from
elasticsearch
import
helpers
from
elasticsearch.helpers
import
bulk
import
json
'''
读取txt里面的全部数据,插入到新的索引中
'''
def
DetData
(
datainfo
):
#根据ES5中返回的结果,组织导入ES6数据结构
isTrue
=
True
action
=
{}
try
:
doc
=
{}
action
[
"_index"
]
=
"tx_ic_bigdata_business_heming_index_01"
# action["_type"] = "_doc" #elasticsearch7.0+版本需要注释
if
"id"
in
datainfo
.
keys
():
id
=
datainfo
[
"id"
]
doc
[
"id"
]
=
id
action
[
"_id"
]
=
id
if
"company_name"
in
datainfo
.
keys
():
company_name
=
datainfo
[
"company_name"
]
if
company_name
:
doc
[
"company_name"
]
=
company_name
doc
[
"company_name_standard"
]
=
company_name
else
:
doc
[
"company_name"
]
=
None
doc
[
"company_name_standard"
]
=
None
else
:
doc
[
"company_name"
]
=
None
action
[
"_source"
]
=
doc
except
Exception
as
e
:
isTrue
=
False
return
isTrue
,
action
def
main
():
error
=
"ES_error.txt"
f1
=
open
(
error
,
"a"
)
index_name
=
"tx_ic_bigdata_business_heming_index_01"
es
=
Elasticsearch
([
'9.223.8.84'
],
http_auth
=
(
"elastic"
,
"Brg2020!"
),
port
=
9200
)
readfile
=
open
(
'2.txt'
,
'r'
,
encoding
=
'utf-8'
)
lines
=
readfile
.
readlines
()
# 获取全部
bulkList
=
[]
for
line
in
lines
:
line
=
line
.
strip
()
ll
=
json
.
loads
(
line
)
isTrue
,
action
=
DetData
(
ll
)
if
isTrue
:
bulkList
.
append
(
action
)
else
:
f1
.
write
(
"获取"
+
str
(
ll
)
+
"的数据出错!
\r\n
"
)
if
len
(
bulkList
)
==
500
:
print
(
'插入500...............'
)
try
:
bulk
(
es
,
bulkList
,
index
=
index_name
,
raise_on_error
=
True
)
# 批量插入
except
Exception
as
e
:
try
:
for
er
in
e
.
args
[
1
]:
# 和上面的for是一样的,都能取到错误信息
f1
.
write
(
"插入"
+
str
(
er
[
"index"
][
"data"
])
+
"的数据出错!
\r\n
"
)
except
:
f1
.
write
(
"插入"
+
str
(
bulkList
)
+
"的数据出错!
\r\n
"
)
bulkList
=
[]
if
len
(
bulkList
)
>
0
:
try
:
bulk
(
es
,
bulkList
,
index
=
index_name
,
raise_on_error
=
True
)
# 批量插入
except
Exception
as
e
:
try
:
for
er
in
e
.
args
[
1
]:
# 和上面的for是一样的,都能取到错误信息
f1
.
write
(
"插入"
+
str
(
er
[
"index"
][
"data"
])
+
"的数据出错!
\r\n
"
)
except
:
f1
.
write
(
"插入"
+
str
(
bulkList
)
+
"的数据出错!
\r\n
"
)
bulkList
=
[]
f1
.
flush
()
f1
.
close
()
print
(
"over"
)
main
()
\ No newline at end of file
ic-name/tx-ex-insert.py
View file @
ad174ef1
...
@@ -144,8 +144,6 @@ def main():
...
@@ -144,8 +144,6 @@ def main():
f1
=
open
(
error
,
"a"
)
f1
=
open
(
error
,
"a"
)
index_name
=
"tx_ic_bigdata_business_index_01"
index_name
=
"tx_ic_bigdata_business_index_01"
es
=
Elasticsearch
([
'9.223.8.84'
],
http_auth
=
(
"elastic"
,
"Brg2020!"
),
port
=
9200
)
es
=
Elasticsearch
([
'9.223.8.84'
],
http_auth
=
(
"elastic"
,
"Brg2020!"
),
port
=
9200
)
# es = Elasticsearch(['43.247.184.94'], http_auth=("admines", "adminGSBes."), port=7200)
# es = Elasticsearch(['192.168.1.131'])
readfile
=
open
(
'2.txt'
,
'r'
,
encoding
=
'utf-8'
)
readfile
=
open
(
'2.txt'
,
'r'
,
encoding
=
'utf-8'
)
lines
=
readfile
.
readlines
()
# 获取全部
lines
=
readfile
.
readlines
()
# 获取全部
bulkList
=
[]
bulkList
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment