建立ES对象
from elasticsearch import Elasticsearch, helpers
es_host="xxx"
es_user="xxx"
es_password="xxx"
es = Elasticsearch([es_host], http_auth=(es_user, es_password))
构造查询语句
qbody={
xxx
}
scroll_size = 1000
query = es.search(index="xxx", body=qbody)
res = query['hits']['hits']
递归获取DataFrame
当出现嵌套情况时,使用“.”将上下级包含关系进行衔接,并删除原上级文档所对应的列
import pandas as pd
def buildDtFromES(res):
resDf=pd.DataFrame(res)
columnList=resDf.columns
for columnItem in columnList:
tmpValList=resDf[columnItem].values.tolist()
tmpTypeCheckList=[type(tmpValItem)==dict for tmpValItem in tmpValList]
if len(tmpTypeCheckList)==sum(tmpTypeCheckList):
tmpDf=buildDtFromES(tmpValList)
tmpColumnList=tmpDf.columns
renameColumnDict=dict((tmpColumnItem,columnItem+"."+tmpColumnItem) for tmpColumnItem in tmpColumnList)
tmpDf.rename(renameColumnDict,axis=1,inplace=True)
resDf=pd.concat([resDf,tmpDf],axis=1)
resDf.drop(columnItem,axis=1,inplace=True)
return resDf.copy(deep=True)
样例展示
buildDtFromES(res).head().iloc[:,10:11]
_source.RecruitStaffTypeID | |
---|---|
0 | 2 |
1 | 2 |
2 | 2 |
3 | 2 |
4 | 2 |