建立ES对象

from elasticsearch import Elasticsearch, helpers
es_host="xxx"
es_user="xxx"
es_password="xxx"
es = Elasticsearch([es_host], http_auth=(es_user, es_password))

构造查询语句

qbody={
      xxx
    }

scroll_size = 1000

query = es.search(index="xxx", body=qbody)
res = query['hits']['hits']

递归获取DataFrame

当出现嵌套情况时,使用“.”将上下级包含关系进行衔接,并删除原上级文档所对应的列

import pandas as pd

def buildDtFromES(res):
    resDf=pd.DataFrame(res)
    columnList=resDf.columns
    for columnItem in columnList:
        tmpValList=resDf[columnItem].values.tolist()
        tmpTypeCheckList=[type(tmpValItem)==dict for tmpValItem in tmpValList]
        if len(tmpTypeCheckList)==sum(tmpTypeCheckList):
            tmpDf=buildDtFromES(tmpValList)
            tmpColumnList=tmpDf.columns
            renameColumnDict=dict((tmpColumnItem,columnItem+"."+tmpColumnItem) for tmpColumnItem in tmpColumnList)
            tmpDf.rename(renameColumnDict,axis=1,inplace=True)
            resDf=pd.concat([resDf,tmpDf],axis=1)
            resDf.drop(columnItem,axis=1,inplace=True)
    return resDf.copy(deep=True)

样例展示

buildDtFromES(res).head().iloc[:,10:11]
_source.RecruitStaffTypeID
0 2
1 2
2 2
3 2
4 2