greenplum-db/pxf

pxf can not read composite avro file from aws s3 and return error org.apache.avro.generic.GenericData$EnumSymbol cannot be cast to java.lang.String

se7enshidb opened this issue · 1 comments

avsc meta data

vim avro_schema.avsc 
{
"type" : "record",
  "name" : "example_schema",
  "namespace" : "com.example",
  "fields" : [ {
    "name" : "id",
    "type" : "long",
    "doc" : "Id of the user account"
  }, {
    "name" : "username",
    "type" : "string",
    "doc" : "Name of the user account"
  }, {
    "name" : "followers",
    "type" : {"type": "array", "items": "string"},
    "doc" : "Users followers"
  }, {
    "name": "fmap",
    "type": {"type": "map", "values": "long"}
  }, {
    "name": "relationship",
    "type": {
        "type": "enum",
        "name": "relationshipEnum",
        "symbols": ["MARRIED","LOVE","FRIEND","COLLEAGUE","STRANGER","ENEMY"]
    }
  }, {
    "name": "address",
    "type": {
        "type": "record",
        "name": "addressRecord",
        "fields": [
            {"name":"number", "type":"int"},
            {"name":"street", "type":"string"},
            {"name":"city", "type":"string"}]
    }
  } ],
  "doc:" : "A basic schema for storing messages"
}

avro data

vim pxf_avro.txt 
{"id":1, "username":"john","followers":["kate", "santosh"], "relationship": "FRIEND", "fmap": {"kate":10,"santosh":4}, "address":{"number":1, "street":"renaissance drive", "city":"san jose"}}
{"id":2, "username":"jim","followers":["john", "pam"], "relationship": "COLLEAGUE", "fmap": {"john":3,"pam":3}, "address":{"number":9, "street":"deer creek", "city":"palo alto"}}

java -jar ./avro-tools-1.9.1.jar fromjson --schema-file /tmp/avro_schema.avsc /tmp/pxf_avro.txt > /tmp/pxf_avro.avro

convert

java -jar ./avro-tools.jar fromjson --schema-file ./avro_schema.avsc ./pxf_avro.txt > ./pxf_avro.avro

create table

DROP FOREIGN TABLE s3_pxf_cn_with_avro;
CREATE FOREIGN TABLE s3_pxf_cn_with_avro(
id bigint, username text, followers text, fmap text, relationship text, address text
)
SERVER s3_server_cn
OPTIONS ( resource '/testpxf/avrowithenum/pxf_avro.avro', format 'avro');
select * from s3_pxf_cn_with_avro;

query

postgres=# select * from s3_pxf_cn_with_avro;
psql: ERROR:  remote component error (500) from '127.0.0.1:5888':  Type  Exception Report   Message  org.apache.avro.generic.GenericData$EnumSymbol cannot be cast to java.lang.String   Description  The server encountered an unexpected condition that prevented it from fulfilling the request.   Exception   java.io.IOException: org.apache.avro.generic.GenericData$EnumSymbol cannot be cast to java.lang.String (libchurl.c:963)  (seg0 slice1 192.168.100.14:50000 pid=452878) (libchurl.c:963)

@se7enshidb This issue is fixed in PR #696. Closing this issue now.