apache/seatunnel

[Bug] [connector-hive] can't find field which is partition key

Opened this issue · 0 comments

Search before asking

  • I had searched in the issues and found no similar issues.

What happened

When I run Hive-Source and Jdbc-Writer, there is an error happened.
hadoop version is 3.4.
the reason is: there is no data with hive partition. so cannot parse partition keys from path.

SeaTunnel Version

2.3.8

SeaTunnel Config

env {
"job.mode"=BATCH
"job.retry.times"="0"
}
source {
Hive {
    "hive.hadoop.conf" {}
    "parse_partition_from_path"="true"
    "read_columns"=[
        id,
        name,
        "u_count",
        "s_count",
        "rate"
    ]
    "read_partitions"=[
        "dt=20241216"
    ]
    parallelism=1
    "result_table_name"=Table15998381360416
    "table_name"="xx"
    "tmp_path"="/user/seatunnel"
    "hive_site_path"="hive-site.xml"
    "cluster_account"=msns
    "hdfs_site_path"="hdfs-site.xml"
    "kerberos_krb5_conf_path"="krb5.conf"
    "kerberos_keytab_path"="keytab"
    "core_site_path"=""
    "kerberos_principal"=""
    "metastore_uri"=""
}
}
transform {
Sql {
    connectorType=Sql
    query="select id,\n       name,\n       u_count,\n       s_count,\n       rate,\n       dt from Table15998381360416 "
    "result_table_name"=Table15998381360417
    "source_table_name"=Table15998381360416
}
}
sink {
Jdbc {
    "schema_save_mode"="CREATE_SCHEMA_WHEN_NOT_EXIST"
    "data_save_mode"="CUSTOM_PROCESSING"
    "create_index"="true"
    "connection_check_timeout_sec"=30
    "batch_size"=1000
    "is_exactly_once"="false"
    "max_commit_attempts"=3
    "transaction_timeout_sec"=-1
    "auto_commit"="false"
    "support_upsert_by_query_primary_key_exist"="false"
    "multi_table_sink_replica"=1
    "source_table_name"=Table15998381360417
    "generate_sink_sql"=true
     driver="com.mysql.cj.jdbc.Driver"
}
}

Running Command

submit job by web

Error Exception

java.lang.IllegalArgumentException: can't find field [dt]
        at org.apache.seatunnel.transform.sql.zeta.ZetaSQLType.getExpressionType(ZetaSQLType.java:141)
        at org.apache.seatunnel.transform.sql.zeta.ZetaSQLEngine.typeMapping(ZetaSQLEngine.java:219)
        at org.apache.seatunnel.transform.sql.SQLTransform.transformTableSchema(SQLTransform.java:115)
        at org.apache.seatunnel.transform.common.AbstractCatalogSupportTransform.transformCatalogTable(AbstractCatalogSupportTransform.java:90)
        at org.apache.seatunnel.transform.common.AbstractCatalogSupportTransform.getProducedCatalogTable(AbstractCatalogSupportTransform.java:80)
        at org.apache.seatunnel.engine.core.parse.MultipleTableJobConfigParser.parseTransform(MultipleTableJobConfigParser.java:477)
        at org.apache.seatunnel.engine.core.parse.MultipleTableJobConfigParser.parseTransforms(MultipleTableJobConfigParser.java:405)
        at org.apache.seatunnel.engine.core.parse.MultipleTableJobConfigParser.parse(MultipleTableJobConfigParser.java:214)
        at org.apache.seatunnel.engine.client.job.ClientJobExecutionEnvironment.getLogicalDag(ClientJobExecutionEnvironment.java:114)
        at org.apache.seatunnel.engine.client.job.ClientJobExecutionEnvironment.execute(ClientJobExecutionEnvironment.java:182)
        at org.apache.seatunnel.app.service.impl.JobExecutorServiceImpl.lambda$executeJobBySeaTunnel$273(JobExecutorServiceImpl.java:400)
        at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:750)

Zeta or Flink or Spark Version

No response

Java or Scala Version

No response

Screenshots

No response

Are you willing to submit PR?

  • Yes I am willing to submit a PR!

Code of Conduct