GoogleCloudDataproc/spark-bigquery-connector

Failed to write into bigquery using spark 3.2.1

Closed this issue · 2 comments

Hey team,

I am trying to write a dataframe from spark to bigquery using this code:

    customerOutboundTrackerUpdateData
            .write()
            .format("bigquery")
            .option("temporaryGcsBucket", "dev-bkt-01")
            .option("project", "gcp-dev-prj-01")
            .option("parentProject", "gcp-dev-prj-01")
            .mode("overwrite")
            .save("gcp-dev-prj-01.ds_cust.table_1");

And this error I see:

java.lang.RuntimeException: Failed to write to BigQuery
at com.google.cloud.spark.bigquery.BigQueryWriteHelper.writeDataFrameToBigQuery(BigQueryWriteHelper.scala:93)
at com.google.cloud.spark.bigquery.BigQueryInsertableRelation.insert(BigQueryInsertableRelation.scala:43)
at com.google.cloud.spark.bigquery.BigQueryRelationProvider.createRelation(BigQueryRelationProvider.scala:116)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:47)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:80)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:78)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:89)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.$anonfun$applyOrElse$1(QueryExecution.scala:174)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$8(SQLExecution.scala:245)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:393)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:192)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:979)
at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:147)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:343)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.applyOrElse(QueryExecution.scala:174)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.applyOrElse(QueryExecution.scala:170)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:590)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:168)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:590)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:268)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:264)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:566)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$1(QueryExecution.scala:170)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:324)
at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:170)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:155)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:146)
at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:200)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:959)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:427)
at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:396)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:250)
at com.albertsons.catalog.osrr.processor.TargetAudienceDataProcessor.updateBigQueryCustomerOutboundTrackerTable(TargetAudienceDataProcessor.java:716)
at com.albertsons.catalog.osrr.processor.TargetAudienceDataProcessor.updateAndGetBigQueryCustomerOutboundTrackerInfo(TargetAudienceDataProcessor.java:678)
at com.albertsons.catalog.osrr.processor.TargetAudienceDataProcessor.writeDataToSQLDB(TargetAudienceDataProcessor.java:613)
at com.albertsons.catalog.osrr.processor.TargetAudienceDataProcessor.runCatalogDataProcessorForBigQuery(TargetAudienceDataProcessor.java:266)
at com.albertsons.catalog.osrr.processor.TargetAudienceDataProcessor.runCatalogDataProcessor(TargetAudienceDataProcessor.java:117)
at com.albertsons.catalog.osrr.processor.TargetAudienceDataProcessor.main(TargetAudienceDataProcessor.java:94)
at $linee279e11a345d4129a632361060510f9725.$read$$iw$$iw$$iw$$iw$$iw$$iw.(command--1:1)
at $linee279e11a345d4129a632361060510f9725.$read$$iw$$iw$$iw$$iw$$iw.(command--1:43)
at $linee279e11a345d4129a632361060510f9725.$read$$iw$$iw$$iw$$iw.(command--1:45)
at $linee279e11a345d4129a632361060510f9725.$read$$iw$$iw$$iw.(command--1:47)
at $linee279e11a345d4129a632361060510f9725.$read$$iw$$iw.(command--1:49)
at $linee279e11a345d4129a632361060510f9725.$read$$iw.(command--1:51)
at $linee279e11a345d4129a632361060510f9725.$read.(command--1:53)
at $linee279e11a345d4129a632361060510f9725.$read$.(command--1:57)
at $linee279e11a345d4129a632361060510f9725.$read$.(command--1)
at $linee279e11a345d4129a632361060510f9725.$eval$.$print$lzycompute(:7)
at $linee279e11a345d4129a632361060510f9725.$eval$.$print(:6)
at $linee279e11a345d4129a632361060510f9725.$eval.$print()
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1020)
at scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:568)
at scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:36)
at scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:116)
at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41)
at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:567)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:594)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:564)
at com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:219)
at com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$1(ScalaDriverLocal.scala:225)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:1013)
at com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:966)
at com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:225)
at com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$13(DriverLocal.scala:644)
at com.databricks.logging.Log4jUsageLoggingShim$.$anonfun$withAttributionContext$1(Log4jUsageLoggingShim.scala:33)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:94)
at com.databricks.logging.Log4jUsageLoggingShim$.withAttributionContext(Log4jUsageLoggingShim.scala:31)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:205)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:204)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:59)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:240)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:225)
at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:59)
at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:621)
at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$2(DriverWrapper.scala:634)
at scala.util.Try$.apply(Try.scala:213)
at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:626)
at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$3(DriverWrapper.scala:665)
at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:425)
at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:445)
at com.databricks.logging.Log4jUsageLoggingShim$.$anonfun$withAttributionContext$1(Log4jUsageLoggingShim.scala:33)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:94)
at com.databricks.logging.Log4jUsageLoggingShim$.withAttributionContext(Log4jUsageLoggingShim.scala:31)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:205)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:204)
at com.databricks.backend.daemon.driver.DriverWrapper.withAttributionContext(DriverWrapper.scala:56)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:240)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:225)
at com.databricks.backend.daemon.driver.DriverWrapper.withAttributionTags(DriverWrapper.scala:56)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:420)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:339)
at com.databricks.backend.daemon.driver.DriverWrapper.recordOperationWithResultTags(DriverWrapper.scala:56)
at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:665)
at com.databricks.backend.daemon.driver.DriverWrapper.executeCommandAndGetError(DriverWrapper.scala:543)
at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:578)
at com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$runInnerLoop$1(DriverWrapper.scala:446)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.logging.Log4jUsageLoggingShim$.$anonfun$withAttributionContext$1(Log4jUsageLoggingShim.scala:33)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:94)
at com.databricks.logging.Log4jUsageLoggingShim$.withAttributionContext(Log4jUsageLoggingShim.scala:31)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:205)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:204)
at com.databricks.backend.daemon.driver.DriverWrapper.withAttributionContext(DriverWrapper.scala:56)
at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:446)
at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:383)
at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:234)
at java.lang.Thread.run(Thread.java:750)
Caused by: com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.BigQueryException: Access Denied: File gs://osrr-dataload-dev-bkt-01/.spark-bigquery-app-20240718164623-0000-07d27c5a-8588-4b8d-9b3e-6958b5fc352a/part-00000-tid-2537579855118061111-fdcd7ce0-ca0d-4db8-af92-b78c75d6adaf-201-1-c000.snappy.parquet: Access Denied
at com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.Job.reload(Job.java:419)
at com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.Job.waitFor(Job.java:252)
at com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.connector.common.BigQueryClient.createAndWaitFor(BigQueryClient.java:179)
at com.google.cloud.spark.bigquery.BigQueryWriteHelper.finishedJob$lzycompute$1(BigQueryWriteHelper.scala:153)
at com.google.cloud.spark.bigquery.BigQueryWriteHelper.finishedJob$1(BigQueryWriteHelper.scala:153)
at com.google.cloud.spark.bigquery.BigQueryWriteHelper.$anonfun$loadDataToBigQuery$3(BigQueryWriteHelper.scala:155)
at org.apache.spark.internal.Logging.logInfo(Logging.scala:57)
at org.apache.spark.internal.Logging.logInfo$(Logging.scala:56)
at com.google.cloud.spark.bigquery.BigQueryWriteHelper.logInfo(BigQueryWriteHelper.scala:33)
at com.google.cloud.spark.bigquery.BigQueryWriteHelper.loadDataToBigQuery(BigQueryWriteHelper.scala:155)
at com.google.cloud.spark.bigquery.BigQueryWriteHelper.writeDataFrameToBigQuery(BigQueryWriteHelper.scala:90)
... 118 more

Same steps when I tried to execute from Databricks Notebook, writing into bigquery is working fine.
I can confirm that all the permissions are given to bucket and table with cross project access

@davidrabinowitz quick help is really needful

That's the error - Caused by: com.google.cloud.spark.bigquery.repackaged.com.google.cloud.bigquery.BigQueryException: Access Denied: File gs://osrr-dataload-dev-bkt-01/.spark-bigquery-app-20240718164623-0000-07d27c5a-8588-4b8d-9b3e-6958b5fc352a/part-00000-tid-2537579855118061111-fdcd7ce0-ca0d-4db8-af92-b78c75d6adaf-201-1-c000.snappy.parquet: Access Denied. Please verify that the service account has all the permissions listed here. Alternatively, you can check the DIRECT write method.