hortonworks-spark/shc

shc fails to create table

kevintrannz opened this issue · 0 comments

Hello,
We have an error while writing Dataset as below code:

                    Map hbaseOptions = new HashMap<>();
                    hbaseOptions.put(HBaseTableCatalog.tableCatalog(), catalog);
                    hbaseOptions.put(HBaseTableCatalog.newTable(), "5");

                    ds.toDF().write()
                            .options(hbaseOptions)
                            .format("org.apache.spark.sql.execution.datasources.hbase")
                            .save();

Exception:

18/11/22 14:15:21 INFO SparkHadoopWriter: Job job_20181122141516_0193 committed.
Exception in thread "main" org.apache.hadoop.hbase.TableExistsException
  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
  at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
  at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
  at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
  at java.util.concurrent.ForkJoinTask.getThrowableException(ForkJoinTask.java:598)
  at java.util.concurrent.ForkJoinTask.reportException(ForkJoinTask.java:677)
  at java.util.concurrent.ForkJoinTask.invoke(ForkJoinTask.java:735)
  at java.util.stream.ForEachOps$ForEachOp.evaluateParallel(ForEachOps.java:160)
  at java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateParallel(ForEachOps.java:174)
  at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:233)
  at java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:418)
  at java.util.stream.ReferencePipeline$Head.forEach(ReferencePipeline.java:583)
  at my.package.MyFile.process(MyFile.java:249)
Caused by: org.apache.hadoop.hbase.TableExistsException: data_feed1
  at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
  at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
  at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
  at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
  at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
  at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:110)
  at org.apache.hadoop.hbase.util.ForeignExceptionUtil.toIOException(ForeignExceptionUtil.java:45)
  at org.apache.hadoop.hbase.client.HBaseAdmin$ProcedureFuture.convertResult(HBaseAdmin.java:4786)
  at org.apache.hadoop.hbase.client.HBaseAdmin$ProcedureFuture.waitProcedureResult(HBaseAdmin.java:4744)
  at org.apache.hadoop.hbase.client.HBaseAdmin$ProcedureFuture.get(HBaseAdmin.java:4676)
  at org.apache.hadoop.hbase.client.HBaseAdmin.createTable(HBaseAdmin.java:680)
  at org.apache.spark.sql.execution.datasources.hbase.HBaseRelation.createTable(HBaseRelation.scala:155)
  at org.apache.spark.sql.execution.datasources.hbase.DefaultSource.createRelation(HBaseRelation.scala:60)
  at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
  at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
  at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
  at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
  at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
  at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
  at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
  at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:656)
  at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:656)
  at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
  at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:656)
  at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:273)
  at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:267)
  at java.util.stream.ForEachOps$ForEachOp$OfRef.accept(ForEachOps.java:184)
  at java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
  at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:481)
  at java.util.stream.ForEachOps$ForEachTask.compute(ForEachOps.java:291)
  at java.util.concurrent.CountedCompleter.exec(CountedCompleter.java:731)
  at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
  at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
  at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
  at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:157)
Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.hbase.TableExistsException): data_feed1
  at org.apache.hadoop.hbase.master.procedure.CreateTableProcedure.prepareCreate(CreateTableProcedure.java:290)
  at org.apache.hadoop.hbase.master.procedure.CreateTableProcedure.executeFromState(CreateTableProcedure.java:107)
  at org.apache.hadoop.hbase.master.procedure.CreateTableProcedure.executeFromState(CreateTableProcedure.java:59)
  at org.apache.hadoop.hbase.procedure2.StateMachineProcedure.execute(StateMachineProcedure.java:139)
  at org.apache.hadoop.hbase.procedure2.Procedure.doExecute(Procedure.java:506)
  at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1167)
  at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execLoop(ProcedureExecutor.java:955)
  at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execLoop(ProcedureExecutor.java:908)
  at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$400(ProcedureExecutor.java:77)
  at org.apache.hadoop.hbase.procedure2.ProcedureExecutor$2.run(ProcedureExecutor.java:482)
18/11/22 14:15:21 INFO SparkContext: Invoking stop() from shutdown hook
18/11/22 14:15:21 INFO ContextCleaner: Cleaned accumulator 736

Dependencies:

...
       <spark.version>2.3.2</spark.version>
        <scala.version>2.11</scala.version>
        <hadoop.version>2.8.3</hadoop.version>
        <hbase.version>1.4.7</hbase.version>
....
       <dependency>
            <groupId>com.hortonworks</groupId>
            <artifactId>shc-core</artifactId>
            <version>1.1.1-2.1-s_2.11</version>
        </dependency>
....



It works fine in the second run as the table was created in the first run.
If we delete this table and re-run again then this exception still occurs.

Could you please give me your idea?

Thanks,
Kevin.