`to_td` fails with nonexistent database
chezou opened this issue · 2 comments
chezou commented
While to_td works well with a nonexistent table, it doesn't work with a nonexistent database.
It'd be nice to create a database as well, or raising specific error message would be fine.
>>> td.to_td(df, 'aki_test.test_pytd123', con, if_exists='replace', index=False)
Traceback (most recent call last):
File "/Users/ariga/src/pytd-test/.venv/lib/python3.6/site-packages/pytd/writer.py", line 90, in write_dataframe
sdf.write.mode(if_exists).format('com.treasuredata.spark').option('table', destination).save()
File "/Users/ariga/src/pytd-test/.venv/lib/python3.6/site-packages/pyspark/sql/readwriter.py", line 732, in save
self._jwrite.save()
File "/Users/ariga/src/pytd-test/.venv/lib/python3.6/site-packages/py4j/java_gateway.py", line 1257, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/Users/ariga/src/pytd-test/.venv/lib/python3.6/site-packages/pyspark/sql/utils.py", line 63, in deco
return f(*a, **kw)
File "/Users/ariga/src/pytd-test/.venv/lib/python3.6/site-packages/py4j/protocol.py", line 328, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o61.save.
: com.treasuredata.client.TDClientHttpNotFoundException: [TARGET_NOT_FOUND] [404:Not Found] API request to /v3/table/create/aki_test/test_pytd123/log has failed: Resource not found
at com.treasuredata.client.TDRequestErrorHandler.defaultHttpResponseErrorResolver(TDRequestErrorHandler.java:119)
at com.treasuredata.client.TDHttpRequestHandler.resolveHttpResponseError(TDHttpRequestHandler.java:88)
at com.treasuredata.client.TDHttpClient.submitRequest(TDHttpClient.java:402)
at com.treasuredata.client.TDHttpClient.submitRequest(TDHttpClient.java:452)
at com.treasuredata.client.TDHttpClient.call(TDHttpClient.java:475)
at com.treasuredata.client.TDClient.doPost(TDClient.java:253)
at com.treasuredata.client.TDClient.createTable(TDClient.java:423)
at com.treasuredata.client.TDClient.createTableIfNotExists(TDClient.java:431)
at com.treasuredata.spark.DefaultSource.createRelation(DefaultSource.scala:105)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676)
at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:676)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:676)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:285)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:271)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/ariga/src/pytd-test/.venv/lib/python3.6/site-packages/pytd/pandas_td/__init__.py", line 349, in to_td
writer.write_dataframe(frame, con.database, name, mode)
File "/Users/ariga/src/pytd-test/.venv/lib/python3.6/site-packages/pytd/writer.py", line 94, in write_dataframe
raise RuntimeError('failed to load table via td-spark: ' + str(e.java_exception))
RuntimeError: failed to load table via td-spark: com.treasuredata.client.TDClientHttpNotFoundException: [TARGET_NOT_FOUND] [404:Not Found] API request to /v3/table/create/aki_test/test_pytd123/log has failed: Resource not found
takuti commented
I believe pytd.table.Table
(ref. #17) now raises a proper exception under the condition:
>>> td.to_td(df, 'hogerregrfjerife.foo', con, if_exists='replace', index=False)
Traceback (most recent call last):
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/table.py", line 25, in __init__
client.api_client.database(database)
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tdclient/client.py", line 81, in database
raise api.NotFoundError("Database '%s' does not exist" % (db_name))
tdclient.errors.NotFoundError: Database 'hogerregrfjerife' does not exist
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/pandas_td/__init__.py", line 413, in to_td
con.get_table(database, table).import_dataframe(frame, writer, mode)
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/client.py", line 214, in get_table
return Table(self, database, table)
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/table.py", line 29, in __init__
database, table, e
ValueError: faild to create pytd.table.Table instance for `hogerregrfjerife.foo`: Database 'hogerregrfjerife' does not exist
>>> td.to_td(df, 'hogerregrfjerife.foo', con, if_exists='replace', index=False, writer='insert_into')
Traceback (most recent call last):
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/table.py", line 25, in __init__
client.api_client.database(database)
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tdclient/client.py", line 81, in database
raise api.NotFoundError("Database '%s' does not exist" % (db_name))
tdclient.errors.NotFoundError: Database 'hogerregrfjerife' does not exist
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/pandas_td/__init__.py", line 413, in to_td
con.get_table(database, table).import_dataframe(frame, writer, mode)
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/client.py", line 214, in get_table
return Table(self, database, table)
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/table.py", line 29, in __init__
database, table, e
ValueError: faild to create pytd.table.Table instance for `hogerregrfjerife.foo`: Database 'hogerregrfjerife' does not exist
>>> td.to_td(df, 'hogerregrfjerife.foo', con, if_exists='replace', index=False, writer='spark')
Traceback (most recent call last):
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/table.py", line 25, in __init__
client.api_client.database(database)
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/tdclient/client.py", line 81, in database
raise api.NotFoundError("Database '%s' does not exist" % (db_name))
tdclient.errors.NotFoundError: Database 'hogerregrfjerife' does not exist
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/pandas_td/__init__.py", line 413, in to_td
con.get_table(database, table).import_dataframe(frame, writer, mode)
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/client.py", line 214, in get_table
return Table(self, database, table)
File "/Users/kitazawa/.pyenv/versions/3.7.3/lib/python3.7/site-packages/pytd/table.py", line 29, in __init__
database, table, e
ValueError: faild to create pytd.table.Table instance for `hogerregrfjerife.foo`: Database 'hogerregrfjerife' does not exist
@chezou Can we close this issue?
chezou commented
That's reasonable, thanks.