treasure-data/pytd

Invalid handling `None` value for InsertIntoWriter

chezou opened this issue · 1 comments

When a column has None value it will be converted to nan and can't handle it with InsertIntoWriter since the value doesn't have quotes for the string.

This doesn't happen with SparkWriter and BulkImportWriter.

Reproducible code:

In [1]: import os

In [2]: import pandas as pd

In [3]: import pytd.pandas_td as td

In [6]: df = pd.DataFrame({'a':[1, 2], 'b': [None, 3]})

In [7]: df
Out[7]:
   a    b
0  1  NaN
1  2  3.0

In [9]: con = td.connect(apikey=os.environ["TD_API_KEY"], endpoint=os.environ["TD_API_SERVER"])

In [10]: td.to_td(df, "aki.pytd_test", con, if_exists="overwrite", index=False, writer="insert_into")
---------------------------------------------------------------------------
PrestoUserError                           Traceback (most recent call last)
<ipython-input-10-93a602a9721b> in <module>
----> 1 td.to_td(df, "aki.pytd_test", con, if_exists="overwrite", index=False, writer="insert_into")

~/src/pytd/pytd/pandas_td/__init__.py in to_td(frame, name, con, if_exists, time_col, time_index, index, index_label, chunksize, date_format, writer)
    394
    395     database, table = name.split(".")
--> 396     con.get_table(database, table).import_dataframe(frame, writer, mode)
    397
    398

~/src/pytd/pytd/table.py in import_dataframe(self, dataframe, writer, if_exists, **kwargs)
    115             writer = Writer.from_string(writer, **kwargs)
    116
--> 117         writer.write_dataframe(dataframe, self, if_exists)
    118
    119         if writer_from_string:

~/src/pytd/pytd/writer.py in write_dataframe(self, dataframe, table, if_exists)
    122
    123         self._insert_into(
--> 124             table, dataframe.values.tolist(), column_names, column_types, if_exists
    125         )
    126

~/src/pytd/pytd/writer.py in _insert_into(self, table, list_of_list, column_names, column_types, if_exists)
    189             ", ".join(rows),
    190         )
--> 191         table.client.query(q_insert, engine="presto")
    192
    193

~/src/pytd/pytd/client.py in query(self, query, engine)
    177             engine = self.default_engine
    178         header = engine.create_header("Client#query")
--> 179         return engine.execute(header + query)
    180
    181     def get_table(self, database, table):

~/src/pytd/pytd/query_engine.py in execute(self, query)
     61         cur = self.cursor()
     62         cur.execute(query)
---> 63         rows = cur.fetchall()
     64         columns = [desc[0] for desc in cur.description]
     65         return {"data": rows, "columns": columns}

~/src/pytd/.venv/lib/python3.6/site-packages/prestodb/dbapi.py in fetchall(self)
    290     def fetchall(self):
    291         # type: () -> List[List[Any]]
--> 292         return list(self.genall())
    293
    294     def cancel(self):

~/src/pytd/.venv/lib/python3.6/site-packages/prestodb/client.py in __iter__(self)
    465         # Subsequent fetches from GET requests until next_uri is empty.
    466         while not self._query.is_finished():
--> 467             rows = self._query.fetch()
    468             for row in rows:
    469                 self._rownumber += 1

~/src/pytd/.venv/lib/python3.6/site-packages/prestodb/client.py in fetch(self)
    532         """Continue fetching data for the current query_id"""
    533         response = self._request.get(self._request.next_uri)
--> 534         status = self._request.process(response)
    535         if status.columns:
    536             self._columns = status.columns

~/src/pytd/.venv/lib/python3.6/site-packages/prestodb/client.py in process(self, http_response)
    410         logger.debug('HTTP {}: {}'.format(http_response.status_code, response))
    411         if 'error' in response:
--> 412             raise self._process_error(response['error'], response.get('id'))
    413
    414         if constants.HEADER_CLEAR_SESSION in http_response.headers:

PrestoUserError: PrestoUserError(type=USER_ERROR, name=SYNTAX_ERROR, message="line 3:53: Column 'nan' cannot be resolved", query_id=20190726_094954_33069_fe7t5)

pandas doesn't have a way to astype(str) with skipping NaN or None pandas-dev/pandas#25353