Invalid handling `None` value for InsertIntoWriter
chezou opened this issue · 1 comments
chezou commented
When a column has None
value it will be converted to nan
and can't handle it with InsertIntoWriter since the value doesn't have quotes for the string.
This doesn't happen with SparkWriter and BulkImportWriter.
Reproducible code:
In [1]: import os
In [2]: import pandas as pd
In [3]: import pytd.pandas_td as td
In [6]: df = pd.DataFrame({'a':[1, 2], 'b': [None, 3]})
In [7]: df
Out[7]:
a b
0 1 NaN
1 2 3.0
In [9]: con = td.connect(apikey=os.environ["TD_API_KEY"], endpoint=os.environ["TD_API_SERVER"])
In [10]: td.to_td(df, "aki.pytd_test", con, if_exists="overwrite", index=False, writer="insert_into")
---------------------------------------------------------------------------
PrestoUserError Traceback (most recent call last)
<ipython-input-10-93a602a9721b> in <module>
----> 1 td.to_td(df, "aki.pytd_test", con, if_exists="overwrite", index=False, writer="insert_into")
~/src/pytd/pytd/pandas_td/__init__.py in to_td(frame, name, con, if_exists, time_col, time_index, index, index_label, chunksize, date_format, writer)
394
395 database, table = name.split(".")
--> 396 con.get_table(database, table).import_dataframe(frame, writer, mode)
397
398
~/src/pytd/pytd/table.py in import_dataframe(self, dataframe, writer, if_exists, **kwargs)
115 writer = Writer.from_string(writer, **kwargs)
116
--> 117 writer.write_dataframe(dataframe, self, if_exists)
118
119 if writer_from_string:
~/src/pytd/pytd/writer.py in write_dataframe(self, dataframe, table, if_exists)
122
123 self._insert_into(
--> 124 table, dataframe.values.tolist(), column_names, column_types, if_exists
125 )
126
~/src/pytd/pytd/writer.py in _insert_into(self, table, list_of_list, column_names, column_types, if_exists)
189 ", ".join(rows),
190 )
--> 191 table.client.query(q_insert, engine="presto")
192
193
~/src/pytd/pytd/client.py in query(self, query, engine)
177 engine = self.default_engine
178 header = engine.create_header("Client#query")
--> 179 return engine.execute(header + query)
180
181 def get_table(self, database, table):
~/src/pytd/pytd/query_engine.py in execute(self, query)
61 cur = self.cursor()
62 cur.execute(query)
---> 63 rows = cur.fetchall()
64 columns = [desc[0] for desc in cur.description]
65 return {"data": rows, "columns": columns}
~/src/pytd/.venv/lib/python3.6/site-packages/prestodb/dbapi.py in fetchall(self)
290 def fetchall(self):
291 # type: () -> List[List[Any]]
--> 292 return list(self.genall())
293
294 def cancel(self):
~/src/pytd/.venv/lib/python3.6/site-packages/prestodb/client.py in __iter__(self)
465 # Subsequent fetches from GET requests until next_uri is empty.
466 while not self._query.is_finished():
--> 467 rows = self._query.fetch()
468 for row in rows:
469 self._rownumber += 1
~/src/pytd/.venv/lib/python3.6/site-packages/prestodb/client.py in fetch(self)
532 """Continue fetching data for the current query_id"""
533 response = self._request.get(self._request.next_uri)
--> 534 status = self._request.process(response)
535 if status.columns:
536 self._columns = status.columns
~/src/pytd/.venv/lib/python3.6/site-packages/prestodb/client.py in process(self, http_response)
410 logger.debug('HTTP {}: {}'.format(http_response.status_code, response))
411 if 'error' in response:
--> 412 raise self._process_error(response['error'], response.get('id'))
413
414 if constants.HEADER_CLEAR_SESSION in http_response.headers:
PrestoUserError: PrestoUserError(type=USER_ERROR, name=SYNTAX_ERROR, message="line 3:53: Column 'nan' cannot be resolved", query_id=20190726_094954_33069_fe7t5)
chezou commented
pandas doesn't have a way to astype(str)
with skipping NaN or None pandas-dev/pandas#25353