ibmdbanalytics/ibmdbpy

Hardcoded varchar size causes issues for table creation

charleybeller opened this issue · 0 comments

I attempted to use ibmdbpy to create a db2 table from an existing pandas dataframe that included a long column of long string values and hit the following error:

DataError: ('22001', '[22001] [IBM][CLI Driver][DB2/LINUXX8664] SQL0433N  Value "[redacted long string value]" is too long.  SQLSTATE=22001\n')

It looks like this hardcoding of varchar(255) may be at fault:

for column in dataframe.columns:

Though it also may be related to the pyodbc issue mention in #40 (comment)

We can work around the issue by explicitly creating the table using ibm_db_dbi but I wanted to flag the use case as a helpful feature to support as future capacity allows.

Stacktrace below:

DataError                                 Traceback (most recent call last)
<ipython-input-40-a4ab293e77d0> in <module>
----> 1 idadf = idadb_1f53f1cd54874200a0400e09388b1234.as_idadataframe(dataframe=df, tablename='INTRUSION', clear_existing=True)
      2 idadf.head()

/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/ibmdbpy/utils.py in wrapper(*args, **kwds)
     71         """Calculate elapsed time in seconds"""
     72         start = time()
---> 73         result = function(*args, **kwds)
     74         elapsed = time() - start
     75         if os.environ['VERBOSE'] == 'True':

/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/ibmdbpy/base.py in as_idadataframe(self, dataframe, tablename, clear_existing, primary_key, indexer)
    947         self._create_table(dataframe, tablename, primary_key=primary_key)
    948         idadf = ibmdbpy.frame.IdaDataFrame(self, tablename, indexer)
--> 949         self.append(idadf, dataframe)
    950 
    951         ############## Experimental ##################

/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/ibmdbpy/base.py in append(self, idadf, df, maxnrow)
   1438             print("Uploading %s rows (maxnrow was set to %s)"%(df.shape[0], maxnrow))
   1439             try:
-> 1440                 self._insert_into_database(df, idadf.schema, idadf.tablename, silent=True)
   1441             except:
   1442                 raise

/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/ibmdbpy/base.py in _insert_into_database(self, dataframe, schema, tablename, silent)
   2006         # TODO: Good idea : create a savepoint before creating the table
   2007         # Rollback in to savepoint in case of failure
-> 2008         self._prepare_and_execute(query, autocommit=False, silent=silent)
   2009 
   2010         for idadf in self._idadfs:

/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/ibmdbpy/base.py in _prepare_and_execute(self, query, autocommit, silent)
   2027         """
   2028         self._check_connection()
-> 2029         return sql._prepare_and_execute(self, query, autocommit, silent)
   2030 
   2031     def _check_procedure(self, proc_name, alg_name=None):

/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/ibmdbpy/sql.py in _prepare_and_execute(idaobject, query, autocommit, silent)
     58         query = _prepare_query(query, silent)
     59         #print(query)
---> 60         cursor.execute(query)
     61         if autocommit is True:
     62             idaobject._autocommit()

/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/pypyodbc.py in execute(self, query_string, params, many_mode, call_mode)
   1624 
   1625         else:
-> 1626             self.execdirect(query_string)
   1627         return self
   1628 

/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/pypyodbc.py in execdirect(self, query_string)
   1650             c_query_string = ctypes.c_char_p(query_string)
   1651             ret = ODBC_API.SQLExecDirect(self.stmt_h, c_query_string, len(query_string))
-> 1652         check_success(self, ret)
   1653         self._NumOfRows()
   1654         self._UpdateDesc()

/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/pypyodbc.py in check_success(ODBC_obj, ret)
   1005     if ret not in (SQL_SUCCESS, SQL_SUCCESS_WITH_INFO, SQL_NO_DATA):
   1006         if isinstance(ODBC_obj, Cursor):
-> 1007             ctrl_err(SQL_HANDLE_STMT, ODBC_obj.stmt_h, ret, ODBC_obj.ansi)
   1008         elif isinstance(ODBC_obj, Connection):
   1009             ctrl_err(SQL_HANDLE_DBC, ODBC_obj.dbc_h, ret, ODBC_obj.ansi)

/opt/conda/envs/Python-3.7-main/lib/python3.7/site-packages/pypyodbc.py in ctrl_err(ht, h, val_ret, ansi)
    975                 raise ProgrammingError(state,err_text)
    976             elif state[:2] in (raw_s('22')):
--> 977                 raise DataError(state,err_text)
    978             elif state[:2] in (raw_s('23')) or state == raw_s('40002'):
    979                 raise IntegrityError(state,err_text)

DataError: ('22001', '[22001] [IBM][CLI Driver][DB2/LINUXX8664] SQL0433N  Value "[redacted long string value]" is too long.  SQLSTATE=22001\n')