`rtd_ray.upgrade_rtd()`
Opened this issue · 0 comments
mariusdkm commented
When after using rtd_ray.download_rtd()
and then rtd_ray.upgrade_rtd()
once,
rtd_ray.upgrade_rtd()
doesn't work anymore.
I think rtd_ray.upgrade_rtd()
does something weird.
Function: subgraph_callable-81d1293f-236b-4a65-a47d-08df5268
args: ({'piece': ('/usr/src/app/cache/recent_change_rtd/_new/part.0.parquet', None, None)})
kwargs: {}
Exception: "ValueError('Categorical categories cannot be null')"
Traceback (most recent call last):
File "/usr/local/lib/python3.9/runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/local/lib/python3.9/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/usr/src/app/update_butler/__main__.py", line 28, in <module>
rtd_ray.upgrade_rtd()
File "/usr/src/app/helpers/RtdRay.py", line 294, in upgrade_rtd
rtd = self.load_data()
File "/usr/src/app/helpers/RtdRay.py", line 430, in load_data
rtd[key] = rtd[key].cat.set_categories(rtd[key].head(1).cat.categories)
File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/core.py", line 1098, in head
return self._head(n=n, npartitions=npartitions, compute=compute, safe=safe)
File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/core.py", line 1132, in _head
result = result.compute()
File "/opt/venv/lib/python3.9/site-packages/dask/base.py", line 288, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/opt/venv/lib/python3.9/site-packages/dask/base.py", line 570, in compute
results = schedule(dsk, keys, **kwargs)
File "/opt/venv/lib/python3.9/site-packages/distributed/client.py", line 2722, in get
results = self.gather(packed, asynchronous=asynchronous, direct=direct)
File "/opt/venv/lib/python3.9/site-packages/distributed/client.py", line 1977, in gather
return self.sync(
File "/opt/venv/lib/python3.9/site-packages/distributed/client.py", line 865, in sync
return sync(
File "/opt/venv/lib/python3.9/site-packages/distributed/utils.py", line 327, in sync
raise exc.with_traceback(tb)
File "/opt/venv/lib/python3.9/site-packages/distributed/utils.py", line 310, in f
result[0] = yield future
File "/opt/venv/lib/python3.9/site-packages/tornado/gen.py", line 762, in run
value = future.result()
File "/opt/venv/lib/python3.9/site-packages/distributed/client.py", line 1842, in _gather
raise exception.with_traceback(traceback)
File "/opt/venv/lib/python3.9/site-packages/dask/optimization.py", line 969, in __call__
return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
File "/opt/venv/lib/python3.9/site-packages/dask/core.py", line 149, in get
result = _execute_task(task, cache)
File "/opt/venv/lib/python3.9/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/io/parquet/core.py", line 87, in __call__
return read_parquet_part(
File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/io/parquet/core.py", line 422, in read_parquet_part
dfs = [
File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/io/parquet/core.py", line 423, in <listcomp>
func(fs, rg, columns.copy(), index, **toolz.merge(kwargs, kw))
File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/io/parquet/arrow.py", line 432, in read_partition
df = cls._arrow_table_to_pandas(arrow_table, categories, **kwargs)
File "/opt/venv/lib/python3.9/site-packages/dask/dataframe/io/parquet/arrow.py", line 1574, in _arrow_table_to_pandas
return arrow_table.to_pandas(categories=categories, **_kwargs)
File "pyarrow/array.pxi", line 766, in pyarrow.lib._PandasConvertible.to_pandas
File "pyarrow/table.pxi", line 1815, in pyarrow.lib.Table._to_pandas
File "/opt/venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py", line 789, in table_to_blockmanager
blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
File "/opt/venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py", line 1130, in _table_to_blocks
return [_reconstruct_block(item, columns, extension_columns)
File "/opt/venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py", line 1130, in <listcomp>
return [_reconstruct_block(item, columns, extension_columns)
File "/opt/venv/lib/python3.9/site-packages/pyarrow/pandas_compat.py", line 728, in _reconstruct_block
cat = _pandas_api.categorical_type.from_codes(
File "/opt/venv/lib/python3.9/site-packages/pandas/core/arrays/categorical.py", line 672, in from_codes
dtype = CategoricalDtype._from_values_or_dtype(
File "/opt/venv/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py", line 296, in _from_values_or_dtype
dtype = CategoricalDtype(categories, ordered)
File "/opt/venv/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py", line 183, in __init__
self._finalize(categories, ordered, fastpath=False)
File "/opt/venv/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py", line 337, in _finalize
categories = self.validate_categories(categories, fastpath=fastpath)
File "/opt/venv/lib/python3.9/site-packages/pandas/core/dtypes/dtypes.py", line 537, in validate_categories
raise ValueError("Categorical categories cannot be null")
ValueError: Categorical categories cannot be null