DataFrame.pivot does not accept list as index parameter
crucis opened this issue · 0 comments
crucis commented
The following example does not work in Databricks Runtime 8.4:
kdf = ks.DataFrame({"ui": ['C', 'D', 'D', 'C'],
"foo": ['one', 'one', 'two', 'two'],
"bar": ['A', 'A', 'B', 'C'],
"ar": [1, 2, 2, 2],
"baz": [1, 2, 3, 4]}, columns=['ui', 'foo', 'bar', 'baz', 'ar'])
kdf.pivot(index=['ui', 'foo'], columns='bar', values=['baz', 'ar'])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<command-4107535394601473> in <module>
----> 1 df.pivot(index=['ui','foo'] , columns='bar', values=['baz', 'ar'])
/databricks/python/lib/python3.8/site-packages/databricks/koalas/usage_logging/__init__.py in wrapper(*args, **kwargs)
193 start = time.perf_counter()
194 try:
--> 195 res = func(*args, **kwargs)
196 logger.log_success(
197 class_name, function_name, time.perf_counter() - start, signature
/databricks/python/lib/python3.8/site-packages/databricks/koalas/frame.py in pivot(self, index, columns, values)
6274 index = df._internal.column_labels[: self._internal.index_level]
6275
-> 6276 df = df.pivot_table(index=index, columns=columns, values=values, aggfunc="first")
6277
6278 if should_use_existing_index:
/databricks/python/lib/python3.8/site-packages/databricks/koalas/usage_logging/__init__.py in wrapper(*args, **kwargs)
188 if hasattr(_local, "logging") and _local.logging:
189 # no need to log since this should be internal call.
--> 190 return func(*args, **kwargs)
191 _local.logging = True
192 try:
/databricks/python/lib/python3.8/site-packages/databricks/koalas/frame.py in pivot_table(self, values, index, columns, aggfunc, fill_value)
6048 index = [label if is_name_like_tuple(label) else (label,) for label in index]
6049 sdf = (
-> 6050 sdf.groupBy([self._internal.spark_column_name_for(label) for label in index])
6051 .pivot(pivot_col=self._internal.spark_column_name_for(columns))
6052 .agg(*agg_cols)
/databricks/python/lib/python3.8/site-packages/databricks/koalas/frame.py in <listcomp>(.0)
6048 index = [label if is_name_like_tuple(label) else (label,) for label in index]
6049 sdf = (
-> 6050 sdf.groupBy([self._internal.spark_column_name_for(label) for label in index])
6051 .pivot(pivot_col=self._internal.spark_column_name_for(columns))
6052 .agg(*agg_cols)
/databricks/python/lib/python3.8/site-packages/databricks/koalas/internal.py in spark_column_name_for(self, label_or_scol)
813 scol = label_or_scol
814 else:
--> 815 scol = self.spark_column_for(label_or_scol)
816 return self.spark_frame.select(scol).columns[0]
817
/databricks/python/lib/python3.8/site-packages/databricks/koalas/internal.py in spark_column_for(self, label)
803 """ Return Spark Column for the given column label. """
804 column_labels_to_scol = dict(zip(self.column_labels, self.data_spark_columns))
--> 805 if label in column_labels_to_scol:
806 return column_labels_to_scol[label]
807 else:
TypeError: unhashable type: 'list'
I am using
kdf.pivot_table(index=['ui','foo'] , columns='bar', values=['baz', 'ar'], aggfunc='first')
to solve my problem, but I think that pivot
should work with Multiindex.