TypeError: unhashable type: 'list' in 'analyze' method building target_dict["duplicates"]
kbroughton opened this issue · 2 comments
kbroughton commented
First try using sweetviz. I'm running in a modified scipy-notebook container running python3.9.
import sweetviz as sv
my_report = sv.analyze(df)
my_report.show_html()
[Summarizing dataframe]
[ 0%] 00:00 -> (? left)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/tmp/ipykernel_1008/3374629033.py in <module>
1 import sweetviz as sv
2
----> 3 my_report = sv.analyze(df, target_feat='state')
4 my_report.show_html()
/opt/conda/lib/python3.9/site-packages/sweetviz/sv_public.py in analyze(source, target_feat, feat_cfg, pairwise_analysis)
10 feat_cfg: FeatureConfig = None,
11 pairwise_analysis: str = 'auto'):
---> 12 report = sweetviz.DataframeReport(source, target_feat, None,
13 pairwise_analysis, feat_cfg)
14 return report
/opt/conda/lib/python3.9/site-packages/sweetviz/dataframe_report.py in __init__(self, source, target_feature_name, compare, pairwise_analysis, fc)
127 self.progress_bar.set_description_str("[Summarizing dataframe]")
128 self.summary_source = dict()
--> 129 self.summarize_dataframe(source_df, self.source_name, self.summary_source, fc.skip)
130 # UPDATE 2021-02-05: Count the target has an actual feature!!! It is!!!
131 # if target_feature_name:
/opt/conda/lib/python3.9/site-packages/sweetviz/dataframe_report.py in summarize_dataframe(self, source, name, target_dict, skip)
357 target_dict["memory_single_row"] = 0
358
--> 359 target_dict["duplicates"] = NumWithPercent(sum(source.duplicated()), len(source))
360 target_dict["num_cmp_not_in_source"] = 0 # set later, as needed
361
/opt/conda/lib/python3.9/site-packages/pandas/core/frame.py in duplicated(self, subset, keep)
6198
6199 vals = (col.values for name, col in self.items() if name in subset)
-> 6200 labels, shape = map(list, zip(*map(f, vals)))
6201
6202 ids = get_group_index(
/opt/conda/lib/python3.9/site-packages/pandas/core/frame.py in f(vals)
6171
6172 def f(vals) -> tuple[np.ndarray, int]:
-> 6173 labels, shape = algorithms.factorize(vals, size_hint=len(self))
6174 return labels.astype("i8", copy=False), len(shape)
6175
/opt/conda/lib/python3.9/site-packages/pandas/core/algorithms.py in factorize(values, sort, na_sentinel, size_hint)
759 na_value = None
760
--> 761 codes, uniques = factorize_array(
762 values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value
763 )
/opt/conda/lib/python3.9/site-packages/pandas/core/algorithms.py in factorize_array(values, na_sentinel, size_hint, na_value, mask)
561
562 table = hash_klass(size_hint or len(values))
--> 563 uniques, codes = table.factorize(
564 values, na_sentinel=na_sentinel, na_value=na_value, mask=mask
565 )
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.factorize()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable._unique()
TypeError: unhashable type: 'list'
tomgallagher commented
Me too. I have structs and lists in some data frame columns. I'm guessing this is a problem for Sweetviz?