liuxu77/LargeST

处理SD数据出错

ContinualGoing opened this issue · 2 comments

处理SD数据出错

您好,非常感谢您开源的大数据集,我们在尝试处理SD数据的时候,jupyter笔记本代码运行出错,错误提示如下:
year = '2019' # please specify the year, our experiments use 2019

sd_meta.ID = sd_meta.ID.astype(str)
sd_meta_id = sd_meta.ID.values.tolist()

ca_his = pd.read_hdf('../ca/ca_his_' + year +'.h5')
sd_his = ca_his[sd_meta_id]
sd_his

ValueError Traceback (most recent call last)
~/anaconda3/envs/test7/lib/python3.7/site-packages/IPython/core/formatters.py in call(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()

~/anaconda3/envs/test7/lib/python3.7/site-packages/IPython/lib/pretty.py in pretty(self, obj)
392 if cls is not object
393 and callable(cls.dict.get('repr')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)

~/anaconda3/envs/test7/lib/python3.7/site-packages/IPython/lib/pretty.py in repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break
()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/frame.py in repr(self)
808 line_width=width,
809 max_colwidth=max_colwidth,
--> 810 show_dimensions=show_dimensions,
811 )
812

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, min_rows, max_cols, show_dimensions, decimal, line_width, max_colwidth, encoding)
935 max_cols=max_cols,
936 show_dimensions=show_dimensions,
--> 937 decimal=decimal,
938 )
939 return fmt.DataFrameRenderer(formatter).to_string(

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in init(self, frame, columns, col_space, header, index, na_rep, formatters, justify, float_format, sparsify, index_names, max_rows, min_rows, max_cols, show_dimensions, decimal, bold_rows, escape)
510
511 self.tr_frame = self.frame
--> 512 self.truncate()
513 self.adj = get_adjustment()
514

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in truncate(self)
701
702 if self.is_truncated_vertically:
--> 703 self._truncate_vertically()
704
705 def _truncate_horizontally(self) -> None:

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in _truncate_vertically(self)
741 head = self.tr_frame.iloc[:row_num, :]
742 tail = self.tr_frame.iloc[-row_num:, :]
--> 743 self.tr_frame = concat((head, tail))
744 else:
745 row_num = cast(int, self.max_rows)

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
293 verify_integrity=verify_integrity,
294 copy=copy,
--> 295 sort=sort,
296 )
297

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in init(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
465 self.copy = copy
466
--> 467 self.new_axes = self._get_new_axes()
468
469 def get_result(self):

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_new_axes(self)
537 return [
538 self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i)
--> 539 for i in range(ndim)
540 ]
541

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in (.0)
537 return [
538 self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i)
--> 539 for i in range(ndim)
540 ]
541

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_concat_axis(self)
590
591 if self.keys is None:
--> 592 concat_axis = _concat_indexes(indexes)
593 else:
594 concat_axis = _make_concat_multiindex(

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _concat_indexes(indexes)
608
609 def _concat_indexes(indexes) -> Index:
--> 610 return indexes[0].append(indexes[1:])
611
612

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/indexes/base.py in append(self, other)
4354 name = None if len(names) > 1 else self.name
4355
-> 4356 return self._concat(to_concat, name)
4357
4358 def _concat(self, to_concat: List["Index"], name: Label) -> "Index":

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/indexes/base.py in _concat(self, to_concat, name)
4362 to_concat_vals = [x._values for x in to_concat]
4363
-> 4364 result = concat_compat(to_concat_vals)
4365 return Index(result, name=name)
4366

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/dtypes/concat.py in concat_compat(to_concat, axis)
153
154 elif _contains_datetime or "timedelta" in typs:
--> 155 return _concat_datetime(to_concat, axis=axis)
156
157 elif all_empty:

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/dtypes/concat.py in _concat_datetime(to_concat, axis)
375 to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
376
--> 377 result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)
378
379 if result.ndim == 2 and is_extension_array_dtype(result.dtype):

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in _concat_same_type(cls, to_concat, axis)
405 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
406 pairs = zip(to_concat[:-1], to_concat[1:])
--> 407 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
408 new_freq = obj.freq
409

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in (.0)
405 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
406 pairs = zip(to_concat[:-1], to_concat[1:])
--> 407 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
408 new_freq = obj.freq
409

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in getitem(self, key)
279 only handle list-likes, slices, and integer scalars
280 """
--> 281 result = super().getitem(key)
282 if lib.is_scalar(result):
283 return result

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/_mixins.py in getitem(self, key)
222 result = self._ndarray[key]
223 if self.ndim == 1:
--> 224 return self._box_func(result)
225 return self._from_backing_data(result)
226

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py in _box_func(self, x)
483
484 def _box_func(self, x) -> Union[Timestamp, NaTType]:
--> 485 return Timestamp(x, freq=self.freq, tz=self.tz)
486
487 @Property

pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.new()

pandas/_libs/tslibs/offsets.pyx in pandas._libs.tslibs.offsets.to_offset()

ValueError: Invalid frequency: b'ccopy_reg\n_reconstructor\np0\n(cpandas.tseries.offsets\nMinute\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nVn\np6\nI15\nsVnormalize\np7\nI00\nsV_cache\np8\n(dp9\nsb.'


ValueError Traceback (most recent call last)
~/anaconda3/envs/test7/lib/python3.7/site-packages/IPython/core/formatters.py in call(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/frame.py in repr_html(self)
851 max_cols=max_cols,
852 show_dimensions=show_dimensions,
--> 853 decimal=".",
854 )
855 return fmt.DataFrameRenderer(formatter).to_html(notebook=True)

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in init(self, frame, columns, col_space, header, index, na_rep, formatters, justify, float_format, sparsify, index_names, max_rows, min_rows, max_cols, show_dimensions, decimal, bold_rows, escape)
510
511 self.tr_frame = self.frame
--> 512 self.truncate()
513 self.adj = get_adjustment()
514

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in truncate(self)
701
702 if self.is_truncated_vertically:
--> 703 self._truncate_vertically()
704
705 def _truncate_horizontally(self) -> None:

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/io/formats/format.py in _truncate_vertically(self)
741 head = self.tr_frame.iloc[:row_num, :]
742 tail = self.tr_frame.iloc[-row_num:, :]
--> 743 self.tr_frame = concat((head, tail))
744 else:
745 row_num = cast(int, self.max_rows)

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
293 verify_integrity=verify_integrity,
294 copy=copy,
--> 295 sort=sort,
296 )
297

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in init(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
465 self.copy = copy
466
--> 467 self.new_axes = self._get_new_axes()
468
469 def get_result(self):

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_new_axes(self)
537 return [
538 self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i)
--> 539 for i in range(ndim)
540 ]
541

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in (.0)
537 return [
538 self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i)
--> 539 for i in range(ndim)
540 ]
541

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _get_concat_axis(self)
590
591 if self.keys is None:
--> 592 concat_axis = _concat_indexes(indexes)
593 else:
594 concat_axis = _make_concat_multiindex(

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/reshape/concat.py in _concat_indexes(indexes)
608
609 def _concat_indexes(indexes) -> Index:
--> 610 return indexes[0].append(indexes[1:])
611
612

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/indexes/base.py in append(self, other)
4354 name = None if len(names) > 1 else self.name
4355
-> 4356 return self._concat(to_concat, name)
4357
4358 def _concat(self, to_concat: List["Index"], name: Label) -> "Index":

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/indexes/base.py in _concat(self, to_concat, name)
4362 to_concat_vals = [x._values for x in to_concat]
4363
-> 4364 result = concat_compat(to_concat_vals)
4365 return Index(result, name=name)
4366

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/dtypes/concat.py in concat_compat(to_concat, axis)
153
154 elif _contains_datetime or "timedelta" in typs:
--> 155 return _concat_datetime(to_concat, axis=axis)
156
157 elif all_empty:

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/dtypes/concat.py in _concat_datetime(to_concat, axis)
375 to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
376
--> 377 result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)
378
379 if result.ndim == 2 and is_extension_array_dtype(result.dtype):

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in _concat_same_type(cls, to_concat, axis)
405 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
406 pairs = zip(to_concat[:-1], to_concat[1:])
--> 407 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
408 new_freq = obj.freq
409

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in (.0)
405 if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
406 pairs = zip(to_concat[:-1], to_concat[1:])
--> 407 if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
408 new_freq = obj.freq
409

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimelike.py in getitem(self, key)
279 only handle list-likes, slices, and integer scalars
280 """
--> 281 result = super().getitem(key)
282 if lib.is_scalar(result):
283 return result

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/_mixins.py in getitem(self, key)
222 result = self._ndarray[key]
223 if self.ndim == 1:
--> 224 return self._box_func(result)
225 return self._from_backing_data(result)
226

~/anaconda3/envs/test7/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py in _box_func(self, x)
483
484 def _box_func(self, x) -> Union[Timestamp, NaTType]:
--> 485 return Timestamp(x, freq=self.freq, tz=self.tz)
486
487 @Property

pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.new()

pandas/_libs/tslibs/offsets.pyx in pandas._libs.tslibs.offsets.to_offset()

ValueError: Invalid frequency: b'ccopy_reg\n_reconstructor\np0\n(cpandas.tseries.offsets\nMinute\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\n(dp5\nVn\np6\nI15\nsVnormalize\np7\nI00\nsV_cache\np8\n(dp9\nsb.'

Hi, thanks for your question. Does this issue also occur on other datasets, e.g., GBA?

Also, you may need to check the version of pandas library, ours use pandas==1.3.5