Error loading PadChest dataset
dgmato opened this issue · 4 comments
dgmato commented
I downloaded the 224x224 images of the PadChest dataset and I tried to load the dataset in a Jupyter Notebook as described in the example code:
import torchxrayvision as xrv
d_pc = xrv.datasets.PC_Dataset(imgpath="D:/DATASET/images-224/")
However, I get the following error related to how the study date is read. See error below:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [9], in <cell line: 1>()
----> 1 d_pc = xrv.datasets.PC_Dataset(imgpath="D:/DATASET/images-224/")
File ~\anaconda3\lib\site-packages\torchxrayvision\datasets.py:796, in PC_Dataset.__init__(self, imgpath, csvpath, views, transform, data_aug, flat_dir, seed, unique_patients)
792 # add consistent csv values
793
794 # offset_day_int
795 dt = pd.to_datetime(self.csv["StudyDate_DICOM"], format="%Y%m%d")
--> 796 self.csv["offset_day_int"] = dt.astype(int) // 10**9 // 86400
798 # patientid
799 self.csv["patientid"] = self.csv["PatientID"].astype(str)
File ~\anaconda3\lib\site-packages\pandas\core\generic.py:5912, in NDFrame.astype(self, dtype, copy, errors)
5905 results = [
5906 self.iloc[:, i].astype(dtype, copy=copy)
5907 for i in range(len(self.columns))
5908 ]
5910 else:
5911 # else, only a single dtype is given
-> 5912 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
5913 return self._constructor(new_data).__finalize__(self, method="astype")
5915 # GH 33113: handle empty frame or series
File ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py:419, in BaseBlockManager.astype(self, dtype, copy, errors)
418 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
--> 419 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
File ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py:304, in BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs)
302 applied = b.apply(f, **kwargs)
303 else:
--> 304 applied = getattr(b, f)(**kwargs)
305 except (TypeError, NotImplementedError):
306 if not ignore_failures:
File ~\anaconda3\lib\site-packages\pandas\core\internals\blocks.py:580, in Block.astype(self, dtype, copy, errors)
562 """
563 Coerce to the new dtype.
564
(...)
576 Block
577 """
578 values = self.values
--> 580 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
582 new_values = maybe_coerce_values(new_values)
583 newb = self.make_block(new_values)
File ~\anaconda3\lib\site-packages\pandas\core\dtypes\cast.py:1292, in astype_array_safe(values, dtype, copy, errors)
1289 dtype = dtype.numpy_dtype
1291 try:
-> 1292 new_values = astype_array(values, dtype, copy=copy)
1293 except (ValueError, TypeError):
1294 # e.g. astype_nansafe can fail on object-dtype of strings
1295 # trying to convert to float
1296 if errors == "ignore":
File ~\anaconda3\lib\site-packages\pandas\core\dtypes\cast.py:1222, in astype_array(values, dtype, copy)
1214 if (
1215 values.dtype.kind in ["m", "M"]
1216 and dtype.kind in ["i", "u"]
(...)
1219 ):
1220 # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced
1221 msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]"
-> 1222 raise TypeError(msg)
1224 if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
1225 return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True)
TypeError: cannot astype a datetimelike from [datetime64[ns]] to [int32]
Has anyone encounter this same issue? Any ideas on how to solve this? Thank you!
ieee8023 commented
What version of pandas is installed?
import pandas as pd
pd.__version__
Maybe try a pip install pandas --upgrade
ieee8023 commented
This may be a windows support issue. I've started some tests and will see what kind of fixes are needed and get back to you.