Clinical data download fail
Closed this issue · 1 comments
sgosline commented
Currently calling
import cptac
dat = cptac.Brca()
clin = dat.get_clinical('mssm')
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /opt/homebrew/lib/python3.10/site-packages/cptac/tools/download_tools.py:102, in download(cancer, source, dtype, data_file)
101 repo_data = fetch_repo_data()
--> 102 get_data(f"https://zenodo.org/api/records/8394329/files/{file_name}/content", output_file)
103 # Verify checksum
File /opt/homebrew/lib/python3.10/site-packages/cptac/tools/download_tools.py:167, in get_data(url, subfolder, num_threads)
166 if repo_data['files'][num]['key'] == file_name:
--> 167 file_size = repo_data['files'][num]['filesize']
168 break
KeyError: 'filesize'
The above exception was the direct cause of the following exception:
DownloadFailedError Traceback (most recent call last)
Cell In[17], line 1
----> 1 tumor_samps = dat.get_clinical(cs)#.Sample_Tumor_Normal=='Tumor'
File /opt/homebrew/lib/python3.10/site-packages/cptac/cancers/cancer.py:99, in Cancer.get_clinical(self, source, tissue_type, imputed)
97 def get_clinical(self, source: str= None, tissue_type: str="both", imputed: bool=False) -> pd.DataFrame:
98 """Get the clinical dataframe from the specified data source."""
---> 99 df = self.get_dataframe("clinical", source, tissue_type, imputed=imputed)
100 df.columns = df.columns.str.split('/').str[-1] # Keep only the part after the slash
101 return df
File /opt/homebrew/lib/python3.10/site-packages/cptac/cancers/cancer.py:694, in Cancer.get_dataframe(self, data_type, source, tissue_type, imputed)
691 if source not in self._sources:
692 raise DataSourceNotFoundError(f"Data source {source} not found for the {self._cancer_type} dataset.")
--> 694 df = self._sources[source].get_df(data_type)
696 if tissue_type == "normal":
697 df = self._normal_only(df)
File /opt/homebrew/lib/python3.10/site-packages/cptac/cancers/source.py:76, in Source.get_df(self, df_type)
74 if df_type not in self.load_functions:
75 raise DataTypeNotInSourceError(f"The {self.source} source does not have {df_type} data for {self.cancer_type} cancer.")
---> 76 self.load_functions[df_type]()
77 return self._data[df_type]
File /opt/homebrew/lib/python3.10/site-packages/cptac/cancers/mssm/mssm.py:62, in Mssm.load_clinical(self)
60 df_type = 'clinical'
61 if df_type not in self._data:
---> 62 file_path = self.locate_files(df_type)
63 tumor_codes = {'brca':'BR', 'ccrcc':'CCRCC',
64 'ucec':'UCEC', 'gbm':'GBM', 'hnscc':'HNSCC',
65 'lscc':'LSCC', 'luad':'LUAD', 'pdac':'PDA',
66 'hcc':'HCC', 'coad':'CO', 'ov':'OV'}
68 df = pd.read_csv(file_path, sep='\t')
File /opt/homebrew/lib/python3.10/site-packages/cptac/cancers/source.py:129, in Source.locate_files(self, datatype)
126 os.remove(file_path)
128 if not os.path.isfile(file_path) and not self.no_internet:
--> 129 cptac.download(self.cancer_type, self.source, datatype, data_file)
130 elif not os.path.isfile(file_path) and self.no_internet:
131 raise MissingFileError(f"The {self.source} {data_file} file for the {self.cancer_type} is not downloaded and you are running cptac in no_internet mode.")
File /opt/homebrew/lib/python3.10/site-packages/cptac/tools/download_tools.py:117, in download(cancer, source, dtype, data_file)
115 raise HttpResponseError(f"Requesting data failed with the following error: {e}")
116 except Exception as e:
--> 117 raise DownloadFailedError(f"Failed to download data file for {source} {cancer} {dtype} with error:\n{e}") from e
DownloadFailedError: Failed to download data file for mssm brca clinical with error:
'filesize'
sabmel commented
I apologize for the error on my part. I just released a fixed version. I appreciate your patience.