PayneLab/cptac

Clinical data download fail

Closed this issue · 1 comments

Currently calling

import cptac
dat = cptac.Brca()
clin = dat.get_clinical('mssm')

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /opt/homebrew/lib/python3.10/site-packages/cptac/tools/download_tools.py:102, in download(cancer, source, dtype, data_file)
    101 repo_data = fetch_repo_data()
--> 102 get_data(f"https://zenodo.org/api/records/8394329/files/{file_name}/content", output_file)
    103 # Verify checksum

File /opt/homebrew/lib/python3.10/site-packages/cptac/tools/download_tools.py:167, in get_data(url, subfolder, num_threads)
    166 if repo_data['files'][num]['key'] == file_name:
--> 167     file_size = repo_data['files'][num]['filesize']
    168     break

KeyError: 'filesize'

The above exception was the direct cause of the following exception:

DownloadFailedError                       Traceback (most recent call last)
Cell In[17], line 1
----> 1 tumor_samps = dat.get_clinical(cs)#.Sample_Tumor_Normal=='Tumor'

File /opt/homebrew/lib/python3.10/site-packages/cptac/cancers/cancer.py:99, in Cancer.get_clinical(self, source, tissue_type, imputed)
     97 def get_clinical(self, source: str= None, tissue_type: str="both", imputed: bool=False) -> pd.DataFrame:
     98     """Get the clinical dataframe from the specified data source."""
---> 99     df = self.get_dataframe("clinical", source, tissue_type, imputed=imputed)
    100     df.columns = df.columns.str.split('/').str[-1] # Keep only the part after the slash
    101     return df

File /opt/homebrew/lib/python3.10/site-packages/cptac/cancers/cancer.py:694, in Cancer.get_dataframe(self, data_type, source, tissue_type, imputed)
    691 if source not in self._sources:
    692     raise DataSourceNotFoundError(f"Data source {source} not found for the {self._cancer_type} dataset.")
--> 694 df = self._sources[source].get_df(data_type)
    696 if tissue_type == "normal":
    697     df = self._normal_only(df)

File /opt/homebrew/lib/python3.10/site-packages/cptac/cancers/source.py:76, in Source.get_df(self, df_type)
     74     if df_type not in self.load_functions:
     75         raise DataTypeNotInSourceError(f"The {self.source} source does not have {df_type} data for {self.cancer_type} cancer.")
---> 76     self.load_functions[df_type]()
     77 return self._data[df_type]

File /opt/homebrew/lib/python3.10/site-packages/cptac/cancers/mssm/mssm.py:62, in Mssm.load_clinical(self)
     60 df_type = 'clinical'
     61 if df_type not in self._data:
---> 62     file_path = self.locate_files(df_type)
     63     tumor_codes = {'brca':'BR', 'ccrcc':'CCRCC',
     64                    'ucec':'UCEC', 'gbm':'GBM', 'hnscc':'HNSCC',
     65                    'lscc':'LSCC', 'luad':'LUAD', 'pdac':'PDA',
     66                    'hcc':'HCC', 'coad':'CO', 'ov':'OV'}
     68     df = pd.read_csv(file_path, sep='\t')

File /opt/homebrew/lib/python3.10/site-packages/cptac/cancers/source.py:129, in Source.locate_files(self, datatype)
    126         os.remove(file_path)
    128 if not os.path.isfile(file_path) and not self.no_internet:
--> 129     cptac.download(self.cancer_type, self.source, datatype, data_file)
    130 elif not os.path.isfile(file_path) and self.no_internet:
    131     raise MissingFileError(f"The {self.source} {data_file} file for the {self.cancer_type} is not downloaded and you are running cptac in no_internet mode.")

File /opt/homebrew/lib/python3.10/site-packages/cptac/tools/download_tools.py:117, in download(cancer, source, dtype, data_file)
    115     raise HttpResponseError(f"Requesting data failed with the following error: {e}")
    116 except Exception as e:
--> 117     raise DownloadFailedError(f"Failed to download data file for {source} {cancer} {dtype} with error:\n{e}") from e

DownloadFailedError: Failed to download data file for mssm brca clinical with error:
'filesize'

I apologize for the error on my part. I just released a fixed version. I appreciate your patience.