Graph-Learning-Benchmarks/gli

[BUG] wiki failed to load: BadZipFile

Closed this issue · 1 comments

Looks that the bug is related to the uploaded zip file. I will try to upload the file again

To Reproduce


In [1]: import gli

In [2]: data = gli.dataloading.get_gli_dataset("wiki", "NodeClassification")
---------------------------------------------------------------------------
BadZipFile                                Traceback (most recent call last)
Input In [2], in <cell line: 1>()
----> 1 data = gli.dataloading.get_gli_dataset("wiki", "NodeClassification")

File ~/Documents/research/gli/gli/dataloading.py:87, in get_gli_dataset(dataset, task, task_id, device, verbose)
     54 def get_gli_dataset(dataset: str,
     55                     task: str,
     56                     task_id: int = 1,
     57                     device: str = "cpu",
     58                     verbose: bool = False) -> DGLDataset:
     59     """Get a graph dataset given dataset name and task config.
     60 
     61     GLI will download the dataset if the data files do not exist.
   (...)
     85     'CORA dataset. NodeClassification'
     86     """
---> 87     g = get_gli_graph(dataset, device=device, verbose=verbose)
     88     t = get_gli_task(dataset, task, task_id=task_id, verbose=verbose)
     89     return combine_graph_and_task(g, t)

File ~/Documents/research/gli/gli/dataloading.py:141, in get_gli_graph(dataset, device, verbose)
    138     raise FileNotFoundError(f"{metadata_path} not found.")
    139 download_data(dataset, verbose=verbose)
--> 141 return read_gli_graph(metadata_path, device=device, verbose=verbose)

File ~/Documents/research/gli/gli/graph.py:40, in read_gli_graph(metadata_path, device, verbose)
     36     assert neg in metadata[
     37         "data"], f"attribute `{neg}` not in metadata.json"
     39 data = copy(metadata["data"])
---> 40 data = _dfs_read_file(pwd, data, device="cpu")
     42 if _is_single_graph(data):
     43     return _get_single_graph(data, device, hetero=hetero, name=name)

File ~/Documents/research/gli/gli/graph.py:223, in _dfs_read_file(pwd, d, device)
    221 def _dfs_read_file(pwd, d, device="cpu"):
    222     """Read file efficiently."""
--> 223     return _dfs_read_file_helper(pwd, d, device)

File ~/Documents/research/gli/gli/graph.py:234, in _dfs_read_file_helper(pwd, d, device)
    232 empty_keys = []
    233 for k in d:
--> 234     entry = _dfs_read_file_helper(pwd, d[k], device=device)
    235     if entry is None:
    236         empty_keys.append(k)

File ~/Documents/research/gli/gli/graph.py:234, in _dfs_read_file_helper(pwd, d, device)
    232 empty_keys = []
    233 for k in d:
--> 234     entry = _dfs_read_file_helper(pwd, d[k], device=device)
    235     if entry is None:
    236         empty_keys.append(k)

File ~/Documents/research/gli/gli/graph.py:230, in _dfs_read_file_helper(pwd, d, device)
    228 if "file" in d:
    229     path = os.path.join(pwd, d["file"])
--> 230     return load_data(path, d.get("key"), device)
    232 empty_keys = []
    233 for k in d:

File ~/Documents/research/gli/gli/utils.py:177, in load_data(path, key, device)
    174     return sp.load_npz(path)
    176 # Dense arrays file with a key
--> 177 raw = np.load(path, allow_pickle=False)
    178 assert key is not None
    179 array: np.ndarray = raw.get(key)

File ~/opt/miniconda3/envs/gli/lib/python3.10/site-packages/numpy/lib/npyio.py:432, in load(file, mmap_mode, allow_pickle, fix_imports, encoding)
    428 if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
    429     # zip-file (assume .npz)
    430     # Potentially transfer file ownership to NpzFile
    431     stack.pop_all()
--> 432     ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle,
    433                   pickle_kwargs=pickle_kwargs)
    434     return ret
    435 elif magic == format.MAGIC_PREFIX:
    436     # .npy file

File ~/opt/miniconda3/envs/gli/lib/python3.10/site-packages/numpy/lib/npyio.py:189, in NpzFile.__init__(self, fid, own_fid, allow_pickle, pickle_kwargs)
    185 def __init__(self, fid, own_fid=False, allow_pickle=False,
    186              pickle_kwargs=None):
    187     # Import is postponed to here since zipfile depends on gzip, an
    188     # optional component of the so-called standard library.
--> 189     _zip = zipfile_factory(fid)
    190     self._files = _zip.namelist()
    191     self.files = []

File ~/opt/miniconda3/envs/gli/lib/python3.10/site-packages/numpy/lib/npyio.py:112, in zipfile_factory(file, *args, **kwargs)
    110 import zipfile
    111 kwargs['allowZip64'] = True
--> 112 return zipfile.ZipFile(file, *args, **kwargs)

File ~/opt/miniconda3/envs/gli/lib/python3.10/zipfile.py:1267, in ZipFile.__init__(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps)
   1265 try:
   1266     if mode == 'r':
-> 1267         self._RealGetContents()
   1268     elif mode in ('w', 'x'):
   1269         # set the modified flag so central directory gets written
   1270         # even if no files are added to the archive
   1271         self._didModify = True

File ~/opt/miniconda3/envs/gli/lib/python3.10/zipfile.py:1334, in ZipFile._RealGetContents(self)
   1332     raise BadZipFile("File is not a zip file")
   1333 if not endrec:
-> 1334     raise BadZipFile("File is not a zip file")
   1335 if self.debug > 1:
   1336     print(endrec)

BadZipFile: File is not a zip file

The problem disappeared due to unknown reason. I will reopen this issue if I can reproduce it.