[BUG] wiki failed to load: BadZipFile
Closed this issue · 1 comments
Jn-Huang commented
Looks that the bug is related to the uploaded zip
file. I will try to upload the file again
To Reproduce
In [1]: import gli
In [2]: data = gli.dataloading.get_gli_dataset("wiki", "NodeClassification")
---------------------------------------------------------------------------
BadZipFile Traceback (most recent call last)
Input In [2], in <cell line: 1>()
----> 1 data = gli.dataloading.get_gli_dataset("wiki", "NodeClassification")
File ~/Documents/research/gli/gli/dataloading.py:87, in get_gli_dataset(dataset, task, task_id, device, verbose)
54 def get_gli_dataset(dataset: str,
55 task: str,
56 task_id: int = 1,
57 device: str = "cpu",
58 verbose: bool = False) -> DGLDataset:
59 """Get a graph dataset given dataset name and task config.
60
61 GLI will download the dataset if the data files do not exist.
(...)
85 'CORA dataset. NodeClassification'
86 """
---> 87 g = get_gli_graph(dataset, device=device, verbose=verbose)
88 t = get_gli_task(dataset, task, task_id=task_id, verbose=verbose)
89 return combine_graph_and_task(g, t)
File ~/Documents/research/gli/gli/dataloading.py:141, in get_gli_graph(dataset, device, verbose)
138 raise FileNotFoundError(f"{metadata_path} not found.")
139 download_data(dataset, verbose=verbose)
--> 141 return read_gli_graph(metadata_path, device=device, verbose=verbose)
File ~/Documents/research/gli/gli/graph.py:40, in read_gli_graph(metadata_path, device, verbose)
36 assert neg in metadata[
37 "data"], f"attribute `{neg}` not in metadata.json"
39 data = copy(metadata["data"])
---> 40 data = _dfs_read_file(pwd, data, device="cpu")
42 if _is_single_graph(data):
43 return _get_single_graph(data, device, hetero=hetero, name=name)
File ~/Documents/research/gli/gli/graph.py:223, in _dfs_read_file(pwd, d, device)
221 def _dfs_read_file(pwd, d, device="cpu"):
222 """Read file efficiently."""
--> 223 return _dfs_read_file_helper(pwd, d, device)
File ~/Documents/research/gli/gli/graph.py:234, in _dfs_read_file_helper(pwd, d, device)
232 empty_keys = []
233 for k in d:
--> 234 entry = _dfs_read_file_helper(pwd, d[k], device=device)
235 if entry is None:
236 empty_keys.append(k)
File ~/Documents/research/gli/gli/graph.py:234, in _dfs_read_file_helper(pwd, d, device)
232 empty_keys = []
233 for k in d:
--> 234 entry = _dfs_read_file_helper(pwd, d[k], device=device)
235 if entry is None:
236 empty_keys.append(k)
File ~/Documents/research/gli/gli/graph.py:230, in _dfs_read_file_helper(pwd, d, device)
228 if "file" in d:
229 path = os.path.join(pwd, d["file"])
--> 230 return load_data(path, d.get("key"), device)
232 empty_keys = []
233 for k in d:
File ~/Documents/research/gli/gli/utils.py:177, in load_data(path, key, device)
174 return sp.load_npz(path)
176 # Dense arrays file with a key
--> 177 raw = np.load(path, allow_pickle=False)
178 assert key is not None
179 array: np.ndarray = raw.get(key)
File ~/opt/miniconda3/envs/gli/lib/python3.10/site-packages/numpy/lib/npyio.py:432, in load(file, mmap_mode, allow_pickle, fix_imports, encoding)
428 if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
429 # zip-file (assume .npz)
430 # Potentially transfer file ownership to NpzFile
431 stack.pop_all()
--> 432 ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle,
433 pickle_kwargs=pickle_kwargs)
434 return ret
435 elif magic == format.MAGIC_PREFIX:
436 # .npy file
File ~/opt/miniconda3/envs/gli/lib/python3.10/site-packages/numpy/lib/npyio.py:189, in NpzFile.__init__(self, fid, own_fid, allow_pickle, pickle_kwargs)
185 def __init__(self, fid, own_fid=False, allow_pickle=False,
186 pickle_kwargs=None):
187 # Import is postponed to here since zipfile depends on gzip, an
188 # optional component of the so-called standard library.
--> 189 _zip = zipfile_factory(fid)
190 self._files = _zip.namelist()
191 self.files = []
File ~/opt/miniconda3/envs/gli/lib/python3.10/site-packages/numpy/lib/npyio.py:112, in zipfile_factory(file, *args, **kwargs)
110 import zipfile
111 kwargs['allowZip64'] = True
--> 112 return zipfile.ZipFile(file, *args, **kwargs)
File ~/opt/miniconda3/envs/gli/lib/python3.10/zipfile.py:1267, in ZipFile.__init__(self, file, mode, compression, allowZip64, compresslevel, strict_timestamps)
1265 try:
1266 if mode == 'r':
-> 1267 self._RealGetContents()
1268 elif mode in ('w', 'x'):
1269 # set the modified flag so central directory gets written
1270 # even if no files are added to the archive
1271 self._didModify = True
File ~/opt/miniconda3/envs/gli/lib/python3.10/zipfile.py:1334, in ZipFile._RealGetContents(self)
1332 raise BadZipFile("File is not a zip file")
1333 if not endrec:
-> 1334 raise BadZipFile("File is not a zip file")
1335 if self.debug > 1:
1336 print(endrec)
BadZipFile: File is not a zip file
Jn-Huang commented
The problem disappeared due to unknown reason. I will reopen this issue if I can reproduce it.