`InvalidPathFormatError` when calling `ds.all()`
Closed this issue · 2 comments
yonomitt commented
My code:
len(ds.all().dataframe)
My repo:
https://test.dagshub.com/yonomitt/LAION-Aesthetics-V2-6.5plus/src/main/data
My error:
---------------------------------------------------------------------------
InvalidPathFormatError Traceback (most recent call last)
Cell In[15], line 1
----> 1 len(ds.all().dataframe)
File ~/.miniforge3/envs/dagstest/lib/python3.10/site-packages/dagshub/data_engine/client/dataclasses.py:77, in QueryResult.dataframe(self)
75 for e in self.entries:
76 names.append(e.path)
---> 77 urls.append(e.download_url(self.datasource))
78 metadata_keys.update(e.metadata.keys())
80 res = pd.DataFrame({"name": names, "dagshub_download_url": urls})
File ~/.miniforge3/envs/dagstest/lib/python3.10/site-packages/dagshub/data_engine/client/dataclasses.py:21, in Datapoint.download_url(self, ds)
20 def download_url(self, ds: "Datasource"):
---> 21 return ds.source.raw_path(self)
File ~/.miniforge3/envs/dagstest/lib/python3.10/site-packages/dagshub/data_engine/model/datasource_state.py:93, in DatasourceState.raw_path(self, path)
89 """
90 Returns the url for the download path of a specified path
91 """
92 path = self._extract_path(path).strip("/")
---> 93 return self.root_raw_path + "/" + path
File ~/.miniforge3/envs/dagstest/lib/python3.10/functools.py:981, in cached_property.__get__(self, instance, owner)
979 val = cache.get(self.attrname, _NOT_FOUND)
980 if val is _NOT_FOUND:
--> 981 val = self.func(instance)
982 try:
983 cache[self.attrname] = val
File ~/.miniforge3/envs/dagstest/lib/python3.10/site-packages/dagshub/data_engine/model/datasource_state.py:111, in DatasourceState.root_raw_path(self)
104 @cached_property
105 def root_raw_path(self):
106 """
107 Returns the root raw path of the dataset for downloading files
108 This is just a "prefix" of the datasource relative to the repo.
109 In order to build a path of an entity you need to concatenate the path to this root
110 """
--> 111 return self._root_path("raw")
File ~/.miniforge3/envs/dagstest/lib/python3.10/site-packages/dagshub/data_engine/model/datasource_state.py:115, in DatasourceState._root_path(self, path_type)
113 def _root_path(self, path_type):
114 assert path_type in ["raw", "content"]
--> 115 parts = self.path_parts()
116 if self.source_type == DatasourceType.BUCKET:
117 path_elems = [parts["schema"], parts["bucket"]]
File ~/.miniforge3/envs/dagstest/lib/python3.10/site-packages/dagshub/data_engine/model/datasource_state.py:145, in DatasourceState.path_parts(self)
143 match = regex.fullmatch(self.path)
144 if match is None:
--> 145 raise InvalidPathFormatError(f"{self.path} is not valid path format for type {self.source_type}.\n"
146 f"Expected format: {expected_formats[self.source_type]}")
147 return match.groupdict()
InvalidPathFormatError: repo://yonomitt/LAION-Aesthetics-V2-6.5plus/data is not valid path format for type DatasourceType.REPOSITORY.
Expected format: repo://owner/reponame/prefix
kbolashev commented
Aha, another regex issue, thank you very much Yono, will fix it <3
kbolashev commented
Fixed in latest couple commits, but I haven't bumped a version yet