radiant-mlhub-landcovernet example failing
TomAugspurger opened this issue · 3 comments
The landcovernet example at https://github.com/microsoft/PlanetaryComputerExamples/blob/main/tutorials/radiant-mlhub-landcovernet.ipynb is currently failing for me.
>>> collection_id = "ref_landcovernet_v1_labels"
>>> collection = client.get_collection(collection_id)
---------------------------------------------------------------------------
APIError Traceback (most recent call last)
File /srv/conda/envs/notebook/lib/python3.8/site-packages/pystac_client/stac_api_io.py:136, in StacApiIO.request(self, href, method, headers, parameters)
135 if resp.status_code != 200:
--> 136 raise APIError(resp.text)
137 return resp.content.decode("utf-8")
APIError: {"detail":"Collection ref_landcovernet_v1_labels does not exist."}
During handling of the above exception, another exception occurred:
APIError Traceback (most recent call last)
Input In [4], in <cell line: 3>()
1 collection_id = "ref_landcovernet_v1_labels"
----> 3 collection = client.get_collection(collection_id)
4 collection_sci_ext = ScientificExtension.ext(collection)
5 print(f"Description: {collection.description}")
File /srv/conda/envs/notebook/lib/python3.8/site-packages/pystac_client/client.py:92, in Client.get_collection(self, collection_id)
90 if self._stac_io.conforms_to(ConformanceClasses.COLLECTIONS):
91 url = f"{self.get_self_href()}/collections/{collection_id}"
---> 92 collection = CollectionClient.from_dict(self._stac_io.read_json(url), root=self)
93 return collection
94 else:
File /srv/conda/envs/notebook/lib/python3.8/site-packages/pystac/stac_io.py:197, in StacIO.read_json(self, source, *args, **kwargs)
178 def read_json(
179 self, source: Union[str, "Link_Type"], *args: Any, **kwargs: Any
180 ) -> Dict[str, Any]:
181 """Read a dict from the given source.
182
183 See :func:`StacIO.read_text <pystac.StacIO.read_text>` for usage of
(...)
195 given source.
196 """
--> 197 txt = self.read_text(source, *args, **kwargs)
198 return self.json_loads(txt)
File /srv/conda/envs/notebook/lib/python3.8/site-packages/pystac_client/stac_api_io.py:77, in StacApiIO.read_text(self, source, parameters, *args, **kwargs)
75 href = source
76 if bool(urlparse(href).scheme):
---> 77 return self.request(href, *args, parameters=parameters, **kwargs)
78 else:
79 with open(href) as f:
File /srv/conda/envs/notebook/lib/python3.8/site-packages/pystac_client/stac_api_io.py:139, in StacApiIO.request(self, href, method, headers, parameters)
137 return resp.content.decode("utf-8")
138 except Exception as err:
--> 139 raise APIError(str(err))
APIError: {"detail":"Collection ref_landcovernet_v1_labels does not exist."}
cc @KennSmithDS. Do you know if that collection ID was deliberately removed? Is there a good alternative?
That turns up another error:
image_link = source_imagery_links[0]
response = session.get(image_link.href)
image_item = Item.from_dict(response.json())
print(f"Item ID: {image_item.id}")
print("Assets:")
for asset_key, asset in image_item.assets.items():
print(f"- Asset Key: {asset_key}")
asset_eo_ext = EOExtension.ext(asset)
if asset_eo_ext.bands is not None:
band_names = ", ".join(band.common_name for band in asset_eo_ext.bands)
print(f" Bands:{band_names}")
raises with
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
Input In [30], in <cell line: 5>()
1 image_link = source_imagery_links[0]
3 response = session.get(image_link.href)
----> 5 image_item = Item.from_dict(response.json())
6 print(f"Item ID: {image_item.id}")
7 print("Assets:")
File /srv/conda/envs/notebook/lib/python3.8/site-packages/requests/models.py:899, in Response.json(self, **kwargs)
897 if encoding is not None:
898 try:
--> 899 return complexjson.loads(
900 self.content.decode(encoding), **kwargs
901 )
902 except UnicodeDecodeError:
903 # Wrong UTF codec detected; usually because it's not UTF-8
904 # but some other 8-bit codec. This is an RFC violation,
905 # and the server didn't bother to tell us what codec *was*
906 # used.
907 pass
File /srv/conda/envs/notebook/lib/python3.8/json/__init__.py:357, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
352 del kw['encoding']
354 if (cls is None and object_hook is None and
355 parse_int is None and parse_float is None and
356 parse_constant is None and object_pairs_hook is None and not kw):
--> 357 return _default_decoder.decode(s)
358 if cls is None:
359 cls = JSONDecoder
File /srv/conda/envs/notebook/lib/python3.8/json/decoder.py:337, in JSONDecoder.decode(self, s, _w)
332 def decode(self, s, _w=WHITESPACE.match):
333 """Return the Python representation of ``s`` (a ``str`` instance
334 containing a JSON document).
335
336 """
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
339 if end != len(s):
File /srv/conda/envs/notebook/lib/python3.8/json/decoder.py:355, in JSONDecoder.raw_decode(self, s, idx)
353 obj, end = self.scan_once(s, idx)
354 except StopIteration as err:
--> 355 raise JSONDecodeError("Expecting value", s, err.value) from None
356 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
The image link is 'https://radiantmlhub.blob.core.windows.net/stac/landcovernet-af-v1/landcovernet_af_v1_source_sentinel_2/landcovernet_af_v1_source_sentinel_2_28QDE_00_20180103/landcovernet_af_v1_source_sentinel_2_28QDE_00_20180103.json'
. Making a request there is returning a 404, so either the file doesn't exist, or I don't have permission to access it.
I made some progress on this by changing the collection_id
to ref_landcovernet_af_v1_labels
.
With that change it's failing at https://nbviewer.org/github/microsoft/PlanetaryComputerExamples/blob/main/tutorials/radiant-mlhub-landcovernet.ipynb#Downloading-Source-Imagery. We're getting a 404 when we try to access the source imagery linked to from the item, which maybe indicates bad metadata in the STAC item returned by the Radiant Earth API.
Here's the simplest reproducer I could get:
import getpass
MLHUB_API_KEY = "..." # your API key kerey
MLHUB_ROOT_URL = "https://api.radiant.earth/mlhub/v1"
from pystac_client import Client
import requests
from urllib.parse import urljoin
import pystac
from pystac.extensions.label import LabelRelType
client = Client.open(
MLHUB_ROOT_URL, parameters={"key": MLHUB_API_KEY}, ignore_conformance=True
)
class MLHubSession(requests.Session):
def __init__(self, *args, api_key=None, **kwargs):
super().__init__(*args, **kwargs)
self.params.update({"key": api_key})
def request(self, method, url, *args, **kwargs):
url_prefix = MLHUB_ROOT_URL.rstrip("/") + "/"
url = urljoin(url_prefix, url)
return super().request(method, url, *args, **kwargs)
session = MLHubSession(api_key=MLHUB_API_KEY)
item_href = 'https://api.radiant.earth/mlhub/v1/collections/ref_landcovernet_af_v1_labels/items/ref_landcovernet_af_v1_labels_28QDE_00'
item = pystac.Item.from_dict(session.get(item_href).json())
source_imagery_links = item.get_links(rel=LabelRelType.SOURCE)
assert session.head(source_imagery_links[0].href).status_code == 200
For reference, that HREF is 'https://radiantmlhub.blob.core.windows.net/stac/landcovernet-af-v1/landcovernet_af_v1_source_sentinel_2/landcovernet_af_v1_source_sentinel_2_28QDE_00_20180103/landcovernet_af_v1_source_sentinel_2_28QDE_00_20180103.json'
cc @KennSmithDS / @kbgg, in case that STAC item is indeed incorrect. Or perhaps the notebook needs to be updated for a new method of accessing the source imagery.