appelmar/gdalcubes

Querying Landsat collections returns only NA values

Closed this issue · 4 comments

Hi,

when trying to use gdalcubes with a STAC collection that is not the Sentinel-2 COGS collection ('sentinel-s2-l2a-cogs'), we receive data cubes that are filled with only NA values as a result. We are not sure where exactly the issue occurs, seeing as up until the download there is no obvious difference to the Sentinel-2 results. The same occurs when trying a different catalog altogether.

This is the code we used to trial the different catalogs/collections:

### Sentinel-2 ----
library(rstac)
library(gdalcubes)

time = c("2021-04-01", "2021-04-30")

bbox_wgs84 = c(
    xmin = 11.07141
    , ymin = 49.71396
    , xmax = 11.21258
    , ymax = 49.80534
)

bbox_crs = c(
    xmin = 4398205
    , ymin = 2956338
    , xmax = 4408378
    , ymax = 2966503
)

s = stac("https://earth-search.aws.element84.com/v0")

view = cube_view(
    extent = list(
        t0 = time[1]
        , t1 = time[2]
        , left = bbox_crs[1]
        , right = bbox_crs[3]
        , top = bbox_crs[4]
        , bottom = bbox_crs[2]
    )
    , srs = "EPSG:3035"
    , dx = 10
    , dy = 10
    , dt = "P10D"
    , aggregation = "first"
    , resampling = "near"
)

s2_items = s |>
    stac_search(
        collections = "sentinel-s2-l2a-cogs"
        , bbox = bbox_wgs84
        , datetime = paste0(time[1], "/", time[2])
    ) |>
    post_request()
# s2_items

s2_col = gdalcubes::stac_image_collection(
    s2_items$features
    , asset_names = c("B02", "B03", "B04")
)

s2_cube = gdalcubes::raster_cube(
    image_collection = s2_col
    , view = view
)

gcstars_s2 = stars::st_as_stars(s2_cube)
gcstars_s2
#> stars object with 3 dimensions and 3 attributes
#> attribute(s), summary of first 1e+05 cells:
#>                          Min. 1st Qu. Median      Mean 3rd Qu.  Max.
#> file302bcad36ca.nc":B02  1830    8912  12792 11227.128   13408 14424
#> file302bcad36ca.nc":B03  1802    7984  11584 10150.159   12136 13168
#> file302bcad36ca.nc":B04  1814    7752  11256  9884.882   11800 12800
#> dimension(s):
#>      from   to  offset delta                       refsys point
#> x       1 1018 4398202    10 ETRS89-extended / LAEA Eu...    NA
#> y       1 1017 2966506   -10 ETRS89-extended / LAEA Eu...    NA
#> time    1    3      NA    NA                      POSIXct FALSE
#>                                                                         values
#> x                                                                         NULL
#> y                                                                         NULL
#> time [2021-04-01,2021-04-11), [2021-04-11,2021-04-21), [2021-04-21,2021-05-01)
#>      x/y
#> x    [x]
#> y    [y]
#> time
merge(gcstars_s2) |> plot()
#> downsample set to 4

### Landsat-8 ----
l8_items = s |>
    stac_search(
        collections = "landsat-8-l1-c1"
        , bbox = bbox_wgs84
        , datetime = paste0(time[1], "/", time[2])
    ) |>
    post_request()
# l8_items

l8_col = gdalcubes::stac_image_collection(
    l8_items$features
    , asset_names = c("B2", "B3", "B11")
)

l8_cube = gdalcubes::raster_cube(
    image_collection = l8_col
    , view = view
)

gcstars_l8 = stars::st_as_stars(l8_cube)
gcstars_l8
#> stars object with 3 dimensions and 3 attributes
#> attribute(s), summary of first 1e+05 cells:
#>                          Min. 1st Qu. Median Mean 3rd Qu. Max.  NA's
#> file302bae9b983.nc":B11    NA      NA     NA  NaN      NA   NA 1e+05
#> file302bae9b983.nc":B2     NA      NA     NA  NaN      NA   NA 1e+05
#> file302bae9b983.nc":B3     NA      NA     NA  NaN      NA   NA 1e+05
#> dimension(s):
#>      from   to  offset delta                       refsys point
#> x       1 1018 4398202    10 ETRS89-extended / LAEA Eu...    NA
#> y       1 1017 2966506   -10 ETRS89-extended / LAEA Eu...    NA
#> time    1    3      NA    NA                      POSIXct FALSE
#>                                                                         values
#> x                                                                         NULL
#> y                                                                         NULL
#> time [2021-04-01,2021-04-11), [2021-04-11,2021-04-21), [2021-04-21,2021-05-01)
#>      x/y
#> x    [x]
#> y    [y]
#> time
# all NA


## different catalog
s_usgs = stac("https://landsatlook.usgs.gov/stac-server")

usgs_items = s_usgs |>
    stac_search(
        collections = "landsat-c2l2-st"
        , bbox = bbox_wgs84
        , datetime = paste0(time[1], "/", time[2])
        ) |>
    post_request()
#usgs_items

usgs_col = gdalcubes::stac_image_collection(
    usgs_items$features
    , asset_names = c("lwir11")
)

usgs_cube = gdalcubes::raster_cube(
    image_collection = usgs_col
    , view = view
)

stars_usgs = stars::st_as_stars(usgs_cube)
stars_usgs
#> stars object with 3 dimensions and 1 attribute
#> attribute(s), summary of first 1e+05 cells:
#>                             Min. 1st Qu. Median Mean 3rd Qu. Max.  NA's
#> file302bd0172d1.nc":lwir11    NA      NA     NA  NaN      NA   NA 1e+05
#> dimension(s):
#>      from   to  offset delta                       refsys point
#> x       1 1018 4398202    10 ETRS89-extended / LAEA Eu...    NA
#> y       1 1017 2966506   -10 ETRS89-extended / LAEA Eu...    NA
#> time    1    3      NA    NA                      POSIXct FALSE
#>                                                                         values
#> x                                                                         NULL
#> y                                                                         NULL
#> time [2021-04-01,2021-04-11), [2021-04-11,2021-04-21), [2021-04-21,2021-05-01)
#>      x/y
#> x    [x]
#> y    [y]
#> time

Created on 2022-05-02 by the reprex package (v2.0.1)

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.1.3 (2022-03-10)
#>  os       Ubuntu 20.04.4 LTS
#>  system   x86_64, linux-gnu
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Europe/Berlin
#>  date     2022-05-02
#>  pandoc   2.17.1.1 @ /usr/lib/rstudio/bin/quarto/bin/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  abind         1.4-5   2016-07-21 [2] CRAN (R 4.1.0)
#>  assertthat    0.2.1   2019-03-21 [2] CRAN (R 4.1.0)
#>  class         7.3-20  2022-01-13 [4] CRAN (R 4.1.2)
#>  classInt      0.4-3   2020-04-07 [2] CRAN (R 4.1.0)
#>  cli           3.2.0   2022-02-14 [1] CRAN (R 4.1.2)
#>  crayon        1.5.1   2022-03-26 [1] CRAN (R 4.1.3)
#>  curl          4.3.2   2021-06-23 [2] CRAN (R 4.1.0)
#>  DBI           1.1.2   2021-12-20 [1] CRAN (R 4.1.2)
#>  digest        0.6.29  2021-12-01 [1] CRAN (R 4.1.2)
#>  dplyr         1.0.7   2021-06-18 [2] CRAN (R 4.1.0)
#>  e1071         1.7-9   2021-09-16 [1] CRAN (R 4.1.2)
#>  ellipsis      0.3.2   2021-04-29 [2] CRAN (R 4.1.0)
#>  evaluate      0.15    2022-02-18 [1] CRAN (R 4.1.2)
#>  fansi         1.0.3   2022-03-24 [1] CRAN (R 4.1.3)
#>  fastmap       1.1.0   2021-01-25 [1] CRAN (R 4.1.1)
#>  fs            1.5.2   2021-12-08 [1] CRAN (R 4.1.3)
#>  gdalcubes   * 0.6.1   2022-03-23 [1] CRAN (R 4.1.3)
#>  generics      0.1.2   2022-01-31 [1] CRAN (R 4.1.2)
#>  glue          1.6.2   2022-02-24 [1] CRAN (R 4.1.3)
#>  highr         0.9     2021-04-16 [2] CRAN (R 4.1.0)
#>  htmltools     0.5.2   2021-08-25 [1] CRAN (R 4.1.1)
#>  httr          1.4.2   2020-07-20 [2] CRAN (R 4.1.0)
#>  jsonlite      1.8.0   2022-02-22 [1] CRAN (R 4.1.3)
#>  KernSmooth    2.23-20 2021-05-03 [4] CRAN (R 4.0.5)
#>  knitr         1.38    2022-03-25 [1] CRAN (R 4.1.3)
#>  lifecycle     1.0.1   2021-09-24 [1] CRAN (R 4.1.1)
#>  lwgeom        0.2-8   2021-10-06 [1] CRAN (R 4.1.2)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.1.3)
#>  mime          0.12    2021-09-28 [1] CRAN (R 4.1.1)
#>  ncdf4         1.19    2021-12-15 [1] CRAN (R 4.1.2)
#>  pillar        1.7.0   2022-02-01 [1] CRAN (R 4.1.2)
#>  pkgconfig     2.0.3   2019-09-22 [2] CRAN (R 4.1.0)
#>  proxy         0.4-26  2021-06-07 [2] CRAN (R 4.1.0)
#>  purrr         0.3.4   2020-04-17 [2] CRAN (R 4.1.0)
#>  R6            2.5.1   2021-08-19 [2] CRAN (R 4.1.1)
#>  Rcpp          1.0.8.3 2022-03-17 [1] CRAN (R 4.1.3)
#>  reprex        2.0.1   2021-08-05 [1] CRAN (R 4.1.2)
#>  rlang         1.0.2   2022-03-04 [1] CRAN (R 4.1.3)
#>  rmarkdown     2.13    2022-03-10 [1] CRAN (R 4.1.3)
#>  rstac       * 0.9.1-5 2021-10-31 [1] CRAN (R 4.1.1)
#>  rstudioapi    0.13    2020-11-12 [2] CRAN (R 4.1.0)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.1.2)
#>  sf            1.0-7   2022-03-07 [1] CRAN (R 4.1.3)
#>  stars         0.5-5   2021-12-19 [1] CRAN (R 4.1.2)
#>  stringi       1.7.6   2021-11-29 [1] CRAN (R 4.1.2)
#>  stringr       1.4.0   2019-02-10 [2] CRAN (R 4.1.0)
#>  tibble        3.1.6   2021-11-07 [1] CRAN (R 4.1.2)
#>  tidyselect    1.1.1   2021-04-30 [2] CRAN (R 4.1.0)
#>  units         0.8-0   2022-02-05 [1] CRAN (R 4.1.2)
#>  utf8          1.2.2   2021-07-24 [1] CRAN (R 4.1.2)
#>  vctrs         0.4.1   2022-04-13 [1] CRAN (R 4.1.3)
#>  withr         2.5.0   2022-03-03 [1] CRAN (R 4.1.3)
#>  xfun          0.30    2022-03-02 [1] CRAN (R 4.1.3)
#>  xml2          1.3.3   2021-11-30 [1] CRAN (R 4.1.2)
#>  yaml          2.3.5   2022-02-21 [1] CRAN (R 4.1.2)
#> 
#>  [1] /home/carola/R/x86_64-pc-linux-gnu-library/4.1
#>  [2] /usr/local/lib/R/site-library
#>  [3] /usr/lib/R/site-library
#>  [4] /usr/lib/R/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────

Thanks for any hints on this!

Best,
Carola

It seems that there are two different reasons for the two Landsat examples. In both cases, the GeoTIFFs are not accessible but for different reasons.

  1. (AWS) When setting gdalcubes_options(debug = TRUE), I get entries like:
[WARNING] GDAL cannot open '/vsicurl/https://landsat-pds.s3.us-west-2.amazonaws.com/c1/L8/193/026/LC08_L1TP_193026_20210426_20210501_01_T1/LC08_L1TP_193026_20210426_20210501_01_T1_B3.TIF'

I think the Landsat collection 1 (https://landsat-pds.s3.us-west-2.amazonaws.com/c1) is not available on AWS anymore, (see https://www.usgs.gov/landsat-missions/landsat-data-access, the table for collection 1 does not mention commercial cloud access), in favor of the collection 2, which is a requester pays bucket on AWS.

  1. (USGS) I think that accessing the images requires authentication (it redirects me to the login page when entering the URLs in a browser). There is a chance that it works by adapting the url_fun argument of stac_image_collection but I couldn't find any documentation of their API so far.

Hi @appelmar,
thanks for helping us finding this out.
Regarding the first reason it is all a bit confusing, as the URL you mentioned also statest that "Collection 1 products will remain available for search and download until December 31, 2022".
Regarding requester pay bucket access we've defined an IAM role in our AWS account and granted “AmazonS3ReadOnlyAccess” (as described here).
After configuring the credentials with the AWS CLI, we can access and download the data per scene, e.g.

aws s3api get-object --bucket usgs-landsat --key collection02/level-2/standard/oli-tirs/2022/206/112/LC09_L2SR_206112_20220211_20220213_02_T2/LC09_L2SR_206112_20220211_20220213_02_T2_SR_B2.TIF ~/Downloads/sample.tif --request-payer requester

So I think the requester pay access works. Is there a way to submit the S3 credentials to gdalcubes to work with the STAC?
If not, do you see any method to use gdalcubes for querying Landsat at this time?

Best,
Hendrik

Yes, AWS credentials can be set using GDAL's configuration options (see https://gdal.org/user/virtual_file_systems.html#vsis3-aws-s3-files), e.g., using gdalcubes_set_gdal_config(). However, I also found that the STAC response from the USGS service is a bit harder to work with: Each asset points to the data on the USGS archive and to the data on AWS, so it needs some additional preprocessing. For now, the following script works for me (you need to add the credentials):

library(rstac)
library(gdalcubes)

time = c("2021-04-01", "2021-04-30")

bbox_wgs84 = c(
  xmin = 11.07141
  , ymin = 49.71396
  , xmax = 11.21258
  , ymax = 49.80534
)

bbox_crs = c(
  xmin = 4398205
  , ymin = 2956338
  , xmax = 4408378
  , ymax = 2966503
)

s = stac("https://earth-search.aws.element84.com/v0")

view = cube_view(
  extent = list(
    t0 = time[1]
    , t1 = time[2]
    , left = bbox_crs[1]
    , right = bbox_crs[3]
    , top = bbox_crs[4]
    , bottom = bbox_crs[2]
  )
  , srs = "EPSG:3035"
  , dx = 10
  , dy = 10
  , dt = "P10D"
  , aggregation = "first"
  , resampling = "near"
)

s_usgs = stac("https://landsatlook.usgs.gov/stac-server")
usgs_items = s_usgs |>
  stac_search(
    collections = "landsat-c2l2-st"
    , bbox = bbox_wgs84
    , datetime = paste0(time[1], "/", time[2])
  ) |>
  post_request()
#usgs_items

usgs_items_s3 = usgs_items
usgs_items_s3$features = lapply(usgs_items$features, function(x) {
  x$assets = lapply(x$assets, function(y) {
    if (!is.null(y$alternate$s3$href)) {
      y$href = y$alternate$s3$href
    }
    y
  })
  x
})

usgs_col = gdalcubes::stac_image_collection(
  usgs_items_s3$features
  , asset_names = c("lwir11"),
  url_fun = function(x) { paste0("/vsis3/", substr(x, 6, nchar(x))) } # remove s3:// prefix and add /vsis3/
)


usgs_cube = gdalcubes::raster_cube(
  image_collection = usgs_col
  , view = view
)

gdalcubes_set_gdal_config("AWS_ACCESS_KEY_ID", "...............")
gdalcubes_set_gdal_config("AWS_SECRET_ACCESS_KEY", "................")
gdalcubes_set_gdal_config("AWS_REQUEST_PAYER", "requester")
gdalcubes_options(parallel = 4)

stars_usgs = stars::st_as_stars(usgs_cube)
stars_usgs

Ok, cool! That seems to work. Thank you for your help. Very much appreciated.