R-ArcGIS/arcgislayers

Another counting problem?

Closed this issue · 2 comments

Describe the bug
Large layer with lots of features leads to HTTP 403 Forbidden error.

To Reproduce
Perhaps a smidge larger than a reprex, sorry:

library(arcgislayers)
library(sf)
library(tidyverse)
library(tigris)

wbd_server <- file.path(
  "https://hydro.nationalmap.gov/arcgis/rest/services",
  "NHDPlus_HR",
  "MapServer/12"
) |> arc_open()

selection <- file.path(
  "https://raw.githubusercontent.com",
  "kbvernon",
  "hndsr-watersheds",
  "main/data",
  "selected-watersheds.txt"
) |> 
  readLines() |> 
  str_split_1(", ") |>
  as.numeric()

southwest <- states() |> 
  filter(NAME %in% c("Utah", "Colorado", "Arizona", "New Mexico") ) |> 
  select(geometry) |> 
  st_transform(4326) |> 
  st_union() |> 
  st_cast("POLYGON") |> 
  st_geometry()

huc12 <- wbd_server |> 
  arc_select(
    fields = "huc12",
    filter_geom = southwest,
    geometry = FALSE
  ) |> 
  mutate(huc10 = str_sub(huc12, 1, 10)) |> 
  filter(huc10 %in% selection) |> 
  pull(huc12)

huc12 <- paste0("'", huc12, "'", collapse = ",")
  
watersheds <- arc_select(
  wbd_server,
  fields = c("huc12", "name"),
  where = paste0("huc12 IN (", huc12, ")"),
  filter_geom = southwest,
  geometry = TRUE
)

Commenting out the WHERE statement and setting n_max = 100 works.

watersheds <- arc_select(
  wbd_server,
  fields = c("huc12", "name"),
  # where = paste0("huc12 IN (", huc12, ")"),
  filter_geom = southwest,
  n_max = 100,
  geometry = TRUE
)

Expected behavior
Should return an sf table with 4,335 features in it.

Additional context
This is the USGS Watershed Boundary Dataset, part of the National Hydrography Dataset that was causing problems before.
Link: https://hydro.nationalmap.gov/arcgis/rest/services/NHDPlus_HR/MapServer/12

Closing this because I think we figured out that the issue was the use of IN with the SQL and a better approach would be to use LIKE.

huc <- stringr::str_split_1(huc12, ",") |> 
  stringr::str_remove_all("'") |> 
  as.numeric()

desired_hucs <- data.frame(huc12 = unique(as.character(huc)))

unique_7s <- huc |> as.character() |> 
  substr(1,7) |> 
  unique()

split_ind <- unique_7s |> substr(1,2) 

splits <- split(unique_7s, split_ind)

watersheds <- arc_select(
  wbd_server,
  fields = c("huc12", "name"),
  # iterate through all of the splits
  where = paste0("huc12 LIKE '", splits[[1]][1], "%'")
)

watersheds
# do a semi join to make sure you don't have any errant ones 
semi_join(watersheds, desired_hucs)