hypertidy/silicate

vec_group_id replace group ident

mdsumner opened this issue · 0 comments

See vctrs::vec_group_id a drop-in replace for group_indices().

Discussed here: r-lib/vctrs#332

The example below is part of ongoing discussion for changes here and in sfheaders/geometries: dcooley/geometries#4

  • flush out this task (also as.integer(factor(etc)))
  • compare to use of unjoin

Maybe consider whether dplyr can be dropped (this is coming from sfheaders direction as well so can replace gibble but the joins will be tough).

new_SC0 <- function(vertex, object, index, crs = NA_character_, meta = NULL) {
  meta1 <- tibble::tibble(proj = crs, ctime = Sys.time())
  if (!is.null(meta)) {
    meta <- rbind(meta1, meta)
  }
  object[["topology_"]] <- index
  structure(list(object = object, vertex = vertex,
                 meta = meta), class = c("SC0", "sc"))
}

## build SC0 with sfheaders

sc0 <- function(x, ...) {
  df <- sfheaders::sf_to_df(x)
  crs <- crsmeta::crs_proj(x)
  
  x[[attr(x, "sf_column")]] <- NULL
  object <- tibble::as_tibble(x)
  object$object_ <- 1:nrow(object)
  
  
  ## deduplicate in xy
  df[["vertex_"]] <-  vctrs::vec_group_id(df[c("x", "y")])
  ## the vertex table, separated out (we need vertex_ to remap)
  v <- df[!duplicated(df[["vertex_"]]), c("x", "y", "vertex_")]
  ## now remap (can this be done better?) 
  ## (alt. is unjoin())
  df[["vertex_"]] <- match(df$vertex_, v$vertex_)
  
  ## cleanup
  v[["vertex_"]] <- NULL
  df[["x"]] <- NULL  ## not really necessary to remove but highlights the point
  df[["y"]] <- NULL  ## that these are now indexed in 'v'
  
  ## a global linestring_id 
  if ("multipolygon_id" %in% names(df)) {
    df[["path_"]] <- vctrs::vec_group_id(df[c("sfg_id", "polygon_id", "linestring_id")])
    
  } else {
    df[["path_"]] <- vctrs::vec_group_id(df, c("sfg_id", "linestring_id"))
    
  }
  featurelist <- split(df, df$sfg_id)
  feature_segments <- vector("list", length(featurelist))
  .path2seg <- function(x, pathid = NULL) {
    cbind(.vx0 = x[-length(x)], .vx1 = x[-1L], path_ = pathid)
  }
  
  for (i in seq_along(featurelist)) {
    segments <- lapply(split(featurelist[[i]][c("vertex_", "path_")], 
                             featurelist[[i]]$path_), 
                       function(lstring) .path2seg(lstring[["vertex_"]], 
                                                   pathid = lstring[["path_"]][1L]))
    feature_segments[[i]] <- tibble::as_tibble(do.call(rbind, segments))
  }
  names(v) <- c("x_", "y_")
  new_SC0(v, object, feature_segments, crs = crs)
}
library(silicate)
plot(sc0(inlandwaters))
rbenchmark::benchmark(sc0(inlandwaters), 
                      SC0(inlandwaters))
#1 sc0(inlandwaters)          100    5.76    1.000      5.67     0.05         NA        NA
#2 SC0(inlandwaters)          100    8.61    1.495      8.49     0.13         NA        NA