statistikat/STATcubeR

filters in sc_table_custom()

Closed this issue · 1 comments

There have now been several requests to support filtering in sc_table_custom(). Currently, the only way to do this is to generate the request.json by hand.

Example
library(STATcubeR)

schema <- sc_schema_db("detouextregsai")
region <- schema$`Other Classifications`$
  `Tourism commune [ABO]`$`Regionale Gliederung (Ebene +1)`

request <- list(
  database = schema$id,
  dimensions = list(I(region$id)),
  recodes = setNames(
    list(list(
      map = list(
        I(region$Bregenzerwald$id),
        I(region$`Vorarlberg Rest`$id),
        I(region$`Bodensee-Vorarlberg`$id)
      )
    )),
    region$id
  )
)

jsonlite::write_json(request, "request.json", pretty = TRUE, auto_unbox = TRUE)
readLines("request.json") %>% cat(sep = "\n")
x <- sc_table("request.json", add_totals = FALSE)
x$tabulate()

It might be sensible to extend the functionality of sc_table_custom() to support filters (or possibly other recodes) via additional parameters. The syntax might look like this

library(STATcubeR)

schema <- sc_schema_db("detouextregsai")
region <- schema$`Other Classifications`$
  `Tourism commune [ABO]`$`Regionale Gliederung (Ebene +1)`

sc_table_custom(
  schema,
  region,
  sc_recode(region, c(region$Bregenzerwald, 
      region$`Vorarlberg Rest`, region$`Bodensee-Vorarlberg`)) 
)

There is now a first implementation which allows to set filters

STATcubeR usage
schema <- sc_schema_db("detouextregsai")
region <- schema$`Other Classifications`$`Tourism commune [ABO]`$
  `Regionale Gliederung (Ebene +1)`
month <- schema$`Mandatory fields`$`Season/Tourism Month`

x <- sc_table_custom(
  schema,
  schema$Facts$Arrivals,
  list(month, region),
  recodes = c(
    sc_recode(region, total = FALSE, map = list(
      region$Achensee,
      list(region$Arlberg, region$`Ausseerland-Salzkammergut`)
    )),
    sc_recode(month, total = FALSE)
  )
)
x$tabulate()
resulting data.frame
# A STATcubeR tibble: 92 x 3
   `Season/Tourism Month` `Tourism commune [ABO]`           Arrivals
   <date>                 <fct>                                <dbl>
 1 2000-01-01             Achensee                             90947
 2 2000-01-01             Arlberg;Ausseerland-Salzkammergut   209356
 3 2000-06-01             Achensee                            133523
 4 2000-06-01             Arlberg;Ausseerland-Salzkammergut   109769
 5 2001-01-01             Achensee                             93178
 6 2001-01-01             Arlberg;Ausseerland-Salzkammergut   199256
 7 2001-06-01             Achensee                            137138
 8 2001-06-01             Arlberg;Ausseerland-Salzkammergut   112685
 9 2002-01-01             Achensee                            100464
10 2002-01-01             Arlberg;Ausseerland-Salzkammergut   212284
# … with 82 more rows
# ℹ Use `print(n = ...)` to see more rows
resulting API request
{
  "database": "str:database:detouextregsai",
  "measures": [
    "str:statfn:detouextregsai:F-DATA1:F-ANK:SUM",
    "str:measure:detouextregsai:F-DATA1:F-UEB"
  ],
  "dimensions": [
    [
      "str:field:detouextregsai:F-DATA1:C-SDB_TIT-0"
    ],
    [
      "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93SUM-0"
    ]
  ],
  "recodes": {
    "str:field:detouextregsai:F-DATA1:C-SDB_TIT-0": {
      "total": true
    },
    "str:valueset:detouextregsai:F-DATA1:C-C93-2:C-C93SUM-0": {
      "total": true
    }
  }
}

This feature is currently only available in the development branch #32 . For anyone interested, the package can be instlled from the dev-branch. However, these new features are likely to be refactored before #32 is merged.

remotes::install_github("statistikat/STATcubeR", ref = "tibble_pkg")