tidymodels/recipes

step_interact() bugs with long formula

EmilHvitfeldt opened this issue · 1 comments

library(tidymodels)

set.seed(12345)
dat <- tibble(outcome = sample(c("y", "n"), 100, replace = TRUE),
              race = sample(c("b", "w", "o"), 100, replace = TRUE),
              hisp = sample(c("hisp", "nhisp"), 100, replace = TRUE),
              cat = sample(c("a", "b", "c", "d", "e"), 100, replace = TRUE))

rec <- recipe(outcome ~ ., data = dat) %>%
  step_dummy(all_nominal_predictors()) %>%
  step_interact(~ starts_with("race_"):starts_with("hisp_") + 
                  starts_with("race_"):starts_with("cat")) %>%
  prep()

colnames(bake(rec, new_data = dat))
#>  [1] "outcome"             "race_o"              "race_w"             
#>  [4] "hisp_nhisp"          "cat_b"               "cat_c"              
#>  [7] "cat_d"               "cat_e"               "race_o_x_hisp_nhisp"
#> [10] "race_w_x_hisp_nhisp" "race_o_x_cat_b"      "race_o_x_cat_c"     
#> [13] "race_o_x_cat_d"      "race_o_x_cat_e"      "race_w_x_cat_b"     
#> [16] "race_w_x_cat_c"      "race_w_x_cat_d"      "race_w_x_cat_e"

rec <- recipe(outcome ~ ., data = dat) %>%
  step_dummy(all_nominal_predictors()) %>%
  step_interact(~ starts_with("race_"):starts_with("hisp_") + 
                  starts_with("race_"):starts_with("cat") +
                  starts_with("hisp_"):starts_with("cat")) %>%
  prep()
#> Warning: ! Interaction specification failed for:
#> • ~...
#> ℹ No interactions will be created
#> Error in `step_interact()`:
#> Caused by error in `rlang::f_rhs()`:
#> ! `x` must be a formula

colnames(bake(rec, new_data = dat))
#>  [1] "outcome"             "race_o"              "race_w"             
#>  [4] "hisp_nhisp"          "cat_b"               "cat_c"              
#>  [7] "cat_d"               "cat_e"               "race_o_x_hisp_nhisp"
#> [10] "race_w_x_hisp_nhisp" "race_o_x_cat_b"      "race_o_x_cat_c"     
#> [13] "race_o_x_cat_d"      "race_o_x_cat_e"      "race_w_x_cat_b"     
#> [16] "race_w_x_cat_c"      "race_w_x_cat_d"      "race_w_x_cat_e"

Can be reduced to

rlang::as_label(
  ~ starts_with("race_"):starts_with("hisp_") + 
    starts_with("race_"):starts_with("cat")
)
#> [1] "~starts_with(\"race_\"):starts_with(\"hisp_\") + starts_with(\"race_\"):starts_with(\"cat\")"

rlang::as_label(
  ~ starts_with("race_"):starts_with("hisp_") + 
    starts_with("race_"):starts_with("cat") +
    starts_with("hisp_"):starts_with("cat")
)
#> [1] "~..."

This issue has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex https://reprex.tidyverse.org) and link to this issue.