business-science/modeltime.ensemble

Ensembles for Nested Forecasting and at local IDs

mdancho84 opened this issue · 1 comments

Add method for working with Local ID and Nested Forecast Ensembles.

I've added support for nested forecasting with ensembles. There are 2 new algorithms:

  1. Ensemble Nested Average
  2. Ensemble Nested Weighted

Example

# Setup

library(tidymodels)
library(modeltime)
library(modeltime.ensemble)
library(tidyverse)
library(timetk)

data_tbl <- walmart_sales_weekly %>%
    select(id, Date, Weekly_Sales) %>%
    set_names(c("id", "date", "value"))

nested_data_tbl <- data_tbl %>%
    extend_timeseries(
        .id_var        = id,
        .date_var      = date,
        .length_future = 52
    ) %>%
    nest_timeseries(
        .id_var        = id,
        .length_future = 52,
        .length_actual = 52*2
    ) %>%
    split_nested_timeseries(
        .length_test = 52
    )

nested_data_tbl <- nested_data_tbl %>% slice(1:2)


rec_prophet <- recipe(value ~ date, extract_nested_train_split(nested_data_tbl)) 

wflw_prophet <- workflow() %>%
    add_model(
        prophet_reg("regression", seasonality_yearly = TRUE) %>% 
            set_engine("prophet")
    ) %>%
    add_recipe(rec_prophet)

rec_xgb <- recipe(value ~ ., extract_nested_train_split(nested_data_tbl)) %>%
    step_timeseries_signature(date) %>%
    step_rm(date) %>%
    step_zv(all_predictors()) %>%
    step_dummy(all_nominal_predictors(), one_hot = TRUE)

wflw_xgb <- workflow() %>%
    add_model(boost_tree("regression") %>% set_engine("xgboost")) %>%
    add_recipe(rec_xgb)

nested_modeltime_tbl <- modeltime_nested_fit(
    # Nested data 
    nested_data = nested_data_tbl,
    
    # Add workflows
    wflw_prophet,
    wflw_xgb
)
#> Fitting models on training data... ===============>--------------- 50% | ET...
#> ```



# ENSEMBLE AVERAGE TESTING ----

parallel_start(6)

ensem <- nested_modeltime_tbl %>%
    ensemble_nested_average(
        type           = "mean", 
        keep_submodels = TRUE, 
        control        = control_nested_fit(allow_par = FALSE, verbose = TRUE)
    ) %>%
    ensemble_nested_average(
        type           = "median", 
        keep_submodels = TRUE, 
        model_ids      = c(1,2), 
        control        = control_nested_fit(allow_par = FALSE, verbose = TRUE)
    )
#> i [1/2] Starting Modeltime Table: ID 1_1...
#> v [1/2] Finished Modeltime Table: ID 1_1
#> i [2/2] Starting Modeltime Table: ID 1_3...
#> v [2/2] Finished Modeltime Table: ID 1_3
#> Finished in: 1.125003 secs.
#> i [1/2] Starting Modeltime Table: ID 1_1...
#> v [1/2] Finished Modeltime Table: ID 1_1
#> i [2/2] Starting Modeltime Table: ID 1_3...
#> v [2/2] Finished Modeltime Table: ID 1_3
#> Finished in: 1.133003 secs.
#> 

ensem
#> # Nested Modeltime Table
#> 
#> Trained on: .splits | Model Errors: [0]
#> # A tibble: 2 x 5
#>   id    .actual_data       .future_data      .splits         .modeltime_tables  
#>   <fct> <list>             <list>            <list>          <list>             
#> 1 1_1   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [4 x~
#> 2 1_3   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [4 x~

ensem %>% extract_nested_modeltime_table()
#> # A tibble: 4 x 6
#>   id    .model_id .model         .model_desc                 .type .calibration_da~
#>   <fct>     <dbl> <list>         <chr>                       <chr> <list>          
#> 1 1_1           1 <workflow>     PROPHET                     Test  <tibble [52 x 4~
#> 2 1_1           2 <workflow>     XGBOOST                     Test  <tibble [52 x 4~
#> 3 1_1           3 <ensemble [2]> ENSEMBLE (MEAN): 2 MODELS   Test  <tibble [52 x 4~
#> 4 1_1           4 <ensemble [2]> ENSEMBLE (MEDIAN): 2 MODELS Test  <tibble [52 x 4~

ensem %>% extract_nested_test_accuracy()
#> # A tibble: 8 x 10
#>   id    .model_id .model_desc        .type    mae  mape  mase smape   rmse   rsq
#>   <fct>     <dbl> <chr>              <chr>  <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
#> 1 1_1           1 PROPHET            Test  10071.  45.9  1.99  60.0 11777. 0.383
#> 2 1_1           2 XGBOOST            Test   6237.  25.3  1.23  24.6  9017. 0.191
#> 3 1_1           3 ENSEMBLE (MEAN): ~ Test   5419.  20.2  1.07  22.2  8655. 0.415
#> 4 1_1           4 ENSEMBLE (MEDIAN)~ Test   5419.  20.2  1.07  22.2  8655. 0.415
#> 5 1_3           1 PROPHET            Test   3540.  29.9  1.37  25.5  4708. 0.796
#> 6 1_3           2 XGBOOST            Test   3086.  18.8  1.20  20.4  5086. 0.787
#> 7 1_3           3 ENSEMBLE (MEAN): ~ Test   2662.  19.0  1.03  17.7  4038. 0.819
#> 8 1_3           4 ENSEMBLE (MEDIAN)~ Test   2662.  19.0  1.03  17.7  4038. 0.819


# ENSEMBLE WEIGHTED TESTING ----

parallel_start(6)

ensem <- nested_modeltime_tbl %>%
    ensemble_nested_weighted(
        loadings       = c(2,1),  
        loading_method = "lowest_rmse",
        control        = control_nested_fit(allow_par = FALSE, verbose = TRUE)
    ) 
#> i [1/2] Starting Modeltime Table: ID 1_1...
#> v [1/2] Finished Modeltime Table: ID 1_1
#> i [2/2] Starting Modeltime Table: ID 1_3...
#> v [2/2] Finished Modeltime Table: ID 1_3
#> Finished in: 1.278002 secs.
#> 

ensem
#> # Nested Modeltime Table
#> 
#> Trained on: .splits | Model Errors: [0]
#> # A tibble: 2 x 5
#>   id    .actual_data       .future_data      .splits         .modeltime_tables  
#>   <fct> <list>             <list>            <list>          <list>             
#> 1 1_1   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [3 x~
#> 2 1_3   <tibble [104 x 2]> <tibble [52 x 2]> <split [52|52]> <mdl_time_tbl [3 x~

ensem %>% 
    extract_nested_modeltime_table(1) %>%
    slice(3) %>%
    pluck(".model", 1)
#> -- Modeltime Ensemble -------------------------------------------
#> Ensemble of 2 Models (WEIGHTED)
#> 
#> # Modeltime Table
#> # A tibble: 2 x 6
#>   .model_id .model     .model_desc .type .calibration_data .loadings
#>       <int> <list>     <chr>       <chr> <list>                <dbl>
#> 1         1 <workflow> PROPHET     Test  <tibble [52 x 4]>     0.333
#> 2         2 <workflow> XGBOOST     Test  <tibble [52 x 4]>     0.667

ensem %>%
    extract_nested_test_accuracy()
#> # A tibble: 6 x 10
#>   id    .model_id .model_desc        .type    mae  mape  mase smape   rmse   rsq
#>   <fct>     <dbl> <chr>              <chr>  <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
#> 1 1_1           1 PROPHET            Test  10071.  45.9 1.99   60.0 11777. 0.383
#> 2 1_1           2 XGBOOST            Test   6237.  25.3 1.23   24.6  9017. 0.191
#> 3 1_1           3 ENSEMBLE (WEIGHTE~ Test   4414.  14.8 0.870  16.0  8321. 0.410
#> 4 1_3           1 PROPHET            Test   3540.  29.9 1.37   25.5  4708. 0.796
#> 5 1_3           2 XGBOOST            Test   3086.  18.8 1.20   20.4  5086. 0.787
#> 6 1_3           3 ENSEMBLE (WEIGHTE~ Test   2772.  21.2 1.08   19.1  4068. 0.820

ensem %>%
    extract_nested_test_forecast() %>%
    group_by(id) %>%
    plot_modeltime_forecast(.interactive = F)

Created on 2021-10-12 by the reprex package (v2.0.1)