mitchelloharawild/fable.prophet

Issues with holiday

benshing1984 opened this issue · 6 comments


I m beginner, how could I add holidays into the model?

library(fable.prophet)
library(prophet)
library(tsibble)
library(dplyr)

#try to add Holiday into tsibble
cn_holiday <- data_frame(
  holiday = 'cn',
  ds = as.Date(c('2019-01-01','2019-02-04','2019-02-05',
                 '2019-02-06','2019-02-07','2019-02-08',
                 '2019-02-09','2019-02-10','2019-04-05',
                 '2019-05-01','2019-06-07','2019-09-13',
                 '2019-10-01','2019-10-02','2019-10-03',
                 '2019-10-04','2019-10-05','2019-10-06',
                 '2019-10-07')),
  lower_window = 0,
  upper_window = 1
)
usa_holiday <- data_frame(
  holiday = 'usa',
  ds = as.Date(c('2019-01-01','2019-01-21','2019-02-18',
                 '2019-05-12','2019-05-27','2019-06-16',
                 '2019-07-04','2019-09-02','2019-10-14',
                 '2019-11-11','2019-11-28','2019-11-29',
                 '2019-12-25')),
  lower_window = 0,
  upper_window = 1
)
holidays <- bind_rows(cn_holiday, usa_holiday)
holidays<- as_tsibble(holidays, index = ds, key = holiday)

fit <- cafe %>% 
  model(
    prophet = fable.prophet::prophet(Turnover ~ season("year", 4, type = "multiplicative") + holiday( holidays = holidays))
  )

Warning message:
8 errors (1 unique) encountered for prophet
[8] object 'holidays' not found

it shown this warning message, do I miss anything ?

I don't have the cafe dataset, so I am unable to test your code above.
I've produced a similar example from the prophet docs, which seems to work fine.

Could you provide the dataset to create a minimally reproducible example?

library(tidyverse)
library(tsibble)
#> 
#> Attaching package: 'tsibble'
#> The following object is masked from 'package:dplyr':
#> 
#>     id
library(fable.prophet)
#> Loading required package: Rcpp
#> Loading required package: fabletools
df <- read_csv("https://raw.githubusercontent.com/facebook/prophet/master/examples/example_wp_log_peyton_manning.csv") %>% 
  as_tsibble(index = ds)
#> Parsed with column specification:
#> cols(
#>   ds = col_date(format = ""),
#>   y = col_double()
#> )
playoffs <- data_frame(
  holiday = 'playoff',
  ds = as.Date(c('2008-01-13', '2009-01-03', '2010-01-16',
                 '2010-01-24', '2010-02-07', '2011-01-08',
                 '2013-01-12', '2014-01-12', '2014-01-19',
                 '2014-02-02', '2015-01-11', '2016-01-17',
                 '2016-01-24', '2016-02-07')),
  lower_window = 0,
  upper_window = 1
)
#> Warning: `data_frame()` is deprecated, use `tibble()`.
#> This warning is displayed once per session.
superbowls <- data_frame(
  holiday = 'superbowl',
  ds = as.Date(c('2010-02-07', '2014-02-02', '2016-02-07')),
  lower_window = 0,
  upper_window = 1
)
holidays <- bind_rows(playoffs, superbowls)
holidays <- as_tsibble(holidays, index = ds, key = holiday)

fit <- df %>% 
  model(
    prophet = prophet(y ~ season("year", 4, type = "multiplicative") + holiday( holidays = holidays))
  )
fit
#> # A mable: 1 x 1
#>   prophet  
#>   <model>  
#> 1 <prophet>

Created on 2020-01-06 by the reprex package (v0.3.0)

Thanks Mitchell for your prompt reply,

I m testing it with Walmart testing set. and still come up this error.

library(tidyverse)
library(tsibble)
library(fable.prophet)
library(data.table)
library(future)
library(lubridate)


walmart_store_sales_data = data.table::fread("https://remixinstitute.box.com/shared/static/9kzyttje3kd7l41y1e14to0akwl9vuje.csv", header = T, stringsAsFactors = FALSE) 

df<- walmart_store_sales_data%>%
  mutate(Date = ymd(Date))%>%
  as_tsibble(index = Date, key = c(Store, Dept))%>%
  select(Date, Store, Dept, Weekly_Sales)

playoffs <- data_frame(
  holiday = 'playoff',
  ds = as.Date(c('2008-01-13', '2009-01-03', '2010-01-16',
                 '2010-01-24', '2010-02-07', '2011-01-08',
                 '2013-01-12', '2014-01-12', '2014-01-19',
                 '2014-02-02', '2015-01-11', '2016-01-17',
                 '2016-01-24', '2016-02-07')),
  lower_window = 0,
  upper_window = 1
)


superbowls <- data_frame(
  holiday = 'superbowl',
  ds = as.Date(c('2010-02-07', '2014-02-02', '2016-02-07')),
  lower_window = 0,
  upper_window = 1
)
holidays <- bind_rows(playoffs, superbowls)
holidays <- as_tsibble(holidays, index = ds, key = holiday)

plan(multiprocess)
fit <- df %>% 
  model(
    prophet = prophet( Weekly_Sales ~ season("week", 7, type = "multiplicative") + holiday (holidays = holidays))
  )

#Warning message:
#3331 errors (1 unique) encountered for prophet
#[3331] object 'holidays' not found
 
 fit
 A mable: 3,331 x 3
 Key:     Store, Dept [3,331]
   Store  Dept prophet     
   <int> <int> <model>     
 1     1     1 <NULL model>

Thanks for the reproducible example - I can reproduce your error now.

The issue as far as I can tell is with the use of parallel, not holidays.

Could you try a small sample of Store/Dept without parallel?

Parallel does cause this error, is the future update can also multiprocess with holidays effects? that would be a nice.

I just test out smaller sample with and without parallel with my laptop. Parallel process does help when I have a bigger data set.

Thanks anywhere for solving this problem.

> library(tidyverse)
> library(tsibble)
> library(fable.prophet)
> library(data.table)
> library(lubridate)
> library(fable)
> library(tictoc)
> 
> walmart_store_sales_data = data.table::fread("https://remixinstitute.box.com/shared/static/9kzyttje3kd7l41y1e14to0akwl9vuje.csv", header = T, stringsAsFactors = FALSE) 
 [0%] Downloaded 0 bytes...
 Downloaded 3087910 bytes...> 
> df<- walmart_store_sales_data%>%
+   mutate(Date = ymd(Date))%>%
+   as_tsibble(index = Date, key = c(Store, Dept))%>%
+   select(Date, Store, Dept, Weekly_Sales)%>%
+   filter(Store == c(2, 3) & Dept == c(1:10))
> 
> 
> playoffs <- data_frame(
+   holiday = 'playoff',
+   ds = as.Date(c('2008-01-13', '2009-01-03', '2010-01-16',
+                  '2010-01-24', '2010-02-07', '2011-01-08',
+                  '2013-01-12', '2014-01-12', '2014-01-19',
+                  '2014-02-02', '2015-01-11', '2016-01-17',
+                  '2016-01-24', '2016-02-07')),
+   lower_window = 0,
+   upper_window = 1
+ )
> 
> 
> superbowls <- data_frame(
+   holiday = 'superbowl',
+   ds = as.Date(c('2010-02-07', '2014-02-02', '2016-02-07')),
+   lower_window = 0,
+   upper_window = 1
+ )
> holidays <- bind_rows(playoffs, superbowls)
> holidays <- as_tsibble(holidays, index = ds, key = holiday)
> 
> 
> tic()
> fit <- df %>% 
+   model(
+     prophet = prophet( Weekly_Sales ~ season("week", 7, type = "multiplicative") )
+   )
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
> fit%>%
+   forecast(h =1)
# A fable: 10 x 6 [70D]
# Key:     Store, Dept, .model [10]
   Store  Dept .model  Date       Weekly_Sales .distribution  
   <int> <int> <chr>   <date>            <dbl> <dist>         
 1     2     1 prophet 2012-11-23       35652. sim(=dbl[1000])
 2     2     3 prophet 2013-01-04       22304. sim(=dbl[1000])
 3     2     5 prophet 2012-12-07       35360. sim(=dbl[1000])
 4     2     7 prophet 2012-11-09       41939. sim(=dbl[1000])
 5     2     9 prophet 2012-12-21       31652. sim(=dbl[1000])
 6     3     2 prophet 2012-11-23       20013. sim(=dbl[1000])
 7     3     4 prophet 2013-01-04        9320. sim(=dbl[1000])
 8     3     6 prophet 2012-12-07        2679. sim(=dbl[1000])
 9     3     8 prophet 2012-11-09        8175. sim(=dbl[1000])
10     3    10 prophet 2012-12-21       13142. sim(=dbl[1000])
> toc()
14.85 sec elapsed
> 
> #use parallel processing
> 
> library(future)
> plan(multiprocess)
> tic()
> fit <- df %>% 
+   model(
+     prophet = prophet( Weekly_Sales ~ season("week", 7, type = "multiplicative") )
+   )
 Progress: ──────────────────────────────────────────────────────────────── 100%

n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 10
n.changepoints greater than number of observations. Using 11
> fit%>%
+   forecast(h =1)
# A fable: 10 x 6 [70D]
# Key:     Store, Dept, .model [10]
   Store  Dept .model  Date       Weekly_Sales .distribution  
   <int> <int> <chr>   <date>            <dbl> <dist>         
 1     2     1 prophet 2012-11-23       35652. sim(=dbl[1000])
 2     2     3 prophet 2013-01-04       22304. sim(=dbl[1000])
 3     2     5 prophet 2012-12-07       35360. sim(=dbl[1000])
 4     2     7 prophet 2012-11-09       41939. sim(=dbl[1000])
 5     2     9 prophet 2012-12-21       31652. sim(=dbl[1000])
 6     3     2 prophet 2012-11-23       20013. sim(=dbl[1000])
 7     3     4 prophet 2013-01-04        9320. sim(=dbl[1000])
 8     3     6 prophet 2012-12-07        2679. sim(=dbl[1000])
 9     3     8 prophet 2012-11-09        8175. sim(=dbl[1000])
10     3    10 prophet 2012-12-21       13142. sim(=dbl[1000])
> toc()
21.73 sec elapsed

Mitch. I noticed the same issue.

There's some strange conflict between fable.prophet and future. I'm digging the code to try to find exactly where fable.prophet 'loses contact' with the Environment when running through future.

Here's another example with your good old Victoria Electricity Demand dataset:

library(dplyr)
library(future)
library(tsibble)
library(fable.prophet)
library(fable)
library(tsibbledata)
library(tictoc)

# Create a Holiday Tsibble
holidays <- tsibble::holiday_aus(c(2012, 2013, 2014), state = "national") %>% 
  rename(ds = date) %>% 
  as_tsibble()

# First Try
tictoc::tic()
fit <- vic_elec %>% 
  model(pr1 = fable.prophet::prophet(log(Demand+1) ~ season(period = "week") + season(period = "day") + holiday(holidays)),
        pr2 = fable.prophet::prophet(log(Demand+1) ~ season(period = "week") + season(period = "day")))
tictoc::toc()
accuracy(fit)

#Second Try / Parallelization
future::plan(multiprocess)
tictoc::tic()
fit2 <- vic_elec %>% 
  model(pr1 = fable.prophet::prophet(log(Demand+1) ~ season(period = "week") + season(period = "day") + holiday(holidays)),
        pr2 = fable.prophet::prophet(log(Demand+1) ~ season(period = "week") + season(period = "day")))
tictoc::toc()
accuracy(fit2)

Thanks for investigating this further.
I believe this is due to scoping issues with parallel modelling in fabletools: tidyverts/fabletools#146

The model formula will need to be carefully parsed to identify required objects that do not exist within the tsibble, and distribute them to the parallel worker nodes.