Nixtla/statsforecast

Exogenous Regressor having ValueError: could not broadcast input array from shape (53,3) into shape (52,3)

Closed this issue · 8 comments

Description

from statsforecast.models import AutoARIMA
models = [AutoARIMA(season_length=4)]
sf = StatsForecast(
models=models,
freq='W',
n_jobs=1,
)
horizon = 52
level = [95]

fcst = sf.forecast(df=input_df, h=horizon, X_df=ex_df, level=level)
fcst.head()

Why does this code give me an error saying


ValueError Traceback (most recent call last)
Cell In[111], line 11
8 horizon = 52
9 level = [95]
---> 11 fcst = sf.forecast(df=input_df, h=horizon, X_df=ex_df, level=level)
12 fcst.head()

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/statsforecast/core.py:1565, in StatsForecast.forecast(self, h, df, X_df, level, fitted, sort_df, prediction_intervals, id_col, time_col, target_col)
1561 raise ValueError(
1562 "You must specify level when using prediction_intervals"
1563 )
1564 if self._is_native(df=df):
-> 1565 return super().forecast(
1566 df=df,
1567 h=h,
1568 X_df=X_df,
1569 level=level,
1570 fitted=fitted,
1571 sort_df=sort_df,
1572 prediction_intervals=prediction_intervals,
1573 id_col=id_col,
1574 time_col=time_col,
1575 target_col=target_col,
1576 )
1577 assert df is not None
1578 engine = make_execution_engine(infer_by=[df])

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/statsforecast/core.py:863, in _StatsForecast.forecast(self, h, df, X_df, level, fitted, sort_df, prediction_intervals, id_col, time_col, target_col)
861 X, level = self._parse_X_level(h=h, X=X_df, level=level)
862 if self.n_jobs == 1:
--> 863 res_fcsts = self.ga.forecast(
864 models=self.models,
865 h=h,
866 fallback_model=self.fallback_model,
867 fitted=fitted,
868 X=X,
869 level=level,
870 verbose=self.verbose,
871 target_col=target_col,
872 )
873 else:
874 res_fcsts = self._forecast_parallel(
875 h=h, fitted=fitted, X=X, level=level, target_col=target_col
876 )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/statsforecast/core.py:209, in GroupedArray.forecast(self, models, h, fallback_model, fitted, X, level, verbose, target_col)
207 if fcsts_i.ndim == 1:
208 fcsts_i = fcsts_i[:, None]
--> 209 fcsts[i * h : (i + 1) * h, cuts[i_model] : cuts[i_model + 1]] = fcsts_i
210 cols += cols_m
211 if fitted:

ValueError: could not broadcast input array from shape (53,3) into shape (52,3)

note that my input dataframe shape is 416 rows × 9 columns, and exogenous dataframe shape is 104 rows × 8 columns.

Link

No response

Hey. Can you provide a reproducible example (along with some data)? The following works as expected:

from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA
from utilsforecast.data import generate_series

h = 52
series = generate_series(2, freq="W", min_length=200, max_length=300, n_static_features=2)
X_df = series.groupby('unique_id', observed=True).tail(h).drop(columns='y')
train = series.drop(X_df.index)

sf = StatsForecast(models=[AutoARIMA(season_length=4)], freq="W")
sf.forecast(df=train, h=h, X_df=X_df, level=[95])

The data kind of looks like this

Input Data:

unique_id | ds | y | trend | seasonal | exogenous | exogenous_lag_3 | exogenous_lag_5 | external_signal -- | -- | -- | -- | -- | -- | -- | -- | -- "rT:{ZÊ悴ÜF{<фï#=๑,᱄©ঋӎ° | 2020-03-08 | 7.486014 | 200.0 | 6199.476635 | 217.520839 | 335.556044 | 433.702076 | 6181.411581 "rT:{ZÊ悴ÜF{<фï#=๑,᱄©ঋӎ° | 2020-03-15 | 7.115399 | 1200.0 | 7057.529964 | 385.700711 | 326.913268 | 419.006001 | 5339.990400 "rT:{ZÊ悴ÜF{<фï#=๑,᱄©ঋӎ° | 2020-03-22 | 6.048641 | 2600.0 | 7849.874700 | 459.054781 | 563.332891 | 424.123655 | 5489.351360 "rT:{ZÊ悴ÜF{<фï#=๑,᱄©ঋӎ° | 2020-03-29 | 4.969568 | 4000.0 | 8092.339929 | 418.075968 | 358.778677 | 307.659674 | 5121.140475 "rT:{ZÊ悴ÜF{<фï#=๑,᱄©ঋӎ° | 2020-04-05 | 4.619659 | 5400.0 | 7782.813379 | 264.883827 | 344.763527 | 421.083844 | 4492.706055

Exogenous Data:

unique_id | ds | y | trend | seasonal | exogenous | exogenous_lag_3 | exogenous_lag_5 | external_signal -- | -- | -- | -- | -- | -- | -- | -- | -- "rT:{ZÊ悴ÜF{<фï#=๑,᱄©ঋӎ° | 2020-03-08 | 7.486014 | 200.0 | 6199.476635 | 217.520839 | 335.556044 | 433.702076 | 6181.411581 "rT:{ZÊ悴ÜF{<фï#=๑,᱄©ঋӎ° | 2020-03-15 | 7.115399 | 1200.0 | 7057.529964 | 385.700711 | 326.913268 | 419.006001 | 5339.990400 "rT:{ZÊ悴ÜF{<фï#=๑,᱄©ঋӎ° | 2020-03-22 | 6.048641 | 2600.0 | 7849.874700 | 459.054781 | 563.332891 | 424.123655 | 5489.351360 "rT:{ZÊ悴ÜF{<фï#=๑,᱄©ঋӎ° | 2020-03-29 | 4.969568 | 4000.0 | 8092.339929 | 418.075968 | 358.778677 | 307.659674 | 5121.140475 "rT:{ZÊ悴ÜF{<фï#=๑,᱄©ঋӎ° | 2020-04-05 | 4.619659 | 5400.0 | 7782.813379 | 264.883827 | 344.763527 | 421.083844 | 4492.706055

I meant providing a self contained example (as the one I provided above) that reproduces the error.

I am having the following error running you chunk of code as well in AzureML. Is that a dependency issue?


ValueError Traceback (most recent call last)
Cell In[11], line 2
1 sf = StatsForecast(models=[AutoARIMA(season_length=4)], freq="W")
----> 2 sf.forecast(df=train, h=h, X_df=X_df, level=[95])

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/statsforecast/core.py:1565, in StatsForecast.forecast(self, h, df, X_df, level, fitted, sort_df, prediction_intervals, id_col, time_col, target_col)
1561 raise ValueError(
1562 "You must specify level when using prediction_intervals"
1563 )
1564 if self._is_native(df=df):
-> 1565 return super().forecast(
1566 df=df,
1567 h=h,
1568 X_df=X_df,
1569 level=level,
1570 fitted=fitted,
1571 sort_df=sort_df,
1572 prediction_intervals=prediction_intervals,
1573 id_col=id_col,
1574 time_col=time_col,
1575 target_col=target_col,
1576 )
1577 assert df is not None
1578 engine = make_execution_engine(infer_by=[df])

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/statsforecast/core.py:861, in _StatsForecast.forecast(self, h, df, X_df, level, fitted, sort_df, prediction_intervals, id_col, time_col, target_col)
859 self._validate_sizes_for_prediction_intervals(prediction_intervals)
860 self._set_prediction_intervals(prediction_intervals=prediction_intervals)
--> 861 X, level = self._parse_X_level(h=h, X=X_df, level=level)
862 if self.n_jobs == 1:
863 res_fcsts = self.ga.forecast(
864 models=self.models,
865 h=h,
(...)
871 target_col=target_col,
872 )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/statsforecast/core.py:681, in _StatsForecast._parse_X_level(self, h, X, level)
677 raise ValueError(
678 f"Expected X to have shape {expected_shape}, but got {X.shape}"
679 )
680 first_col = [c for c in X.columns if c not in (self.id_col, self.time_col)][0]
--> 681 _, _, data, indptr, _ = ufp.process_df(X, self.id_col, self.time_col, first_col)
682 return GroupedArray(data, indptr), level

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/utilsforecast/processing.py:657, in process_df(df, id_col, time_col, target_col)
635 """Extract components from dataframe
636
637 Parameters
(...)
654 If the data is already sorted this is None.
655 """
656 # validations
--> 657 validate_format(df, id_col, time_col, target_col)
659 # ids
660 id_counts = counts_by_id(df, id_col)

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/utilsforecast/validation.py:130, in validate_format(df, id_col, time_col, target_col)
128 is_numeric = target.is_numeric()
129 if not is_numeric:
--> 130 raise ValueError(
131 f"The target column ('{target_col}') should have a numeric data type, got '{target.dtype}')"
132 )

ValueError: The target column ('static_0') should have a numeric data type, got 'category')

Can you try upgrading utilsforecast? pip install --upgrade utilsforecast

Tried that. Still having the same issue.

Just updated Statsforecast and the chunk you provided works, but having the same issue with my dataset. I have no idea why the shape error is being shown where my input and output correspond to the correct shapes and it works fine with pmdarima.

Anyways solved it by predicting the exogenous features for test set.