Inverse transform of quantiles after differentiation
brsnw250 opened this issue ยท 0 comments
๐ Feature Request
Currently, we treat quantiles in the inverse transforms the same way as the target series. When using differentiation (DifferencingTransform
) this might result in very wide and not meaningful intervals.
Mainly, this affects cases when the expected value of
Code to reproduce
from etna.datasets.datasets_generation import generate_ar_df
from etna.datasets import TSDataset
from etna.pipeline import Pipeline
from etna.models import SeasonalMovingAverageModel
from etna.transforms import DifferencingTransform
from etna.analysis import plot_forecast
df = generate_ar_df(100, "2020-01-01")
ts = TSDataset(df=TSDataset.to_dataset(df=df), freq="D")
train_ts, test_ts = ts.train_test_split(test_size=20)
pipeline = Pipeline(
transforms=[DifferencingTransform(in_column="target")],
model=SeasonalMovingAverageModel(seasonality=1),
horizon=20
)
pipeline.fit(train_ts)
forecast = pipeline.forecast(prediction_interval=True)
plot_forecast(forecast_ts=forecast, test_ts=test_ts, prediction_intervals=True)
Proposal
Implement interface for separate treatment of quantiles in transforms.
Use DifferencingTransform
, where
Test cases
No response
Additional context
Here is a comparison between current and proposed approaches.
import numpy as np
import matplotlib.pyplot as plt
# setting timeline and generating noise
t = np.arange(100)
eps = np.random.normal(0, 1, 100)
eps[0] += 10
level = eps[0]
# generating random walk series
y = np.cumsum(eps)
# differentiating series
r = np.diff(y)
# estimate quantiles for the first difference
r_q_upper = r + np.quantile(r, q=0.975)
r_q_lower = r + np.quantile(r, q=0.025)
# current approach
y_q_upper = np.cumsum(r_q_upper) + level
y_q_lower = np.cumsum(r_q_lower) + level
# proposed approach
int_r = np.roll(np.cumsum(r) + level, 1) # integration
int_r[0] = level
y_q_upper_adj = int_r + r_q_upper
y_q_lower_adj = int_r + r_q_lower
plt.figure(figsize=(6, 12))
plt.subplot(3, 1, 1)
plt.plot(t[1:], r, color="orange", label="first difference")
plt.fill_between(t[1:], r_q_upper, r_q_lower, alpha=0.3, color="orange", label="interval")
plt.legend()
plt.subplot(3, 1, 2)
plt.title("Current approach")
plt.plot(t[1:], y[1:], label="series")
plt.fill_between(t[1:], y_q_upper, y_q_lower, alpha=0.3, label="interval", color="g")
plt.legend()
plt.subplot(3, 1, 3)
plt.title("Proposed approach")
plt.plot(t[1:], y[1:], label="series")
plt.fill_between(t[1:], y_q_upper_adj, y_q_lower_adj, alpha=0.3, label="interval", color="g")
plt.legend()