ValueError: Expected parameter df (Tensor of shape (32, 168, 1)) of distribution Chi2() to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values
Yuang-Deng opened this issue · 3 comments
NOTE: ISSUES ARE NOT FOR CODE HELP - Ask for Help at https://stackoverflow.com
Your issue may already be reported!
Also, please search on the issue tracker before creating one.
- I'm submitting a ...
- bug report
- feature request
- support request => Please do not submit support request here, see note at the top of this template.
Issue Description
- When Issue Happens
- Steps To Reproduce
1.Auto-PyTorch from git@github.com:automl/Auto-PyTorch.git
1.config space from git@github.com:automl/ConfigSpace.git
1.run examples/APT-TS/APT_task.py with electricity
Expected Behavior
Current Behavior
Possible Solution
Your Code
import numpy as np
from autoPyTorch.api.time_series_forecasting import TimeSeriesForecastingTask
import pandas as pds
from datetime import datetime
import warnings
import os
import copy
from pathlib import Path
import argparse
import csv
import shutil
from autoPyTorch.datasets.resampling_strategy import (
CrossValTypes,
HoldoutValTypes,
)
import data_loader
from constant import VALUE_COL_NAME, TIME_COL_NAME, SEASONALITY_MAP, FREQUENCY_MAP, DATASETS
def compute_loss(forecast_horizon, seasonality, final_forecasts, test_series_list, train_series_list):
epsilon = 0.1
MASE = []
sMAPE = []
msMAPE = []
MAE = []
RMSE = []
sqrt_forecast_horizon = np.sqrt(forecast_horizon)
idx = 0
for f, y, y_data in zip(final_forecasts, test_series_list, train_series_list):
M = len(y_data)
diff_abs = np.abs(f - y)
if M == seasonality:
mase_denominator = 0
else:
mase_denominator_coefficient = forecast_horizon / (M - seasonality)
mase_denominator = mase_denominator_coefficient * \
np.sum(np.abs(y_data[seasonality:] - y_data[:-seasonality]))
abs_loss = np.sum(diff_abs)
mase = abs_loss / mase_denominator
if mase_denominator == 0:
mase_denominator_coefficient = forecast_horizon / (M - 1)
mase_denominator = mase_denominator_coefficient * \
np.sum(np.abs(y_data[1:] - y_data[:-1]))
mase = abs_loss / mase_denominator
if np.isnan(mase) or np.isinf(mase):
# see the R file
pass
else:
MASE.append(mase)
smape = 2 * diff_abs / (np.abs(y) + np.abs(f))
smape[diff_abs == 0] = 0
smape = np.sum(smape) / forecast_horizon
sMAPE.append(smape)
msmape = np.sum(2 * diff_abs / (np.maximum(np.abs(y) + np.abs(f) + epsilon, epsilon + 0.5))) / forecast_horizon
msMAPE.append(msmape)
mae = abs_loss / forecast_horizon
MAE.append(mae)
rmse = np.linalg.norm(f - y) / sqrt_forecast_horizon
RMSE.append(rmse)
idx += 1
res = {}
res['Mean MASE'] = np.mean(MASE)
res['Median MASE'] = np.median(MASE)
res['Mean sMAPE'] = np.mean(sMAPE)
res['Median sMAPE'] = np.median(sMAPE)
res['Mean mSMAPE'] = np.mean(msMAPE)
res['Median mSMAPE'] = np.median(msMAPE)
res['Mean MAE'] = np.mean(MAE)
res['Median MAE'] = np.median(MAE)
res['Mean RMSE'] = np.mean(RMSE)
res['Median RMSE'] = np.median(RMSE)
return res
def main(working_dir="/home/$USER/tmp/tmp",
dataset_name='nn5_daily',
budget_type='dataset_size',
res_dir="/home/ubuntu/autopytorch/Auto-PyTorch/work_dirs/tsf_res",
validation='holdout',
seed=1):
file_name, external_forecast_horizon, integer_conversion = DATASETS[dataset_name]
dataset_path = Path("/home/ubuntu/autopytorch/tsf_data") / dataset_name / file_name
df, frequency, forecast_horizon, contain_missing_values, contain_equal_length = \
data_loader.convert_tsf_to_dataframe(str(dataset_path))
# If the forecast horizon is not given within the .tsf file, then it should be provided as a function input
if forecast_horizon is None:
if external_forecast_horizon is None:
raise Exception("Please provide the required prediction steps")
else:
forecast_horizon = external_forecast_horizon
train_series_list = []
test_series_list = []
X_train = []
y_train = []
X_test = []
y_test = []
if frequency is not None:
freq = FREQUENCY_MAP[frequency]
seasonality = SEASONALITY_MAP[frequency]
else:
freq = "1Y"
seasonality = 1
shortest_sequence = np.inf
train_start_time_list = []
for index, row in df.iterrows():
if TIME_COL_NAME in df.columns:
train_start_time = row[TIME_COL_NAME]
else:
train_start_time = datetime.strptime('1900-01-01 00-00-00',
'%Y-%m-%d %H-%M-%S') # Adding a dummy timestamp, if the timestamps are not available in the dataset or consider_time is False
train_start_time_list.append(pds.Timestamp(train_start_time, freq=freq))
series_data = row[VALUE_COL_NAME].to_numpy()
# Creating training and test series. Test series will be only used during evaluation
train_series_data = series_data[:len(series_data) - forecast_horizon]
test_series_data = series_data[(len(series_data) - forecast_horizon): len(series_data)]
y_test.append(series_data[-forecast_horizon:])
train_series_list.append(train_series_data)
test_series_list.append(test_series_data)
shortest_sequence = min(len(train_series_data), shortest_sequence)
if validation == 'cv':
n_splits = 3
while shortest_sequence - forecast_horizon - forecast_horizon * n_splits <= 0:
n_splits -= 1
if n_splits >= 2:
resampling_strategy = CrossValTypes.time_series_cross_validation
resampling_strategy_args = {'num_splits': n_splits}
else:
warnings.warn('The dataset is not suitable for cross validation, we will try holdout instead')
validation = 'holdout'
elif validation == 'holdout_ts':
resampling_strategy = CrossValTypes.time_series_ts_cross_validation
resampling_strategy_args = None
if validation == 'holdout':
resampling_strategy = HoldoutValTypes.time_series_hold_out_validation
resampling_strategy_args = None
X_train = copy.deepcopy(train_series_list)
y_train = copy.deepcopy(train_series_list)
X_test = copy.deepcopy(X_train)
path = Path(working_dir) / 'APT_run'
path_log = str(path / dataset_name / budget_type / f'{seed}' / "log")
path_pred = str(path / dataset_name / budget_type / f'{seed}' / "output")
# Remove intermediate files
try:
shutil.rmtree(path_log)
shutil.rmtree(path_pred)
except OSError as e:
print("Error: %s - %s." % (e.filename, e.strerror))
smac_source_dir = Path(path_log) / "smac3-output"
api = TimeSeriesForecastingTask(
#delete_tmp_folder_after_terminate=False,
#delete_output_folder_after_terminate=False,
seed=seed,
ensemble_size=20,
resampling_strategy=resampling_strategy,
resampling_strategy_args=resampling_strategy_args,
temporary_directory=path_log,
output_directory=path_pred,
)
api.set_pipeline_config(device="cuda",
torch_num_threads=8,
early_stopping=20)
if budget_type == "random_search":
budget_kwargs = {'budget_type': 'random_search',
'max_budget': None,
'min_budget': None}
elif budget_type != 'full_budget':
from autoPyTorch.constants import FORECASTING_BUDGET_TYPE
if budget_type not in FORECASTING_BUDGET_TYPE and budget_type != 'epochs':
raise NotImplementedError('Unknown Budget Type!')
budget_kwargs = {'budget_type': budget_type,
'max_budget': 50 if budget_type == 'epochs' else 1.0,
'min_budget': 5 if budget_type == 'epochs' else 0.1}
else:
budget_kwargs = {'budget_type': 'epochs',
'max_budget': 50,
'min_budget': 50}
api.search(
X_train=None,
y_train=copy.deepcopy(y_train),
optimize_metric='mean_MASE_forecasting',
n_prediction_steps=forecast_horizon,
**budget_kwargs,
freq=freq,
# start_times_train=train_start_time_list,
memory_limit=32 * 1024,
normalize_y=False,
total_walltime_limit=600,
min_num_test_instances=1000,
)
from autoPyTorch.data.time_series_forecasting_validator import TimeSeriesForecastingInputValidator
res_dir = Path(res_dir)
res_dir_task = res_dir / dataset_name / budget_type / f'{seed}'
smac_res_path = res_dir_task / 'smac3-output'
if not os.path.exists(str(res_dir_task)):
os.makedirs(str(res_dir_task))
try:
shutil.rmtree(smac_res_path)
except OSError as e:
print("Error: %s - %s." % (e.filename, e.strerror))
try:
shutil.copytree(str(smac_source_dir), smac_res_path)
except OSError as e: # python >2.5
print("Error: %s - %s." % (e.filename, e.strerror))
refit_dataset = api.dataset.create_refit_set()
train_pred_seq = []
test_sets = api.dataset.generate_test_seqs()
# try:
# api.refit(refit_dataset, 0)
# # pred = api.predict(test_sets)
# except Exception as e:
# print(e)
# exit()
api.refit(refit_dataset, 0)
pred = api.predict(test_sets)
if integer_conversion:
final_forecasts = np.round(pred)
else:
final_forecasts = pred
if frequency is not None:
freq = FREQUENCY_MAP[frequency]
seasonality = SEASONALITY_MAP[frequency]
else:
freq = "1Y"
seasonality = 1
if isinstance(seasonality, list):
seasonality = min(seasonality) # Use to calculate MASE
seasonality = int(seasonality)
res = compute_loss(forecast_horizon, seasonality, pred, y_test, train_series_data)
print(res)
# write the forecasting results to a file
forecast_file_path = res_dir_task / f"{dataset_name}_{budget_type}_results.txt"
with open(forecast_file_path, "w") as output:
writer = csv.writer(output, lineterminator='\n')
writer.writerows(final_forecasts)
# Write training dataset and the actual results into separate files, which are then used for error calculations
# We do not use the built-in evaluation method in GluonTS as some of the error measures we use are not implemented in that
temp_dataset_path = res_dir_task / f"{dataset_name}_dataset.txt"
temp_results_path = res_dir_task / f"{dataset_name}_ground_truth.txt"
# with open(str(temp_dataset_path), "w") as output_dataset:
# writer = csv.writer(output_dataset, lineterminator='\n')
# writer.writerows(train_series_list)
with open(str(temp_results_path), "w") as output_results:
writer = csv.writer(output_results, lineterminator='\n')
writer.writerows(test_series_list)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='APT_Task')
parser.add_argument('--dataset_name', type=str, default="electricity_hourly", help='dataset name')
parser.add_argument("--budget_type", default="epochs", type=str, help='budget type')
parser.add_argument("--working_dir", default="/home/ubuntu/autopytorch/Auto-PyTorch/work_dirs", type=str,
help="directory where datasets and tmp files are stored")
parser.add_argument('--validation', type=str, default="holdout", help='type of validation')
parser.add_argument('--seed', type=int, default="10", help='random seed')
args = parser.parse_args()
dataset_name = args.dataset_name
budget_type = args.budget_type
working_dir = args.working_dir
validation = args.validation
seed = args.seed
main(working_dir=working_dir, dataset_name=dataset_name, budget_type=budget_type, validation=validation, seed=seed)
Error message
Traceback (most recent call last):
File "/home/ubuntu/.pyenv/versions/3.8.9/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/ubuntu/.pyenv/versions/3.8.9/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2022.12.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/__main__.py", line 39, in <module>
cli.main()
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2022.12.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 430, in main
run()
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2022.12.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/../debugpy/server/cli.py", line 284, in run_file
runpy.run_path(target, run_name="__main__")
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2022.12.0/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 321, in run_path
return _run_module_code(code, init_globals, run_name,
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2022.12.0/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 135, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "/home/ubuntu/.vscode-server/extensions/ms-python.python-2022.12.0/pythonFiles/lib/python/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_runpy.py", line 124, in _run_code
exec(code, run_globals)
File "./examples/APT-TS/APT_task.py", line 346, in <module>
main(working_dir=working_dir, dataset_name=dataset_name, budget_type=budget_type, validation=validation, seed=seed)
File "./examples/APT-TS/APT_task.py", line 283, in main
api.refit(refit_dataset, 0)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/autoPyTorch/api/base_task.py", line 1441, in refit
fit_and_suppress_warnings(self._logger, model, X, y=None)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/autoPyTorch/evaluation/abstract_evaluator.py", line 338, in fit_and_suppress_warnings
pipeline.fit(X, y)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/autoPyTorch/pipeline/base_pipeline.py", line 155, in fit
self.fit_estimator(X, y, **fit_params)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/autoPyTorch/pipeline/base_pipeline.py", line 174, in fit_estimator
self._final_estimator.fit(X, y, **fit_params)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/autoPyTorch/pipeline/components/training/trainer/__init__.py", line 211, in fit
self._fit(
File "/home/ubuntu/autopytorch/Auto-PyTorch2/autoPyTorch/pipeline/components/training/trainer/__init__.py", line 310, in _fit
train_loss, train_metrics = self.choice.train_epoch(
File "/home/ubuntu/autopytorch/Auto-PyTorch2/autoPyTorch/pipeline/components/training/trainer/forecasting_trainer/forecasting_base_trainer.py", line 106, in train_epoch
loss, outputs = self.train_step(data, targets)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/autoPyTorch/pipeline/components/training/trainer/forecasting_trainer/forecasting_base_trainer.py", line 206, in train_step
outputs = self.model(past_targets=past_target,
File "/home/ubuntu/autopytorch/Auto-PyTorch2/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/autoPyTorch/pipeline/components/setup/network/forecasting_architecture.py", line 602, in forward
output = self.head(decoder_output)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/autoPyTorch/pipeline/components/setup/network_head/forecasting_network_head/distribution.py", line 95, in forward
return self.dist_cls(*self.domain_map(*params_unbounded))
File "/home/ubuntu/autopytorch/Auto-PyTorch2/.venv/lib/python3.8/site-packages/torch/distributions/studentT.py", line 50, in __init__
self._chi2 = Chi2(self.df)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/.venv/lib/python3.8/site-packages/torch/distributions/chi2.py", line 22, in __init__
super(Chi2, self).__init__(0.5 * df, 0.5, validate_args=validate_args)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/.venv/lib/python3.8/site-packages/torch/distributions/gamma.py", line 52, in __init__
super(Gamma, self).__init__(batch_shape, validate_args=validate_args)
File "/home/ubuntu/autopytorch/Auto-PyTorch2/.venv/lib/python3.8/site-packages/torch/distributions/distribution.py", line 55, in __init__
raise ValueError(
ValueError: Expected parameter df (Tensor of shape (32, 168, 1)) of distribution Chi2() to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[nan],
[nan],
[nan],
...,
[nan],
[nan],
[nan]],
[[nan],
[nan],
[nan],
...,
[nan],
[nan],
[nan]],
[[nan],
[nan],
[nan],
...,
[nan],
[nan],
[nan]],
...,
[[nan],
[nan],
[nan],
...,
[nan],
[nan],
[nan]],
[[nan],
[nan],
[nan],
...,
[nan],
[nan],
[nan]],
[[nan],
[nan],
[nan],
...,
[nan],
[nan],
[nan]]], grad_fn=<MulBackward0>)
Your Local environment
- Operating System, version
- Ubuntu 20.04.4 LTS
- Python, version
- 3.8.9
- Outputs of
pip freeze
orconda list
absl-py==1.2.0
aiohttp==3.8.1
aiosignal==1.2.0
alembic==1.8.1
async-timeout==4.0.2
attrs==22.1.0
autopage==0.5.1
-e git+git@github.com:automl/Auto-PyTorch.git@c138dff1909464110f4d35dfb9c016e724a8c25a#egg=autoPyTorch
cachetools==5.2.0
catboost==1.0.6
certifi==2022.6.15
charset-normalizer==2.1.0
click==8.1.3
cliff==3.10.1
cloudpickle==2.1.0
cmaes==0.8.2
cmd2==2.4.2
colorlog==6.6.0
-e git+git@github.com:automl/ConfigSpace.git@e681dc9fa32cf113fe4a658bf0c36306f32376d2#egg=ConfigSpace
convertdate==2.4.0
cycler==0.11.0
Cython==0.29.32
dask==2022.8.0
Deprecated==1.2.13
distributed==2022.8.0
emcee==3.1.2
flaky==3.7.0
fonttools==4.34.4
frozenlist==1.3.1
fsspec==2022.7.1
gluonts==0.10.2
google-auth==2.10.0
google-auth-oauthlib==0.4.6
graphviz==0.20.1
greenlet==1.1.2
grpcio==1.47.0
HeapDict==1.0.1
hijri-converter==2.2.4
holidays==0.14.2
idna==3.3
imageio==2.21.0
imgaug==0.4.0
importlib-metadata==4.12.0
importlib-resources==5.9.0
Jinja2==3.1.2
joblib==1.1.0
kiwisolver==1.4.4
korean-lunar-calendar==0.2.1
lightgbm==3.3.2
llvmlite==0.39.0
locket==1.0.0
lockfile==0.12.2
Mako==1.2.1
Markdown==3.4.1
MarkupSafe==2.1.1
matplotlib==3.5.2
msgpack==1.0.4
multidict==6.0.2
networkx==2.8.5
numba==0.56.0
numpy==1.22.4
oauthlib==3.2.0
opencv-python==4.6.0.66
optuna==2.10.1
packaging==21.3
pandas==1.4.3
partd==1.2.0
patsy==0.5.2
pbr==5.9.0
Pillow==9.2.0
plotly==5.9.0
prettytable==3.3.0
protobuf==3.19.4
psutil==5.9.1
pyasn1==0.4.8
pyasn1-modules==0.2.8
pydantic==1.9.1
pyDeprecate==0.3.2
PyMeeus==0.5.11
pynisher==0.6.4
pyparsing==3.0.9
pyperclip==1.8.2
pyrfr==0.8.3
python-dateutil==2.8.2
pytorch-forecasting==0.10.2
pytorch-lightning==1.7.0
pytz==2022.1
PyWavelets==1.3.0
PyYAML==6.0
regex==2022.7.25
requests==2.28.1
requests-oauthlib==1.3.1
rsa==4.9
scikit-image==0.19.3
scikit-learn==0.24.2
scipy==1.8.1
Shapely==1.8.2
six==1.16.0
sktime==0.13.0
smac==1.4.0
sortedcontainers==2.4.0
SQLAlchemy==1.4.39
statsmodels==0.13.2
stevedore==4.0.0
tabulate==0.8.10
tblib==1.7.0
tenacity==8.0.1
tensorboard==2.9.1
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
threadpoolctl==3.1.0
tifffile==2022.8.3
toolz==0.12.0
torch==1.12.1
torchmetrics==0.9.3
torchvision==0.13.1
tornado==6.1
tqdm==4.64.0
typing-extensions==4.3.0
urllib3==1.26.11
wcwidth==0.2.5
Werkzeug==2.2.1
wrapt==1.14.1
yarl==1.8.1
zict==2.2.0
zipp==3.8.1
Make sure to add all the information needed to understand the bug so that someone can help.
If the info is missing, we'll add the 'Needs more information' label and close the issue until there is enough information.
Hi,
this error happens because one hyperparameter configuration passed the HPO process (only trained on the training set) but failed during the refit process (when the model is trained on the training+validation set). Currently, our optimizer does not consider catching the exception within refit. We will fix this ASAP and incorporate it into the next release.
Before that, you could simply remove that line called refit()
(though this might slightly weaken the final performance).
ok, thanks for your reply.
Hi, we have fixed this issue in the latest release. You can install it using pip install autoPyTorch==0.2.1
. I am closing this issue for now.