PaddlePaddle/PaddleTS

NHiTSModel模型fit时候加入验证集报错。

suntao2015005848 opened this issue · 1 comments

#Pipeline训练

import pandas as pd

from paddlets.pipeline.pipeline import Pipeline
import numpy as np
import matplotlib.pyplot as plt
import datetime
import paddlets
from paddlets import TSDataset
from paddlets import TimeSeries
from paddlets.models.forecasting.dl import * #引入了全部预测模型
from paddlets.models.forecasting import * #引入了全部预测模型
from paddlets.transform import OneHot, StandardScaler,TimeFeatureGenerator ,KSigma,MinMaxScaler
from paddlets.metrics import MSE, MAE
import warnings
warnings.filterwarnings('ignore')
from paddlets.automl.autots import AutoTS
import os 
from paddlets.automl.autots import SearchSpaceConfiger
from ray.tune import uniform, qrandint, choice,quniform
from paddlets.transform import TimeFeatureGenerator

# 读取CSV文件
df = pd.read_csv('/home/aistudio/ydl/fh_power_data.csv')
df = df.filter(items=['monitorTime', 'presentValue'])

target_cov_dataset = TSDataset.load_from_dataframe(
    df,
    time_col='monitorTime',
    target_cols='presentValue',
    known_cov_cols=['monitorTime'],
    freq='5min',
    fill_missing_dates=True,
    fillna_method='pre'
)    
# 是否是工作日
time_feature_generator = TimeFeatureGenerator(feature_cols=['is_workday','weekday','hour'])
target_cov_dataset = time_feature_generator.fit_transform(target_cov_dataset)



df_2 =  target_cov_dataset.to_dataframe()
df_2['weekday'] = df_2['weekday'].astype(float)
df_2['hour'] = df_2['hour'].astype(float)

target_cov_dataset_1 = TSDataset.load_from_dataframe(
    df_2,
    time_col='monitorTime',
    target_cols='presentValue',
    known_cov_cols=['is_workday','weekday','hour'],
    freq='5min',
    fill_missing_dates=True,
    fillna_method='pre'
)  


val_dataset = TSDataset.load_from_csv(
    filepath_or_buffer="/home/aistudio/ydl/val_test.csv",
    time_col='monitorTime',
    target_cols='presentValue',
    known_cov_cols=['is_workday','weekday','hour'],
    freq='5min',
    fill_missing_dates=True,
    fillna_method='pre'
)  

pipeline_list = [
    (NHiTSModel , {
    'eval_metrics': ["mse", "mae"],
    'batch_size': 256, 
    'max_epochs': 10, 
    'patience': 10,
    'sampling_stride': 12,
    'in_chunk_len': 288,# 输入序列的长度
    'out_chunk_len':288,# 输出序列的长度
    'verbose':1,# 打印日志的详细程度,这里设置为1
    })
]


pipeline =  Pipeline(pipeline_list)

pipeline.fit(target_cov_dataset_1)
pipeline.save(path="/home/aistudio/ydl/model_nhits/in288_out288_autots/")

报错:

opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
    719         if force_all_finite:
    720             _assert_all_finite(array,
--> 721                                allow_nan=force_all_finite == 'allow-nan')
    722 
    723     if ensure_min_samples > 0:

/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype)
    104                     msg_err.format
    105                     (type_err,
--> 106                      msg_dtype if msg_dtype is not None else X.dtype)
    107             )
    108     # for object dtype data, we only check for NaNs (GH-13254)

ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

您好,根据提示检查一下数据是否为空