Problem to my code identify the search space from gridsearch
Opened this issue · 0 comments
I have this code bellow as a backend of my jupyter notebook when I called the model:
imports
import numpy as np
import pandas as pd
from scipy import stats
Sklearn
from sklearn.metrics import r2_score
from ML.ml_utils import *
from sklearn.model_selection import train_test_split
FNN
import tensorflow as tf
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import EarlyStopping
from keras_tuner import HyperParameters
from keras_tuner import Objective
from keras_tuner.tuners import GridSearch, RandomSearch
from your_module import create_model
class FeedForwardNN(tf.keras.Model):
def init(self, input_dim=None, random_seed=42):
super(FeedForwardNN, self).init()
self.seed = random_seed
input_dim = 2048
self.input_dim = input_dim
self.model = self.build_model()
def build_model(self, hp=None):
if hp is None:
hp = kt.HyperParameters()
tf.random.set_seed(self.seed)
# Hyperparameters
optimizer_c = hp.Choice("optimizer", ['SGD', 'Adam'])
learning_rate = hp.Float("learning_rate", min_value=0.00001, max_value=0.1, step=10, sampling='log')
l2_reg = hp.Float("l2_reg", min_value=0.0001, max_value=0.1, step=10, sampling='log')
dropout_rate = hp.Float("dropout_rate", min_value=0, max_value=0.5, step=0.1)
output_dim = hp.Fixed("output_dim", value=1)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(self.input_dim,)))
model.add(tf.keras.layers.Dense(units=hp.Int("units_1", min_value=32, max_value=256, step=32),
activation='relu',
kernel_regularizer=tf.keras.regularizers.l2(l2_reg)))
for i in range(hp.Int("num_layers", 1, 3)):
model.add(tf.keras.layers.Dense(units=hp.Int(f"units_{i}", min_value=32, max_value=256, step=32),
activation='relu',
kernel_regularizer=tf.keras.regularizers.l2(l2_reg)))
if optimizer_c == "SGD":
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
elif optimizer_c == "Adam":
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
else:
raise ValueError("Unsupported optimizer")
model.add(tf.keras.layers.Dropout(dropout_rate))
model.add(tf.keras.layers.Dense(output_dim, activation='linear'))
model.compile(loss=tf.keras.losses.mean_squared_error,
optimizer=optimizer,
metrics=['mean_absolute_error'])
return model
def call(self, inputs, training=False):
# Define the forward pass of the model
return self.model(inputs, training=training)
def fit(self, X, y, *args, **kwargs):
return self.model.fit(X, y, *args, shuffle=True, **kwargs)
class MLModel:
def init(self, data, ml_algorithm, reg_class="regression", cv_fold=10, random_seed=42):
self.data = data
self.ml_algorithm = ml_algorithm
self.reg_class = reg_class
self.cv_fold = cv_fold
self.seed = random_seed
self.features_train, self.features_val, self.labels_train, self.labels_val = self.split_data()
self.best_params, self.best_model = self.train_with_hyperparameters()
self.model = self.train_with_hyperparameters_and_final_model()
def split_data(self):
features_train, features_val, labels_train, labels_val = train_test_split(
self.data.features, self.data.labels, test_size=0.2, random_state=self.seed)
return features_train, features_val, labels_train, labels_val
def train_with_hyperparameters(self):
if self.ml_algorithm == 'FNN':
tuner = GridSearch(FeedForwardNN,
objective='val_mean_absolute_error',
max_trials=self.cv_fold,
seed=self.seed,
max_retries_per_trial=2,)
tuner.search_space_summary()
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
tuner.search(self.features_train,
self.labels_train,
epochs=100,
validation_split=0.2,
callbacks=[stop_early])
best_h_params = tuner.get_best_hyperparameters(num_trials=1)[0]
best_model = tuner.get_best_models()[0]
return best_h_params, best_model
def final_model(self):
if self.ml_algorithm == "FNN":
best_h_params = self.train_with_hyperparameters()
# Create a new instance of FeedForwardNN with the best hyperparameters
final_model = FeedForwardNN(**best_h_params)
final_model.build((None, self.features_train.shape[1]))
early_stopping = EarlyStopping(monitor="val_loss",
patience=10,
restore_best_weights=True)
final_model.fit(self.features_train,
self.labels_train,
epochs=100,
validation_data=(self.features_val, self.labels_val),
callbacks=[early_stopping])
return final_model
else:
raise ValueError('Optimal parameter error')
def train_with_hyperparameters_and_final_model(self):
self.best_params, self.best_model = self.train_with_hyperparameters()
final_model = self.final_model()
return final_model
class Model_Evaluation:
def init(self, model, data, model_id=None, model_loaded=None, reg_class="regression"):
self.reg_class = reg_class
self.model_id = model_id
self.model = model
self.data = data
self.model_loaded = model_loaded
self.labels, self.y_pred, self.predictions = self.model_predict(data)
self.pred_performance = self.prediction_performance(data)
def model_predict(self, data):
if self.reg_class == "regression":
if self.model_id == "FNN":
data_features = data.features
else:
'Prediction error'
if self.model_loaded is not None:
y_prediction = self.model.predict(data_features)
else:
y_prediction = self.model.model.predict(data_features)
labels = self.data.labels
predictions = pd.DataFrame(list(zip(data.cid, labels, y_prediction)),
columns=["Cid", "Experimental", "Predicted"])
predictions['Target ID'] = data.target[0]
predictions['Algorithm'] = self.model_id
predictions['Residuals'] = [label_i - prediction_i for label_i, prediction_i in zip(labels, y_prediction)]
return labels, y_prediction, predictions
def prediction_performance(self, data, nantozero=False) -> pd.DataFrame:
if self.reg_class == "regression":
labels = self.labels
pred = self.y_pred
fill = 0 if nantozero else np.nan
if len(pred) == 0:
mae = fill
mse = fill
rmse = fill
r2 = fill
r = fill
else:
mae = tf.keras.metrics.mean_absolute_error(labels, pred).numpy().tolist()
mse = tf.keras.metrics.mean_squared_error(labels, pred).numpy().tolist()
rmse = np.sqrt(mse)
target = data.target[0]
model_name = self.model_id
#Calculate r and r2
self.labels1 = self.labels.reshape(-1, 1)
self.y_pred1 = self.y_pred.reshape(-1, 1)
correlation_matrix = np.corrcoef(self.labels1, self.y_pred1, rowvar=False)
correlation_xy = correlation_matrix[0,1]
r = correlation_xy**2
r2 = r2_score(self.labels, self.y_pred)
result_list = [{"MAE": mae,
"MSE": mse,
"RMSE": rmse,
"R2": r2,
"r": r,
"Dataset size": len(labels),
"Target ID": target,
"Algorithm": model_name}
]
# Prepare result dataset
results = pd.DataFrame(result_list)
results.set_index(["Target ID", "Algorithm", "Dataset size"], inplace=True)
results.columns = pd.MultiIndex.from_product([["Value"], ["MAE", "MSE", "RMSE", "R2", "r"]],
names=["Value", "Metric"])
results = results.stack().reset_index().set_index("Target ID")
return results
I don't know why, but the code run without the hyperparameter search and in search_space_sumary() appers only this:
Search space summary
Default search space size: 0
Anyone can help me how to correct my code? I think is something related to the hp=kt.HyperParameters, but I tried all ways possible for me and didn't have any difference.