'Parallel' object is not iterable
ggous opened this issue · 0 comments
ggous commented
Hello and thanks for this project! It seems very promising!
I am trying to train a xgboost classifier.
My code is:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold
from scipy.stats import uniform
from mango.domain.distribution import loguniform
from mango import Tuner
from joblib import Parallel, delayed
xgb_params = {
'n_estimators': range(10, 200, 50), # 10 to 200 in steps of 50
'max_depth': range(1, 15), # 1 to 14
'reg_alpha': loguniform(-3, 6), # 10^-3 to 10^3
'booster': ['gbtree', 'gblinear'],
'colsample_bylevel': uniform(0.05, 0.95), # 0.05 to 1.0
'colsample_bytree': uniform(0.05, 0.95), # 0.05 to 1.0
'learning_rate': loguniform(-3, 3), # 0.001 to 1
'reg_lambda': loguniform(-3, 6), # 10^-3 to 10^3
'min_child_weight': loguniform(0, 2), # 1 to 100
'subsample': uniform(0.1, 0.89), # 0.1 to 0.99
}
class MangoParallelOptimization:
def __init__(self,
njobs,
configuration_params,
features_train,
target_train,
features_val,
target_val):
self.njobs = njobs
self.conf_dict = configuration_params
self.x_train = features_train
self.y_train = target_train
self.x_val = features_val
self.y_val = target_val
self.space = xgb_params
def _objective(self, **model_params):
kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=123)
results = []
for hyper_param in model_params:
model = xgb.XGBClassifier(**hyper_param)
result = cross_val_score(model,
self.x_train,
self.y_train,
scoring='accuracy',
cv=kfold).mean()
results.append(result)
return results
def _objective2(self, params_batch):
global parameters
results_batch = Parallel(self.njobs,
backend='multiprocessing')
(delayed(self._objective)(**params) for params in params_batch)
acc = [result for result in results_batch]
return acc
def mango_optimization(self):
tuner = Tuner(self.space, self._objective2, self.conf_dict)
optimization_results = tuner.maximize()
return optimization_results['best_params'], optimization_results['best_objective']
if __name__=="__main__":
df = pd.read_csv('/home/ggous/example.csv')
df.dropna(axis=1, inplace=True)
features = df.drop(['id', 'CLASS'], axis=1)
labels = df['CLASS'].values
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(
features,
labels,
stratify=labels,
test_size = 0.2,
random_state = 123)
# encode string class values as integers
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)
scaler = StandardScaler()
x_train_sc = pd.DataFrame(scaler.fit_transform(x_train),
index=x_train.index,
columns=x_train.columns)
x_test_sc = scaler.transform(x_test)
# Parallel optimization with Mango
config_params = {'num_iteration': 40, 'initial_random': 10}
optim = MangoParallelOptimization(njobs=4,
configuration_params=config_params,
features_train=x_train,
target_train=y_train,
features_val=x_test,
target_val=y_test)
best_parameters, best_objective = optim.mango_optimization()
# Results
print('best parameters:', best_parameters)
print('best accuracy:', best_objective)
# Train the model with the best hyper-parameters
best_model = xgb.XGBClassifier(n_jobs=-1, **best_parameters)
best_model.fit(x_train, y_train)
The file I am using is here.
I have some questions:
-
First of all, running the code , gives :
Parallel object is not iterable
-
If I want to use in the xgb classifier, the following arguments:
'use_label_encoder': False,
'eval_metric': 'mlogloss',
'seed': 123,
'enable_categorical': False
Can I do ?? :
for hyper_param in model_params:
model = xgb.XGBClassifier(**hyper_param,
use_label_encoder'=False,
eval_metric= 'mlogloss',
seed= 123,
enable_categorical= False )
- If I want to manually do the k fold , like this:
def _objective(self, **model_params):
kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=123)
for i, (train_idx, val_idx) in enumerate(kfold.split(x_train, y_train)):
x_train_, y_train_ = x_train[train_idx, :], y_train[train_idx]
x_val_, y_val_ = x_train[val_idx, :], y_train[val_idx]
model = xgb.XGBClassifier(**hyper_param)
history = model.fit(x_train_,
y_train_,
early_stopping_rounds=10,
eval_set=[(x_train_, y_train_), (x_val_, y_val_)])
....
How can I do that? And use the history object inside every fold iteration in order to plot things.
And finally return the result that mango wants.?
What kind of result should be?