Using multiple RL environments with Optuna
Anth0rx opened this issue · 19 comments
How do I use Optuna with multiple OpenAI Gym environments in Stable Baselines 3?
Stable Baselines 3 suggests using a SubProcVec
for running an agent through multiple environments.
When I try to wrap the environment in a SubProcVec
while using your example it results in a BrokenPipe error. Do you have some further hints or examples?
Could you share the minimal code?
Sure, so I used the following file based on your example:
""" Optuna example that optimizes the hyperparameters of
a reinforcement learning agent using A2C implementation from Stable-Baselines3
on an OpenAI Gym environment.
This is a simplified version of what can be found in https://github.com/DLR-RM/rl-baselines3-zoo.
You can run this example as follows:
$ python sb3_simple.py
"""
from typing import Callable
from typing import Any
from typing import Dict
import gym
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
from stable_baselines3 import A2C
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import SubprocVecEnv
import torch
import torch.nn as nn
THREADS = 2
N_TRIALS = 100
N_STARTUP_TRIALS = 5
N_EVALUATIONS = 2
N_TIMESTEPS = int(2e4)
EVAL_FREQ = int(N_TIMESTEPS / N_EVALUATIONS)
N_EVAL_EPISODES = 3
def make_env(monitor=False) -> Callable:
"""
Utility function for multiprocessed env.
:return: (Callable)
"""
def _init() -> gym.Env:
env = gym.make("CartPole-v1")
if monitor:
return Monitor(env)
else:
return env
return _init
def sample_a2c_params(trial: optuna.Trial) -> Dict[str, Any]:
"""Sampler for A2C hyperparameters."""
gamma = 1.0 - trial.suggest_float("gamma", 0.0001, 0.1, log=True)
max_grad_norm = trial.suggest_float("max_grad_norm", 0.3, 5.0, log=True)
gae_lambda = 1.0 - trial.suggest_float("gae_lambda", 0.001, 0.2, log=True)
n_steps = 2 ** trial.suggest_int("exponent_n_steps", 3, 10)
learning_rate = trial.suggest_float("lr", 1e-5, 1, log=True)
ent_coef = trial.suggest_float("ent_coef", 0.00000001, 0.1, log=True)
ortho_init = trial.suggest_categorical("ortho_init", [False, True])
net_arch = trial.suggest_categorical("net_arch", ["tiny", "small"])
activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"])
# Display true values.
trial.set_user_attr("gamma_", gamma)
trial.set_user_attr("gae_lambda_", gae_lambda)
trial.set_user_attr("n_steps", n_steps)
net_arch = [
{"pi": [64], "vf": [64]} if net_arch == "tiny" else {"pi": [64, 64], "vf": [64, 64]}
]
activation_fn = {"tanh": nn.Tanh, "relu": nn.ReLU}[activation_fn]
return {
"n_steps": n_steps,
"gamma": gamma,
"gae_lambda": gae_lambda,
"learning_rate": learning_rate,
"ent_coef": ent_coef,
"max_grad_norm": max_grad_norm,
"policy_kwargs": {
"net_arch": net_arch,
"activation_fn": activation_fn,
"ortho_init": ortho_init,
},
}
class TrialEvalCallback(EvalCallback):
"""Callback used for evaluating and reporting a trial."""
def __init__(
self,
eval_env: SubprocVecEnv,
trial: optuna.Trial,
n_eval_episodes: int = 5,
eval_freq: int = 10000,
deterministic: bool = True,
verbose: int = 0,
):
super().__init__(
eval_env=eval_env,
n_eval_episodes=n_eval_episodes,
eval_freq=eval_freq,
deterministic=deterministic,
verbose=verbose,
)
self.trial = trial
self.eval_idx = 0
self.is_pruned = False
def _on_step(self) -> bool:
if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
super()._on_step()
self.eval_idx += 1
self.trial.report(self.last_mean_reward, self.eval_idx)
# Prune trial if need.
if self.trial.should_prune():
self.is_pruned = True
return False
return True
def objective(trial: optuna.Trial) -> float:
kwargs = DEFAULT_HYPERPARAMS.copy()
# Sample hyperparameters.
kwargs.update(sample_a2c_params(trial))
# Create the RL model.
model = A2C(**kwargs)
# Create env used for evaluation.
eval_env = SubprocVecEnv([make_env(monitor=True) for i in range(THREADS)], start_method='spawn')
# Create the callback that will periodically evaluate and report the performance.
eval_callback = TrialEvalCallback(
eval_env, trial, n_eval_episodes=N_EVAL_EPISODES, eval_freq=EVAL_FREQ, deterministic=True
)
nan_encountered = False
try:
model.learn(N_TIMESTEPS, callback=eval_callback)
except AssertionError as e:
# Sometimes, random hyperparams can generate NaN.
print(e)
nan_encountered = True
finally:
# Free memory.
model.env.close()
eval_env.close()
# Tell the optimizer that the trial failed.
if nan_encountered:
return float("nan")
if eval_callback.is_pruned:
raise optuna.exceptions.TrialPruned()
return eval_callback.last_mean_reward
if __name__ == "__main__":
env = SubprocVecEnv([make_env(monitor=False) for i in range(THREADS)], start_method='spawn')
DEFAULT_HYPERPARAMS = {
"policy": "MlpPolicy",
"env": env,
}
# Set pytorch num threads to 1 for faster training.
torch.set_num_threads(1)
sampler = TPESampler(n_startup_trials=N_STARTUP_TRIALS)
# Do not prune before 1/3 of the max budget is used.
pruner = MedianPruner(n_startup_trials=N_STARTUP_TRIALS, n_warmup_steps=N_EVALUATIONS // 3)
study = optuna.create_study(sampler=sampler, pruner=pruner, direction="maximize")
try:
study.optimize(objective, n_trials=N_TRIALS, timeout=600)
except KeyboardInterrupt:
pass
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print(" Value: ", trial.value)
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))
print(" User attrs:")
for key, value in trial.user_attrs.items():
print(" {}: {}".format(key, value))
The changes I made were:
- move the environment definition below
if __name__ == "__main__":
(This is needed for SubprocVecEnv):
env = SubprocVecEnv([make_env(monitor=False) for i in range(THREADS)], start_method='spawn')
DEFAULT_HYPERPARAMS = {
"policy": "MlpPolicy",
"env": env,
}
- add
make_env()
function:
def make_env(monitor=False) -> Callable:
"""
Utility function for multiprocessed env.
:return: (Callable)
"""
def _init() -> gym.Env:
env = gym.make("CartPole-v1")
if monitor:
return Monitor(env)
else:
return env
return _init
- Create
eval_env
withSubprocVecEnv
:
eval_env = SubprocVecEnv([make_env(monitor=True) for i in range(THREADS)], start_method='spawn')
This yields the following error:
Traceback (most recent call last):
File "/tmp/sb3_simple.py", line 175, in <module>
study.optimize(objective, n_trials=N_TRIALS, timeout=600)
File "/tmp/venv/lib64/python3.10/site-packages/optuna/study/study.py", line 425, in optimize
_optimize(
File "/tmp/venv/lib64/python3.10/site-packages/optuna/study/_optimize.py", line 66, in _optimize
_optimize_sequential(
File "/tmp/venv/lib64/python3.10/site-packages/optuna/study/_optimize.py", line 163, in _optimize_sequential
frozen_trial = _run_trial(study, func, catch)
File "/tmp/venv/lib64/python3.10/site-packages/optuna/study/_optimize.py", line 251, in _run_trial
raise func_err
File "/tmp/venv/lib64/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
value_or_values = func(trial)
File "/tmp/sb3_simple.py", line 138, in objective
model.learn(N_TIMESTEPS, callback=eval_callback)
File "/tmp/venv/lib64/python3.10/site-packages/stable_baselines3/a2c/a2c.py", line 193, in learn
return super().learn(
File "/tmp/venv/lib64/python3.10/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 236, in learn
total_timesteps, callback = self._setup_learn(
File "/tmp/venv/lib64/python3.10/site-packages/stable_baselines3/common/base_class.py", line 408, in _setup_learn
self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
File "/tmp/venv/lib64/python3.10/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 135, in reset
remote.send(("reset", None))
File "/usr/lib64/python3.10/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/lib64/python3.10/multiprocessing/connection.py", line 411, in _send_bytes
self._send(header + buf)
File "/usr/lib64/python3.10/multiprocessing/connection.py", line 368, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Hello,
why are you using start_method='spawn'
?
Does your env work without optuna with subprocess?
Usually I use start_method='forkserver'
. I just wanted to try spawn
as well and posted that version.
Without optuna I would implement it like that:
import gym
from typing import Callable
from stable_baselines3 import A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import SubprocVecEnv
def make_env(monitor=False) -> Callable:
"""
Utility function for multiprocessed env.
:return: (Callable)
"""
def _init() -> gym.Env:
env = gym.make("CartPole-v1")
if monitor:
return Monitor(env)
else:
return env
return _init
if __name__=="__main__":
env = SubprocVecEnv([make_env(monitor=True) for i in range(4)], start_method='forkserver')
model = A2C("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=250_000)
When I try to execute it through a separate script with subprocess
I receive the following error:
Traceback (most recent call last):
File "/tmp/run_subprocess.py", line 1, in <module>
import subprocess
File "/tmp/subprocess.py", line 5, in <module>
from stable_baselines3 import A2C
File "/tmp/venv/lib64/python3.10/site-packages/stable_baselines3/__init__.py", line 5, in <module>
from stable_baselines3.a2c import A2C
File "/tmp/venv/lib64/python3.10/site-packages/stable_baselines3/a2c/__init__.py", line 1, in <module>
from stable_baselines3.a2c.a2c import A2C
File "/tmp/venv/lib64/python3.10/site-packages/stable_baselines3/a2c/a2c.py", line 3, in <module>
import torch as th
File "/tmp/venv/lib64/python3.10/site-packages/torch/__init__.py", line 877, in <module>
from torch import multiprocessing as multiprocessing
File "/tmp/venv/lib64/python3.10/site-packages/torch/multiprocessing/__init__.py", line 18, in <module>
from .reductions import init_reductions
File "/tmp/venv/lib64/python3.10/site-packages/torch/multiprocessing/reductions.py", line 7, in <module>
from multiprocessing.util import register_after_fork
File "/usr/lib64/python3.10/multiprocessing/util.py", line 17, in <module>
from subprocess import _args_from_interpreter_flags
ImportError: cannot import name '_args_from_interpreter_flags' from partially initialized module 'subprocess' (most likely due to a circular import) (/tmp/subprocess.py)
My run_subprocess.py
file looks like:
import subprocess
if __name__=="__main__":
subprocess.run(["python3", "subprocess.py"])
My run_subprocess.py file looks like:
why are you doing that? and not directly calling the first script?
It looks like the problem comes from pytorch.
I couldn't reproduce the error, I'm using python 3.7/3.9 and pytorch 1.12.1 on linux.
why are you doing that? and not directly calling the first script?
Maybe I misunderstood you. In your first answer you wrote "Does your env work without optuna with subprocess?". I thought you meant calling the script with subprocess.run()
.
I couldn't reproduce the error, I'm using python 3.7/3.9 and pytorch 1.12.1 on linux.
I am running Python 3.10.9 and PyTorch 1.13.1 on Linux
Maybe I misunderstood you. In your first answer you wrote "Does your env work without optuna with subprocess?". I thought you meant calling the script with subprocess.run().
I meant checking that this code runs (SubprocVecEnv is already creating subprocesses):
from stable_baselines3 import A2C
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv
if __name__ == "__main__":
env = make_vec_env("CartPole-v1", n_envs=4, vec_env_cls=SubprocVecEnv)
model = A2C("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=1000, progress_bar=True)
With the code you provided I receive a similar error:
Traceback (most recent call last):
File "/tmp/a2c.py", line 1, in <module>
from stable_baselines3 import A2C
File "/tmp/venv-3.10/lib64/python3.10/site-packages/stable_baselines3/__init__.py", line 5, in <module>
from stable_baselines3.a2c import A2C
File "/tmp/venv-3.10/lib64/python3.10/site-packages/stable_baselines3/a2c/__init__.py", line 1, in <module>
from stable_baselines3.a2c.a2c import A2C
File "/tmp/venv-3.10/lib64/python3.10/site-packages/stable_baselines3/a2c/a2c.py", line 3, in <module>
import torch as th
File "/tmp/venv-3.10/lib64/python3.10/site-packages/torch/__init__.py", line 14, in <module>
import platform
File "/usr/lib64/python3.10/platform.py", line 119, in <module>
import subprocess
File "/tmp/a2c.py", line 4, in <module>
from stable_baselines3 import A2C
ImportError: cannot import name 'A2C' from partially initialized module 'stable_baselines3' (most likely due to a circular import) (/tmp/venv-3.10/lib64/python3.10/site-packages/stable_baselines3/__init__.py)
The same error occurs with PyTorch 1.12.1
The same error occurs with PyTorch 1.12.1
The issue is not from optuna then but from your setup.
could you try with the RL Zoo? ("python -m rl_zoo3.train --algo a2c --env CartPole-v1
from any folder)
RL Zoo: https://github.com/DLR-RM/rl-baselines3-zoo
Could you try with another python version? or in google colab?
Be also careful with the name of your scripts/folders (https://stackoverflow.com/questions/64807163/importerror-cannot-import-name-from-partially-initialized-module-m).
could you try with the RL Zoo? ("python -m rl_zoo3.train --algo a2c --env CartPole-v1 from any folder)
This works without problems for different Python versions (3.7.16, 3.8.16, 3.9.16, 3.10.9, 3.11.0).
I also tested the originally proposed optuna A2C example with SubprocVecEnv
with different Python versions (same as above). With every version I receive the same error:
Traceback (most recent call last):
File "sb3_simple.py", line 174, in <module>
study.optimize(objective, n_trials=N_TRIALS, timeout=600)
File "/tmp/venv-3.7/lib64/python3.7/site-packages/optuna/study/study.py", line 434, in optimize
show_progress_bar=show_progress_bar,
File "/tmp/venv-3.7/lib64/python3.7/site-packages/optuna/study/_optimize.py", line 76, in _optimize
progress_bar=progress_bar,
File "/tmp/venv-3.7/lib64/python3.7/site-packages/optuna/study/_optimize.py", line 163, in _optimize_sequential
frozen_trial = _run_trial(study, func, catch)
File "/tmp/venv-3.7/lib64/python3.7/site-packages/optuna/study/_optimize.py", line 251, in _run_trial
raise func_err
File "/tmp/venv-3.7/lib64/python3.7/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
value_or_values = func(trial)
File "sb3_simple.py", line 137, in objective
model.learn(N_TIMESTEPS, callback=eval_callback)
File "/tmp/venv-3.7/lib64/python3.7/site-packages/stable_baselines3/a2c/a2c.py", line 199, in learn
progress_bar=progress_bar,
File "/tmp/venv-3.7/lib64/python3.7/site-packages/stable_baselines3/common/on_policy_algorithm.py", line 241, in learn
progress_bar,
File "/tmp/venv-3.7/lib64/python3.7/site-packages/stable_baselines3/common/base_class.py", line 408, in _setup_learn
self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
File "/tmp/venv-3.7/lib64/python3.7/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 135, in reset
remote.send(("reset", None))
File "/usr/lib64/python3.7/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/usr/lib64/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
self._send(header + buf)
File "/usr/lib64/python3.7/multiprocessing/connection.py", line 368, in _send
n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
This works without problems for different Python versions (3.7.16, 3.8.16, 3.9.16, 3.10.9, 3.11.0).
sorry, I meant python -m rl_zoo3.train --algo a2c --env CartPole-v1 --vec-env subproc
and with optuna: python -m rl_zoo3.train --algo a2c --env CartPole-v1 -optimize --vec-env subproc -n 5000
If the zoo works fine for you then there is something weird with your setup.
Do you have a GPU? if so, could you try running script without is using CUDA_VISIBLE_DEVICES=
?
sorry, I meant
python -m rl_zoo3.train --algo a2c --env CartPole-v1 --vec-env subproc
and with optuna:python -m rl_zoo3.train --algo a2c --env CartPole-v1 -optimize --vec-env subproc -n 5000
Okay, that works.
Unfortunately the original script (with start_method="forkserver"
as well) still throws the BrokenPipe error (for all the Python versions above).
I also tried it with the make_vec_env()
function of stable_baselines3. Here is my final script:
from typing import Any
from typing import Dict
import gym
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler
from stable_baselines3 import A2C
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv
import torch
import torch.nn as nn
N_TRIALS = 100
N_STARTUP_TRIALS = 5
N_EVALUATIONS = 2
N_TIMESTEPS = int(2e4)
EVAL_FREQ = int(N_TIMESTEPS / N_EVALUATIONS)
N_EVAL_EPISODES = 3
def sample_a2c_params(trial: optuna.Trial) -> Dict[str, Any]:
"""Sampler for A2C hyperparameters."""
gamma = 1.0 - trial.suggest_float("gamma", 0.0001, 0.1, log=True)
max_grad_norm = trial.suggest_float("max_grad_norm", 0.3, 5.0, log=True)
gae_lambda = 1.0 - trial.suggest_float("gae_lambda", 0.001, 0.2, log=True)
n_steps = 2 ** trial.suggest_int("exponent_n_steps", 3, 10)
learning_rate = trial.suggest_float("lr", 1e-5, 1, log=True)
ent_coef = trial.suggest_float("ent_coef", 0.00000001, 0.1, log=True)
ortho_init = trial.suggest_categorical("ortho_init", [False, True])
net_arch = trial.suggest_categorical("net_arch", ["tiny", "small"])
activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"])
# Display true values.
trial.set_user_attr("gamma_", gamma)
trial.set_user_attr("gae_lambda_", gae_lambda)
trial.set_user_attr("n_steps", n_steps)
net_arch = [
{"pi": [64], "vf": [64]} if net_arch == "tiny" else {"pi": [64, 64], "vf": [64, 64]}
]
activation_fn = {"tanh": nn.Tanh, "relu": nn.ReLU}[activation_fn]
return {
"n_steps": n_steps,
"gamma": gamma,
"gae_lambda": gae_lambda,
"learning_rate": learning_rate,
"ent_coef": ent_coef,
"max_grad_norm": max_grad_norm,
"policy_kwargs": {
"net_arch": net_arch,
"activation_fn": activation_fn,
"ortho_init": ortho_init,
},
}
class TrialEvalCallback(EvalCallback):
"""Callback used for evaluating and reporting a trial."""
def __init__(
self,
eval_env: SubprocVecEnv,
trial: optuna.Trial,
n_eval_episodes: int = 5,
eval_freq: int = 10000,
deterministic: bool = True,
verbose: int = 0,
):
super().__init__(
eval_env=eval_env,
n_eval_episodes=n_eval_episodes,
eval_freq=eval_freq,
deterministic=deterministic,
verbose=verbose,
)
self.trial = trial
self.eval_idx = 0
self.is_pruned = False
def _on_step(self) -> bool:
if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
super()._on_step()
self.eval_idx += 1
self.trial.report(self.last_mean_reward, self.eval_idx)
# Prune trial if need.
if self.trial.should_prune():
self.is_pruned = True
return False
return True
def objective(trial: optuna.Trial) -> float:
kwargs = DEFAULT_HYPERPARAMS.copy()
# Sample hyperparameters.
kwargs.update(sample_a2c_params(trial))
# Create the RL model.
model = A2C(**kwargs)
# Create env used for evaluation.
eval_env = make_vec_env("CartPole-v1", n_envs=8, vec_env_cls=SubprocVecEnv)
# Create the callback that will periodically evaluate and report the performance.
eval_callback = TrialEvalCallback(
eval_env, trial, n_eval_episodes=N_EVAL_EPISODES, eval_freq=EVAL_FREQ, deterministic=True
)
nan_encountered = False
try:
model.learn(N_TIMESTEPS, callback=eval_callback)
except AssertionError as e:
# Sometimes, random hyperparams can generate NaN.
print(e)
nan_encountered = True
finally:
# Free memory.
model.env.close()
eval_env.close()
# Tell the optimizer that the trial failed.
if nan_encountered:
return float("nan")
if eval_callback.is_pruned:
raise optuna.exceptions.TrialPruned()
return eval_callback.last_mean_reward
if __name__ == "__main__":
env = make_vec_env("CartPole-v1", n_envs=8, vec_env_cls=SubprocVecEnv)
DEFAULT_HYPERPARAMS = {
"policy": "MlpPolicy",
"env": env,
"device": "cpu"
}
# Set pytorch num threads to 1 for faster training.
torch.set_num_threads(1)
sampler = TPESampler(n_startup_trials=N_STARTUP_TRIALS)
# Do not prune before 1/3 of the max budget is used.
pruner = MedianPruner(n_startup_trials=N_STARTUP_TRIALS, n_warmup_steps=N_EVALUATIONS // 3)
study = optuna.create_study(sampler=sampler, pruner=pruner, direction="maximize")
try:
study.optimize(objective, n_trials=N_TRIALS, timeout=600)
except KeyboardInterrupt:
pass
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print(" Value: ", trial.value)
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))
print(" User attrs:")
for key, value in trial.user_attrs.items():
print(" {}: {}".format(key, value))
The relevant changes are:
- Evaluation environment in the
objective()
function:
eval_env = make_vec_env("CartPole-v1", n_envs=8, vec_env_cls=SubprocVecEnv)
- Training environment in the main routine:
env = make_vec_env("CartPole-v1", n_envs=8, vec_env_cls=SubprocVecEnv)
DEFAULT_HYPERPARAMS = {
[...]
"device": "cpu"
}
Okay, that works.
Good to hear =)
Then you can start from the zoo code: https://github.com/DLR-RM/rl-baselines3-zoo
It's weird as the RL Zoo is using the exact same thing as what I wrote.
The folder/setup where you run the script might be the problem.
I also tried it with the make_vec_env() function of stable_baselines3. Here is my final script:
do you mean that this version worked for you?
The folder/setup where you run the script might be the problem.
I placed the script and the Python venv
s inside the /tmp
folder. Why would that cause an issue?
do you mean that this version worked for you?
No, I just meant I also tried it with make_vec_env()
without success :/
@araffin Do you have an idea if the venv
setup could be a problem? The problem occurs at other locations, as well.
@araffin I had a look at rl-baselines-zoo code and the setup_experiment()
function seems to set n_envs
to 1
in case optimize_parameters
is set to True
via the command-line argument -optimize
:
def setup_experiment(self) -> Optional[Tuple[BaseAlgorithm, Dict[str, Any]]]:
"""
Read hyperparameters, pre-process them (create schedules, wrappers, callbacks, action noise objects)
create the environment and possibly the model.
:return: the initialized RL model
"""
[...]
n_envs = 1 if self.algo == "ars" or self.optimize_hyperparameters else self.n_envs
Could that be the reason why python -m rl_zoo3.train --algo a2c --env CartPole-v1 -optimize --vec-env subproc -n 5000
works?
This issue has not seen any recent activity.
This issue was closed automatically because it had not seen any recent activity. If you want to discuss it, you can reopen it freely.