How to improve the evaluation efficiency?
Closed this issue · 2 comments
nuomizai commented
I used the following code to evaluate the pretrained model, but found that the evaluation efficiency is too low (about 23min per episode). I wondered if there is anything wrong with the code? Or could you please provide a standard script for evaluation? Thanks for your help!
import mj_envs
import gym
import numpy as np
import torch
import gpytorch
from gp_models import MultitaskGPModel
from rlkit.torch.pytorch_util import set_gpu_mode
from tqdm import tqdm
import copy
import time
device = torch.device('cuda:1')
def rollout(
env,
agent,
max_path_length=np.inf,
render=False,
render_kwargs=None,
preprocess_obs_for_policy_fn=None,
get_action_kwargs=None,
return_dict_obs=False,
full_o_postprocess_func=None,
reset_callback=None,
):
if render_kwargs is None:
render_kwargs = {}
if get_action_kwargs is None:
get_action_kwargs = {}
if preprocess_obs_for_policy_fn is None:
preprocess_obs_for_policy_fn = lambda x: x
raw_obs = []
raw_next_obs = []
observations = []
actions = []
rewards = []
terminals = []
dones = []
agent_infos = []
env_infos = []
next_observations = []
path_length = 0
# agent.reset()
o = env.reset()
if reset_callback:
reset_callback(env, agent, o)
if render:
# todo: debug
env.mj_render()
# env.render(**render_kwargs)
while path_length < max_path_length:
print('path_length:', path_length)
raw_obs.append(o)
# todo: debug
# o_for_agent = torch.from_numpy(o).cuda().float().unsqueeze(0)
o_torch = torch.from_numpy(np.array([o])).float().to(device)
output = model(o_torch)
observed_pred = likelihood(output)
a = observed_pred.mean.data.cpu().numpy()
if len(a) == 1:
a = a[0]
# # o_for_agent = o
# # a = agent.get_action(o_for_agent, **get_action_kwargs)
# a, *_ = agent(o_for_agent, **get_action_kwargs)
# a = a.detach().cpu().numpy()
# # a = agent.get_action(o_for_agent, **get_action_kwargs)[0][0]
agent_info = None
if full_o_postprocess_func:
full_o_postprocess_func(env, agent, o)
next_o, r, done, env_info = env.step(copy.deepcopy(a))
if render:
# todo: debug
env.mj_render()
# env.render(**render_kwargs)
observations.append(o)
rewards.append(r)
terminal = False
if done:
# terminal=False if TimeLimit caused termination
if not env_info.pop('TimeLimit.truncated', False):
terminal = True
terminals.append(terminal)
dones.append(done)
actions.append(a)
next_observations.append(next_o)
raw_next_obs.append(next_o)
agent_infos.append(agent_info)
env_infos.append(env_info)
path_length += 1
if done:
break
o = next_o
actions = np.array(actions)
if len(actions.shape) == 1:
actions = np.expand_dims(actions, 1)
observations = np.array(observations)
next_observations = np.array(next_observations)
if return_dict_obs:
observations = raw_obs
next_observations = raw_next_obs
rewards = np.array(rewards)
if len(rewards.shape) == 1:
rewards = rewards.reshape(-1, 1)
return dict(
observations=observations,
actions=actions,
rewards=rewards,
next_observations=next_observations,
terminals=np.array(terminals).reshape(-1, 1),
dones=np.array(dones).reshape(-1, 1),
agent_infos=agent_infos,
env_infos=env_infos,
full_observations=raw_obs,
full_next_observations=raw_obs,
)
def simulate_policy(env, policy, T=100, H=200, gpu=True, render=False):
if gpu:
set_gpu_mode(True)
# policy.cuda()
policy.to(device)
print('use GPU')
# policy = MakeDeterministic(policy)
episode = 0
success_time = 0
env.seed(1)
for episode in tqdm(range(0, T)):
print('episode:{}'.format(episode))
path = rollout(
env,
policy,
max_path_length=H,
render=render,
)
if path['env_infos'][-1]['goal_achieved'] is True:
success_time += 1
if hasattr(env, "log_diagnostics"):
env.log_diagnostics([path])
time.sleep(0.02)
success_time /= episode
return success_time
env = gym.make(f'door-binary-v0')
obs_dim = env.observation_space.low.size
action_dim = env.action_space.low.size
data_set = '../d4rl_model/offpolicy_hand_data/door2_sparse.npy'
model_path = '../nppac/nppac/door/gp_door_multitask_1000.pt'
data = np.load(data_set, allow_pickle=True)
keep_num = 1000
use_ard = True
gp_type = 'multitask'
gp_rank = 1
kernel_type = 'matern12'
# Ablation to randomly filter the dataset, not active by default.
if keep_num < len(data):
print(f'Keeping {keep_num} trajectories.')
data = np.random.choice(data, keep_num, replace=False)
if type(data[0]['observations'][0]) is dict:
# Convert to just the states
for traj in data:
traj['observations'] = [t['state_observation'] for t in traj['observations']]
train_x = torch.from_numpy(np.array([j for i in [traj['observations'] for traj in data] for j in i])).float().to(
device)
train_y = torch.from_numpy(np.array([j for i in [traj['actions'] for traj in data] for j in i])).float().to(
device)
print('Data Loaded!')
# Initialize likelihood and model
likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=action_dim).to(device)
likelihood.eval()
ard_num_dims = obs_dim if use_ard else None
model = MultitaskGPModel(train_x, train_y, likelihood, num_tasks=action_dim, rank=gp_rank,
ard_num_dims=ard_num_dims, kernel_type=kernel_type).to(device)
model_dict = torch.load(model_path, map_location=device)
model.load_state_dict(model_dict)
model.eval()
success_rate = simulate_policy(env, model, render=False, T=100)
print('success rate is :', success_rate)
conglu1997 commented
What you have looks good, but missing
with torch.no_grad(), gpytorch.settings.fast_pred_var():
for the parts that evaluate the GP.
nuomizai commented
What you have looks good, but missing
with torch.no_grad(), gpytorch.settings.fast_pred_var():
for the parts that evaluate the GP.
Thanks for your help! I added with torch.no_grad(), gpytorch.settings.fast_pred_var():
and now the evaluation efficiency is much better!