interfacing with other RL environments

Question

interfacing with other RL environments

grahamannett opened this issue 5 years ago · 0 comments

Are you taking pull requests for extending some of the classes with other environments? I really like this framework and have been getting it to work with habit-api https://github.com/facebookresearch/habitat-api (I had tried with another but it was too difficult) and the sim.train I don't think works as is, I've written my own train() loop and it seems like it works but there are a bunch of nuances with habitat-api it seems (like you can't have multiple instances it seems due to an opengl thing? Im still not sure as trying to understand both frameworks and habitat-api/habitat-sim is pretty extensive) What I have right now is something like:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten

import huskarl as hk
import habitat

class SimpleRLEnv(habitat.RLEnv):
    def get_reward_range(self):
        return [-1, 1]

    def get_reward(self, observations):
        return 0

    def get_done(self, observations):
        return self.habitat_env.episode_over

    def get_info(self, observations):
        return self.habitat_env.get_metrics()

config = habitat.get_config(config_paths="configs/tasks/pointnav.yaml")
create_env = lambda: SimpleRLEnv(config=config).unwrapped
dummy_env = create_env()

obs = dummy_env.observation_space.sample()
input_shape = obs["rgb"].shape
action_space_n = dummy_env.action_space.n
dummy_env.close()

model = Sequential([
    Conv2D(16, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
    Conv2D(16, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    Flatten(),
    Dense(16, activation='relu')
])


agent = hk.agent.DQN(model, actions=action_space_n, nsteps=2)

# These are what would need to work with habitat-api i believe
# sim = hk.Simulation(create_env, agent)
# sim.train(max_steps=30)
# sim.test(max_steps=10)

instances = 1
max_steps = 50

episode_reward_sequences = []
episode_step_sequences = []
episode_rewards = 0

envs = create_env()
states = envs.reset()


for step in range(max_steps):
    # Most of this is copied from simulation._sp_train()
    action = agent.act(states["rgb"])
    next_state, reward, done, other_ = envs.step(action)
    agent.push(hk.memory.Transition(states["rgb"], action, reward, None if done else next_state["rgb"]))
    episode_rewards += reward

    if done:
        episode_reward_sequences.append(episode_rewards)
        episode_step_sequences.append(step)
        episode_rewards = 0
        states = envs.reset()
    else:
        states = next_state
    if step % 5 == 0: print(f"step is: {step} and pointgoal is: {states['pointgoal']}")
    agent.train(step)

Also I think I can add PPO to the agents but wasn't working fully yet.