wrong policy
huangfugui00 opened this issue · 21 comments
I run the code successfuly.However,the agent can not walk ,run after training.Do I miss anything?
Any help would be great !
Before training a policy: you have to modify the step in dp_env_v3.py to set up correct rewards for the task. If you want to train the agent to walk, a walking demonstration is also needed to provided to define the rewards in dp_env_v3.py step function.
For example, I have implemented one version that loads example first, and then compare the example against the training posture to get the rewards:
def calc_config_reward(self):
assert len(self.mocap.data) != 0
err_configs = 0.0
target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
self.curr_frame = target_config
curr_config = self.get_joint_configs()
err_configs = self.calc_config_errs(curr_config, target_config)
# reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
reward_config = math.exp(-err_configs)
return reward_config, err_configs
I have modify the step in dp_env_v3.py.But it does not work.
def step(self,action):
self.step_len = int(self.mocap_dt // self.model.opt.timestep)
step_times = int(self.mocap_dt // self.model.opt.timestep)
pos_before = mass_center(self.model, self.sim)
self.do_simulation(action, step_times)
pos_after = mass_center(self.model, self.sim)
observation = self._get_obs()
reward_alive=1.0
reward_obs = self.calc_config_reward()
reward_acs = np.square(self.data.ctrl).sum()
reward_forward = 0.25 * (pos_after - pos_before)
info = dict(reward_obs=reward_obs, reward_acs=reward_acs, reward_forward=reward_forward)
done = self.is_done()
if done:
reward = 0
else:
reward = reward_obs - 0.1 * reward_acs + reward_forward + reward_alive
return observation, reward, done, info
The file below was what I used to train the walking movement:
dp_env_v3.py:
#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd
from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer
from gym.envs.mujoco import mujoco_env
from gym import utils
from config import Config
from pyquaternion import Quaternion
from transformations import quaternion_from_euler, euler_from_quaternion
BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow",
"left_shoulder", "left_elbow", "right_hip", "right_knee",
"right_ankle", "left_hip", "left_knee", "left_ankle"]
DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3,
"right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1,
"left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3,
"left_hip": 3, "left_knee": 1, "left_ankle": 3}
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
xml_file_path = Config.xml_path
self.xml_file_path = xml_file_path
self.mocap = MocapDM()
self.interface = MujocoInterface()
self.load_mocap(Config.mocap_path)
self.weight_pose = 0.5
self.weight_vel = 0.05
self.weight_root = 0.2
self.weight_end_eff = 0.15
self.weight_com = 0.1
self.scale_pose = 2.0
self.scale_vel = 0.1
self.scale_end_eff = 40.0
self.scale_root = 5.0
self.scale_com = 10.0
self.scale_err = 1.0
self.reference_state_init()
self.idx_curr = -1
self.idx_tmp_count = -1
mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
utils.EzPickle.__init__(self)
def _quat2euler(self, quat):
tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
euler = euler_from_quaternion(tmp_quat, axes='rxyz')
return euler
def _get_obs(self):
config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
vel = self.sim.data.qvel.flat.copy() # ignore root joint
return np.concatenate((config, vel))
def reference_state_init(self):
self.idx_init = random.randint(0, self.mocap_data_len-1)
# self.idx_init = 0
self.idx_curr = self.idx_init
self.idx_tmp_count = 0
def early_termination(self):
target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
curr_config = self.get_joint_configs()
err_configs = self.calc_config_errs(curr_config, target_config)
if err_configs >= 15.0:
return True
return False
def get_joint_configs(self):
data = self.sim.data
return data.qpos[7:] # to exclude root joint
def get_root_configs(self):
data = self.sim.data
return data.qpos[3:7] # to exclude x coord
def load_mocap(self, filepath):
self.mocap.load_mocap(filepath)
self.mocap_dt = self.mocap.dt
self.mocap_data_len = len(self.mocap.data)
def calc_config_errs(self, env_config, mocap_config):
assert len(env_config) == len(mocap_config)
return np.sum(np.abs(env_config - mocap_config))
def calc_root_reward(self): # including root joint
curr_root = self.mocap.data_config[self.idx_curr][3:7]
target_root = self.get_root_configs()
assert len(curr_root) == len(target_root)
assert len(curr_root) == 4
q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])
q_diff = q_0.conjugate * q_1
tmp_diff = q_diff.angle
err_root = abs(tmp_diff)
reward_root = math.exp(-err_root)
return reward_root
def calc_config_reward(self):
assert len(self.mocap.data) != 0
err_configs = 0.0
target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
self.curr_frame = target_config
curr_config = self.get_joint_configs()
err_configs = self.calc_config_errs(curr_config, target_config)
# reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
reward_config = math.exp(-err_configs)
return reward_config, err_configs
def step(self, action):
self.step_len = 1
step_times = 1
self.do_simulation(action, step_times)
reward_config, err_config = self.calc_config_reward()
reward_root = 10 * self.calc_root_reward()
reward = reward_config + reward_root
info = dict()
self.idx_curr += 1
self.idx_curr = self.idx_curr % self.mocap_data_len
observation = self._get_obs()
done = bool(self.is_done() or err_config >= 10.0)
return observation, reward, done, info
def is_done(self):
mass = np.expand_dims(self.model.body_mass, 1)
xpos = self.sim.data.xipos
z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
# done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
done = bool((z_com < 0.7) or (z_com > 1.2))
# return False
return done
def goto(self, pos):
self.sim.data.qpos[:] = pos[:]
self.sim.forward()
def get_time(self):
return self.sim.data.time
def reset_model(self):
self.reference_state_init()
qpos = self.mocap.data_config[self.idx_init]
qvel = self.mocap.data_vel[self.idx_init]
# qvel = self.init_qvel
self.set_state(qpos, qvel)
observation = self._get_obs()
self.idx_tmp_count = -self.step_len
return observation
def reset_model_init(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
)
return self._get_obs()
def viewer_setup(self):
pass
# self.viewer.cam.trackbodyid = 1
# self.viewer.cam.distance = self.model.stat.extent * 1.0
# self.viewer.cam.lookat[2] = 2.0
# self.viewer.cam.elevation = -20
if __name__ == "__main__":
env = DPEnv()
env.reset_model()
import cv2
from VideoSaver import VideoSaver
width = 640
height = 480
# vid_save = VideoSaver(width=width, height=height)
# env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
action_size = env.action_space.shape[0]
ac = np.zeros(action_size)
while True:
# target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
# env.sim.data.qpos[:7] = target_config[:]
# env.sim.forward()
qpos = env.mocap.data_config[env.idx_curr]
qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
# qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
# qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
env.set_state(qpos, qvel)
env.sim.step()
print("Reward root:", env.calc_config_reward())
env.idx_curr += 1
if env.idx_curr == env.mocap_data_len:
# env.reset_model()
env.idx_curr = env.idx_curr % env.mocap_data_len
# print(env._get_obs())
env.render()
# vid_save.close()
config.py:
from os import getcwd
class Config(object):
all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
'walk']
curr_path = getcwd()
# motion = 'spinkick'
# motion = 'walk'
motion = 'run'
env_name = "dp_env_v3"
# env_name = "dp_env_carry"
motion_folder = '/mujoco/motions'
xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
You may need to specify the motion_folder in the config.py
I will try with your code ,Thanks !
The agent just stand up right instead of walking , after I update my code just like you offer. I find that the agent will get a big reward if it stand up right all the time.Does it happened to you ?
Did you change the motion in config.py to 'walk'? The motion is what the agent needs to learn.
Yes,I have change the motion in config.py.By the way ,I only modify the config.py and use your new dp_env_v3.py .
from os import getcwd
class Config(object):
all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
'walk']
curr_path = getcwd()
motion = 'walk'
env_name = "dp_env_v3"
motion_folder = '/mujoco/motions'
xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
xml_test_folder = '/mujoco_test/'
mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
Please change this line in dp_env_v3.py:
reward_config, err_config = self.calc_config_reward()
reward_root = 10 * self.calc_root_reward()
reward = reward_config + reward_root
to
reward_config, err_config = self.calc_config_reward()
reward = 10 * reward_config
and set timestep_per_batch in TRPO sampling to 2048. After training, would you mind showing me your results?
Sorry, I am too busy to replay to you yesterday .I will show your the result after training. Thank you.
It looks like the character is not imtating the walking movement. You can find the following codes:
def early_termination(self):
target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
curr_config = self.get_joint_configs()
err_configs = self.calc_config_errs(curr_config, target_config)
if err_configs >= 15.0:
return True
return False
and change the err_configs>=15.0
to err_configs>=5.0
to more strictly terminate the process when the movement deviates.
Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?
Thank you in advance!
dp_env_v3.py
#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd
from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer
from gym.envs.mujoco import mujoco_env
from gym import utils
from config import Config
from pyquaternion import Quaternion
from transformations import quaternion_from_euler, euler_from_quaternion
BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow",
"left_shoulder", "left_elbow", "right_hip", "right_knee",
"right_ankle", "left_hip", "left_knee", "left_ankle"]
DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3,
"right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1,
"left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3,
"left_hip": 3, "left_knee": 1, "left_ankle": 3}
def mass_center(model, sim):
mass = np.expand_dims(model.body_mass, 1)
xpos = sim.data.xipos
return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
def __init__(self):
xml_file_path = Config.xml_path
self.xml_file_path = xml_file_path
self.mocap = MocapDM()
self.interface = MujocoInterface()
self.load_mocap(Config.mocap_path)
self.weight_pose = 0.5
self.weight_vel = 0.05
self.weight_root = 0.2
self.weight_end_eff = 0.15
self.weight_com = 0.1
self.scale_pose = 2.0
self.scale_vel = 0.1
self.scale_end_eff = 40.0
self.scale_root = 5.0
self.scale_com = 10.0
self.scale_err = 1.0
self.reference_state_init()
self.idx_curr = -1
self.idx_tmp_count = -1
mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
utils.EzPickle.__init__(self)
def _quat2euler(self, quat):
tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
euler = euler_from_quaternion(tmp_quat, axes='rxyz')
return euler
def _get_obs(self):
config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
vel = self.sim.data.qvel.flat.copy() # ignore root joint
return np.concatenate((config, vel))
def reference_state_init(self):
self.idx_init = random.randint(0, self.mocap_data_len-1)
# self.idx_init = 0
self.idx_curr = self.idx_init
self.idx_tmp_count = 0
def early_termination(self):
target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
curr_config = self.get_joint_configs()
err_configs = self.calc_config_errs(curr_config, target_config)
if err_configs >= 5.0:
return True
return False
def get_joint_configs(self):
data = self.sim.data
return data.qpos[7:] # to exclude root joint
def get_root_configs(self):
data = self.sim.data
return data.qpos[3:7] # to exclude x coord
def load_mocap(self, filepath):
self.mocap.load_mocap(filepath)
self.mocap_dt = self.mocap.dt
self.mocap_data_len = len(self.mocap.data)
def calc_config_errs(self, env_config, mocap_config):
assert len(env_config) == len(mocap_config)
return np.sum(np.abs(env_config - mocap_config))
def calc_root_reward(self): # including root joint
curr_root = self.mocap.data_config[self.idx_curr][3:7]
target_root = self.get_root_configs()
assert len(curr_root) == len(target_root)
assert len(curr_root) == 4
q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])
q_diff = q_0.conjugate * q_1
tmp_diff = q_diff.angle
err_root = abs(tmp_diff)
reward_root = math.exp(-err_root)
return reward_root
def calc_config_reward(self):
assert len(self.mocap.data) != 0
err_configs = 0.0
target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
self.curr_frame = target_config
curr_config = self.get_joint_configs()
err_configs = self.calc_config_errs(curr_config, target_config)
# reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
reward_config = math.exp(-err_configs)
return reward_config, err_configs
def step(self, action):
self.step_len = 1
step_times = 1
self.do_simulation(action, step_times)
reward_config, err_config = self.calc_config_reward()
#reward_root = 10 * self.calc_root_reward()
#reward = reward_config + reward_root
reward = 10 * reward_config
info = dict()
self.idx_curr += 1
self.idx_curr = self.idx_curr % self.mocap_data_len
observation = self._get_obs()
done = bool(self.is_done() or err_config >= 10.0)
return observation, reward, done, info
def is_done(self):
mass = np.expand_dims(self.model.body_mass, 1)
xpos = self.sim.data.xipos
z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
# done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
done = bool((z_com < 0.7) or (z_com > 1.2))
# return False
return done
def goto(self, pos):
self.sim.data.qpos[:] = pos[:]
self.sim.forward()
def get_time(self):
return self.sim.data.time
def reset_model(self):
self.reference_state_init()
qpos = self.mocap.data_config[self.idx_init]
qvel = self.mocap.data_vel[self.idx_init]
# qvel = self.init_qvel
self.set_state(qpos, qvel)
observation = self._get_obs()
self.idx_tmp_count = -self.step_len
return observation
def reset_model_init(self):
c = 0.01
self.set_state(
self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
)
return self._get_obs()
def viewer_setup(self):
pass
# self.viewer.cam.trackbodyid = 1
# self.viewer.cam.distance = self.model.stat.extent * 1.0
# self.viewer.cam.lookat[2] = 2.0
# self.viewer.cam.elevation = -20
if __name__ == "__main__":
env = DPEnv()
env.reset_model()
import cv2
from VideoSaver import VideoSaver
width = 640
height = 480
# vid_save = VideoSaver(width=width, height=height)
# env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
action_size = env.action_space.shape[0]
ac = np.zeros(action_size)
while True:
# target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
# env.sim.data.qpos[:7] = target_config[:]
# env.sim.forward()
qpos = env.mocap.data_config[env.idx_curr]
qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
# qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
# qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
env.set_state(qpos, qvel)
env.sim.step()
print("Reward root:", env.calc_config_reward())
env.idx_curr += 1
if env.idx_curr == env.mocap_data_len:
# env.reset_model()
env.idx_curr = env.idx_curr % env.mocap_data_len
# print(env._get_obs())
env.render()
# vid_save.close()
config.py
from os import getcwd
class Config(object):
all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
'walk']
curr_path = getcwd()
# motion = 'spinkick'
#motion = 'dance_b'
#motion = 'run'
motion = 'walk'
env_name = "dp_env_v3"
motion_folder = '/mujoco/motions'
xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
xml_test_folder = '/mujoco_test/'
mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name)
from os import getcwd
Hi, I just noticed your issue. You may this line:
done = bool(self.is_done() or err_config >= 10.0)
to
done = bool(self.is_done() or err_config >= 5.0)
stop earlier when the movement deviates.
I get the result after training with TimestepsSoFar is 1e7 ,and the program is still running now.
Hello. I am facing the same problem. The humanoid stands but not moves, exactly same as your gif shows. I also set done = bool(self.is_done() or err_config >= 5.0)
. Could you please tell me what did you change to make it move ? Thank you!
I use Pybullet as an alternative to MuJoCo.In Pybullet, there is a python version of DeeoMimic ,maybe you can check it.
Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?Thank you in advance!
dp_env_v3.py
#!/usr/bin/env python3 import numpy as np import math import random from os import getcwd from mujoco.mocap_v2 import MocapDM from mujoco.mujoco_interface import MujocoInterface from mujoco.mocap_util import JOINT_WEIGHT from mujoco_py import load_model_from_xml, MjSim, MjViewer from gym.envs.mujoco import mujoco_env from gym import utils from config import Config from pyquaternion import Quaternion from transformations import quaternion_from_euler, euler_from_quaternion BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", "left_shoulder", "left_elbow", "right_hip", "right_knee", "right_ankle", "left_hip", "left_knee", "left_ankle"] DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, "left_hip": 3, "left_knee": 1, "left_ankle": 3} def mass_center(model, sim): mass = np.expand_dims(model.body_mass, 1) xpos = sim.data.xipos return (np.sum(mass * xpos, 0) / np.sum(mass))[0] class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self): xml_file_path = Config.xml_path self.xml_file_path = xml_file_path self.mocap = MocapDM() self.interface = MujocoInterface() self.load_mocap(Config.mocap_path) self.weight_pose = 0.5 self.weight_vel = 0.05 self.weight_root = 0.2 self.weight_end_eff = 0.15 self.weight_com = 0.1 self.scale_pose = 2.0 self.scale_vel = 0.1 self.scale_end_eff = 40.0 self.scale_root = 5.0 self.scale_com = 10.0 self.scale_err = 1.0 self.reference_state_init() self.idx_curr = -1 self.idx_tmp_count = -1 mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1) utils.EzPickle.__init__(self) def _quat2euler(self, quat): tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]]) euler = euler_from_quaternion(tmp_quat, axes='rxyz') return euler def _get_obs(self): config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y vel = self.sim.data.qvel.flat.copy() # ignore root joint return np.concatenate((config, vel)) def reference_state_init(self): self.idx_init = random.randint(0, self.mocap_data_len-1) # self.idx_init = 0 self.idx_curr = self.idx_init self.idx_tmp_count = 0 def early_termination(self): target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint curr_config = self.get_joint_configs() err_configs = self.calc_config_errs(curr_config, target_config) if err_configs >= 5.0: return True return False def get_joint_configs(self): data = self.sim.data return data.qpos[7:] # to exclude root joint def get_root_configs(self): data = self.sim.data return data.qpos[3:7] # to exclude x coord def load_mocap(self, filepath): self.mocap.load_mocap(filepath) self.mocap_dt = self.mocap.dt self.mocap_data_len = len(self.mocap.data) def calc_config_errs(self, env_config, mocap_config): assert len(env_config) == len(mocap_config) return np.sum(np.abs(env_config - mocap_config)) def calc_root_reward(self): # including root joint curr_root = self.mocap.data_config[self.idx_curr][3:7] target_root = self.get_root_configs() assert len(curr_root) == len(target_root) assert len(curr_root) == 4 q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3]) q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3]) q_diff = q_0.conjugate * q_1 tmp_diff = q_diff.angle err_root = abs(tmp_diff) reward_root = math.exp(-err_root) return reward_root def calc_config_reward(self): assert len(self.mocap.data) != 0 err_configs = 0.0 target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint self.curr_frame = target_config curr_config = self.get_joint_configs() err_configs = self.calc_config_errs(curr_config, target_config) # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs) reward_config = math.exp(-err_configs) return reward_config, err_configs def step(self, action): self.step_len = 1 step_times = 1 self.do_simulation(action, step_times) reward_config, err_config = self.calc_config_reward() #reward_root = 10 * self.calc_root_reward() #reward = reward_config + reward_root reward = 10 * reward_config info = dict() self.idx_curr += 1 self.idx_curr = self.idx_curr % self.mocap_data_len observation = self._get_obs() done = bool(self.is_done() or err_config >= 10.0) return observation, reward, done, info def is_done(self): mass = np.expand_dims(self.model.body_mass, 1) xpos = self.sim.data.xipos z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2] # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination()) done = bool((z_com < 0.7) or (z_com > 1.2)) # return False return done def goto(self, pos): self.sim.data.qpos[:] = pos[:] self.sim.forward() def get_time(self): return self.sim.data.time def reset_model(self): self.reference_state_init() qpos = self.mocap.data_config[self.idx_init] qvel = self.mocap.data_vel[self.idx_init] # qvel = self.init_qvel self.set_state(qpos, qvel) observation = self._get_obs() self.idx_tmp_count = -self.step_len return observation def reset_model_init(self): c = 0.01 self.set_state( self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv) ) return self._get_obs() def viewer_setup(self): pass # self.viewer.cam.trackbodyid = 1 # self.viewer.cam.distance = self.model.stat.extent * 1.0 # self.viewer.cam.lookat[2] = 2.0 # self.viewer.cam.elevation = -20 if __name__ == "__main__": env = DPEnv() env.reset_model() import cv2 from VideoSaver import VideoSaver width = 640 height = 480 # vid_save = VideoSaver(width=width, height=height) # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt") action_size = env.action_space.shape[0] ac = np.zeros(action_size) while True: # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint # env.sim.data.qpos[:7] = target_config[:] # env.sim.forward() qpos = env.mocap.data_config[env.idx_curr] qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr]) # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr]) # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr]) env.set_state(qpos, qvel) env.sim.step() print("Reward root:", env.calc_config_reward()) env.idx_curr += 1 if env.idx_curr == env.mocap_data_len: # env.reset_model() env.idx_curr = env.idx_curr % env.mocap_data_len # print(env._get_obs()) env.render() # vid_save.close()config.py
from os import getcwd class Config(object): all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown' 'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick', 'walk'] curr_path = getcwd() # motion = 'spinkick' #motion = 'dance_b' #motion = 'run' motion = 'walk' env_name = "dp_env_v3" motion_folder = '/mujoco/motions' xml_folder = '/mujoco/humanoid_deepmimic/envs/asset' xml_test_folder = '/mujoco_test/' mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion) xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name) xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name) from os import getcwd
@kemangjaka
Hello, I did the same as the code above. Have you already solved the problem? Could you please tell me how?
Hello everyone. I think I know where the problem is. In addition to doing changes above, in trpo.py you also need to:
change line.78 to ob = env.reset()
comment line.79 out
comment line.404 out.
Because after debugging I found that the training needs Reference State Initialization(RSI), but the original code didn't actually implement this and was covered by a normal initialization.
If anybody tries this please give me a feedback. Peace!
Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?
Thank you in advance!
dp_env_v3.py#!/usr/bin/env python3 import numpy as np import math import random from os import getcwd from mujoco.mocap_v2 import MocapDM from mujoco.mujoco_interface import MujocoInterface from mujoco.mocap_util import JOINT_WEIGHT from mujoco_py import load_model_from_xml, MjSim, MjViewer from gym.envs.mujoco import mujoco_env from gym import utils from config import Config from pyquaternion import Quaternion from transformations import quaternion_from_euler, euler_from_quaternion BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", "left_shoulder", "left_elbow", "right_hip", "right_knee", "right_ankle", "left_hip", "left_knee", "left_ankle"] DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, "left_hip": 3, "left_knee": 1, "left_ankle": 3} def mass_center(model, sim): mass = np.expand_dims(model.body_mass, 1) xpos = sim.data.xipos return (np.sum(mass * xpos, 0) / np.sum(mass))[0] class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self): xml_file_path = Config.xml_path self.xml_file_path = xml_file_path self.mocap = MocapDM() self.interface = MujocoInterface() self.load_mocap(Config.mocap_path) self.weight_pose = 0.5 self.weight_vel = 0.05 self.weight_root = 0.2 self.weight_end_eff = 0.15 self.weight_com = 0.1 self.scale_pose = 2.0 self.scale_vel = 0.1 self.scale_end_eff = 40.0 self.scale_root = 5.0 self.scale_com = 10.0 self.scale_err = 1.0 self.reference_state_init() self.idx_curr = -1 self.idx_tmp_count = -1 mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1) utils.EzPickle.__init__(self) def _quat2euler(self, quat): tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]]) euler = euler_from_quaternion(tmp_quat, axes='rxyz') return euler def _get_obs(self): config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y vel = self.sim.data.qvel.flat.copy() # ignore root joint return np.concatenate((config, vel)) def reference_state_init(self): self.idx_init = random.randint(0, self.mocap_data_len-1) # self.idx_init = 0 self.idx_curr = self.idx_init self.idx_tmp_count = 0 def early_termination(self): target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint curr_config = self.get_joint_configs() err_configs = self.calc_config_errs(curr_config, target_config) if err_configs >= 5.0: return True return False def get_joint_configs(self): data = self.sim.data return data.qpos[7:] # to exclude root joint def get_root_configs(self): data = self.sim.data return data.qpos[3:7] # to exclude x coord def load_mocap(self, filepath): self.mocap.load_mocap(filepath) self.mocap_dt = self.mocap.dt self.mocap_data_len = len(self.mocap.data) def calc_config_errs(self, env_config, mocap_config): assert len(env_config) == len(mocap_config) return np.sum(np.abs(env_config - mocap_config)) def calc_root_reward(self): # including root joint curr_root = self.mocap.data_config[self.idx_curr][3:7] target_root = self.get_root_configs() assert len(curr_root) == len(target_root) assert len(curr_root) == 4 q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3]) q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3]) q_diff = q_0.conjugate * q_1 tmp_diff = q_diff.angle err_root = abs(tmp_diff) reward_root = math.exp(-err_root) return reward_root def calc_config_reward(self): assert len(self.mocap.data) != 0 err_configs = 0.0 target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint self.curr_frame = target_config curr_config = self.get_joint_configs() err_configs = self.calc_config_errs(curr_config, target_config) # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs) reward_config = math.exp(-err_configs) return reward_config, err_configs def step(self, action): self.step_len = 1 step_times = 1 self.do_simulation(action, step_times) reward_config, err_config = self.calc_config_reward() #reward_root = 10 * self.calc_root_reward() #reward = reward_config + reward_root reward = 10 * reward_config info = dict() self.idx_curr += 1 self.idx_curr = self.idx_curr % self.mocap_data_len observation = self._get_obs() done = bool(self.is_done() or err_config >= 10.0) return observation, reward, done, info def is_done(self): mass = np.expand_dims(self.model.body_mass, 1) xpos = self.sim.data.xipos z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2] # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination()) done = bool((z_com < 0.7) or (z_com > 1.2)) # return False return done def goto(self, pos): self.sim.data.qpos[:] = pos[:] self.sim.forward() def get_time(self): return self.sim.data.time def reset_model(self): self.reference_state_init() qpos = self.mocap.data_config[self.idx_init] qvel = self.mocap.data_vel[self.idx_init] # qvel = self.init_qvel self.set_state(qpos, qvel) observation = self._get_obs() self.idx_tmp_count = -self.step_len return observation def reset_model_init(self): c = 0.01 self.set_state( self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv) ) return self._get_obs() def viewer_setup(self): pass # self.viewer.cam.trackbodyid = 1 # self.viewer.cam.distance = self.model.stat.extent * 1.0 # self.viewer.cam.lookat[2] = 2.0 # self.viewer.cam.elevation = -20 if __name__ == "__main__": env = DPEnv() env.reset_model() import cv2 from VideoSaver import VideoSaver width = 640 height = 480 # vid_save = VideoSaver(width=width, height=height) # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt") action_size = env.action_space.shape[0] ac = np.zeros(action_size) while True: # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint # env.sim.data.qpos[:7] = target_config[:] # env.sim.forward() qpos = env.mocap.data_config[env.idx_curr] qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr]) # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr]) # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr]) env.set_state(qpos, qvel) env.sim.step() print("Reward root:", env.calc_config_reward()) env.idx_curr += 1 if env.idx_curr == env.mocap_data_len: # env.reset_model() env.idx_curr = env.idx_curr % env.mocap_data_len # print(env._get_obs()) env.render() # vid_save.close()config.py
from os import getcwd class Config(object): all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown' 'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick', 'walk'] curr_path = getcwd() # motion = 'spinkick' #motion = 'dance_b' #motion = 'run' motion = 'walk' env_name = "dp_env_v3" motion_folder = '/mujoco/motions' xml_folder = '/mujoco/humanoid_deepmimic/envs/asset' xml_test_folder = '/mujoco_test/' mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion) xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name) xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name) from os import getcwd@kemangjaka
Hello, I did the same as the code above. Have you already solved the problem? Could you please tell me how?
Sorry, I no longer use this code...
Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?
Thank you in advance!
dp_env_v3.py#!/usr/bin/env python3 import numpy as np import math import random from os import getcwd from mujoco.mocap_v2 import MocapDM from mujoco.mujoco_interface import MujocoInterface from mujoco.mocap_util import JOINT_WEIGHT from mujoco_py import load_model_from_xml, MjSim, MjViewer from gym.envs.mujoco import mujoco_env from gym import utils from config import Config from pyquaternion import Quaternion from transformations import quaternion_from_euler, euler_from_quaternion BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", "left_shoulder", "left_elbow", "right_hip", "right_knee", "right_ankle", "left_hip", "left_knee", "left_ankle"] DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, "left_hip": 3, "left_knee": 1, "left_ankle": 3} def mass_center(model, sim): mass = np.expand_dims(model.body_mass, 1) xpos = sim.data.xipos return (np.sum(mass * xpos, 0) / np.sum(mass))[0] class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self): xml_file_path = Config.xml_path self.xml_file_path = xml_file_path self.mocap = MocapDM() self.interface = MujocoInterface() self.load_mocap(Config.mocap_path) self.weight_pose = 0.5 self.weight_vel = 0.05 self.weight_root = 0.2 self.weight_end_eff = 0.15 self.weight_com = 0.1 self.scale_pose = 2.0 self.scale_vel = 0.1 self.scale_end_eff = 40.0 self.scale_root = 5.0 self.scale_com = 10.0 self.scale_err = 1.0 self.reference_state_init() self.idx_curr = -1 self.idx_tmp_count = -1 mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1) utils.EzPickle.__init__(self) def _quat2euler(self, quat): tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]]) euler = euler_from_quaternion(tmp_quat, axes='rxyz') return euler def _get_obs(self): config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y vel = self.sim.data.qvel.flat.copy() # ignore root joint return np.concatenate((config, vel)) def reference_state_init(self): self.idx_init = random.randint(0, self.mocap_data_len-1) # self.idx_init = 0 self.idx_curr = self.idx_init self.idx_tmp_count = 0 def early_termination(self): target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint curr_config = self.get_joint_configs() err_configs = self.calc_config_errs(curr_config, target_config) if err_configs >= 5.0: return True return False def get_joint_configs(self): data = self.sim.data return data.qpos[7:] # to exclude root joint def get_root_configs(self): data = self.sim.data return data.qpos[3:7] # to exclude x coord def load_mocap(self, filepath): self.mocap.load_mocap(filepath) self.mocap_dt = self.mocap.dt self.mocap_data_len = len(self.mocap.data) def calc_config_errs(self, env_config, mocap_config): assert len(env_config) == len(mocap_config) return np.sum(np.abs(env_config - mocap_config)) def calc_root_reward(self): # including root joint curr_root = self.mocap.data_config[self.idx_curr][3:7] target_root = self.get_root_configs() assert len(curr_root) == len(target_root) assert len(curr_root) == 4 q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3]) q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3]) q_diff = q_0.conjugate * q_1 tmp_diff = q_diff.angle err_root = abs(tmp_diff) reward_root = math.exp(-err_root) return reward_root def calc_config_reward(self): assert len(self.mocap.data) != 0 err_configs = 0.0 target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint self.curr_frame = target_config curr_config = self.get_joint_configs() err_configs = self.calc_config_errs(curr_config, target_config) # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs) reward_config = math.exp(-err_configs) return reward_config, err_configs def step(self, action): self.step_len = 1 step_times = 1 self.do_simulation(action, step_times) reward_config, err_config = self.calc_config_reward() #reward_root = 10 * self.calc_root_reward() #reward = reward_config + reward_root reward = 10 * reward_config info = dict() self.idx_curr += 1 self.idx_curr = self.idx_curr % self.mocap_data_len observation = self._get_obs() done = bool(self.is_done() or err_config >= 10.0) return observation, reward, done, info def is_done(self): mass = np.expand_dims(self.model.body_mass, 1) xpos = self.sim.data.xipos z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2] # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination()) done = bool((z_com < 0.7) or (z_com > 1.2)) # return False return done def goto(self, pos): self.sim.data.qpos[:] = pos[:] self.sim.forward() def get_time(self): return self.sim.data.time def reset_model(self): self.reference_state_init() qpos = self.mocap.data_config[self.idx_init] qvel = self.mocap.data_vel[self.idx_init] # qvel = self.init_qvel self.set_state(qpos, qvel) observation = self._get_obs() self.idx_tmp_count = -self.step_len return observation def reset_model_init(self): c = 0.01 self.set_state( self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv) ) return self._get_obs() def viewer_setup(self): pass # self.viewer.cam.trackbodyid = 1 # self.viewer.cam.distance = self.model.stat.extent * 1.0 # self.viewer.cam.lookat[2] = 2.0 # self.viewer.cam.elevation = -20 if __name__ == "__main__": env = DPEnv() env.reset_model() import cv2 from VideoSaver import VideoSaver width = 640 height = 480 # vid_save = VideoSaver(width=width, height=height) # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt") action_size = env.action_space.shape[0] ac = np.zeros(action_size) while True: # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint # env.sim.data.qpos[:7] = target_config[:] # env.sim.forward() qpos = env.mocap.data_config[env.idx_curr] qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr]) # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr]) # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr]) env.set_state(qpos, qvel) env.sim.step() print("Reward root:", env.calc_config_reward()) env.idx_curr += 1 if env.idx_curr == env.mocap_data_len: # env.reset_model() env.idx_curr = env.idx_curr % env.mocap_data_len # print(env._get_obs()) env.render() # vid_save.close()config.py
from os import getcwd class Config(object): all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown' 'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick', 'walk'] curr_path = getcwd() # motion = 'spinkick' #motion = 'dance_b' #motion = 'run' motion = 'walk' env_name = "dp_env_v3" motion_folder = '/mujoco/motions' xml_folder = '/mujoco/humanoid_deepmimic/envs/asset' xml_test_folder = '/mujoco_test/' mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion) xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name) xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name) from os import getcwd@kemangjaka
Hello, I did the same as the code above. Have you already solved the problem? Could you please tell me how?Sorry, I no longer use this code...
Here is one training I got the character to walk:
https://www.dropbox.com/s/0p0gtocqcfhw3lr/src_walk_ok_done_early_horizon_2048.zip?dl=0
You can find the checkpoint
and logs
. Training the character is not easy. The batch_size
and number of MPI processes matter a lot. @kemangjaka