wrong policy

Question

wrong policy

huangfugui00 opened this issue 5 years ago · 21 comments

I run the code successfuly.However,the agent can not walk ,run after training.Do I miss anything?
Any help would be great !

Answer 1 · 2019-07-22T02:42:37.000Z

Before training a policy: you have to modify the step in dp_env_v3.py to set up correct rewards for the task. If you want to train the agent to walk, a walking demonstration is also needed to provided to define the rewards in dp_env_v3.py step function.

Answer 2 · 2019-07-22T02:47:50.000Z

For example, I have implemented one version that loads example first, and then compare the example against the training posture to get the rewards:

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

Answer 3 · 2019-07-22T05:39:26.000Z

I have modify the step in dp_env_v3.py.But it does not work.

 def step(self,action):
        self.step_len = int(self.mocap_dt // self.model.opt.timestep)
        step_times = int(self.mocap_dt // self.model.opt.timestep)
        pos_before = mass_center(self.model, self.sim)
        self.do_simulation(action, step_times)
        pos_after = mass_center(self.model, self.sim)
        observation = self._get_obs()
        reward_alive=1.0
        reward_obs = self.calc_config_reward()
        reward_acs = np.square(self.data.ctrl).sum()
        reward_forward = 0.25 * (pos_after - pos_before)
        info = dict(reward_obs=reward_obs, reward_acs=reward_acs, reward_forward=reward_forward)
        done = self.is_done()
        if done:
            reward = 0
        else:
            reward = reward_obs - 0.1 * reward_acs + reward_forward + reward_alive
        return observation, reward, done, info

Answer 4 · 2019-07-22T08:59:32.000Z

The file below was what I used to train the walking movement:
dp_env_v3.py:

#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd

from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer

from gym.envs.mujoco import mujoco_env
from gym import utils

from config import Config
from pyquaternion import Quaternion

from transformations import quaternion_from_euler, euler_from_quaternion

BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", 
            "left_shoulder", "left_elbow", "right_hip", "right_knee", 
            "right_ankle", "left_hip", "left_knee", "left_ankle"]

DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, 
           "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, 
           "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, 
           "left_hip": 3, "left_knee": 1, "left_ankle": 3}

def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        xml_file_path = Config.xml_path
        self.xml_file_path = xml_file_path

        self.mocap = MocapDM()
        self.interface = MujocoInterface()
        self.load_mocap(Config.mocap_path)

        self.weight_pose = 0.5
        self.weight_vel = 0.05
        self.weight_root = 0.2
        self.weight_end_eff = 0.15
        self.weight_com = 0.1

        self.scale_pose = 2.0
        self.scale_vel = 0.1
        self.scale_end_eff = 40.0
        self.scale_root = 5.0
        self.scale_com = 10.0
        self.scale_err = 1.0

        self.reference_state_init()
        self.idx_curr = -1
        self.idx_tmp_count = -1

        mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
        utils.EzPickle.__init__(self)

    def _quat2euler(self, quat):
        tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
        euler = euler_from_quaternion(tmp_quat, axes='rxyz')
        return euler

    def _get_obs(self):
        config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
        vel = self.sim.data.qvel.flat.copy() # ignore root joint

        return np.concatenate((config, vel))

    def reference_state_init(self):
        self.idx_init = random.randint(0, self.mocap_data_len-1)
        # self.idx_init = 0
        self.idx_curr = self.idx_init
        self.idx_tmp_count = 0

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 15.0:
            return True
        return False

    def get_joint_configs(self):
        data = self.sim.data
        return data.qpos[7:] # to exclude root joint

    def get_root_configs(self):
        data = self.sim.data
        return data.qpos[3:7] # to exclude x coord

    def load_mocap(self, filepath):
        self.mocap.load_mocap(filepath)
        self.mocap_dt = self.mocap.dt
        self.mocap_data_len = len(self.mocap.data)

    def calc_config_errs(self, env_config, mocap_config):
        assert len(env_config) == len(mocap_config)
        return np.sum(np.abs(env_config - mocap_config))

    def calc_root_reward(self): # including root joint
        curr_root = self.mocap.data_config[self.idx_curr][3:7]
        target_root = self.get_root_configs()
        assert len(curr_root) == len(target_root)
        assert len(curr_root) == 4

        q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
        q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])

        q_diff =  q_0.conjugate * q_1
        tmp_diff = q_diff.angle

        err_root = abs(tmp_diff)
        reward_root = math.exp(-err_root)
        return reward_root

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

    def step(self, action):
        self.step_len = 1
        step_times = 1

        self.do_simulation(action, step_times)

        reward_config,  err_config = self.calc_config_reward()
        reward_root = 10 * self.calc_root_reward()
        reward = reward_config + reward_root

        info = dict()

        self.idx_curr += 1
        self.idx_curr = self.idx_curr % self.mocap_data_len

        observation = self._get_obs()
        done = bool(self.is_done() or err_config >= 10.0)

        return observation, reward, done, info

    def is_done(self):
        mass = np.expand_dims(self.model.body_mass, 1)
        xpos = self.sim.data.xipos
        z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
        # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
        done = bool((z_com < 0.7) or (z_com > 1.2))
        # return False
        return done

    def goto(self, pos):
        self.sim.data.qpos[:] = pos[:]
        self.sim.forward()

    def get_time(self):
        return self.sim.data.time

    def reset_model(self):
        self.reference_state_init()
        qpos = self.mocap.data_config[self.idx_init]
        qvel = self.mocap.data_vel[self.idx_init]
        # qvel = self.init_qvel
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        self.idx_tmp_count = -self.step_len
        return observation

    def reset_model_init(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        pass
        # self.viewer.cam.trackbodyid = 1
        # self.viewer.cam.distance = self.model.stat.extent * 1.0
        # self.viewer.cam.lookat[2] = 2.0
        # self.viewer.cam.elevation = -20

if __name__ == "__main__":
    env = DPEnv()
    env.reset_model()

    import cv2
    from VideoSaver import VideoSaver
    width = 640
    height = 480

    # vid_save = VideoSaver(width=width, height=height)

    # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
    action_size = env.action_space.shape[0]
    ac = np.zeros(action_size)
    while True:
        # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
        # env.sim.data.qpos[:7] = target_config[:]
        # env.sim.forward()

        qpos = env.mocap.data_config[env.idx_curr]
        qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
        # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        env.set_state(qpos, qvel)
        env.sim.step()
        print("Reward root:", env.calc_config_reward())
        env.idx_curr += 1
        if env.idx_curr == env.mocap_data_len:
            # env.reset_model()
            env.idx_curr = env.idx_curr % env.mocap_data_len

        # print(env._get_obs())
        env.render()

    # vid_save.close()

config.py:

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    # motion = 'spinkick'
    # motion = 'walk'
    motion = 'run'
    env_name = "dp_env_v3"
    # env_name = "dp_env_carry"

    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)

You may need to specify the motion_folder in the config.py

Answer 5 · 2019-07-22T09:09:19.000Z

I will try with your code ,Thanks !

Answer 6 · 2019-07-23T02:13:06.000Z

The agent just stand up right instead of walking , after I update my code just like you offer. I find that the agent will get a big reward if it stand up right all the time.Does it happened to you ?

Answer 7 · 2019-07-23T08:23:26.000Z

Did you change the motion in config.py to 'walk'? The motion is what the agent needs to learn.

Answer 8 · 2019-07-23T11:51:50.000Z

Yes,I have change the motion in config.py.By the way ,I only modify the config.py and use your new dp_env_v3.py .

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    motion = 'walk'
    env_name = "dp_env_v3"

    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
    xml_test_folder = '/mujoco_test/'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)

Answer 9 · 2019-07-24T03:34:54.000Z

Please change this line in dp_env_v3.py:

        reward_config,  err_config = self.calc_config_reward()
        reward_root = 10 * self.calc_root_reward()
        reward = reward_config + reward_root

to

        reward_config,  err_config = self.calc_config_reward()
        reward = 10 * reward_config

and set timestep_per_batch in TRPO sampling to 2048. After training, would you mind showing me your results?

Answer 10 · 2019-07-24T03:38:05.000Z

My result is as follows:

Answer 11 · 2019-07-25T02:43:27.000Z

Sorry, I am too busy to replay to you yesterday .I will show your the result after training. Thank you.

Answer 12 · 2019-07-26T02:43:33.000Z

I get the result after training with TimestepsSoFar is 1e7 ,and the program is still running now.

Answer 13 · 2019-07-27T06:37:36.000Z

It looks like the character is not imtating the walking movement. You can find the following codes:

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 15.0:
            return True
        return False

and change the err_configs>=15.0 to err_configs>=5.0 to more strictly terminate the process when the movement deviates.

Answer 14 · 2019-09-19T12:09:40.000Z

Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?

Thank you in advance!

dp_env_v3.py

#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd

from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer

from gym.envs.mujoco import mujoco_env
from gym import utils

from config import Config
from pyquaternion import Quaternion

from transformations import quaternion_from_euler, euler_from_quaternion

BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", 
            "left_shoulder", "left_elbow", "right_hip", "right_knee", 
            "right_ankle", "left_hip", "left_knee", "left_ankle"]

DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, 
           "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, 
           "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, 
           "left_hip": 3, "left_knee": 1, "left_ankle": 3}

def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        xml_file_path = Config.xml_path
        self.xml_file_path = xml_file_path

        self.mocap = MocapDM()
        self.interface = MujocoInterface()
        self.load_mocap(Config.mocap_path)

        self.weight_pose = 0.5
        self.weight_vel = 0.05
        self.weight_root = 0.2
        self.weight_end_eff = 0.15
        self.weight_com = 0.1

        self.scale_pose = 2.0
        self.scale_vel = 0.1
        self.scale_end_eff = 40.0
        self.scale_root = 5.0
        self.scale_com = 10.0
        self.scale_err = 1.0

        self.reference_state_init()
        self.idx_curr = -1
        self.idx_tmp_count = -1

        mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
        utils.EzPickle.__init__(self)

    def _quat2euler(self, quat):
        tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
        euler = euler_from_quaternion(tmp_quat, axes='rxyz')
        return euler

    def _get_obs(self):
        config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
        vel = self.sim.data.qvel.flat.copy() # ignore root joint

        return np.concatenate((config, vel))

    def reference_state_init(self):
        self.idx_init = random.randint(0, self.mocap_data_len-1)
        # self.idx_init = 0
        self.idx_curr = self.idx_init
        self.idx_tmp_count = 0

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 5.0:
            return True
        return False

    def get_joint_configs(self):
        data = self.sim.data
        return data.qpos[7:] # to exclude root joint

    def get_root_configs(self):
        data = self.sim.data
        return data.qpos[3:7] # to exclude x coord

    def load_mocap(self, filepath):
        self.mocap.load_mocap(filepath)
        self.mocap_dt = self.mocap.dt
        self.mocap_data_len = len(self.mocap.data)

    def calc_config_errs(self, env_config, mocap_config):
        assert len(env_config) == len(mocap_config)
        return np.sum(np.abs(env_config - mocap_config))

    def calc_root_reward(self): # including root joint
        curr_root = self.mocap.data_config[self.idx_curr][3:7]
        target_root = self.get_root_configs()
        assert len(curr_root) == len(target_root)
        assert len(curr_root) == 4

        q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
        q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])

        q_diff =  q_0.conjugate * q_1
        tmp_diff = q_diff.angle

        err_root = abs(tmp_diff)
        reward_root = math.exp(-err_root)
        return reward_root

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

    def step(self, action):
        self.step_len = 1
        step_times = 1

        self.do_simulation(action, step_times)

        reward_config,  err_config = self.calc_config_reward()
        #reward_root = 10 * self.calc_root_reward()
        #reward = reward_config + reward_root
        reward = 10 * reward_config

        info = dict()

        self.idx_curr += 1
        self.idx_curr = self.idx_curr % self.mocap_data_len

        observation = self._get_obs()
        done = bool(self.is_done() or err_config >= 10.0)

        return observation, reward, done, info

    def is_done(self):
        mass = np.expand_dims(self.model.body_mass, 1)
        xpos = self.sim.data.xipos
        z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
        # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
        done = bool((z_com < 0.7) or (z_com > 1.2))
        # return False
        return done

    def goto(self, pos):
        self.sim.data.qpos[:] = pos[:]
        self.sim.forward()

    def get_time(self):
        return self.sim.data.time

    def reset_model(self):
        self.reference_state_init()
        qpos = self.mocap.data_config[self.idx_init]
        qvel = self.mocap.data_vel[self.idx_init]
        # qvel = self.init_qvel
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        self.idx_tmp_count = -self.step_len
        return observation

    def reset_model_init(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        pass
        # self.viewer.cam.trackbodyid = 1
        # self.viewer.cam.distance = self.model.stat.extent * 1.0
        # self.viewer.cam.lookat[2] = 2.0
        # self.viewer.cam.elevation = -20

if __name__ == "__main__":
    env = DPEnv()
    env.reset_model()

    import cv2
    from VideoSaver import VideoSaver
    width = 640
    height = 480

    # vid_save = VideoSaver(width=width, height=height)

    # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
    action_size = env.action_space.shape[0]
    ac = np.zeros(action_size)
    while True:
        # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
        # env.sim.data.qpos[:7] = target_config[:]
        # env.sim.forward()

        qpos = env.mocap.data_config[env.idx_curr]
        qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
        # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        env.set_state(qpos, qvel)
        env.sim.step()
        print("Reward root:", env.calc_config_reward())
        env.idx_curr += 1
        if env.idx_curr == env.mocap_data_len:
            # env.reset_model()
            env.idx_curr = env.idx_curr % env.mocap_data_len

        # print(env._get_obs())
        env.render()

    # vid_save.close()

config.py

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    # motion = 'spinkick'
    #motion = 'dance_b'
    #motion = 'run'
    motion = 'walk'
    env_name = "dp_env_v3"
    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
    xml_test_folder = '/mujoco_test/'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
    xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name)

from os import getcwd

Answer 15 · 2019-10-15T06:04:10.000Z

Hi, I just noticed your issue. You may this line:

done = bool(self.is_done() or err_config >= 10.0)

to

done = bool(self.is_done() or err_config >= 5.0)

stop earlier when the movement deviates.

Answer 16 · 2019-11-20T10:40:28.000Z

I get the result after training with TimestepsSoFar is 1e7 ,and the program is still running now.

Hello. I am facing the same problem. The humanoid stands but not moves, exactly same as your gif shows. I also set done = bool(self.is_done() or err_config >= 5.0). Could you please tell me what did you change to make it move ? Thank you!

Answer 17 · 2019-11-24T04:45:06.000Z

I use Pybullet as an alternative to MuJoCo.In Pybullet, there is a python version of DeeoMimic ,maybe you can check it.

Answer 18 · 2019-11-25T09:26:57.000Z

Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?

Thank you in advance!

dp_env_v3.py

#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd

from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer

from gym.envs.mujoco import mujoco_env
from gym import utils

from config import Config
from pyquaternion import Quaternion

from transformations import quaternion_from_euler, euler_from_quaternion

BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", 
            "left_shoulder", "left_elbow", "right_hip", "right_knee", 
            "right_ankle", "left_hip", "left_knee", "left_ankle"]

DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, 
           "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, 
           "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, 
           "left_hip": 3, "left_knee": 1, "left_ankle": 3}

def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        xml_file_path = Config.xml_path
        self.xml_file_path = xml_file_path

        self.mocap = MocapDM()
        self.interface = MujocoInterface()
        self.load_mocap(Config.mocap_path)

        self.weight_pose = 0.5
        self.weight_vel = 0.05
        self.weight_root = 0.2
        self.weight_end_eff = 0.15
        self.weight_com = 0.1

        self.scale_pose = 2.0
        self.scale_vel = 0.1
        self.scale_end_eff = 40.0
        self.scale_root = 5.0
        self.scale_com = 10.0
        self.scale_err = 1.0

        self.reference_state_init()
        self.idx_curr = -1
        self.idx_tmp_count = -1

        mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
        utils.EzPickle.__init__(self)

    def _quat2euler(self, quat):
        tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
        euler = euler_from_quaternion(tmp_quat, axes='rxyz')
        return euler

    def _get_obs(self):
        config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
        vel = self.sim.data.qvel.flat.copy() # ignore root joint

        return np.concatenate((config, vel))

    def reference_state_init(self):
        self.idx_init = random.randint(0, self.mocap_data_len-1)
        # self.idx_init = 0
        self.idx_curr = self.idx_init
        self.idx_tmp_count = 0

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 5.0:
            return True
        return False

    def get_joint_configs(self):
        data = self.sim.data
        return data.qpos[7:] # to exclude root joint

    def get_root_configs(self):
        data = self.sim.data
        return data.qpos[3:7] # to exclude x coord

    def load_mocap(self, filepath):
        self.mocap.load_mocap(filepath)
        self.mocap_dt = self.mocap.dt
        self.mocap_data_len = len(self.mocap.data)

    def calc_config_errs(self, env_config, mocap_config):
        assert len(env_config) == len(mocap_config)
        return np.sum(np.abs(env_config - mocap_config))

    def calc_root_reward(self): # including root joint
        curr_root = self.mocap.data_config[self.idx_curr][3:7]
        target_root = self.get_root_configs()
        assert len(curr_root) == len(target_root)
        assert len(curr_root) == 4

        q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
        q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])

        q_diff =  q_0.conjugate * q_1
        tmp_diff = q_diff.angle

        err_root = abs(tmp_diff)
        reward_root = math.exp(-err_root)
        return reward_root

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

    def step(self, action):
        self.step_len = 1
        step_times = 1

        self.do_simulation(action, step_times)

        reward_config,  err_config = self.calc_config_reward()
        #reward_root = 10 * self.calc_root_reward()
        #reward = reward_config + reward_root
        reward = 10 * reward_config

        info = dict()

        self.idx_curr += 1
        self.idx_curr = self.idx_curr % self.mocap_data_len

        observation = self._get_obs()
        done = bool(self.is_done() or err_config >= 10.0)

        return observation, reward, done, info

    def is_done(self):
        mass = np.expand_dims(self.model.body_mass, 1)
        xpos = self.sim.data.xipos
        z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
        # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
        done = bool((z_com < 0.7) or (z_com > 1.2))
        # return False
        return done

    def goto(self, pos):
        self.sim.data.qpos[:] = pos[:]
        self.sim.forward()

    def get_time(self):
        return self.sim.data.time

    def reset_model(self):
        self.reference_state_init()
        qpos = self.mocap.data_config[self.idx_init]
        qvel = self.mocap.data_vel[self.idx_init]
        # qvel = self.init_qvel
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        self.idx_tmp_count = -self.step_len
        return observation

    def reset_model_init(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        pass
        # self.viewer.cam.trackbodyid = 1
        # self.viewer.cam.distance = self.model.stat.extent * 1.0
        # self.viewer.cam.lookat[2] = 2.0
        # self.viewer.cam.elevation = -20

if __name__ == "__main__":
    env = DPEnv()
    env.reset_model()

    import cv2
    from VideoSaver import VideoSaver
    width = 640
    height = 480

    # vid_save = VideoSaver(width=width, height=height)

    # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
    action_size = env.action_space.shape[0]
    ac = np.zeros(action_size)
    while True:
        # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
        # env.sim.data.qpos[:7] = target_config[:]
        # env.sim.forward()

        qpos = env.mocap.data_config[env.idx_curr]
        qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
        # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        env.set_state(qpos, qvel)
        env.sim.step()
        print("Reward root:", env.calc_config_reward())
        env.idx_curr += 1
        if env.idx_curr == env.mocap_data_len:
            # env.reset_model()
            env.idx_curr = env.idx_curr % env.mocap_data_len

        # print(env._get_obs())
        env.render()

    # vid_save.close()

config.py

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    # motion = 'spinkick'
    #motion = 'dance_b'
    #motion = 'run'
    motion = 'walk'
    env_name = "dp_env_v3"
    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
    xml_test_folder = '/mujoco_test/'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
    xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name)

from os import getcwd

@kemangjaka
Hello, I did the same as the code above. Have you already solved the problem? Could you please tell me how?

Answer 19 · 2019-11-28T08:41:09.000Z

Hello everyone. I think I know where the problem is. In addition to doing changes above, in trpo.py you also need to:
change line.78 to ob = env.reset()
comment line.79 out
comment line.404 out.
Because after debugging I found that the training needs Reference State Initialization(RSI), but the original code didn't actually implement this and was covered by a normal initialization.
If anybody tries this please give me a feedback. Peace!

Answer 20 · 2019-11-28T15:49:20.000Z

Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?
Thank you in advance!
dp_env_v3.py

#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd

from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer

from gym.envs.mujoco import mujoco_env
from gym import utils

from config import Config
from pyquaternion import Quaternion

from transformations import quaternion_from_euler, euler_from_quaternion

BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", 
            "left_shoulder", "left_elbow", "right_hip", "right_knee", 
            "right_ankle", "left_hip", "left_knee", "left_ankle"]

DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, 
           "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, 
           "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, 
           "left_hip": 3, "left_knee": 1, "left_ankle": 3}

def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        xml_file_path = Config.xml_path
        self.xml_file_path = xml_file_path

        self.mocap = MocapDM()
        self.interface = MujocoInterface()
        self.load_mocap(Config.mocap_path)

        self.weight_pose = 0.5
        self.weight_vel = 0.05
        self.weight_root = 0.2
        self.weight_end_eff = 0.15
        self.weight_com = 0.1

        self.scale_pose = 2.0
        self.scale_vel = 0.1
        self.scale_end_eff = 40.0
        self.scale_root = 5.0
        self.scale_com = 10.0
        self.scale_err = 1.0

        self.reference_state_init()
        self.idx_curr = -1
        self.idx_tmp_count = -1

        mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
        utils.EzPickle.__init__(self)

    def _quat2euler(self, quat):
        tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
        euler = euler_from_quaternion(tmp_quat, axes='rxyz')
        return euler

    def _get_obs(self):
        config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
        vel = self.sim.data.qvel.flat.copy() # ignore root joint

        return np.concatenate((config, vel))

    def reference_state_init(self):
        self.idx_init = random.randint(0, self.mocap_data_len-1)
        # self.idx_init = 0
        self.idx_curr = self.idx_init
        self.idx_tmp_count = 0

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 5.0:
            return True
        return False

    def get_joint_configs(self):
        data = self.sim.data
        return data.qpos[7:] # to exclude root joint

    def get_root_configs(self):
        data = self.sim.data
        return data.qpos[3:7] # to exclude x coord

    def load_mocap(self, filepath):
        self.mocap.load_mocap(filepath)
        self.mocap_dt = self.mocap.dt
        self.mocap_data_len = len(self.mocap.data)

    def calc_config_errs(self, env_config, mocap_config):
        assert len(env_config) == len(mocap_config)
        return np.sum(np.abs(env_config - mocap_config))

    def calc_root_reward(self): # including root joint
        curr_root = self.mocap.data_config[self.idx_curr][3:7]
        target_root = self.get_root_configs()
        assert len(curr_root) == len(target_root)
        assert len(curr_root) == 4

        q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
        q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])

        q_diff =  q_0.conjugate * q_1
        tmp_diff = q_diff.angle

        err_root = abs(tmp_diff)
        reward_root = math.exp(-err_root)
        return reward_root

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

    def step(self, action):
        self.step_len = 1
        step_times = 1

        self.do_simulation(action, step_times)

        reward_config,  err_config = self.calc_config_reward()
        #reward_root = 10 * self.calc_root_reward()
        #reward = reward_config + reward_root
        reward = 10 * reward_config

        info = dict()

        self.idx_curr += 1
        self.idx_curr = self.idx_curr % self.mocap_data_len

        observation = self._get_obs()
        done = bool(self.is_done() or err_config >= 10.0)

        return observation, reward, done, info

    def is_done(self):
        mass = np.expand_dims(self.model.body_mass, 1)
        xpos = self.sim.data.xipos
        z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
        # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
        done = bool((z_com < 0.7) or (z_com > 1.2))
        # return False
        return done

    def goto(self, pos):
        self.sim.data.qpos[:] = pos[:]
        self.sim.forward()

    def get_time(self):
        return self.sim.data.time

    def reset_model(self):
        self.reference_state_init()
        qpos = self.mocap.data_config[self.idx_init]
        qvel = self.mocap.data_vel[self.idx_init]
        # qvel = self.init_qvel
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        self.idx_tmp_count = -self.step_len
        return observation

    def reset_model_init(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        pass
        # self.viewer.cam.trackbodyid = 1
        # self.viewer.cam.distance = self.model.stat.extent * 1.0
        # self.viewer.cam.lookat[2] = 2.0
        # self.viewer.cam.elevation = -20

if __name__ == "__main__":
    env = DPEnv()
    env.reset_model()

    import cv2
    from VideoSaver import VideoSaver
    width = 640
    height = 480

    # vid_save = VideoSaver(width=width, height=height)

    # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
    action_size = env.action_space.shape[0]
    ac = np.zeros(action_size)
    while True:
        # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
        # env.sim.data.qpos[:7] = target_config[:]
        # env.sim.forward()

        qpos = env.mocap.data_config[env.idx_curr]
        qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
        # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        env.set_state(qpos, qvel)
        env.sim.step()
        print("Reward root:", env.calc_config_reward())
        env.idx_curr += 1
        if env.idx_curr == env.mocap_data_len:
            # env.reset_model()
            env.idx_curr = env.idx_curr % env.mocap_data_len

        # print(env._get_obs())
        env.render()

    # vid_save.close()

config.py

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    # motion = 'spinkick'
    #motion = 'dance_b'
    #motion = 'run'
    motion = 'walk'
    env_name = "dp_env_v3"
    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
    xml_test_folder = '/mujoco_test/'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
    xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name)

from os import getcwd

@kemangjaka
Hello, I did the same as the code above. Have you already solved the problem? Could you please tell me how?

Sorry, I no longer use this code...

Answer 21 · 2020-05-03T07:29:42.000Z

Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?
Thank you in advance!
dp_env_v3.py

#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd

from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer

from gym.envs.mujoco import mujoco_env
from gym import utils

from config import Config
from pyquaternion import Quaternion

from transformations import quaternion_from_euler, euler_from_quaternion

BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", 
            "left_shoulder", "left_elbow", "right_hip", "right_knee", 
            "right_ankle", "left_hip", "left_knee", "left_ankle"]

DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, 
           "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, 
           "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, 
           "left_hip": 3, "left_knee": 1, "left_ankle": 3}

def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        xml_file_path = Config.xml_path
        self.xml_file_path = xml_file_path

        self.mocap = MocapDM()
        self.interface = MujocoInterface()
        self.load_mocap(Config.mocap_path)

        self.weight_pose = 0.5
        self.weight_vel = 0.05
        self.weight_root = 0.2
        self.weight_end_eff = 0.15
        self.weight_com = 0.1

        self.scale_pose = 2.0
        self.scale_vel = 0.1
        self.scale_end_eff = 40.0
        self.scale_root = 5.0
        self.scale_com = 10.0
        self.scale_err = 1.0

        self.reference_state_init()
        self.idx_curr = -1
        self.idx_tmp_count = -1

        mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
        utils.EzPickle.__init__(self)

    def _quat2euler(self, quat):
        tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
        euler = euler_from_quaternion(tmp_quat, axes='rxyz')
        return euler

    def _get_obs(self):
        config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
        vel = self.sim.data.qvel.flat.copy() # ignore root joint

        return np.concatenate((config, vel))

    def reference_state_init(self):
        self.idx_init = random.randint(0, self.mocap_data_len-1)
        # self.idx_init = 0
        self.idx_curr = self.idx_init
        self.idx_tmp_count = 0

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 5.0:
            return True
        return False

    def get_joint_configs(self):
        data = self.sim.data
        return data.qpos[7:] # to exclude root joint

    def get_root_configs(self):
        data = self.sim.data
        return data.qpos[3:7] # to exclude x coord

    def load_mocap(self, filepath):
        self.mocap.load_mocap(filepath)
        self.mocap_dt = self.mocap.dt
        self.mocap_data_len = len(self.mocap.data)

    def calc_config_errs(self, env_config, mocap_config):
        assert len(env_config) == len(mocap_config)
        return np.sum(np.abs(env_config - mocap_config))

    def calc_root_reward(self): # including root joint
        curr_root = self.mocap.data_config[self.idx_curr][3:7]
        target_root = self.get_root_configs()
        assert len(curr_root) == len(target_root)
        assert len(curr_root) == 4

        q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
        q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])

        q_diff =  q_0.conjugate * q_1
        tmp_diff = q_diff.angle

        err_root = abs(tmp_diff)
        reward_root = math.exp(-err_root)
        return reward_root

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

    def step(self, action):
        self.step_len = 1
        step_times = 1

        self.do_simulation(action, step_times)

        reward_config,  err_config = self.calc_config_reward()
        #reward_root = 10 * self.calc_root_reward()
        #reward = reward_config + reward_root
        reward = 10 * reward_config

        info = dict()

        self.idx_curr += 1
        self.idx_curr = self.idx_curr % self.mocap_data_len

        observation = self._get_obs()
        done = bool(self.is_done() or err_config >= 10.0)

        return observation, reward, done, info

    def is_done(self):
        mass = np.expand_dims(self.model.body_mass, 1)
        xpos = self.sim.data.xipos
        z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
        # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
        done = bool((z_com < 0.7) or (z_com > 1.2))
        # return False
        return done

    def goto(self, pos):
        self.sim.data.qpos[:] = pos[:]
        self.sim.forward()

    def get_time(self):
        return self.sim.data.time

    def reset_model(self):
        self.reference_state_init()
        qpos = self.mocap.data_config[self.idx_init]
        qvel = self.mocap.data_vel[self.idx_init]
        # qvel = self.init_qvel
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        self.idx_tmp_count = -self.step_len
        return observation

    def reset_model_init(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        pass
        # self.viewer.cam.trackbodyid = 1
        # self.viewer.cam.distance = self.model.stat.extent * 1.0
        # self.viewer.cam.lookat[2] = 2.0
        # self.viewer.cam.elevation = -20

if __name__ == "__main__":
    env = DPEnv()
    env.reset_model()

    import cv2
    from VideoSaver import VideoSaver
    width = 640
    height = 480

    # vid_save = VideoSaver(width=width, height=height)

    # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
    action_size = env.action_space.shape[0]
    ac = np.zeros(action_size)
    while True:
        # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
        # env.sim.data.qpos[:7] = target_config[:]
        # env.sim.forward()

        qpos = env.mocap.data_config[env.idx_curr]
        qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
        # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        env.set_state(qpos, qvel)
        env.sim.step()
        print("Reward root:", env.calc_config_reward())
        env.idx_curr += 1
        if env.idx_curr == env.mocap_data_len:
            # env.reset_model()
            env.idx_curr = env.idx_curr % env.mocap_data_len

        # print(env._get_obs())
        env.render()

    # vid_save.close()

config.py

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    # motion = 'spinkick'
    #motion = 'dance_b'
    #motion = 'run'
    motion = 'walk'
    env_name = "dp_env_v3"
    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
    xml_test_folder = '/mujoco_test/'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
    xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name)

from os import getcwd

@kemangjaka
Hello, I did the same as the code above. Have you already solved the problem? Could you please tell me how?

Sorry, I no longer use this code...

Here is one training I got the character to walk:
https://www.dropbox.com/s/0p0gtocqcfhw3lr/src_walk_ok_done_early_horizon_2048.zip?dl=0

You can find the checkpoint and logs. Training the character is not easy. The batch_size and number of MPI processes matter a lot. @kemangjaka