mingfeisun/DeepMimic_mujoco

wrong policy

huangfugui00 opened this issue · 21 comments

I run the code successfuly.However,the agent can not walk ,run after training.Do I miss anything?
Any help would be great !

Before training a policy: you have to modify the step in dp_env_v3.py to set up correct rewards for the task. If you want to train the agent to walk, a walking demonstration is also needed to provided to define the rewards in dp_env_v3.py step function.

For example, I have implemented one version that loads example first, and then compare the example against the training posture to get the rewards:

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

I have modify the step in dp_env_v3.py.But it does not work.

 def step(self,action):
        self.step_len = int(self.mocap_dt // self.model.opt.timestep)
        step_times = int(self.mocap_dt // self.model.opt.timestep)
        pos_before = mass_center(self.model, self.sim)
        self.do_simulation(action, step_times)
        pos_after = mass_center(self.model, self.sim)
        observation = self._get_obs()
        reward_alive=1.0
        reward_obs = self.calc_config_reward()
        reward_acs = np.square(self.data.ctrl).sum()
        reward_forward = 0.25 * (pos_after - pos_before)
        info = dict(reward_obs=reward_obs, reward_acs=reward_acs, reward_forward=reward_forward)
        done = self.is_done()
        if done:
            reward = 0
        else:
            reward = reward_obs - 0.1 * reward_acs + reward_forward + reward_alive
        return observation, reward, done, info

The file below was what I used to train the walking movement:
dp_env_v3.py:

#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd

from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer

from gym.envs.mujoco import mujoco_env
from gym import utils

from config import Config
from pyquaternion import Quaternion

from transformations import quaternion_from_euler, euler_from_quaternion

BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", 
            "left_shoulder", "left_elbow", "right_hip", "right_knee", 
            "right_ankle", "left_hip", "left_knee", "left_ankle"]

DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, 
           "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, 
           "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, 
           "left_hip": 3, "left_knee": 1, "left_ankle": 3}

def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        xml_file_path = Config.xml_path
        self.xml_file_path = xml_file_path

        self.mocap = MocapDM()
        self.interface = MujocoInterface()
        self.load_mocap(Config.mocap_path)

        self.weight_pose = 0.5
        self.weight_vel = 0.05
        self.weight_root = 0.2
        self.weight_end_eff = 0.15
        self.weight_com = 0.1

        self.scale_pose = 2.0
        self.scale_vel = 0.1
        self.scale_end_eff = 40.0
        self.scale_root = 5.0
        self.scale_com = 10.0
        self.scale_err = 1.0

        self.reference_state_init()
        self.idx_curr = -1
        self.idx_tmp_count = -1

        mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
        utils.EzPickle.__init__(self)

    def _quat2euler(self, quat):
        tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
        euler = euler_from_quaternion(tmp_quat, axes='rxyz')
        return euler

    def _get_obs(self):
        config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
        vel = self.sim.data.qvel.flat.copy() # ignore root joint

        return np.concatenate((config, vel))

    def reference_state_init(self):
        self.idx_init = random.randint(0, self.mocap_data_len-1)
        # self.idx_init = 0
        self.idx_curr = self.idx_init
        self.idx_tmp_count = 0

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 15.0:
            return True
        return False

    def get_joint_configs(self):
        data = self.sim.data
        return data.qpos[7:] # to exclude root joint

    def get_root_configs(self):
        data = self.sim.data
        return data.qpos[3:7] # to exclude x coord

    def load_mocap(self, filepath):
        self.mocap.load_mocap(filepath)
        self.mocap_dt = self.mocap.dt
        self.mocap_data_len = len(self.mocap.data)

    def calc_config_errs(self, env_config, mocap_config):
        assert len(env_config) == len(mocap_config)
        return np.sum(np.abs(env_config - mocap_config))

    def calc_root_reward(self): # including root joint
        curr_root = self.mocap.data_config[self.idx_curr][3:7]
        target_root = self.get_root_configs()
        assert len(curr_root) == len(target_root)
        assert len(curr_root) == 4

        q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
        q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])

        q_diff =  q_0.conjugate * q_1
        tmp_diff = q_diff.angle

        err_root = abs(tmp_diff)
        reward_root = math.exp(-err_root)
        return reward_root

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

    def step(self, action):
        self.step_len = 1
        step_times = 1

        self.do_simulation(action, step_times)

        reward_config,  err_config = self.calc_config_reward()
        reward_root = 10 * self.calc_root_reward()
        reward = reward_config + reward_root

        info = dict()

        self.idx_curr += 1
        self.idx_curr = self.idx_curr % self.mocap_data_len

        observation = self._get_obs()
        done = bool(self.is_done() or err_config >= 10.0)

        return observation, reward, done, info

    def is_done(self):
        mass = np.expand_dims(self.model.body_mass, 1)
        xpos = self.sim.data.xipos
        z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
        # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
        done = bool((z_com < 0.7) or (z_com > 1.2))
        # return False
        return done

    def goto(self, pos):
        self.sim.data.qpos[:] = pos[:]
        self.sim.forward()

    def get_time(self):
        return self.sim.data.time

    def reset_model(self):
        self.reference_state_init()
        qpos = self.mocap.data_config[self.idx_init]
        qvel = self.mocap.data_vel[self.idx_init]
        # qvel = self.init_qvel
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        self.idx_tmp_count = -self.step_len
        return observation

    def reset_model_init(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        pass
        # self.viewer.cam.trackbodyid = 1
        # self.viewer.cam.distance = self.model.stat.extent * 1.0
        # self.viewer.cam.lookat[2] = 2.0
        # self.viewer.cam.elevation = -20

if __name__ == "__main__":
    env = DPEnv()
    env.reset_model()

    import cv2
    from VideoSaver import VideoSaver
    width = 640
    height = 480

    # vid_save = VideoSaver(width=width, height=height)

    # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
    action_size = env.action_space.shape[0]
    ac = np.zeros(action_size)
    while True:
        # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
        # env.sim.data.qpos[:7] = target_config[:]
        # env.sim.forward()

        qpos = env.mocap.data_config[env.idx_curr]
        qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
        # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        env.set_state(qpos, qvel)
        env.sim.step()
        print("Reward root:", env.calc_config_reward())
        env.idx_curr += 1
        if env.idx_curr == env.mocap_data_len:
            # env.reset_model()
            env.idx_curr = env.idx_curr % env.mocap_data_len

        # print(env._get_obs())
        env.render()

    # vid_save.close()

config.py:

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    # motion = 'spinkick'
    # motion = 'walk'
    motion = 'run'
    env_name = "dp_env_v3"
    # env_name = "dp_env_carry"

    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)

You may need to specify the motion_folder in the config.py

I will try with your code ,Thanks !

The agent just stand up right instead of walking , after I update my code just like you offer. I find that the agent will get a big reward if it stand up right all the time.Does it happened to you ?

Did you change the motion in config.py to 'walk'? The motion is what the agent needs to learn.

Yes,I have change the motion in config.py.By the way ,I only modify the config.py and use your new dp_env_v3.py .

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    motion = 'walk'
    env_name = "dp_env_v3"

    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
    xml_test_folder = '/mujoco_test/'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)

Please change this line in dp_env_v3.py:

        reward_config,  err_config = self.calc_config_reward()
        reward_root = 10 * self.calc_root_reward()
        reward = reward_config + reward_root

to

        reward_config,  err_config = self.calc_config_reward()
        reward = 10 * reward_config

and set timestep_per_batch in TRPO sampling to 2048. After training, would you mind showing me your results?

My result is as follows:
DP_free_walking

Sorry, I am too busy to replay to you yesterday .I will show your the result after training. Thank you.

I get the result after training with TimestepsSoFar is 1e7 ,and the program is still running now.
GIFS

It looks like the character is not imtating the walking movement. You can find the following codes:

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 15.0:
            return True
        return False

and change the err_configs>=15.0 to err_configs>=5.0 to more strictly terminate the process when the movement deviates.

Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?

Thank you in advance!

dp_env_v3.py

#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd

from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer

from gym.envs.mujoco import mujoco_env
from gym import utils

from config import Config
from pyquaternion import Quaternion

from transformations import quaternion_from_euler, euler_from_quaternion

BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", 
            "left_shoulder", "left_elbow", "right_hip", "right_knee", 
            "right_ankle", "left_hip", "left_knee", "left_ankle"]

DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, 
           "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, 
           "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, 
           "left_hip": 3, "left_knee": 1, "left_ankle": 3}

def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        xml_file_path = Config.xml_path
        self.xml_file_path = xml_file_path

        self.mocap = MocapDM()
        self.interface = MujocoInterface()
        self.load_mocap(Config.mocap_path)

        self.weight_pose = 0.5
        self.weight_vel = 0.05
        self.weight_root = 0.2
        self.weight_end_eff = 0.15
        self.weight_com = 0.1

        self.scale_pose = 2.0
        self.scale_vel = 0.1
        self.scale_end_eff = 40.0
        self.scale_root = 5.0
        self.scale_com = 10.0
        self.scale_err = 1.0

        self.reference_state_init()
        self.idx_curr = -1
        self.idx_tmp_count = -1

        mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
        utils.EzPickle.__init__(self)

    def _quat2euler(self, quat):
        tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
        euler = euler_from_quaternion(tmp_quat, axes='rxyz')
        return euler

    def _get_obs(self):
        config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
        vel = self.sim.data.qvel.flat.copy() # ignore root joint

        return np.concatenate((config, vel))

    def reference_state_init(self):
        self.idx_init = random.randint(0, self.mocap_data_len-1)
        # self.idx_init = 0
        self.idx_curr = self.idx_init
        self.idx_tmp_count = 0

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 5.0:
            return True
        return False

    def get_joint_configs(self):
        data = self.sim.data
        return data.qpos[7:] # to exclude root joint

    def get_root_configs(self):
        data = self.sim.data
        return data.qpos[3:7] # to exclude x coord

    def load_mocap(self, filepath):
        self.mocap.load_mocap(filepath)
        self.mocap_dt = self.mocap.dt
        self.mocap_data_len = len(self.mocap.data)

    def calc_config_errs(self, env_config, mocap_config):
        assert len(env_config) == len(mocap_config)
        return np.sum(np.abs(env_config - mocap_config))

    def calc_root_reward(self): # including root joint
        curr_root = self.mocap.data_config[self.idx_curr][3:7]
        target_root = self.get_root_configs()
        assert len(curr_root) == len(target_root)
        assert len(curr_root) == 4

        q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
        q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])

        q_diff =  q_0.conjugate * q_1
        tmp_diff = q_diff.angle

        err_root = abs(tmp_diff)
        reward_root = math.exp(-err_root)
        return reward_root

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

    def step(self, action):
        self.step_len = 1
        step_times = 1

        self.do_simulation(action, step_times)

        reward_config,  err_config = self.calc_config_reward()
        #reward_root = 10 * self.calc_root_reward()
        #reward = reward_config + reward_root
        reward = 10 * reward_config

        info = dict()

        self.idx_curr += 1
        self.idx_curr = self.idx_curr % self.mocap_data_len

        observation = self._get_obs()
        done = bool(self.is_done() or err_config >= 10.0)

        return observation, reward, done, info

    def is_done(self):
        mass = np.expand_dims(self.model.body_mass, 1)
        xpos = self.sim.data.xipos
        z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
        # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
        done = bool((z_com < 0.7) or (z_com > 1.2))
        # return False
        return done

    def goto(self, pos):
        self.sim.data.qpos[:] = pos[:]
        self.sim.forward()

    def get_time(self):
        return self.sim.data.time

    def reset_model(self):
        self.reference_state_init()
        qpos = self.mocap.data_config[self.idx_init]
        qvel = self.mocap.data_vel[self.idx_init]
        # qvel = self.init_qvel
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        self.idx_tmp_count = -self.step_len
        return observation

    def reset_model_init(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        pass
        # self.viewer.cam.trackbodyid = 1
        # self.viewer.cam.distance = self.model.stat.extent * 1.0
        # self.viewer.cam.lookat[2] = 2.0
        # self.viewer.cam.elevation = -20

if __name__ == "__main__":
    env = DPEnv()
    env.reset_model()

    import cv2
    from VideoSaver import VideoSaver
    width = 640
    height = 480

    # vid_save = VideoSaver(width=width, height=height)

    # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
    action_size = env.action_space.shape[0]
    ac = np.zeros(action_size)
    while True:
        # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
        # env.sim.data.qpos[:7] = target_config[:]
        # env.sim.forward()

        qpos = env.mocap.data_config[env.idx_curr]
        qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
        # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        env.set_state(qpos, qvel)
        env.sim.step()
        print("Reward root:", env.calc_config_reward())
        env.idx_curr += 1
        if env.idx_curr == env.mocap_data_len:
            # env.reset_model()
            env.idx_curr = env.idx_curr % env.mocap_data_len

        # print(env._get_obs())
        env.render()

    # vid_save.close()

config.py

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    # motion = 'spinkick'
    #motion = 'dance_b'
    #motion = 'run'
    motion = 'walk'
    env_name = "dp_env_v3"
    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
    xml_test_folder = '/mujoco_test/'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
    xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name)

from os import getcwd

Hi, I just noticed your issue. You may this line:

done = bool(self.is_done() or err_config >= 10.0)

to

done = bool(self.is_done() or err_config >= 5.0)

stop earlier when the movement deviates.

I get the result after training with TimestepsSoFar is 1e7 ,and the program is still running now.
GIFS

Hello. I am facing the same problem. The humanoid stands but not moves, exactly same as your gif shows. I also set done = bool(self.is_done() or err_config >= 5.0). Could you please tell me what did you change to make it move ? Thank you!

I use Pybullet as an alternative to MuJoCo.In Pybullet, there is a python version of DeeoMimic ,maybe you can check it.

Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?

Thank you in advance!

dp_env_v3.py

#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd

from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer

from gym.envs.mujoco import mujoco_env
from gym import utils

from config import Config
from pyquaternion import Quaternion

from transformations import quaternion_from_euler, euler_from_quaternion

BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", 
            "left_shoulder", "left_elbow", "right_hip", "right_knee", 
            "right_ankle", "left_hip", "left_knee", "left_ankle"]

DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, 
           "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, 
           "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, 
           "left_hip": 3, "left_knee": 1, "left_ankle": 3}

def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        xml_file_path = Config.xml_path
        self.xml_file_path = xml_file_path

        self.mocap = MocapDM()
        self.interface = MujocoInterface()
        self.load_mocap(Config.mocap_path)

        self.weight_pose = 0.5
        self.weight_vel = 0.05
        self.weight_root = 0.2
        self.weight_end_eff = 0.15
        self.weight_com = 0.1

        self.scale_pose = 2.0
        self.scale_vel = 0.1
        self.scale_end_eff = 40.0
        self.scale_root = 5.0
        self.scale_com = 10.0
        self.scale_err = 1.0

        self.reference_state_init()
        self.idx_curr = -1
        self.idx_tmp_count = -1

        mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
        utils.EzPickle.__init__(self)

    def _quat2euler(self, quat):
        tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
        euler = euler_from_quaternion(tmp_quat, axes='rxyz')
        return euler

    def _get_obs(self):
        config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
        vel = self.sim.data.qvel.flat.copy() # ignore root joint

        return np.concatenate((config, vel))

    def reference_state_init(self):
        self.idx_init = random.randint(0, self.mocap_data_len-1)
        # self.idx_init = 0
        self.idx_curr = self.idx_init
        self.idx_tmp_count = 0

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 5.0:
            return True
        return False

    def get_joint_configs(self):
        data = self.sim.data
        return data.qpos[7:] # to exclude root joint

    def get_root_configs(self):
        data = self.sim.data
        return data.qpos[3:7] # to exclude x coord

    def load_mocap(self, filepath):
        self.mocap.load_mocap(filepath)
        self.mocap_dt = self.mocap.dt
        self.mocap_data_len = len(self.mocap.data)

    def calc_config_errs(self, env_config, mocap_config):
        assert len(env_config) == len(mocap_config)
        return np.sum(np.abs(env_config - mocap_config))

    def calc_root_reward(self): # including root joint
        curr_root = self.mocap.data_config[self.idx_curr][3:7]
        target_root = self.get_root_configs()
        assert len(curr_root) == len(target_root)
        assert len(curr_root) == 4

        q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
        q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])

        q_diff =  q_0.conjugate * q_1
        tmp_diff = q_diff.angle

        err_root = abs(tmp_diff)
        reward_root = math.exp(-err_root)
        return reward_root

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

    def step(self, action):
        self.step_len = 1
        step_times = 1

        self.do_simulation(action, step_times)

        reward_config,  err_config = self.calc_config_reward()
        #reward_root = 10 * self.calc_root_reward()
        #reward = reward_config + reward_root
        reward = 10 * reward_config

        info = dict()

        self.idx_curr += 1
        self.idx_curr = self.idx_curr % self.mocap_data_len

        observation = self._get_obs()
        done = bool(self.is_done() or err_config >= 10.0)

        return observation, reward, done, info

    def is_done(self):
        mass = np.expand_dims(self.model.body_mass, 1)
        xpos = self.sim.data.xipos
        z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
        # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
        done = bool((z_com < 0.7) or (z_com > 1.2))
        # return False
        return done

    def goto(self, pos):
        self.sim.data.qpos[:] = pos[:]
        self.sim.forward()

    def get_time(self):
        return self.sim.data.time

    def reset_model(self):
        self.reference_state_init()
        qpos = self.mocap.data_config[self.idx_init]
        qvel = self.mocap.data_vel[self.idx_init]
        # qvel = self.init_qvel
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        self.idx_tmp_count = -self.step_len
        return observation

    def reset_model_init(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        pass
        # self.viewer.cam.trackbodyid = 1
        # self.viewer.cam.distance = self.model.stat.extent * 1.0
        # self.viewer.cam.lookat[2] = 2.0
        # self.viewer.cam.elevation = -20

if __name__ == "__main__":
    env = DPEnv()
    env.reset_model()

    import cv2
    from VideoSaver import VideoSaver
    width = 640
    height = 480

    # vid_save = VideoSaver(width=width, height=height)

    # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
    action_size = env.action_space.shape[0]
    ac = np.zeros(action_size)
    while True:
        # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
        # env.sim.data.qpos[:7] = target_config[:]
        # env.sim.forward()

        qpos = env.mocap.data_config[env.idx_curr]
        qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
        # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        env.set_state(qpos, qvel)
        env.sim.step()
        print("Reward root:", env.calc_config_reward())
        env.idx_curr += 1
        if env.idx_curr == env.mocap_data_len:
            # env.reset_model()
            env.idx_curr = env.idx_curr % env.mocap_data_len

        # print(env._get_obs())
        env.render()

    # vid_save.close()

config.py

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    # motion = 'spinkick'
    #motion = 'dance_b'
    #motion = 'run'
    motion = 'walk'
    env_name = "dp_env_v3"
    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
    xml_test_folder = '/mujoco_test/'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
    xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name)

from os import getcwd

@kemangjaka
Hello, I did the same as the code above. Have you already solved the problem? Could you please tell me how?

Hello everyone. I think I know where the problem is. In addition to doing changes above, in trpo.py you also need to:
change line.78 to ob = env.reset()
comment line.79 out
comment line.404 out.
Because after debugging I found that the training needs Reference State Initialization(RSI), but the original code didn't actually implement this and was covered by a normal initialization.
If anybody tries this please give me a feedback. Peace!

Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?
Thank you in advance!
dp_env_v3.py

#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd

from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer

from gym.envs.mujoco import mujoco_env
from gym import utils

from config import Config
from pyquaternion import Quaternion

from transformations import quaternion_from_euler, euler_from_quaternion

BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", 
            "left_shoulder", "left_elbow", "right_hip", "right_knee", 
            "right_ankle", "left_hip", "left_knee", "left_ankle"]

DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, 
           "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, 
           "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, 
           "left_hip": 3, "left_knee": 1, "left_ankle": 3}

def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        xml_file_path = Config.xml_path
        self.xml_file_path = xml_file_path

        self.mocap = MocapDM()
        self.interface = MujocoInterface()
        self.load_mocap(Config.mocap_path)

        self.weight_pose = 0.5
        self.weight_vel = 0.05
        self.weight_root = 0.2
        self.weight_end_eff = 0.15
        self.weight_com = 0.1

        self.scale_pose = 2.0
        self.scale_vel = 0.1
        self.scale_end_eff = 40.0
        self.scale_root = 5.0
        self.scale_com = 10.0
        self.scale_err = 1.0

        self.reference_state_init()
        self.idx_curr = -1
        self.idx_tmp_count = -1

        mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
        utils.EzPickle.__init__(self)

    def _quat2euler(self, quat):
        tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
        euler = euler_from_quaternion(tmp_quat, axes='rxyz')
        return euler

    def _get_obs(self):
        config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
        vel = self.sim.data.qvel.flat.copy() # ignore root joint

        return np.concatenate((config, vel))

    def reference_state_init(self):
        self.idx_init = random.randint(0, self.mocap_data_len-1)
        # self.idx_init = 0
        self.idx_curr = self.idx_init
        self.idx_tmp_count = 0

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 5.0:
            return True
        return False

    def get_joint_configs(self):
        data = self.sim.data
        return data.qpos[7:] # to exclude root joint

    def get_root_configs(self):
        data = self.sim.data
        return data.qpos[3:7] # to exclude x coord

    def load_mocap(self, filepath):
        self.mocap.load_mocap(filepath)
        self.mocap_dt = self.mocap.dt
        self.mocap_data_len = len(self.mocap.data)

    def calc_config_errs(self, env_config, mocap_config):
        assert len(env_config) == len(mocap_config)
        return np.sum(np.abs(env_config - mocap_config))

    def calc_root_reward(self): # including root joint
        curr_root = self.mocap.data_config[self.idx_curr][3:7]
        target_root = self.get_root_configs()
        assert len(curr_root) == len(target_root)
        assert len(curr_root) == 4

        q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
        q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])

        q_diff =  q_0.conjugate * q_1
        tmp_diff = q_diff.angle

        err_root = abs(tmp_diff)
        reward_root = math.exp(-err_root)
        return reward_root

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

    def step(self, action):
        self.step_len = 1
        step_times = 1

        self.do_simulation(action, step_times)

        reward_config,  err_config = self.calc_config_reward()
        #reward_root = 10 * self.calc_root_reward()
        #reward = reward_config + reward_root
        reward = 10 * reward_config

        info = dict()

        self.idx_curr += 1
        self.idx_curr = self.idx_curr % self.mocap_data_len

        observation = self._get_obs()
        done = bool(self.is_done() or err_config >= 10.0)

        return observation, reward, done, info

    def is_done(self):
        mass = np.expand_dims(self.model.body_mass, 1)
        xpos = self.sim.data.xipos
        z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
        # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
        done = bool((z_com < 0.7) or (z_com > 1.2))
        # return False
        return done

    def goto(self, pos):
        self.sim.data.qpos[:] = pos[:]
        self.sim.forward()

    def get_time(self):
        return self.sim.data.time

    def reset_model(self):
        self.reference_state_init()
        qpos = self.mocap.data_config[self.idx_init]
        qvel = self.mocap.data_vel[self.idx_init]
        # qvel = self.init_qvel
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        self.idx_tmp_count = -self.step_len
        return observation

    def reset_model_init(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        pass
        # self.viewer.cam.trackbodyid = 1
        # self.viewer.cam.distance = self.model.stat.extent * 1.0
        # self.viewer.cam.lookat[2] = 2.0
        # self.viewer.cam.elevation = -20

if __name__ == "__main__":
    env = DPEnv()
    env.reset_model()

    import cv2
    from VideoSaver import VideoSaver
    width = 640
    height = 480

    # vid_save = VideoSaver(width=width, height=height)

    # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
    action_size = env.action_space.shape[0]
    ac = np.zeros(action_size)
    while True:
        # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
        # env.sim.data.qpos[:7] = target_config[:]
        # env.sim.forward()

        qpos = env.mocap.data_config[env.idx_curr]
        qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
        # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        env.set_state(qpos, qvel)
        env.sim.step()
        print("Reward root:", env.calc_config_reward())
        env.idx_curr += 1
        if env.idx_curr == env.mocap_data_len:
            # env.reset_model()
            env.idx_curr = env.idx_curr % env.mocap_data_len

        # print(env._get_obs())
        env.render()

    # vid_save.close()

config.py

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    # motion = 'spinkick'
    #motion = 'dance_b'
    #motion = 'run'
    motion = 'walk'
    env_name = "dp_env_v3"
    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
    xml_test_folder = '/mujoco_test/'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
    xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name)

from os import getcwd

@kemangjaka
Hello, I did the same as the code above. Have you already solved the problem? Could you please tell me how?

Sorry, I no longer use this code...

Hi @huangfugui00 @mingfeisun ,
I'm also trying to imitate walking motion.
I modified dp_env_v3.py, config.py, trpo.py (timesteps_per_batch=2048 at line 345, num_timesteps=1e7).
However, the humanoid stands straight and it does not move.
Is there anything that should be changed?
Thank you in advance!
dp_env_v3.py

#!/usr/bin/env python3
import numpy as np
import math
import random
from os import getcwd

from mujoco.mocap_v2 import MocapDM
from mujoco.mujoco_interface import MujocoInterface
from mujoco.mocap_util import JOINT_WEIGHT
from mujoco_py import load_model_from_xml, MjSim, MjViewer

from gym.envs.mujoco import mujoco_env
from gym import utils

from config import Config
from pyquaternion import Quaternion

from transformations import quaternion_from_euler, euler_from_quaternion

BODY_JOINTS = ["chest", "neck", "right_shoulder", "right_elbow", 
            "left_shoulder", "left_elbow", "right_hip", "right_knee", 
            "right_ankle", "left_hip", "left_knee", "left_ankle"]

DOF_DEF = {"root": 3, "chest": 3, "neck": 3, "right_shoulder": 3, 
           "right_elbow": 1, "right_wrist": 0, "left_shoulder": 3, "left_elbow": 1, 
           "left_wrist": 0, "right_hip": 3, "right_knee": 1, "right_ankle": 3, 
           "left_hip": 3, "left_knee": 1, "left_ankle": 3}

def mass_center(model, sim):
    mass = np.expand_dims(model.body_mass, 1)
    xpos = sim.data.xipos
    return (np.sum(mass * xpos, 0) / np.sum(mass))[0]

class DPEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        xml_file_path = Config.xml_path
        self.xml_file_path = xml_file_path

        self.mocap = MocapDM()
        self.interface = MujocoInterface()
        self.load_mocap(Config.mocap_path)

        self.weight_pose = 0.5
        self.weight_vel = 0.05
        self.weight_root = 0.2
        self.weight_end_eff = 0.15
        self.weight_com = 0.1

        self.scale_pose = 2.0
        self.scale_vel = 0.1
        self.scale_end_eff = 40.0
        self.scale_root = 5.0
        self.scale_com = 10.0
        self.scale_err = 1.0

        self.reference_state_init()
        self.idx_curr = -1
        self.idx_tmp_count = -1

        mujoco_env.MujocoEnv.__init__(self, xml_file_path, 1)
        utils.EzPickle.__init__(self)

    def _quat2euler(self, quat):
        tmp_quat = np.array([quat[1], quat[2], quat[3], quat[0]])
        euler = euler_from_quaternion(tmp_quat, axes='rxyz')
        return euler

    def _get_obs(self):
        config = self.sim.data.qpos.flat.copy()[2:] # ignore root joint: x, y
        vel = self.sim.data.qvel.flat.copy() # ignore root joint

        return np.concatenate((config, vel))

    def reference_state_init(self):
        self.idx_init = random.randint(0, self.mocap_data_len-1)
        # self.idx_init = 0
        self.idx_curr = self.idx_init
        self.idx_tmp_count = 0

    def early_termination(self):
        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        curr_config = self.get_joint_configs()
        err_configs = self.calc_config_errs(curr_config, target_config)
        if err_configs >= 5.0:
            return True
        return False

    def get_joint_configs(self):
        data = self.sim.data
        return data.qpos[7:] # to exclude root joint

    def get_root_configs(self):
        data = self.sim.data
        return data.qpos[3:7] # to exclude x coord

    def load_mocap(self, filepath):
        self.mocap.load_mocap(filepath)
        self.mocap_dt = self.mocap.dt
        self.mocap_data_len = len(self.mocap.data)

    def calc_config_errs(self, env_config, mocap_config):
        assert len(env_config) == len(mocap_config)
        return np.sum(np.abs(env_config - mocap_config))

    def calc_root_reward(self): # including root joint
        curr_root = self.mocap.data_config[self.idx_curr][3:7]
        target_root = self.get_root_configs()
        assert len(curr_root) == len(target_root)
        assert len(curr_root) == 4

        q_0 = Quaternion(curr_root[0], curr_root[1], curr_root[2], curr_root[3])
        q_1 = Quaternion(target_root[0], target_root[1], target_root[2], target_root[3])

        q_diff =  q_0.conjugate * q_1
        tmp_diff = q_diff.angle

        err_root = abs(tmp_diff)
        reward_root = math.exp(-err_root)
        return reward_root

    def calc_config_reward(self):
        assert len(self.mocap.data) != 0
        err_configs = 0.0

        target_config = self.mocap.data_config[self.idx_curr][7:] # to exclude root joint
        self.curr_frame = target_config
        curr_config = self.get_joint_configs()

        err_configs = self.calc_config_errs(curr_config, target_config)
        # reward_config = math.exp(-self.scale_err * self.scale_pose * err_configs)
        reward_config = math.exp(-err_configs)

        return reward_config, err_configs

    def step(self, action):
        self.step_len = 1
        step_times = 1

        self.do_simulation(action, step_times)

        reward_config,  err_config = self.calc_config_reward()
        #reward_root = 10 * self.calc_root_reward()
        #reward = reward_config + reward_root
        reward = 10 * reward_config

        info = dict()

        self.idx_curr += 1
        self.idx_curr = self.idx_curr % self.mocap_data_len

        observation = self._get_obs()
        done = bool(self.is_done() or err_config >= 10.0)

        return observation, reward, done, info

    def is_done(self):
        mass = np.expand_dims(self.model.body_mass, 1)
        xpos = self.sim.data.xipos
        z_com = (np.sum(mass * xpos, 0) / np.sum(mass))[2]
        # done = bool((z_com < 0.7) or (z_com > 1.2) or self.early_termination())
        done = bool((z_com < 0.7) or (z_com > 1.2))
        # return False
        return done

    def goto(self, pos):
        self.sim.data.qpos[:] = pos[:]
        self.sim.forward()

    def get_time(self):
        return self.sim.data.time

    def reset_model(self):
        self.reference_state_init()
        qpos = self.mocap.data_config[self.idx_init]
        qvel = self.mocap.data_vel[self.idx_init]
        # qvel = self.init_qvel
        self.set_state(qpos, qvel)
        observation = self._get_obs()
        self.idx_tmp_count = -self.step_len
        return observation

    def reset_model_init(self):
        c = 0.01
        self.set_state(
            self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
            self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv)
        )
        return self._get_obs()

    def viewer_setup(self):
        pass
        # self.viewer.cam.trackbodyid = 1
        # self.viewer.cam.distance = self.model.stat.extent * 1.0
        # self.viewer.cam.lookat[2] = 2.0
        # self.viewer.cam.elevation = -20

if __name__ == "__main__":
    env = DPEnv()
    env.reset_model()

    import cv2
    from VideoSaver import VideoSaver
    width = 640
    height = 480

    # vid_save = VideoSaver(width=width, height=height)

    # env.load_mocap("/home/mingfei/Documents/DeepMimic/mujoco/motions/humanoid3d_crawl.txt")
    action_size = env.action_space.shape[0]
    ac = np.zeros(action_size)
    while True:
        # target_config = env.mocap.data_config[env.idx_curr][:7] # to exclude root joint
        # env.sim.data.qpos[:7] = target_config[:]
        # env.sim.forward()

        qpos = env.mocap.data_config[env.idx_curr]
        qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        # qpos = np.zeros_like(env.mocap.data_config[env.idx_curr])
        # qvel = np.zeros_like(env.mocap.data_vel[env.idx_curr])
        env.set_state(qpos, qvel)
        env.sim.step()
        print("Reward root:", env.calc_config_reward())
        env.idx_curr += 1
        if env.idx_curr == env.mocap_data_len:
            # env.reset_model()
            env.idx_curr = env.idx_curr % env.mocap_data_len

        # print(env._get_obs())
        env.render()

    # vid_save.close()

config.py

from os import getcwd

class Config(object):
    all_motions = ['backflip', 'cartwheel', 'crawl', 'dance_a', 'dance_b', 'getup_facedown'
                   'getup_faceup', 'jump', 'kick', 'punch', 'roll', 'run', 'spin', 'spinkick',
                   'walk']
    curr_path = getcwd()
    # motion = 'spinkick'
    #motion = 'dance_b'
    #motion = 'run'
    motion = 'walk'
    env_name = "dp_env_v3"
    motion_folder = '/mujoco/motions'
    xml_folder = '/mujoco/humanoid_deepmimic/envs/asset'
    xml_test_folder = '/mujoco_test/'

    mocap_path = "%s%s/humanoid3d_%s.txt"%(curr_path, motion_folder, motion)
    xml_path = "%s%s/%s.xml"%(curr_path, xml_folder, env_name)
    xml_path_test = "%s%s/%s_test.xml"%(curr_path, xml_test_folder, env_name)

from os import getcwd

@kemangjaka
Hello, I did the same as the code above. Have you already solved the problem? Could you please tell me how?

Sorry, I no longer use this code...

Here is one training I got the character to walk:
https://www.dropbox.com/s/0p0gtocqcfhw3lr/src_walk_ok_done_early_horizon_2048.zip?dl=0

You can find the checkpoint and logs. Training the character is not easy. The batch_size and number of MPI processes matter a lot. @kemangjaka