Replacing `"global"` with `"local"` coordinate system
pseudo-rnd-thoughts opened this issue ยท 9 comments
Hi,
I'm one of the developers on OpenAI Gym and Gymnasium (a fork of Gym).
In the recent Mujoco 2.3.4 release, the first point mentions the "global"
setting being removed from the coordinate system.
Rerunning our CI with the latest MuJoCo version, we found that one of our models, Hopper, uses the global coordinate system. I have experimented with just changing this to "local"
however, in testing, does not produce the same output, critical for backward compatibility.
I can't find the PR that introduced this change nor any documentation on how to update to this.
Could you recommend how we update our models such that the same global
model is produced?
Here is a model which explains my question:
<mujoco model="hopper">
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="1" condim="1" contype="1" margin="0.001" material="geom" rgba="0.8 0.6 .4 1" solimp=".8 .8 .01" solref=".02 1"/>
<motor ctrllimited="true" ctrlrange="-.4 .4"/>
</default>
<option integrator="RK4" timestep="0.002"/>
<visual>
<map znear="0.02"/>
</visual>
<worldbody>
<light cutoff="100" diffuse="1 1 1" dir="-0 0 -1.3" directional="true" exponent="1" pos="0 0 1.3" specular=".1 .1 .1"/>
<geom conaffinity="1" condim="3" name="floor" pos="0 0 0" rgba="0.8 0.9 0.8 1" size="20 20 .125" type="plane" material="MatPlane"/>
<body name="torso" pos="0 0 1.25">
<camera name="track" mode="trackcom" pos="0 -3 1" xyaxes="1 0 0 0 0 1"/>
<joint armature="0" axis="1 0 0" damping="0" limited="false" name="rootx" pos="0 0 0" stiffness="0" type="slide"/>
<joint armature="0" axis="0 0 1" damping="0" limited="false" name="rootz" pos="0 0 0" ref="1.25" stiffness="0" type="slide"/>
<joint armature="0" axis="0 1 0" damping="0" limited="false" name="rooty" pos="0 0 1.25" stiffness="0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.45 0 0 1.05" name="torso_geom" size="0.05" type="capsule"/>
<body name="thigh" pos="0 0 1.05">
<joint axis="0 -1 0" name="thigh_joint" pos="0 0 1.05" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 1.05 0 0 0.6" name="thigh_geom" size="0.05" type="capsule"/>
<body name="leg" pos="0 0 0.35">
<joint axis="0 -1 0" name="leg_joint" pos="0 0 0.6" range="-150 0" type="hinge"/>
<geom friction="0.9" fromto="0 0 0.6 0 0 0.1" name="leg_geom" size="0.04" type="capsule"/>
<body name="foot" pos="0.13 0 0">
<joint axis="0 -1 0" name="foot_joint" pos="0 0 0.1" range="-45 45" type="hinge"/>
<geom friction="2.0" fromto="-0.13 0 0.1 0.26 0 0.1" name="foot_geom" size="0.06" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="thigh_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="leg_joint"/>
<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="foot_joint"/>
</actuator>
<asset>
<texture type="skybox" builtin="gradient" rgb1=".4 .5 .6" rgb2="0 0 0"
width="100" height="100"/>
<texture builtin="flat" height="1278" mark="cross" markrgb="1 1 1" name="texgeom" random="0.01" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" type="cube" width="127"/>
<texture builtin="checker" height="100" name="texplane" rgb1="0 0 0" rgb2="0.8 0.8 0.8" type="2d" width="100"/>
<material name="MatPlane" reflectance="0.5" shininess="1" specular="1" texrepeat="60 60" texture="texplane"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
</mujoco>
This is the basic environment equivalences that I have done, I have copied the hopper.xml
and replaced global
with local
on line 2 and copied the hopper_v4.py
to hopper_local_v4
and changed the XML filename on line 208.
from numpy.testing import assert_array_equal
from gymnasium.envs.mujoco.hopper_v4 import HopperEnv as HopperGlobal
from gymnasium.envs.mujoco.hopper_local_v4 import HopperEnv as HopperLocal
global_env = HopperGlobal()
local_env = HopperLocal()
SEED = 123
NUM_STEPS = 1000
initial_obs_1, initial_info_1 = global_env.reset(seed=SEED)
initial_obs_2, initial_info_2 = local_env.reset(seed=SEED)
assert_array_equal(initial_obs_1, initial_obs_2)
global_env.action_space.seed(SEED)
for time_step in range(NUM_STEPS):
# We don't evaluate the determinism of actions
action = global_env.action_space.sample()
obs_1, rew_1, terminated_1, truncated_1, info_1 = global_env.step(action)
obs_2, rew_2, terminated_2, truncated_2, info_2 = local_env.step(action)
assert_array_equal(obs_1, obs_2, f"[{time_step}] ")
assert global_env.observation_space.contains(
obs_1
) # obs_2 verified by previous assertion
assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}"
assert (
terminated_1 == terminated_2
), f"[{time_step}] done 1={terminated_1}, done 2={terminated_2}"
assert (
truncated_1 == truncated_2
), f"[{time_step}] done 1={truncated_1}, done 2={truncated_2}"
# assert_equals(info_1, info_2, f"[{time_step}] ")
if (
terminated_1 or truncated_1
): # terminated_2, truncated_2 verified by previous assertion
global_env.reset(seed=SEED)
local_env.reset(seed=SEED)
global_env.close()
local_env.close()
Hi @pseudo-rnd-thoughts ,
As clearly explained in the changelog, this should be fixed by loading and saving in MuJoCo 2.3.3. Indeed, just changing global to local will not produce the right model.
Please load and save in MuJoCo 2.3.3 (you can do this in simulate
or the compile
utility (compile from xml to xml)). For your convenience, I'm pasting the model that we have internally, below.
Let me know if you experience any issues.
converted hopper3
<!--
Regenerated by loading and saving the original model in MuJoCo 2.3.3 using
/sample/compile oldmodel.xml newmodel.xml
In addition <compiler autolimit="true"> was deleted and explicit limits added, where required.
-->
<mujoco model="hopper">
<compiler angle="radian"/>
<option integrator="RK4"/>
<visual>
<map znear="0.02"/>
</visual>
<default>
<joint limited="true" armature="1" damping="1"/>
<geom condim="1" solimp="0.8 0.8 0.01 0.5 2" margin="0.001" material="self"/>
<general ctrllimited="true" ctrlrange="-0.4 0.4"/>
</default>
<custom>
<numeric name="min_torso_y_angle" size="1" data="-1"/>
<numeric name="max_torso_y_angle" size="1" data="1"/>
<numeric name="min_torso_height" size="1" data="0.45"/>
<numeric name="max_state_vector" size="1" data="100"/>
<numeric name="ctrl_cost_coeff" size="1" data="0.001"/>
<numeric name="qpos_random_init_scale" size="1" data="0.005"/>
<numeric name="qvel_random_init_scale" size="1" data="0.005"/>
<text name="qpos_random_init_method" data="uniform"/>
<text name="qvel_random_init_method" data="uniform"/>
</custom>
<asset>
<texture type="2d" name="grid" builtin="checker" mark="edge" rgb1="0.1 0.2 0.3" rgb2="0.2 0.3 0.4" markrgb="0.2 0.3 0.4" width="300" height="300"/>
<texture type="skybox" name="skybox" builtin="gradient" mark="random" rgb1="0.4 0.6 0.8" rgb2="0 0 0" markrgb="1 1 1" width="800" height="800"/>
<material name="grid" texture="grid" texuniform="true" reflectance="0.2"/>
<material name="self" rgba="0.7 0.5 0.3 1"/>
<material name="self_default" rgba="0.7 0.5 0.3 1"/>
<material name="self_highlight" rgba="0 0.5 0.3 1"/>
<material name="effector" rgba="0.7 0.4 0.2 1"/>
<material name="effector_default" rgba="0.7 0.4 0.2 1"/>
<material name="effector_highlight" rgba="0 0.5 0.3 1"/>
<material name="decoration" rgba="0.3 0.5 0.7 1"/>
<material name="eye" rgba="0 0.2 1 1"/>
<material name="target" rgba="0.6 0.3 0.3 1"/>
<material name="target_default" rgba="0.6 0.3 0.3 1"/>
<material name="target_highlight" rgba="0.6 0.3 0.3 0.4"/>
<material name="site" rgba="0.5 0.5 0.5 0.3"/>
</asset>
<worldbody>
<geom name="floor" size="20 20 0.125" type="plane" condim="3" material="grid"/>
<light pos="0 0 1.3" dir="-0.5547 0 -0.83205" directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1"/>
<body name="torso" pos="0 0 1.25">
<joint name="rootx" pos="0 0 -1.25" axis="1 0 0" limited="false" type="slide" armature="0" damping="0"/>
<joint name="rootz" pos="0 0 -1.25" axis="0 0 1" limited="false" type="slide" ref="1.25" armature="0" damping="0"/>
<joint name="rooty" pos="0 0 0" axis="0 1 0" limited="false" armature="0" damping="0"/>
<geom name="torso_geom" size="0.05 0.2" type="capsule" friction="0.9 0.005 0.0001"/>
<camera name="tilted" pos="2.7 3 -0.25" quat="0.660169 -0.660169 0.253331 -0.253331" mode="trackcom"/>
<camera name="track" pos="0 3 -0.25" quat="0.707107 -0.707107 0 0" mode="trackcom"/>
<body name="thigh" pos="0 0 -0.2">
<joint name="thigh_joint" pos="0 0 0" axis="0 -1 0" range="-2.61799 0" limited="true"/>
<geom name="thigh_geom" size="0.05 0.225" pos="0 0 -0.225" type="capsule" friction="0.9 0.005 0.0001"/>
<body name="foot" pos="0.065 0 -0.45">
<joint name="foot_joint" pos="-0.065 0 0" axis="0 -1 0" range="-0.785398 0.785398" limited="true"/>
<geom name="foot_geom" size="0.06 0.195" quat="0.707107 0 -0.707107 0" type="capsule" friction="2 0.005 0.0001"/>
</body>
</body>
</body>
</worldbody>
<actuator>
<general joint="thigh_joint" ctrlrange="-1 1" gear="200 0 0 0 0 0" ctrllimited="true"/>
<general joint="foot_joint" ctrlrange="-1 1" gear="200 0 0 0 0 0" ctrllimited="true"/>
</actuator>
</mujoco>
Thanks for that, however using your model still doesn't produce the same environment using your model or converting the gym model with the compile tool you mention. (it's a bit alarming if the internal dm hopper model is different from the gym model in terms of reproducibility).
To test if the issue originates with hopper or with compiling the model, I used the tool for Ant as well and found the same issue.
<mujoco model="ant">
<compiler angle="radian" autolimits="true"/>
<option timestep="0.01" integrator="RK4"/>
<default class="main">
<joint limited="true" armature="1" damping="1"/>
<geom conaffinity="0" friction="1 0.5 0.5" margin="0.01" density="5" rgba="0.8 0.6 0.4 1"/>
</default>
<custom>
<numeric name="init_qpos" size="15" data="0 0 0.55 1 0 0 0 0 1 0 -1 0 -1 0 1"/>
</custom>
<asset>
<texture type="skybox" builtin="gradient" rgb1="1 1 1" rgb2="0 0 0" width="100" height="600"/>
<texture type="cube" name="texgeom" builtin="flat" mark="cross" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" width="127" height="762"/>
<texture type="2d" name="texplane" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100"/>
<material name="MatPlane" texture="texplane" texrepeat="60 60" specular="1" shininess="1" reflectance="0.5"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<geom name="floor" size="40 40 40" type="plane" conaffinity="1" material="MatPlane" rgba="0.8 0.9 0.8 1"/>
<light pos="0 0 1.3" dir="0 0 -1" directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1"/>
<body name="torso" pos="0 0 0.75" gravcomp="0">
<joint name="root" limited="false" type="free" margin="0.01" armature="0" damping="0"/>
<geom name="torso_geom" size="0.25"/>
<camera name="track" pos="0 -3 0.3" quat="0.707107 0.707107 0 0" mode="trackcom"/>
<body name="front_left_leg" pos="0 0 0" gravcomp="0">
<geom name="aux_1_geom" size="0.08 0.141421" pos="0.1 0.1 0" quat="0.707107 0.5 -0.5 0" type="capsule"/>
<body name="aux_1" pos="0.2 0.2 0" gravcomp="0">
<joint name="hip_1" pos="0 0 0" axis="0 0 1" range="-0.523599 0.523599"/>
<geom name="left_leg_geom" size="0.08 0.141421" pos="0.1 0.1 0" quat="0.707107 0.5 -0.5 0" type="capsule"/>
<body pos="0.2 0.2 0" gravcomp="0">
<joint name="ankle_1" pos="0 0 0" axis="-0.707107 0.707107 0" range="0.523599 1.22173"/>
<geom name="left_ankle_geom" size="0.08 0.282843" pos="0.2 0.2 0" quat="0.707107 0.5 -0.5 0" type="capsule"/>
</body>
</body>
</body>
<body name="front_right_leg" pos="0 0 0" gravcomp="0">
<geom name="aux_2_geom" size="0.08 0.141421" pos="-0.1 0.1 0" quat="0.707107 0.5 0.5 0" type="capsule"/>
<body name="aux_2" pos="-0.2 0.2 0" gravcomp="0">
<joint name="hip_2" pos="0 0 0" axis="0 0 1" range="-0.523599 0.523599"/>
<geom name="right_leg_geom" size="0.08 0.141421" pos="-0.1 0.1 0" quat="0.707107 0.5 0.5 0" type="capsule"/>
<body pos="-0.2 0.2 0" gravcomp="0">
<joint name="ankle_2" pos="0 0 0" axis="0.707107 0.707107 0" range="-1.22173 -0.523599"/>
<geom name="right_ankle_geom" size="0.08 0.282843" pos="-0.2 0.2 0" quat="0.707107 0.5 0.5 0" type="capsule"/>
</body>
</body>
</body>
<body name="back_leg" pos="0 0 0" gravcomp="0">
<geom name="aux_3_geom" size="0.08 0.141421" pos="-0.1 -0.1 0" quat="0.707107 -0.5 0.5 0" type="capsule"/>
<body name="aux_3" pos="-0.2 -0.2 0" gravcomp="0">
<joint name="hip_3" pos="0 0 0" axis="0 0 1" range="-0.523599 0.523599"/>
<geom name="back_leg_geom" size="0.08 0.141421" pos="-0.1 -0.1 0" quat="0.707107 -0.5 0.5 0" type="capsule"/>
<body pos="-0.2 -0.2 0" gravcomp="0">
<joint name="ankle_3" pos="0 0 0" axis="-0.707107 0.707107 0" range="-1.22173 -0.523599"/>
<geom name="third_ankle_geom" size="0.08 0.282843" pos="-0.2 -0.2 0" quat="0.707107 -0.5 0.5 0" type="capsule"/>
</body>
</body>
</body>
<body name="right_back_leg" pos="0 0 0" gravcomp="0">
<geom name="aux_4_geom" size="0.08 0.141421" pos="0.1 -0.1 0" quat="0.707107 -0.5 -0.5 0" type="capsule"/>
<body name="aux_4" pos="0.2 -0.2 0" gravcomp="0">
<joint name="hip_4" pos="0 0 0" axis="0 0 1" range="-0.523599 0.523599"/>
<geom name="rightback_leg_geom" size="0.08 0.141421" pos="0.1 -0.1 0" quat="0.707107 -0.5 -0.5 0" type="capsule"/>
<body pos="0.2 -0.2 0" gravcomp="0">
<joint name="ankle_4" pos="0 0 0" axis="0.707107 0.707107 0" range="0.523599 1.22173"/>
<geom name="fourth_ankle_geom" size="0.08 0.282843" pos="0.2 -0.2 0" quat="0.707107 -0.5 -0.5 0" type="capsule"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<general joint="hip_4" ctrlrange="-1 1" gear="150 0 0 0 0 0" actdim="0"/>
<general joint="ankle_4" ctrlrange="-1 1" gear="150 0 0 0 0 0" actdim="0"/>
<general joint="hip_1" ctrlrange="-1 1" gear="150 0 0 0 0 0" actdim="0"/>
<general joint="ankle_1" ctrlrange="-1 1" gear="150 0 0 0 0 0" actdim="0"/>
<general joint="hip_2" ctrlrange="-1 1" gear="150 0 0 0 0 0" actdim="0"/>
<general joint="ankle_2" ctrlrange="-1 1" gear="150 0 0 0 0 0" actdim="0"/>
<general joint="hip_3" ctrlrange="-1 1" gear="150 0 0 0 0 0" actdim="0"/>
<general joint="ankle_3" ctrlrange="-1 1" gear="150 0 0 0 0 0" actdim="0"/>
</actuator>
</mujoco>
Is this expected for the compiled model to diverge or do you expect the compiled model to act identically to the original xml model?
Is there any particular way that you recommend debugging this
- Can you check the original model vs the converted model in 2.3.3? (in case you were comparing to 2.3.4).
- Can you elaborate on the kinds of differences you are seeing? Are they large in a single step or small/numerical which grow when integrating?
- In order to do a proper debug, I would load both models in Python (in version 2.3.3) and compare the arrays of mjModel in Python.
- the only difference for those 2 models are the following callable member variables (note:
_adress
is the only non-callable)
__copy__
__deepcopy__
__delattr__
__dir__
__eq__
__format__
__ge__
__getattribute__
__getstate__
__gt__
__hash__
__init__
__le__
__lt__
__ne__
__reduce__
__reduce_ex__
__repr__
__setattr__
__setstate__
__sizeof__
__str__
_address
actuator
/home/master-andreas/test/test.py:18: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.
if getattr(global_env.unwrapped.model, t) != getattr(local_env.unwrapped.model, t):
body
cam
camera
eq
equality
exclude
geom
hfield
jnt
joint
key
keyframe
light
mat
material
mesh
numeric
pair
sensor
site
skin
tendon
tex
texture
tuple
- the difference of the observations after 4 simulation time steps is
[ 0.00000000e+00 4.33680869e-19 3.25260652e-19 0.00000000e+00
4.33680869e-19 -2.77555756e-17 -6.93889390e-18 0.00000000e+00
1.38777878e-17 0.00000000e+00 1.11022302e-16]
Note: I used this hopper.xml
<mujoco model="hopper">
<compiler angle="radian" autolimits="true"/>
<option integrator="RK4"/>
<visual>
<map znear="0.02"/>
</visual>
<default class="main">
<joint limited="true" armature="1" damping="1"/>
<geom condim="1" solimp="0.8 0.8 0.01 0.5 2" margin="0.001" material="geom" rgba="0.8 0.6 0.4 1"/>
<general ctrllimited="true" ctrlrange="-0.4 0.4"/>
</default>
<asset>
<texture type="skybox" builtin="gradient" rgb1="0.4 0.5 0.6" rgb2="0 0 0" width="100" height="600"/>
<texture type="cube" name="texgeom" builtin="flat" mark="cross" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" width="127" height="762"/>
<texture type="2d" name="texplane" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100"/>
<material name="MatPlane" texture="texplane" texrepeat="60 60" specular="1" shininess="1" reflectance="0.5"/>
<material name="geom" texture="texgeom" texuniform="true"/>
</asset>
<worldbody>
<geom name="floor" size="20 20 0.125" type="plane" condim="3" material="MatPlane" rgba="0.8 0.9 0.8 1"/>
<light pos="0 0 1.3" dir="0 0 -1" directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular="0.1 0.1 0.1"/>
<body name="torso" pos="0 0 1.25" gravcomp="0">
<joint name="rootx" pos="0 0 -1.25" axis="1 0 0" limited="false" type="slide" armature="0" damping="0"/>
<joint name="rootz" pos="0 0 -1.25" axis="0 0 1" limited="false" type="slide" ref="1.25" armature="0" damping="0"/>
<joint name="rooty" pos="0 0 0" axis="0 1 0" limited="false" armature="0" damping="0"/>
<geom name="torso_geom" size="0.05 0.2" type="capsule" friction="0.9 0.005 0.0001"/>
<camera name="track" pos="0 -3 -0.25" quat="0.707107 0.707107 0 0" mode="trackcom"/>
<body name="thigh" pos="0 0 -0.2" gravcomp="0">
<joint name="thigh_joint" pos="0 0 0" axis="0 -1 0" range="-2.61799 0"/>
<geom name="thigh_geom" size="0.05 0.225" pos="0 0 -0.225" type="capsule" friction="0.9 0.005 0.0001"/>
<body name="leg" pos="0 0 -0.7" gravcomp="0">
<joint name="leg_joint" pos="0 0 0.25" axis="0 -1 0" range="-2.61799 0"/>
<geom name="leg_geom" size="0.04 0.25" type="capsule" friction="0.9 0.005 0.0001"/>
<body name="foot" pos="0.13 0 -0.35" gravcomp="0">
<joint name="foot_joint" pos="-0.13 0 0.1" axis="0 -1 0" range="-0.785398 0.785398"/>
<geom name="foot_geom" size="0.06 0.195" pos="-0.065 0 0.1" quat="0.707107 0 -0.707107 0" type="capsule" friction="2 0.005 0.0001"/>
</body>
</body>
</body>
</body>
</worldbody>
<actuator>
<general joint="thigh_joint" ctrlrange="-1 1" gear="200 0 0 0 0 0" actdim="0"/>
<general joint="leg_joint" ctrlrange="-1 1" gear="200 0 0 0 0 0" actdim="0"/>
<general joint="foot_joint" ctrlrange="-1 1" gear="200 0 0 0 0 0" actdim="0"/>
</actuator>
</mujoco>
Thanks @Kallinteris-Andreas, the error for the first 4 time steps is very small but could you plot the average error over the next 1000 time steps?
As these small differences could cause a massive difference over time
I will do the same test on my side to verify, but looking at your numbers I believe this works as intended.
Let me clarify. The nature of physics simulation, especially when contacts are involved, is that any difference, no matter how small, will quickly grow larger with integration. This fact is not MuJoCo-specific and is related to high Lyapunov exponents around contacts. This means that, for example, every time we optimise any core numerical function, even something like replacing a + (b + (c + d))
with (a + b) + (c + d)
, we lose exact numerical reproducibility.
Our policy (which admittedly is not publicly documented, something to fix) is:
- Small numerical changes like my addition-order example above are not considered breaking.
- Large breaking changes are made while giving the user a way to revert or disable the change, such that it is no longer breaking.
The change discussed here (removing the global
option) is of the type 2, since by following the save and load procedure it reverts to being of type 1.
Regarding your comment
it's a bit alarming if the internal dm hopper model is different from the gym model in terms of reproducibility
In a sense you are right, though of course after you update the external model we will sync to upstream. However you can also see this as a way for us to verify that our changes are indeed benign (no-op or numerical). If I didn't have internal tests based on gym that were breaking and I had to fix, I couldn't send you the converted model above. So there is a silver lining ๐
Closing for now, if my detailed comparison of the original and converted models (hopper
and walker2d
) leads me to suspect that there is a not-merely-numerical breakage, I will reopen.
Finally, for reference, this is the commit: 7cc42ec
@yuvaltassa Thanks for explaining your position and it all makes sense to me.
For now, we will pin the mujoco version to remove the issue for users.
For the future, we will look to updating all of the models (in gymnasium, gym is not being maintained currently) using the compile process that you mention and rerun some training algorithms to assert that the training performance is not affected.
I will get back to you if we find something strange
@pseudo-rnd-thoughts after 4000 simulation steps (1 full episode for gymnasium)
we get an observation delta of
[ 0.06988656 -0.04739737 0.58156766 -0.04711179 0.72577743 -0.19325011
0.16538888 0.15741661 0.8933585 -0.08551425 0.96036895 0.30047739]
where the first 6 are qpos, and the next 6 are qvel
Update: regarding the evaluation of the gymnasium hopper
If I stop the simulation when the hopper is unhealthy (i.e. the hopper is about to fall into a non-recoverable state)
The observable state delta is much smaller. (note: that is run for quite fewer steps)
ic| max_error: 3.7220149626815413e-06
Code for reference:
from numpy.testing import assert_array_equal
import hopper_v4
import hopper_v4_new
import numpy as np
from icecream import ic
global_env = hopper_v4.HopperEnv(exclude_current_positions_from_observation=False, reset_noise_scale=0, render_mode=None)
local_env = hopper_v4_new.HopperEnv(exclude_current_positions_from_observation=False, reset_noise_scale=0, render_mode=None)
"""
for t in dir(global_env.unwrapped.model):
if isinstance(getattr(global_env.unwrapped.model, t), np.ndarray) and getattr(global_env.unwrapped.model, t).size > 0:
if (getattr(global_env.unwrapped.model, t) != getattr(local_env.unwrapped.model, t)).all():
print(t)
#elif callable(getattr(global_env.unwrapped.model, t)):
#None
else:
if getattr(global_env.unwrapped.model, t) != getattr(local_env.unwrapped.model, t):
print(t)
"""
#breakpoint()
NUM_STEPS = 1000
max_error = 0
for SEED in range(10000):
ic(SEED)
initial_obs_1, initial_info_1 = global_env.reset(seed=SEED)
initial_obs_2, initial_info_2 = local_env.reset(seed=SEED)
assert_array_equal(initial_obs_1, initial_obs_2)
global_env.action_space.seed(SEED)
for time_step in range(NUM_STEPS):
# We don't evaluate the determinism of actions
action = global_env.action_space.sample()
obs_1, rew_1, terminated_1, truncated_1, info_1 = global_env.step(action)
obs_2, rew_2, terminated_2, truncated_2, info_2 = local_env.step(action)
if (terminated_1 or truncated_1 or terminated_1 or terminated_2):
error = obs_1-obs_2
ic(time_step)
ic(error)
ic((terminated_1, terminated_2, truncated_1))
max_error = max(max_error, max(error))
break
ic(max_error)
global_env.close()
local_env.close()