isaac-sim/IsaacGymEnvs

Question of adding camera sensors in IsaacGym

Opened this issue · 1 comments

Hi,
I attempted to incorporate camera sensors into the FrankaCubeStack. Initially, I created cameras in _create_envs:

    #camera settings
    camera_props = gymapi.CameraProperties()
    camera_props.width = 360
    camera_props.height = 240
    camera_props.enable_tensors = True

    # Create environments
    for i in range(self.num_envs):
        # create env instance
        env_ptr = self.gym.create_env(self.sim, lower, upper, num_per_row)

        # Create actors and define aggregate group appropriately depending on setting
        # NOTE: franka should ALWAYS be loaded first in sim!
        if self.aggregate_mode >= 3:
            self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True)

        # Create franka
        # Potentially randomize start pose
        if self.franka_position_noise > 0:
            rand_xy = self.franka_position_noise * (-1. + np.random.rand(2) * 2.0)
            franka_start_pose.p = gymapi.Vec3(-0.45 + rand_xy[0], 0.0 + rand_xy[1],
                                             1.0 + table_thickness / 2 + table_stand_height)
        if self.franka_rotation_noise > 0:
            rand_rot = torch.zeros(1, 3)
            rand_rot[:, -1] = self.franka_rotation_noise * (-1. + np.random.rand() * 2.0)
            new_quat = axisangle2quat(rand_rot).squeeze().numpy().tolist()
            franka_start_pose.r = gymapi.Quat(*new_quat)
        franka_actor = self.gym.create_actor(env_ptr, franka_asset, franka_start_pose, "franka", i, 0, 0)
        self.gym.set_actor_dof_properties(env_ptr, franka_actor, franka_dof_props)

        if self.aggregate_mode == 2:
            self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True)

        # Create table
        table_actor = self.gym.create_actor(env_ptr, table_asset, table_start_pose, "table", i, 1, 0)
        table_stand_actor = self.gym.create_actor(env_ptr, table_stand_asset, table_stand_start_pose, "table_stand", i, 1, 0)
        print(table_start_pose)
        print(table_stand_start_pose)

        # Create camera
        cam_handle = self.gym.create_camera_sensor(env_ptr, camera_props)
        self.gym.set_camera_location(cam_handle, env_ptr, gymapi.Vec3(0.5, 0, 2), gymapi.Vec3(-0.5, 0, 1.15))


        # Create wall
        wall_actor = self.gym.create_actor(env_ptr, wall_asset, wall_start_pose, "wall", i, 1, 0)

        if self.aggregate_mode == 1:
            self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True)

        # Create cubes
        self._cubeA_id = self.gym.create_actor(env_ptr, cubeA_asset, cubeA_start_pose, "cubeA", i, 2, 0)
        self._cubeB_id = self.gym.create_actor(env_ptr, cubeB_asset, cubeB_start_pose, "cubeB", i, 4, 0)
        # Set colors
        self.gym.set_rigid_body_color(env_ptr, self._cubeA_id, 0, gymapi.MESH_VISUAL, cubeA_color)
        self.gym.set_rigid_body_color(env_ptr, self._cubeB_id, 0, gymapi.MESH_VISUAL, cubeB_color)

        if self.aggregate_mode > 0:
            self.gym.end_aggregate(env_ptr)

        # Store the created env pointers
        self.envs.append(env_ptr)
        self.frankas.append(franka_actor)
        self.cameras.append(cam_handle)

Subsequently, I tried to store images from the camera sensors in compute_observations.

def compute_observations(self):
        self._refresh()
        obs = ["cubeA_quat", "cubeA_pos", "cubeA_to_cubeB_pos", "eef_pos", "eef_quat"]
        obs += ["q_gripper"] if self.control_type == "osc" else ["q"]
        self.obs_buf = torch.cat([self.states[ob] for ob in obs], dim=-1)
        maxs = {ob: torch.max(self.states[ob]).item() for ob in obs}
        # get camera images
        imgs = []
        self.gym.render_all_camera_sensors(self.sim)
        self.gym.start_access_image_tensors(self.sim)
        for i in range(self.num_envs):
            camera_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[i], self.cameras[i], gymapi.IMAGE_COLOR)
            torch_camera_tensor = gymtorch.wrap_tensor(camera_tensor)
            # save images 
            img = torch_camera_tensor.permute(2, 0, 1)
            RGB = img[:3]  # This will give you the first 3 channels (RGB)
            alpha = img[3]  # This will give you the 4th channel (alpha)
            # Scale RGB channels to [0, 255]
            RGB = RGB * 255
            # Add an extra dimension to alpha so it can be concatenated with RGB
            alpha = alpha.unsqueeze(0)
            # Concatenate alpha and RGB channels
            img = torch.cat((RGB, alpha), dim=0)
            # Convert to PIL Image
            img = to_pil_image(img)
            # Save image
            img.save('image%d.png' % i)

However, the stored images reveal that only the camera in the first environment accurately captures the state of the Franka Robot. Have I overlooked any settings for the camera?

Hi.
Did you manage to get the data from both cameras ?
And did you use the camera data as input data for your DRL Algorithm ?