valeoai/WoodScape

How to get start with the SynWoodScape?

Xcco1 opened this issue · 8 comments

Xcco1 commented

Hello,
Can anybody guide me how to use the SynWoodScape with the task of 3d detection on Bev

I also encountered the same problem, did you solve it?

I also encountered the same problem, did you solve it?

Hi the 3D bounding boxes are expressed in the world reference.
Here is a small python code to plot them into the image.

import numpy as np
import pickle 
import cv2
import matplotlib.pyplot as plt

def get_matrix(x, y, z, yaw, pitch, roll):
    c_y = np.cos(np.radians(yaw))
    s_y = np.sin(np.radians(yaw))
    c_r = np.cos(np.radians(roll))
    s_r = np.sin(np.radians(roll))
    c_p = np.cos(np.radians(pitch))
    s_p = np.sin(np.radians(pitch))
    matrix = np.matrix(np.identity(4))
    matrix[0, 3] = x
    matrix[1, 3] = y
    matrix[2, 3] = z
    matrix[0, 0] = c_p * c_y
    matrix[0, 1] = c_y * s_p * s_r - s_y * c_r
    matrix[0, 2] = -c_y * s_p * c_r - s_y * s_r
    matrix[1, 0] = s_y * c_p
    matrix[1, 1] = s_y * s_p * s_r + c_y * c_r
    matrix[1, 2] = -s_y * s_p * c_r + c_y * s_r
    matrix[2, 0] = s_p
    matrix[2, 1] = -c_p * s_r
    matrix[2, 2] = c_p * c_r
    return matrix

def build_projection_matrix(w, h, fov):
    focal = w / (2.0 * np.tan(fov * np.pi / 360.0))
    K = np.identity(3)
    K[0, 0] = K[1, 1] = focal
    K[0, 2] = w / 2.0
    K[1, 2] = h / 2.0
    return K

def get_image_point(point, K, w2c):
    # Calculate 2D projection of 3D coordinate
    
    # transform to camera coordinates
    point_camera = np.array(np.dot(w2c, point))[0]

    # New we must change from UE4's coordinate system to an "standard"
    # (x, y ,z) -> (y, -z, x)
    # and we remove the fourth componebonent also
    point_camera = [point_camera[1], -point_camera[2], point_camera[0]]

    # now project 3D->2D using the camera matrix
    point_img = np.dot(K, point_camera)
    # normalize
    point_img[0] /= point_img[2]
    point_img[1] /= point_img[2]

    return point_img[0:2]

edges = [[0,1], [1,2], [2,3], [3,0], [4,5], [5,6], [6,7], [7,4], [0,4], [1,5], [2,6], [3,7]]

with open('SynWoodScape_V0.1.1/box_3d_annotations/00000.pkl', 'rb') as f:
    data = pickle.load(f)

img = cv2.imread('SynWoodScape_V0.1.1/rgb_images/00000_BEV.png')
K = build_projection_matrix(img.shape[1], img.shape[0], 90)

with open('SynWoodScape_V0.1.1/vehicle_data/rgb_images/00000.txt') as f:
    contents = f.readlines()
txt_transform = contents[4]
x = float(txt_transform.split("x=")[1].split(',')[0])
y = float(txt_transform.split("y=")[1].split(',')[0])
z = float(txt_transform.split("z=")[1].split(')')[0])
pitch = float(txt_transform.split("pitch=")[1].split(',')[0])
yaw = float(txt_transform.split("yaw=")[1].split(',')[0])
roll = float(txt_transform.split("roll=")[1].split(')')[0])

vehicle_world_matrix = get_matrix(x, y, z, yaw, pitch, roll)
sensor_vehicle_matrix = get_matrix(0, 0, 15, 0, -90, 0)

world_2_camera = np.linalg.inv(np.dot(vehicle_world_matrix, sensor_vehicle_matrix))

for key in data.keys():
    for edge in edges:
        p1 = get_image_point(np.array(data[key][edge[0]])[0], K, world_2_camera)
        p2 = get_image_point(np.array(data[key][edge[1]])[0],  K, world_2_camera)
        cv2.line(img, (int(p1[0]),int(p1[1])), (int(p2[0]),int(p2[1])), (255,0,0, 255), 1)        

plt.imshow(img)
plt.show()
Xcco1 commented

Can we get more data about the SynWoodscape,the data released is a little thin

More data will be provided but I dont know when it will be released.

Hi the 3D bounding boxes are expressed in the world reference. Here is a small python code to plot them into the image.

import numpy as np
import pickle 
import cv2
import matplotlib.pyplot as plt

def get_matrix(x, y, z, yaw, pitch, roll):
    c_y = np.cos(np.radians(yaw))
    s_y = np.sin(np.radians(yaw))
    c_r = np.cos(np.radians(roll))
    s_r = np.sin(np.radians(roll))
    c_p = np.cos(np.radians(pitch))
    s_p = np.sin(np.radians(pitch))
    matrix = np.matrix(np.identity(4))
    matrix[0, 3] = x
    matrix[1, 3] = y
    matrix[2, 3] = z
    matrix[0, 0] = c_p * c_y
    matrix[0, 1] = c_y * s_p * s_r - s_y * c_r
    matrix[0, 2] = -c_y * s_p * c_r - s_y * s_r
    matrix[1, 0] = s_y * c_p
    matrix[1, 1] = s_y * s_p * s_r + c_y * c_r
    matrix[1, 2] = -s_y * s_p * c_r + c_y * s_r
    matrix[2, 0] = s_p
    matrix[2, 1] = -c_p * s_r
    matrix[2, 2] = c_p * c_r
    return matrix

def build_projection_matrix(w, h, fov):
    focal = w / (2.0 * np.tan(fov * np.pi / 360.0))
    K = np.identity(3)
    K[0, 0] = K[1, 1] = focal
    K[0, 2] = w / 2.0
    K[1, 2] = h / 2.0
    return K

def get_image_point(point, K, w2c):
    # Calculate 2D projection of 3D coordinate
    
    # transform to camera coordinates
    point_camera = np.array(np.dot(w2c, point))[0]

    # New we must change from UE4's coordinate system to an "standard"
    # (x, y ,z) -> (y, -z, x)
    # and we remove the fourth componebonent also
    point_camera = [point_camera[1], -point_camera[2], point_camera[0]]

    # now project 3D->2D using the camera matrix
    point_img = np.dot(K, point_camera)
    # normalize
    point_img[0] /= point_img[2]
    point_img[1] /= point_img[2]

    return point_img[0:2]

edges = [[0,1], [1,2], [2,3], [3,0], [4,5], [5,6], [6,7], [7,4], [0,4], [1,5], [2,6], [3,7]]

with open('SynWoodScape_V0.1.1/box_3d_annotations/00000.pkl', 'rb') as f:
    data = pickle.load(f)

img = cv2.imread('SynWoodScape_V0.1.1/rgb_images/00000_BEV.png')
K = build_projection_matrix(img.shape[1], img.shape[0], 90)

with open('SynWoodScape_V0.1.1/vehicle_data/rgb_images/00000.txt') as f:
    contents = f.readlines()
txt_transform = contents[4]
x = float(txt_transform.split("x=")[1].split(',')[0])
y = float(txt_transform.split("y=")[1].split(',')[0])
z = float(txt_transform.split("z=")[1].split(')')[0])
pitch = float(txt_transform.split("pitch=")[1].split(',')[0])
yaw = float(txt_transform.split("yaw=")[1].split(',')[0])
roll = float(txt_transform.split("roll=")[1].split(')')[0])

vehicle_world_matrix = get_matrix(x, y, z, yaw, pitch, roll)
sensor_vehicle_matrix = get_matrix(0, 0, 15, 0, -90, 0)

world_2_camera = np.linalg.inv(np.dot(vehicle_world_matrix, sensor_vehicle_matrix))

for key in data.keys():
    for edge in edges:
        p1 = get_image_point(np.array(data[key][edge[0]])[0], K, world_2_camera)
        p2 = get_image_point(np.array(data[key][edge[1]])[0],  K, world_2_camera)
        cv2.line(img, (int(p1[0]),int(p1[1])), (int(p2[0]),int(p2[1])), (255,0,0, 255), 1)        

plt.imshow(img)
plt.show()

Thanks so much for the useful code. Is there any chance that you could kindly provide code for 3d bbox projection on other views and lidar plane? Thanks so much !