facebookresearch/open-eqa

Potentially incorrect camera pose in HM3D

Closed this issue · 1 comments

Hello,

I am trying to project the dataset's HM3D part into 3D point cloud for visualization using the extracted RGB frames, depth, pose and intrinsic. The code I used successfully generate normal visualization result on the ScanNet part, while the result looks kind of weird on the HM3D part. I wonder if there is any possible for potential error in the HM3D's camera pose?

the correct render result on scannet
scannet

the weird render result on HM3D (I am aware the axis direction in HM3D is different but the 3D point cloud is still weird even neglecting the axis part)
hm3d

and the code is used to generate 3d point cloud.

import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import random
from tqdm import tqdm

frame_sample_ratio = 30
pixel_sample_ratio = 0.05

def load_matrix_from_txt(path, shape=(4, 4)):
    with open(path) as f:
        txt = f.readlines()
    txt = ''.join(txt).replace('\n', ' ')
    matrix = [float(v) for v in txt.split()]
    return np.array(matrix).reshape(shape)

def load_image(path):
    image = Image.open(path)
    imm = np.array(image)
    return imm

def convert_from_uvd(u, v, d, intr, pose):
    if d == 0:
        return None, None, None
    
    fx = intr[0, 0]
    fy = intr[1, 1]
    cx = intr[0, 2]
    cy = intr[1, 2]
    depth_scale = 6553.5

    z = d / depth_scale
    x = (u - cx) * z / fx
    y = (v - cy) * z / fy
    
    world = (pose @ np.array([x, y, z, 1]))
    return world[:3] / world[3]

def plot_3d(xdata, ydata, zdata, color=None, b_min=0, b_max=8, view=(45, 45)):
    fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, dpi=200)
    ax.view_init(view[0], view[1])

    ax.set_xlim(b_min, b_max)
    ax.set_ylim(b_min, b_max)
    ax.set_zlim(b_min, b_max)

    ax.scatter3D(xdata, ydata, zdata, c=color, cmap='rgb', s=0.1)

root = '/data/frames/hm3d-v0/084-hm3d-zt1RVoi7PcG'
intrinsic_depth = np.loadtxt(root+'/intrinsic_depth.txt')

x_data, y_data, z_data, c_data = [], [], [], []

length = len([x for x in os.listdir(root) if x.endswith('depth.png')])

from collections import defaultdict
dic = defaultdict(dict)
dic_c = defaultdict(dict)

for idx in tqdm(range(10)):
    rgb_image_path = root+'/{:05d}-rgb.png'.format(idx)
    depth_image_path = root+'/{:05d}-depth.png'.format(idx)

    p = load_matrix_from_txt(root+'/{:05d}.txt'.format(idx))
    c = load_image(rgb_image_path)
    d = load_image(depth_image_path)


    for i in range(d.shape[0]):
        for j in range(d.shape[1]):
            if random.random() < pixel_sample_ratio:
                x, y, z = convert_from_uvd(j, i, d[i, j], intrinsic_depth, p)
                if x is None:
                    continue
                    
                x_data.append(x)
                y_data.append(y)
                z_data.append(z)
                
                ci = int(i * c.shape[0] / d.shape[0])
                cj = int(j * c.shape[1] / d.shape[1])
                c_data.append(c[ci, cj] / 255.0)

plot_3d(x_data, y_data, z_data, color=c_data)

plt.show()

Hello, I want to ask if you’ve solved this problem? I have a similar question. Thank you very much!