Potentially incorrect camera pose in HM3D
Closed this issue · 1 comments
hsiangwei0903 commented
Hello,
I am trying to project the dataset's HM3D part into 3D point cloud for visualization using the extracted RGB frames, depth, pose and intrinsic. The code I used successfully generate normal visualization result on the ScanNet part, while the result looks kind of weird on the HM3D part. I wonder if there is any possible for potential error in the HM3D's camera pose?
the correct render result on scannet
the weird render result on HM3D (I am aware the axis direction in HM3D is different but the 3D point cloud is still weird even neglecting the axis part)
and the code is used to generate 3d point cloud.
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import random
from tqdm import tqdm
frame_sample_ratio = 30
pixel_sample_ratio = 0.05
def load_matrix_from_txt(path, shape=(4, 4)):
with open(path) as f:
txt = f.readlines()
txt = ''.join(txt).replace('\n', ' ')
matrix = [float(v) for v in txt.split()]
return np.array(matrix).reshape(shape)
def load_image(path):
image = Image.open(path)
imm = np.array(image)
return imm
def convert_from_uvd(u, v, d, intr, pose):
if d == 0:
return None, None, None
fx = intr[0, 0]
fy = intr[1, 1]
cx = intr[0, 2]
cy = intr[1, 2]
depth_scale = 6553.5
z = d / depth_scale
x = (u - cx) * z / fx
y = (v - cy) * z / fy
world = (pose @ np.array([x, y, z, 1]))
return world[:3] / world[3]
def plot_3d(xdata, ydata, zdata, color=None, b_min=0, b_max=8, view=(45, 45)):
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, dpi=200)
ax.view_init(view[0], view[1])
ax.set_xlim(b_min, b_max)
ax.set_ylim(b_min, b_max)
ax.set_zlim(b_min, b_max)
ax.scatter3D(xdata, ydata, zdata, c=color, cmap='rgb', s=0.1)
root = '/data/frames/hm3d-v0/084-hm3d-zt1RVoi7PcG'
intrinsic_depth = np.loadtxt(root+'/intrinsic_depth.txt')
x_data, y_data, z_data, c_data = [], [], [], []
length = len([x for x in os.listdir(root) if x.endswith('depth.png')])
from collections import defaultdict
dic = defaultdict(dict)
dic_c = defaultdict(dict)
for idx in tqdm(range(10)):
rgb_image_path = root+'/{:05d}-rgb.png'.format(idx)
depth_image_path = root+'/{:05d}-depth.png'.format(idx)
p = load_matrix_from_txt(root+'/{:05d}.txt'.format(idx))
c = load_image(rgb_image_path)
d = load_image(depth_image_path)
for i in range(d.shape[0]):
for j in range(d.shape[1]):
if random.random() < pixel_sample_ratio:
x, y, z = convert_from_uvd(j, i, d[i, j], intrinsic_depth, p)
if x is None:
continue
x_data.append(x)
y_data.append(y)
z_data.append(z)
ci = int(i * c.shape[0] / d.shape[0])
cj = int(j * c.shape[1] / d.shape[1])
c_data.append(c[ci, cj] / 255.0)
plot_3d(x_data, y_data, z_data, color=c_data)
plt.show()
qirui-chen commented
Hello, I want to ask if you’ve solved this problem? I have a similar question. Thank you very much!