Reproducing NYUv2 Results
jbrownkramer opened this issue · 2 comments
jbrownkramer commented
This code documents the processing pipeline well, but it starts with disparity images, whereas the NYUv2 starts with depth images.
What baseline and focal length are you using for converting NYUv2.D to disparity? My best guess is
f = 518.857901
b = 75
However, that seems like it could be off by an order of magnitude. Help would be appreciated.
StanLei52 commented
Hi, please see the attached code snippet for converting depth to disparity (along with checking the data). Please make sure that you've downloaded the same data as ours. Also use the depths in depth_bfx
folders as inputs for converting.
def get_sensor_type_baseline(path):
if "kv1" in path:
return 0.075
elif "kv2" in path:
return 0.075
elif "realsense" in path:
return 0.095
elif "xtion" in path:
return 0.095
else:
raise NotFoundErr
def convert_depth_to_disparity(depth_file, intrinsics_file, min_depth=0.01, max_depth=50):
"""
depth_file is a png file that contains the scene depth
intrinsics_file is a txt file supplied in SUNRGBD with sensor information
Can be found at the path: os.path.join(root_dir, room_name, "intrinsics.txt")
"""
with open(intrinsics_file, 'r') as fh:
lines = fh.readlines()
focal_length = float(lines[0].strip().split()[0])
baseline = get_sensor_type_baseline(depth_file)
depth_image = np.array(Image.open(depth_file))
depth = np.array(depth_image).astype(np.float32)
depth_in_meters = depth / 1000.
if min_depth is not None:
depth_in_meters = depth_in_meters.clip(min=min_depth, max=max_depth)
disparity = baseline * focal_length / depth_in_meters
return torch.from_numpy(disparity).float()
def check_sun_rgbd():
keep_labels = ['bathroom', 'bedroom', 'classroom', 'computer_room',
'conference_room', 'corridor', 'dining_area', 'dining_room',
'discussion_area', 'furniture_store', 'home_office', 'kitchen',
'lab', 'lecture_theatre', 'library', 'living_room', 'office',
'rest_space', 'study_space']
mat = scipy.io.loadmat("/pathto/dataset/SUNRGBD/SUNRGBDtoolbox/traintestSUNRGBD/allsplit.mat")
anno_test = mat["alltest"][0]
test_scene_set = set()
n_keep = 0
n_nyu = 0
test_meta = []
for i in tqdm(range(len(anno_test)), total=len(anno_test), desc="sun-rgbd-Val"):
path = str(anno_test[i][0])
path = path.replace("/n/fs/sun3d/data/SUNRGBD", "/pathto/dataset/SUNRGBD")
scene_lbl_path = os.path.join(path, "scene.txt")
intrinsic_path = os.path.join(path, "intrinsics.txt")
img_dir = os.path.join(path, "image")
depth_dir = os.path.join(path, "depth_bfx")
img_fn = os.listdir(img_dir)[0]
depth_fn = os.listdir(depth_dir)
depth_fn = [i for i in depth_fn if i.endswith(".png")][0]
depth_path = os.path.join(depth_dir, depth_fn)
disparity_path = depth_path.replace(".png", "_disparity.pt")
disparity = convert_depth_to_disparity(depth_path, intrinsic_path)
torch.save(disparity, disparity_path)
with open(scene_lbl_path, "r") as f:
scene_lbl = f.readlines()[0]
if scene_lbl in keep_labels:
n_keep += 1
# follow ImageBind to only use those in `keep_labels`
test_meta.append(
{
"image_path": os.path.join(img_dir, img_fn),
"depth_path": depth_path,
"disparity_path": disparity_path,
"label": scene_lbl,
"cleaned_label": scene_lbl.replace("_", " ")
}
)
if "NYU" in scene_lbl_path:
n_nyu += 1
test_scene_set.add(scene_lbl)
print("====== Test info : ")
print(test_scene_set, len(test_scene_set), n_keep, n_nyu, len(anno_test), len(test_meta))
with open("/pathto/code/open_clip/src/open_clip/modal_depth/data/SUN-RGBD_val.json", "w") as f:
json.dump(test_meta, f, indent=2)
anno_train = mat["alltrain"][0]
train_scene_set = set()
n_keep = 0
n_nyu = 0
train_meta = []
for i in tqdm(range(len(anno_train)), total=len(anno_train), desc="sun-rgbd-Train"):
path = str(anno_train[i][0])
path = path.replace("/n/fs/sun3d/data/SUNRGBD", "/pathto/dataset/SUNRGBD")
scene_lbl_path = os.path.join(path, "scene.txt")
intrinsic_path = os.path.join(path, "intrinsics.txt")
img_dir = os.path.join(path, "image")
depth_dir = os.path.join(path, "depth_bfx")
img_fn = os.listdir(img_dir)[0]
depth_fn = os.listdir(depth_dir)
depth_fn = [i for i in depth_fn if i.endswith(".png")][0]
depth_path = os.path.join(depth_dir, depth_fn)
disparity_path = depth_path.replace(".png", "_disparity.pt")
disparity = convert_depth_to_disparity(depth_path, intrinsic_path)
torch.save(disparity, disparity_path)
with open(scene_lbl_path, "r") as f:
scene_lbl = f.readlines()[0]
if scene_lbl in keep_labels:
n_keep += 1
if "NYU" in scene_lbl_path:
n_nyu += 1
train_scene_set.add(scene_lbl)
train_meta.append(
{
"image_path": os.path.join(img_dir, img_fn),
"depth_path": depth_path,
"disparity_path": disparity_path,
"label": scene_lbl,
"cleaned_label": scene_lbl.replace("_", " ")
}
)
print("====== Train info:")
print(train_scene_set, len(train_scene_set), n_keep, n_nyu, len(anno_train), len(train_meta))
with open("/pathto/code/open_clip/src/open_clip/modal_depth/data/SUN-RGBD_train.json", "w") as f:
json.dump(train_meta, f, indent=2)
def check_nyu_rgbd():
# for dataset: rgb path, depth path, label tag (possible prompt)
official_scene_names = json.load(open("/pathto/code/open_clip/src/open_clip/modal_depth/data/nyu-depth-v2_scene_name.json","r"))
keep_labels = [
"bedroom", "kitchen", "living_room", "bathroom", "dining_room", "office", "home_office", "classroom", "bookstore",
]
meta = []
mat = scipy.io.loadmat("/pathto/dataset/SUNRGBD/SUNRGBDtoolbox/traintestSUNRGBD/allsplit.mat")
anno_test = mat["alltest"][0]
test_scene_set = set()
n_nyu = 0
for i in tqdm(range(len(anno_test)), total=len(anno_test)):
path = str(anno_test[i][0])
path = path.replace("/n/fs/sun3d/data/SUNRGBD", "/pathto/dataset/SUNRGBD")
scene_lbl_path = os.path.join(path, "scene.txt")
img_dir = os.path.join(path, "image")
depth_dir = os.path.join(path, "depth_bfx")
img_fn = os.listdir(img_dir)[0]
depth_fn = os.listdir(depth_dir)[0]
disparity_fn = depth_fn.replace(".png", "_disparity.pt")
with open(scene_lbl_path, "r") as f:
scene_lbl = f.readlines()[0]
if not "NYU" in scene_lbl_path:
continue
nyu_idx = int(img_fn[3:7])
n_nyu += 1
test_scene_set.add(scene_lbl)
meta.append(
{
"image_path": os.path.join(img_dir, img_fn),
"depth_path": os.path.join(depth_dir, depth_fn),
"disparity_path": os.path.join(depth_dir, disparity_fn),
"label": scene_lbl,
"cleaned_label": scene_lbl.replace("_", " "),
"benchmark_label": scene_lbl.replace("_", " ") if scene_lbl in keep_labels else "others",
"official_label": official_scene_names[nyu_idx-1]
}
)
print("====== NYU Data info : ")
print(test_scene_set, len(test_scene_set), n_nyu, len(anno_test), len(meta))
with open("/pathto/code/open_clip/src/open_clip/modal_depth/data/NYU-Depth-v2_val.json", "w") as f:
json.dump(meta, f, indent=2)
You may find the f
and b
from the code and the downloaded data. Hope that helps :)
jbrownkramer commented
This is amazing! Thank you!