Questions about depth and normal estimators
Opened this issue · 0 comments
Wi-sc commented
Hi, there.
Thanks for your release. I'm interested in your work and trying to implement it. I can get reasonable results directly using images without depth and normal estimators. But it doesn't work when depth and normal estimators are also implemented. I want to make sure if the two parts are trained totally separated? And do you add additional operations on the depth maps, like the sigmoid or clamp function? Because I also found some values less than 0.
This is my code of depth and normal estimators:
class Net(nn.Module):
def __init__(self, out_planes, layer_names, input_planes=3):
super().__init__()
# Encoder
module_list = list()
resnet = resnet18(pretrained=True)
in_conv = nn.Conv2d(input_planes, 64, kernel_size=7, stride=2, padding=3, bias=False)
module_list.append(
nn.Sequential(
resnet.conv1 if input_planes == 3 else in_conv,
resnet.bn1,
resnet.relu,
resnet.maxpool
)
)
module_list.append(resnet.layer1)
module_list.append(resnet.layer2)
module_list.append(resnet.layer3)
module_list.append(resnet.layer4)
self.encoder = nn.ModuleList(module_list)
# Decoder
for out_plane, layer_name in zip(out_planes, layer_names):
module_list = list()
revresnet = revuresnet18(out_planes=out_plane)
module_list.append(revresnet.layer1)
module_list.append(revresnet.layer2)
module_list.append(revresnet.layer3)
module_list.append(revresnet.layer4)
module_list.append(
nn.Sequential(
revresnet.deconv1,
revresnet.bn1,
revresnet.relu,
revresnet.deconv2
)
)
module_list = nn.ModuleList(module_list).cuda()
setattr(self, 'decoder_' + layer_name, module_list)
def forward(self, im):
# Encode
feat = im
feat_maps = list()
for f in self.encoder:
feat = f(feat)
feat_maps.append(feat)
x = feat_maps[-1]
for idx, f in enumerate(self.decoder_depth):
x = f(x)
if idx < len(self.decoder_depth) - 1:
feat_map = feat_maps[-(idx + 2)]
assert feat_map.shape[2:4] == x.shape[2:4]
x = torch.cat((x, feat_map), dim=1)
depth_output = x
x = feat_maps[-1]
for idx, f in enumerate(self.decoder_mask):
x = f(x)
if idx < len(self.decoder_depth) - 1:
feat_map = feat_maps[-(idx + 2)]
assert feat_map.shape[2:4] == x.shape[2:4]
x = torch.cat((x, feat_map), dim=1)
mask_output = x
x = feat_maps[-1]
for idx, f in enumerate(self.decoder_normal):
x = f(x)
if idx < len(self.decoder_depth) - 1:
feat_map = feat_maps[-(idx + 2)]
assert feat_map.shape[2:4] == x.shape[2:4]
x = torch.cat((x, feat_map), dim=1)
normal_output = x
return depth_output, mask_output, normal_output
For inference:
depth, mask, normal = depth_normal_model(img_input)
depth = depth*mask
input_2 = torch.cat([depth, normal], dim=1)
sdf = model(points_input, input_2)