Inference on onnx/mlmodel
Opened this issue · 1 comments
stev3 commented
Has anyone succeeded in porting this for inference on mobile? I've converted it, but cannot figure out to to transform the bounding box and classification outputs into x,y,w,h.
I thought I might be able to grab the highest confidence in the class I'm looking for and use that row index to find the corresponding bounding box, but the bounding box is not in the typical range (between 0 and 1).
ans = preds['confidence'][0].argmax(axis=0)
coords = preds['coordinates'][0][ans[3]] # ans[3] corresponds to the class I'm looking for
coords_softmax = softmax(coords, axis=0)
print(coords)
[ 0.35327148 2.9023438 1.5244141 -10.9296875 ]
Below is my conversion code.
size = (224,224)
class ImageScale(nn.Module):
def __init__(self):
super().__init__()
mean_r = torch.full((1, size[0], size[1]), 0.485, device=torch.device("cuda"))
std_r = torch.full((1, size[0], size[1]), 0.229, device=torch.device("cuda"))
mean_g = torch.full((1, size[0], size[1]), 0.456, device=torch.device("cuda"))
std_g = torch.full((1, size[0], size[1]), 0.224, device=torch.device("cuda"))
mean_b = torch.full((1, size[0], size[1]), 0.406, device=torch.device("cuda"))
std_b = torch.full((1, size[0], size[1]), 0.225, device=torch.device("cuda"))
self.denominator = torch.full((1, size[0], size[1]), 255., device=torch.device("cuda"))
self.means = torch.cat((mean_r, mean_g, mean_b), 0)
self.stds = torch.cat((std_r, std_g, std_b), 0)
def forward(self, x):
normalized = torch.div(x,self.denominator)
numerator = torch.sub(normalized,self.means)
out = torch.div(numerator,self.stds)
return out.unsqueeze(0)
final_model = [ImageScale()] + [ssd.learn.model]
final_model = nn.Sequential(*final_model)
model_name = "ssd_resnet_5_epochs.onnx"
dummy_input = Variable(torch.randn(3, size[0], size[1])).cuda()
torch.onnx.export(final_model, dummy_input, model_name, input_names = ['image'], output_names=['confidence','coordinates'])
onnx_model = onnx.load(model_name)
mlmodel = convert(onnx_model, image_input_names = ['image'], target_ios='13')
mlmodel.input_description['image'] = 'Image'
mlmodel.output_description['coordinates'] = 'Coordinates'
mlmodel.output_description['confidence'] = 'confidence'
mlmodel.save('ssd_resnet_5_epochs.mlmodel')
stev3 commented
I figured it out.
For anyone looking to run inference on iOS, you must create your anchor and grid sizes and pass them along with your bounding boxes into the _actn_to_bb function. Then deregularize and create bounding boxes like this:
coords = torch.tensor(coords)
coords_actn_to_bb = _actn_to_bb(coords, anchors, grid_sizes)
coords_ans = coords_actn_to_bb[ans[2]] # ans[2] is the box with max value of the class
coords_ans_bb = (coords_ans-0.5) * 2
x1 = 112 * (1 + coords_ans_bb[1])
y1 = 112 * (1 + coords_ans_bb[0])
x2 = 112 * (1 + coords_ans_bb[3])
y2 = 112 * (1 + coords_ans_bb[2])
color = [0,255,0]
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness=2)
coords_ans = coords_actn_to_bb[ans[3]] # ans[3] is the box with max value of the class
coords_ans_bb = (coords_ans-0.5) * 2
x1 = 112 * (1 + coords_ans_bb[1])
y1 = 112 * (1 + coords_ans_bb[0])
x2 = 112 * (1 + coords_ans_bb[3])
y2 = 112 * (1 + coords_ans_bb[2])
color = [255,0,0]
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness=2)