try to test the models, but got wrong scores, thus wrong box
Manolo1988 opened this issue · 6 comments
i'm new to use pytorch. i try to use your pretrained model (rgb or fast-op) to test the performance. but the output of the nets seems not correct, the first col of the output conf_scores are big (nearly 0.99), and the scores of other cols are all very small (far less than 0.01). I use the UCF24 dataset and my pytorch code are:
img_data = torchvision.datasets.ImageFolder('/home/vision1/hdd/dataset/test_data/',
transform=transforms.Compose([
transforms.Scale(300),
transforms.CenterCrop(300),transforms.ToTensor()])
)
torch.set_default_tensor_type('torch.cuda.FloatTensor')
batch_size=20
conf_thresh = 0.01
nms_thresh=0.45
topk = 20
print(len(img_data))
data_loader = torch.utils.data.DataLoader(img_data, batch_size=batch_size,shuffle=False)
print(len(data_loader))
def show_batch(imgs):
grid = utils.make_grid(imgs,nrow=5)
plt.imshow(grid.numpy().transpose((1, 2, 0)))
plt.title('Batch from dataloader')
trained_model_path = '/home/vision1/hdd/models/rgb-ssd300_ucf24_120000.pth'
num_classes = 25 #7 +1 background
net = build_ssd(300, num_classes) # initialize SSD
net.load_state_dict(torch.load(trained_model_path))
net.eval()
net = net.cuda()
cudnn.benchmark = True
print('Finished loading model %d !' % 1)
torch.cuda.synchronize()
for i, (images, batch_y) in enumerate(data_loader):
width = images.size()[2]
height = images.size()[3]
print(i, images.size(), batch_y.size())
images = Variable(images.cuda())
batch_y = batch_y.cuda(async=True)
output = net(images)
print(len(output))
loc_data = output[0]
conf_preds = output[1]
prior_data = output[2]
Other thing confuses me is, in line 133 in test_ucf24.py (for cl_ind in range(1, num_classes):), why the range is from 1 to num_classes, not from 0 to num_classes-1 ?
First class is the 'background' class that is why we have 1+24=25 classes. And during the time most the boxes are background so the system is biased towards background classes that is why the first column mostly have a very high number.
I still can not get correct boxes and scores. The following is my code to input only one pic, i don't know what goes wrong......
def main():
means = (104, 117, 123) # only support voc now
exp_name = 'CONV-SSD-{}-{}-bs-{}-{}-lr-{:05d}'.format(args.dataset, args.input_type,
args.batch_size, args.basenet[:-14], int(args.lr * 100000))
args.save_root += args.dataset+'/'
args.data_root += args.dataset+'/'
args.listid = '01' ## would be usefull in JHMDB-21
print('Exp name', exp_name, args.listid)
#log_file = open(args.save_root + 'cache/' + exp_name + "/testing-{:d}.log".format(iteration), "w", 1)
#log_file.write(exp_name + '\n')
trained_model_path = '/home/vision1/hdd/models/rgb.pth'
#net=torch.load(trained_model_path)
#print(net)
#log_file.write(trained_model_path+'\n')
num_classes = len(CLASSES) + 1 #7 +1 background
net = build_ssd(300, num_classes) # initialize SSD
net.load_state_dict(torch.load(trained_model_path))
net.eval()
image=cv2.imread('/home/vision1/hdd/dataset/onepic/2.jpg')
output_dir= "/home/vision1/hdd/dataset/"
height, width, channels = image.shape
image = cv2.resize(image, (300, 300)).astype(np.float32)
channels, width,height = image.shape
# x = cv2.resize(np.array(image), (size, size)).astype(np.float32)
image -= means
image = image.astype(np.float32)
image = image[:, :, (2, 1, 0)]
image=torch.from_numpy(image).permute(2, 0, 1)
if args.cuda:
net = net.cuda()
cudnn.benchmark = True
image=image.unsqueeze(0)
if args.cuda:
image = Variable(image.cuda(), volatile=True)
batch,channels, width,height = image.data.shape
output = net(image)
loc_data = output[0]
conf_preds = output[1]
prior_data = output[2]
for b in range(1):
decoded_boxes = decode(loc_data[b].data, prior_data.data, cfg['variance']).clone()
conf_scores = net.softmax(conf_preds[b]).data.clone()
output_file_name = output_dir + '/{:05d}.mat'.format(int(1))
sio.savemat(output_file_name,
mdict={'scores': conf_scores.cpu().numpy(), 'loc': decoded_boxes.cpu().numpy()})
for cl_ind in range(1, num_classes):
scores = conf_scores[:, cl_ind].squeeze()
c_mask = scores.gt(args.conf_thresh) # greater than minmum threshold
scores = scores[c_mask].squeeze()
print('scores size',torch.numel(scores))
if(torch.numel(scores)==0):
a=1
continue
boxes = decoded_boxes.clone()
l_mask = c_mask.unsqueeze(1).expand_as(boxes)
boxes = boxes[l_mask].view(-1, 4)
# idx of highest scoring and non-overlapping boxes per class
ids, counts = nms(boxes, scores, args.nms_thresh, args.topk) # idsn - ids after nms
scores = scores[ids[:counts]].cpu().numpy()
print(scores)
boxes = boxes[ids[:counts]].cpu().numpy()
# print('boxes sahpe',boxes.shape)
boxes[:, 0] *= width
boxes[:, 2] *= width
boxes[:, 1] *= height
boxes[:, 3] *= height
for ik in range(boxes.shape[0]):
boxes[ik, 0] = max(0, boxes[ik, 0])
boxes[ik, 2] = min(width, boxes[ik, 2])
boxes[ik, 1] = max(0, boxes[ik, 1])
boxes[ik, 3] = min(height, boxes[ik, 3])
It looks alright to me. It could be the model predictions are not correct.
Does image belong to ucf24 dataset?
some pics are, some are not but with the same actions~
I would suggest running on one the video from ice dancing action to verify if you got it working right.