jhb86253817/PIPNet

The predicted landmarks appear out of face box

try-agaaain opened this issue · 3 comments

In order to shorten the detection time of the face detect, I predict the face box of the current frame by using the landmark of the previous frame of the image. In some cases, the landmarks predicted by PIPNet predicted would appear outside my face box.

In the image below, the area in the red box is the image I entered into the network, and the green dot is the landmarks predicted by PIPNet.

image

Some of these landmarks appear outside the box, does anyone know why?

Here's part of my code

def face_feature(image, cfg, net, device, box=[], use_face_boxes_detector=False):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
    preprocess = transforms.Compose([transforms.Resize((cfg.input_size, cfg.input_size)), transforms.ToTensor(), normalize])
    # 如果使用face_boxes,则box可以为空,否则不能为空
    assert len(box) != 0 or use_face_boxes_detector, \
        print('You must pass a face box that is not empty, ',
              'or set use_face_boxes_detector=True to detect face box')

    image_height, image_width, _ = image.shape
    if use_face_boxes_detector:
        detector = FaceBoxesDetector('FaceBoxes', '../FaceBoxesV2/weights/FaceBoxesV2.pth', cfg.use_gpu, device)
        my_thresh = 0.6
        detections, _ = detector.detect(image, my_thresh, 1)
        if len(detections) >= 1:
            box = detections[0][2:]
            det_xmin = box[0]
            det_ymin = box[1]
            det_width = box[2]
            det_height = box[3]
            det_xmax = det_xmin + det_width - 1
            det_ymax = det_ymin + det_height - 1
    else:
        det_xmin = box[0]
        det_ymin = box[1]
        det_xmax = box[2] - 1
        det_ymax = box[3] - 1
        det_width = det_xmax -det_xmin + 1
        det_height = det_ymax - det_ymin + 1
        cv2.rectangle(image, (det_xmin, det_ymin), (det_xmax, det_ymax), (0, 0, 255), 2)

    det_crop = image[det_ymin:det_ymax, det_xmin:det_xmax, :]
    # cv2.imshow('The cropped image', det_crop)
    # cv2.waitKey(0)
    # det_crop = cv2.resize(det_crop, (cfg.input_size, cfg.input_size))
    inputs = Image.fromarray(det_crop[:,:,::-1].astype('uint8'), 'RGB')

    inputs = preprocess(inputs).unsqueeze(0)
    inputs = inputs.to(device)

    lms_pred_x, lms_pred_y, lms_pred_nb_x, lms_pred_nb_y, outputs_cls, max_cls = forward_pip(net, inputs, preprocess, cfg.input_size, cfg.net_stride, cfg.num_nb)
    lms_pred = torch.cat((lms_pred_x, lms_pred_y), dim=1).flatten()
    meanface_indices, reverse_index1, reverse_index2, max_len = get_meanface(os.path.join('../data', cfg.data_name, 'meanface.txt'), cfg.num_nb)
    tmp_nb_x = lms_pred_nb_x[reverse_index1, reverse_index2].view(cfg.num_lms, max_len)
    tmp_nb_y = lms_pred_nb_y[reverse_index1, reverse_index2].view(cfg.num_lms, max_len)
    tmp_x = torch.mean(torch.cat((lms_pred_x, tmp_nb_x), dim=1), dim=1).view(-1,1)
    tmp_y = torch.mean(torch.cat((lms_pred_y, tmp_nb_y), dim=1), dim=1).view(-1,1)
    lms_pred_merge = torch.cat((tmp_x, tmp_y), dim=1).flatten()
    lms_pred = lms_pred.cpu().numpy()
    lms_pred_merge = lms_pred_merge.cpu().numpy()
    landmarks = []
    for i in range(cfg.num_lms):
        x_pred = lms_pred_merge[i*2] * det_width + det_xmin
        y_pred = lms_pred_merge[i*2+1] * det_height + det_ymin
        landmarks.append([x_pred, y_pred])
        cv2.circle(image, (int(x_pred), int(y_pred)), 1, (0, 255, 0), 1)
    cv2.imshow('In face_feature function', image)
    cv2.waitKey(0)
    return np.array(landmarks)

Hi, I think is actually normal to predict outside because the face is outside, and there is no restrict to prevent it being outside.

I am a beginner, and thank you very much for your answer