gv22ga/dlib-face-recognition-android

recognize result without confidence

liaoweiguo opened this issue · 6 comments

VisionDetRet[] detRets = jniBitmapRec(bitmap);

detRets[i].getConfidence());

return 0.0, I cannot return the best face

        results = mFaceRec.recognize(mCroppedBitmap);
        long endTime = System.currentTimeMillis();
        Log.d(TAG, "Time cost: " + String.valueOf((endTime - startTime) / 1000f) + " sec");

        ArrayList<String> names = new ArrayList<>();
        for(VisionDetRet n:results) {
            names.add(n.getLabel() + n.getConfidence());
        }

I haven't worked on getConfidence() method. It will always return 0. The idea is that getLabel() will return the name of the recognized person only when its confident enough.

Add getConfidence(), but fail to build, ~

Anyway, ability to tuning the confidence is absolutely necessary. When you have time consider it

  1. recognizer.h

class DLibFaceRecognizer

Line 74
std::vector<float> rec_confidences;

  1. recognizer.h
    DLibFaceRecognizer::rec
    Line 180

    float confidence;
    for (size_t i = 0; i < face_descriptors.size(); ++i) {
    for (size_t j = 0; j < rec_face_descriptors.size(); ++j) {
    confidence = length(face_descriptors[i]-rec_face_descriptors[j]);
    if (confidence < 0.5) {
    LOG(INFO) << rec_names[j]<<" FOUND!!!!";
    dlib::rectangle r = frects[i];
    rec_rects.push_back(r);
    rec_labels.push_back(rec_names[j]);
    rec_confidences.push_back(confidence);
    }
    }
    }

  2. recognizer.h

Last Line
inline std::vector getRecResultConfidences() { return rec_confidences; }

  1. jni_face_rec.cpp
    line 92
    jobjectArray getRecResult(JNIEnv* env, RecPtr faceRecognizer,
    const int& size) {
    LOG(INFO) << "getRecResult";
    jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size);
    for (int i = 0; i < size; i++) {
    jobject jDetRet = JNI_VisionDetRet::createJObject(env);
    env->SetObjectArrayElement(jDetRetArray, i, jDetRet);
    dlib::rectangle rect = faceRecognizer->getRecResultRects()[i];
    std::string label = faceRecognizer->getRecResultLabels()[i];
    float confidence = faceRecognizer->getRecResultConfidences()[i];
    g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(),
    rect.right(), rect.bottom());
    g_pJNI_VisionDetRet->setLabel(env, jDetRet, label);
    g_pJNI_VisionDetRet->setConfidence(env, jDetRet, confidence);
    }
    return jDetRetArray;
    }

rebuild the so and it work.
You make the threshold to 0.6 , and the lib give a lot of error detection. so the confidence is good for app to fix it

The threshold of 0.6 gives over 99% accuracy as given in this example on the official site. I think the problem is with image quality. It varies with the device hardware.
You can try changing some parameters to control image quality as given in readme.

I did all the things but I am getting always Confidence 0.0. I want to get best match in result which is not possible without such a variable who can differentiate these. Please help me to resolve my problem.

My code is here which I did till yet as per above suggestion.

recognizer.h

// Created by Gaurav on Feb 23, 2018

#pragma once

#include <dlib/dnn.h>
#include <dlib/string.h>
#include <jni_common/jni_fileutils.h>
#include <jni_common/jni_utils.h>
#include <dlib/image_processing.h>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/opencv/cv_image.h>
#include <dlib/image_loader/load_image.h>
#include <glog/logging.h>
#include <jni.h>
#include
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <stdio.h>
#include
#include
#include <unordered_map>
#include <time.h>
#include <dirent.h>

using namespace dlib;
using namespace std;

// ResNet network copied from dnn_face_recognition_ex.cpp in dlib/examples
template <template <int,templateclass,int,typename> class block, int N, templateclass BN, typename SUBNET>
using residual = add_prev1<block<N,BN,1,tag1>>;

template <template <int,templateclass,int,typename> class block, int N, templateclass BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1>>>>>;

template <int N, template class BN, int stride, typename SUBNET>
using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;

template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>;
template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;

template using alevel0 = ares_down<256,SUBNET>;
template using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>;
template using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>;
template using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>;
template using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>;

using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything<
alevel0<
alevel1<
alevel2<
alevel3<
alevel4<
max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2,
input_rgb_image_sized<150>
>>>>>>>>>>>>;

class DLibFaceRecognizer {
private:
std::string landmark_model;
std::string model_dir_path;
std::string image_dir_path;
std::string dnn_model;
anet_type net;
dlib::shape_predictor sp;
std::unordered_map<int, dlib::full_object_detection> mFaceShapeMap;
dlib::frontal_face_detector face_detector;
std::vectordlib::rectangle rects;
std::vectorstd::string rec_names;
std::vector<matrix<float,0,1>> rec_face_descriptors;
std::vectordlib::rectangle rec_rects;
std::vectorstd::string rec_labels;
std::vector rec_confidences;
bool is_training;

inline void init() {
LOG(INFO) << "init DLibFaceRecognizer";
face_detector = dlib::get_frontal_face_detector();
landmark_model = model_dir_path + "/shape_predictor_5_face_landmarks.dat";
dnn_model = model_dir_path + "/dlib_face_recognition_resnet_model_v1.dat";
image_dir_path = model_dir_path + "/images";
is_training = false;
}

public:
inline void train() {
LOG(INFO) << "train DLibFaceRecognizer";
struct dirent *entry;
DIR *dp;

dp = opendir((image_dir_path).c_str());
if (dp == NULL) {
    LOG(INFO) << ("Opendir: Path does not exist or could not be read.");
}

std::vector<matrix<rgb_pixel>> faces;
std::vector<std::string> names;

// load images from dlib image directory and extract faces
while ((entry = readdir(dp))) {
  std::string filename = entry->d_name;
  if (filename=="." || filename=="..") continue;

  cv::Mat file_image = cv::imread(image_dir_path + "/" + filename, CV_LOAD_IMAGE_COLOR);
  LOG(INFO) << "Load image " << (entry->d_name);
  dlib::cv_image<dlib::bgr_pixel> img(file_image);

  std::vector<dlib::rectangle> frects = face_detector(img);
  if (frects.size()==1) {
    auto face = frects[0];
    auto shape = sp(img, face);
    matrix<rgb_pixel> face_chip;
    extract_image_chip(img, get_face_chip_details(shape,150,0.25), face_chip);
    faces.push_back(move(face_chip));
    names.push_back(filename);
    LOG(INFO) << "Added image " << filename;
  } else if (frects.size()==0) {
    LOG(INFO) << "No face found in image " << filename;
  } else {
    LOG(INFO) << "More than one face found in image " << filename;
  }
}
closedir(dp);

is_training = true;
// calculate face descriptors and set global vars
LOG(INFO) << "Calculating face descriptors " << jniutils::currentDateTime();
rec_face_descriptors = net(faces);
LOG(INFO) << "Calculated face descriptors  " << jniutils::currentDateTime()<<" Size "<<rec_face_descriptors.size();
rec_names = names;
is_training = false;

}

DLibFaceRecognizer() { init(); }

DLibFaceRecognizer(const std::string& dlib_rec_example_dir)
: model_dir_path(dlib_rec_example_dir) {
init();
if (!landmark_model.empty() && jniutils::fileExists(landmark_model) && !dnn_model.empty() && jniutils::fileExists(dnn_model)) {
// load the model weights
dlib::deserialize(landmark_model) >> sp;
dlib::deserialize(dnn_model) >> net;
LOG(INFO) << "Models loaded";
}
}

inline int rec(const cv::Mat& image) {
if (is_training) return 0;
if (image.empty())
return 0;
if (image.channels() == 1) {
cv::cvtColor(image, image, CV_GRAY2BGR);
}
CHECK(image.channels() == 3);

dlib::cv_image<dlib::bgr_pixel> img(image);

std::vector<matrix<rgb_pixel>> faces;
std::vector<dlib::rectangle> frects = face_detector(img);
for (auto face : frects)
{
  auto shape = sp(img, face);
  matrix<rgb_pixel> face_chip;
  extract_image_chip(img, get_face_chip_details(shape,150,0.25), face_chip);
  faces.push_back(move(face_chip));
}

if (faces.size() == 0)
{
  LOG(INFO) << "No faces found in image!";
}
LOG(INFO) << "calculating face descriptor in image..." << jniutils::currentDateTime();
std::vector<matrix<float,0,1>> face_descriptors = net(faces);
LOG(INFO) << "face descriptors in camera image calculated   "<<jniutils::currentDateTime()<<" Size "<<face_descriptors.size();

rec_rects.clear();
rec_labels.clear();
for (size_t i = 0; i < face_descriptors.size();  ++i) {
  for (size_t j = 0; j < rec_face_descriptors.size();  ++j) {
  float confidence = length(face_descriptors[i]-rec_face_descriptors[j]);

   if (confidence < 0.6) {
      LOG(INFO) << rec_names[j]<<" FOUND!!!!";
      dlib::rectangle r = frects[i];
      rec_rects.push_back(r);
      rec_labels.push_back(rec_names[j]);
      rec_confidences.push_back(confidence);
    }
  }
}

return rec_rects.size();

}

virtual inline int det(const cv::Mat& image) {
if (is_training) return 0;
if (image.empty())
return 0;
if (image.channels() == 1) {
cv::cvtColor(image, image, CV_GRAY2BGR);
}
CHECK(image.channels() == 3);
// TODO : Convert to gray image to speed up detection
// It's unnecessary to use color image for face/landmark detection

dlib::cv_image<dlib::bgr_pixel> img(image);

std::vector<matrix<rgb_pixel>> faces;
rects = face_detector(img);
return rects.size();

}

inline std::vectordlib::rectangle getRecResultRects() { return rec_rects; }
inline std::vectorstd::string getRecResultLabels() { return rec_labels; }
inline std::vectordlib::rectangle getDetResultRects() { return rects; }
inline std::vector getRecResultConfidences() { return rec_confidences; }
};

//------------------------------
jni_face_rec.cpp

/*

  • Created on: Oct 20, 2015
  •  Author: Tzutalin
    
  • Copyright (c) 2015 Tzutalin. All rights reserved.
    */
    // Modified by Gaurav on Feb 23, 2018

#include <android/bitmap.h>
#include <jni_common/jni_bitmap2mat.h>
#include <jni_common/jni_primitives.h>
#include <jni_common/jni_fileutils.h>
#include <jni_common/jni_utils.h>
#include <recognizer.h>
#include <jni.h>

using namespace cv;

extern JNI_VisionDetRet* g_pJNI_VisionDetRet;

namespace {

#define JAVA_NULL 0
using RecPtr = DLibFaceRecognizer*;

class JNI_FaceRec {
public:
JNI_FaceRec(JNIEnv* env) {
jclass clazz = env->FindClass(CLASSNAME_FACE_REC);
mNativeContext = env->GetFieldID(clazz, "mNativeFaceRecContext", "J");
env->DeleteLocalRef(clazz);
}

RecPtr getRecognizerPtrFromJava(JNIEnv* env, jobject thiz) {
RecPtr const p = (RecPtr)env->GetLongField(thiz, mNativeContext);
return p;
}

void setRecognizerPtrToJava(JNIEnv* env, jobject thiz, jlong ptr) {
env->SetLongField(thiz, mNativeContext, ptr);
}

jfieldID mNativeContext;
};

// Protect getting/setting and creating/deleting pointer between java/native
std::mutex gLock;

std::shared_ptr<JNI_FaceRec> getJNI_FaceRec(JNIEnv* env) {
static std::once_flag sOnceInitflag;
static std::shared_ptr<JNI_FaceRec> sJNI_FaceRec;
std::call_once(sOnceInitflag, env {
sJNI_FaceRec = std::make_shared<JNI_FaceRec>(env);
});
return sJNI_FaceRec;
}

RecPtr const getRecPtr(JNIEnv* env, jobject thiz) {
std::lock_guardstd::mutex lock(gLock);
return getJNI_FaceRec(env)->getRecognizerPtrFromJava(env, thiz);
}

// The function to set a pointer to java and delete it if newPtr is empty
void setRecPtr(JNIEnv* env, jobject thiz, RecPtr newPtr) {
std::lock_guardstd::mutex lock(gLock);
RecPtr oldPtr = getJNI_FaceRec(env)->getRecognizerPtrFromJava(env, thiz);
if (oldPtr != JAVA_NULL) {
DLOG(INFO) << "setMapManager delete old ptr : " << oldPtr;
delete oldPtr;
}

if (newPtr != JAVA_NULL) {
DLOG(INFO) << "setMapManager set new ptr : " << newPtr;
}

getJNI_FaceRec(env)->setRecognizerPtrToJava(env, thiz, (jlong)newPtr);
}

} // end unnamespace

#ifdef __cplusplus
extern "C" {
#endif

#define DLIB_FACE_JNI_METHOD(METHOD_NAME)
Java_com_tzutalin_dlib_FaceRec_##METHOD_NAME

void JNIEXPORT
DLIB_FACE_JNI_METHOD(jniNativeClassInit)(JNIEnv* env, jclass _this) {}

jobjectArray getRecResult(JNIEnv* env, RecPtr faceRecognizer,
const int& size) {
LOG(INFO) << "getRecResult";
jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size);
for (int i = 0; i < size; i++) {
jobject jDetRet = JNI_VisionDetRet::createJObject(env);
env->SetObjectArrayElement(jDetRetArray, i, jDetRet);
dlib::rectangle rect = faceRecognizer->getRecResultRects()[i];
std::string label = faceRecognizer->getRecResultLabels()[i];
float confidence = faceRecognizer->getRecResultConfidences()[i];
g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(),
rect.right(), rect.bottom());
g_pJNI_VisionDetRet->setLabel(env, jDetRet, label);
g_pJNI_VisionDetRet->setConfidence(env, jDetRet, confidence);
}
return jDetRetArray;
}

jobjectArray getDetResult(JNIEnv* env, RecPtr faceRecognizer,
const int& size) {
LOG(INFO) << "getDetResult";
jobjectArray jDetRetArray = JNI_VisionDetRet::createJObjectArray(env, size);
for (int i = 0; i < size; i++) {
jobject jDetRet = JNI_VisionDetRet::createJObject(env);
env->SetObjectArrayElement(jDetRetArray, i, jDetRet);
dlib::rectangle rect = faceRecognizer->getDetResultRects()[i];
std::string label = "face";
g_pJNI_VisionDetRet->setRect(env, jDetRet, rect.left(), rect.top(),
rect.right(), rect.bottom());
g_pJNI_VisionDetRet->setLabel(env, jDetRet, label);
}
return jDetRetArray;
}

JNIEXPORT jobjectArray JNICALL
DLIB_FACE_JNI_METHOD(jniBitmapDetect)(JNIEnv* env, jobject thiz,
jobject bitmap) {
LOG(INFO) << "jniBitmapFaceDet";
cv::Mat rgbaMat;
cv::Mat bgrMat;
jniutils::ConvertBitmapToRGBAMat(env, bitmap, rgbaMat, true);
cv::cvtColor(rgbaMat, bgrMat, cv::COLOR_RGBA2BGR);
RecPtr mRecPtr = getRecPtr(env, thiz);
jint size = mRecPtr->det(bgrMat);
LOG(INFO) << "det face size: " << size;
return getDetResult(env, mRecPtr, size);
}

JNIEXPORT jobjectArray JNICALL
DLIB_FACE_JNI_METHOD(jniBitmapRec)(JNIEnv* env, jobject thiz,
jobject bitmap) {
LOG(INFO) << "jniBitmapFaceDet";
cv::Mat rgbaMat;
cv::Mat bgrMat;
jniutils::ConvertBitmapToRGBAMat(env, bitmap, rgbaMat, true);
cv::cvtColor(rgbaMat, bgrMat, cv::COLOR_RGBA2BGR);
RecPtr mRecPtr = getRecPtr(env, thiz);
jint size = mRecPtr->rec(bgrMat);
LOG(INFO) << "rec face size: " << size;
return getRecResult(env, mRecPtr, size);
}

jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniInit)(JNIEnv* env, jobject thiz,
jstring jDirPath) {
LOG(INFO) << "jniInit";
std::string dirPath = jniutils::convertJStrToString(env, jDirPath);
RecPtr mRecPtr = new DLibFaceRecognizer(dirPath);
setRecPtr(env, thiz, mRecPtr);
return JNI_OK;
}

jint JNIEXPORT JNICALL DLIB_FACE_JNI_METHOD(jniTrain)(JNIEnv* env, jobject thiz) {
LOG(INFO) << "jniTrain";
RecPtr mRecPtr = getRecPtr(env, thiz);
mRecPtr->train();
return JNI_OK;
}

jint JNIEXPORT JNICALL
DLIB_FACE_JNI_METHOD(jniDeInit)(JNIEnv* env, jobject thiz) {
LOG(INFO) << "jniDeInit";
setRecPtr(env, thiz, JAVA_NULL);
return JNI_OK;
}

#ifdef __cplusplus
}
#endif

//------------------
VisionDetRet.java

/*

  • Copyright (C) 2015 TzuTaLin
  • Licensed under the Apache License, Version 2.0 (the "License");
  • you may not use this file except in compliance with the License.
  • You may obtain a copy of the License at
  •  http://www.apache.org/licenses/LICENSE-2.0
    
  • Unless required by applicable law or agreed to in writing, software
  • distributed under the License is distributed on an "AS IS" BASIS,
  • WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  • See the License for the specific language governing permissions and
  • limitations under the License.
    */

package com.tzutalin.dlib;

/**

  • Created by Tzutalin on 2015/10/20.
    */

import android.graphics.Point;

import java.util.ArrayList;

/**

  • A VisionDetRet contains all the information identifying the location and confidence value of the detected object in a bitmap.
    */
    public final class VisionDetRet {
    private String mLabel;
    private float mConfidence;
    private int mLeft;
    private int mTop;
    private int mRight;
    private int mBottom;
    private ArrayList mLandmarkPoints = new ArrayList<>();

    VisionDetRet() {
    }

    /**

    • @param label Label name
    • @param confidence A confidence factor between 0 and 1. This indicates how certain what has been found is actually the label.
    • @param l The X coordinate of the left side of the result
    • @param t The Y coordinate of the top of the result
    • @param r The X coordinate of the right side of the result
    • @param b The Y coordinate of the bottom of the result
      */
      public VisionDetRet(String label, float confidence, int l, int t, int r, int b) {
      mLabel = label;
      mLeft = l;
      mTop = t;
      mRight = r;
      mBottom = b;
      mConfidence = confidence;
      }

    /**

    • @return The X coordinate of the left side of the result
      */
      public int getLeft() {
      return mLeft;
      }

    /**

    • @return The Y coordinate of the top of the result
      */
      public int getTop() {
      return mTop;
      }

    /**

    • @return The X coordinate of the right side of the result
      */
      public int getRight() {
      return mRight;
      }

    /**

    • @return The Y coordinate of the bottom of the result
      */
      public int getBottom() {
      return mBottom;
      }

    /**

    • @return A confidence factor between 0 and 1. This indicates how certain what has been found is actually the label.
      */
      public float getConfidence() {
      return mConfidence;
      }

    /**

    • @return The label of the result
      */
      public String getLabel() {
      return mLabel;
      }

    /**

    • Add landmark to the list. Usually, call by jni
    • @param x Point x
    • @param y Point y
    • @return true if adding landmark successfully
      */
      public boolean addLandmark(int x, int y) {
      return mLandmarkPoints.add(new Point(x, y));
      }

    /**

    • Return the list of landmark points
    • @return ArrayList of android.graphics.Point
      */
      public ArrayList getFaceLandmarks() {
      return mLandmarkPoints;
      }

    @OverRide
    public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append("Left:");
    sb.append(mLabel);
    sb.append(", Top:");
    sb.append(mTop);
    sb.append(", Right:");
    sb.append(mRight);
    sb.append(", Bottom:");
    sb.append(mBottom);
    sb.append(", Label:");
    sb.append(mLabel);
    return sb.toString();
    }
    }

So Please help me with your valuable suggestion what can I do getting accurate match within 250-300 images.