JustasBart/yolov8_CPP_Inference_OpenCV_ONNX

False detections on YOLOv8

oguzalp7 opened this issue · 1 comments

I'm working on a similiar project just like you and successfully run yolov5s(ref: https://learnopencv.com/object-detection-using-yolov5-and-opencv-dnn-in-c-and-python/) on my project, hence I've a model in yolov8, yet I couldn't make it work. I though I made the normalization mistake, so I've tried your repository with your images and your model (I've them too) and I got the same results with your project.

Here's the output of the terminal: $ ./Yolov8CPPInference

Running on CUDA
Number of detections:1
Number of detections:1

and the output images are:

im0
im1

I'd like to get your attention on the colored lines located at the top left areas of the images.

I could run it via dummy script. I guess, my problem was based on a oop mistake.

`
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>

struct Detection
{
int class_id{0};
float confidence{0.0};
cv::Rect box{};
};

void test(cv::Mat &image, cv::dnn::Net &net){
// Preprocess the input image (if needed)
// Example: You can resize the image to match the input size expected by the model
cv::Size inputSize = cv::Size(640, 480); // Set to the model's input size
cv::Mat blob;
cv::dnn::blobFromImage(image, blob, 1./255., inputSize, cv::Scalar(), true, false);

// Set the input blob for the network
net.setInput(blob);

cv::Mat outputs = net.forward();

int rows = outputs.size[2];
int dims = outputs.size[1];
outputs = outputs.reshape(1, dims);
cv::transpose(outputs, outputs);


//std::cout << rows << std::endl;
float *data = (float *)outputs.data;
float x_factor = image.cols / 640.0;
float y_factor = image.rows / 480.0;

std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;

for(int i=0; i<rows; i++){
    float *classes_scores = data + 4;
    cv::Mat scores(1, 1, CV_32FC1, classes_scores);

    cv::Point class_id;
    double maxClassScore;
    cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
    if (maxClassScore > 0.5){

        confidences.push_back(maxClassScore);
        class_ids.push_back(class_id.x);

        float x = data[0];
        float y = data[1];
        float w = data[2];
        float h = data[3];

        int left = int((x - 0.5 * w) * x_factor);
        int top = int((y - 0.5 * h) * y_factor);

        int width = int(w * x_factor);
        int height = int(h * y_factor);

        boxes.push_back(cv::Rect(left, top, width, height));
    }
    data+=dims;
}

std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, 0.5, 0.4, nms_result);

std::vector<Detection> detections{};
for (unsigned long i = 0; i < nms_result.size(); ++i){
    int idx = nms_result[i];
    Detection result;
    result.class_id = class_ids[idx];
    result.confidence = confidences[idx];

    
    result.box = boxes[idx];

    detections.push_back(result);
}

//std::cout << detections.size() << std::endl;
for(int i=0; i<detections.size(); i++){
    Detection detection = detections[i];

    cv::Rect box = detection.box;

    // Detection box
    cv::rectangle(image, box, cv::Scalar(255, 160, 0), 2);
}

cv::imshow("Output", image);
cv::waitKey(0);

}

int main() {
// Load the ONNX model
cv::dnn::Net net = cv::dnn::readNet("model.onnx");

if (net.empty()) {
    std::cerr << "Failed to load ONNX model!" << std::endl;
    return -1;
}

// Load an input image
cv::Mat image = cv::imread("image.jpg");

if (image.empty()) {
    std::cerr << "Failed to load input image!" << std::endl;
    return -1;
}
test(image, net);


return 0;

}
`