yaoyi30/YOLOv5-cls-TensorRT-Cplusplus

批处理出问题

Opened this issue · 2 comments

你这个如果使用批处理,一次推理多张图片会出问题的。推理的结果为空

很简单,更改几个函数就可以了。
batch_preprocess(std::vectorcv::Mat& imgs, float* input_data)
{
std::vectorcv::Mat InputImage;

for (size_t i = 0; i < imgs.size(); i++) {
	cv::Mat img_ori;
	cv::resize(imgs.at(i), img_ori, cv::Size(IMAGE_WIDTH, IMAGE_HEIGHT));
	InputImage.push_back(img_ori);
}
int ImgCount = InputImage.size();
for (int b = 0; b < ImgCount; b++) {
	cv::Mat img = InputImage.at(b);
	int w = img.cols;
	int h = img.rows;
	int i = 0;
	for (int row = 0; row < h; ++row) {
		uchar* uc_pixel = img.data + row * img.step;
		for (int col = 0; col < img.cols; ++col) {
			input_data[b * 3 * img.rows * img.cols + i] = ((float)uc_pixel[2] / 255.0 - 0.485) / 0.229;
			input_data[b * 3 * img.rows * img.cols + i + img.rows * img.cols] = ((float)uc_pixel[1] / 255.0 - 0.456) / 0.224;
			input_data[b * 3 * img.rows * img.cols + i + 2 * img.rows * img.cols] = ((float)uc_pixel[0] / 255.0 - 0.406) / 0.225;
			uc_pixel += 3;
			++i;
		}
	}

}

}

BatchInferimg(std::vectorcv::Mat& src_imgs)
{
int input_size = BATCH_SIZE * INPUT_CHANNEL * IMAGE_WIDTH * IMAGE_HEIGHT;
int out_size = BATCH_SIZE * OUTPUT_SIZE;
std::vector<std::vector> boxes2;

for (size_t i = 0; i < src_imgs.size(); i+= BATCH_SIZE) {
	std::vector<cv::Mat> imgs;
	std::vector<float> boxes;
	float* img_data = new float[input_size];
	float* prob_result = new float[out_size];
	for (size_t j = i; j <i+ BATCH_SIZE && j < src_imgs.size(); j++) {
		cv::Mat src_img = src_imgs.at(j);
		cv::Mat ori_img = src_img.clone();
		imgs.push_back(ori_img);
	}
	auto pre_start = std::chrono::high_resolution_clock::now();
	this->batch_preprocess(imgs, img_data);
	auto pre_end = std::chrono::high_resolution_clock::now();
	float total_pre = std::chrono::duration<float, std::milli>(pre_end - pre_start).count();
	std::cout << "preprocess take: " << total_pre << " ms." << std::endl;

	// do inference
	auto t_start = std::chrono::high_resolution_clock::now();
	doInference(*context, img_data, prob_result, BATCH_SIZE);
	auto t_end = std::chrono::high_resolution_clock::now();
	float total_inf = std::chrono::duration<float, std::milli>(t_end - t_start).count();
	std::cout << "Inference take: " << total_inf << " ms." << std::endl;
	float* prob_result2 = new float[OUTPUT_SIZE];
	for (int i = 0; i < imgs.size(); i++) {
		for (int j = i * OUTPUT_SIZE; j < (i + 1) * OUTPUT_SIZE; j++) {
			prob_result2[j%OUTPUT_SIZE] = prob_result[j];
		}
		boxes = softmax(prob_result2, OUTPUT_SIZE);
		boxes2.push_back(boxes);
	}
}
return boxes2;

}
这样就可以批处理了