template<>
__global__ voidHSwishKernel(float* input, float* output, int edge) {
KernelPositionBlock;
float x = input[position];
float a = x + 3;
a = a < 0 ? 0 : (a >= 6 ? 6 : a);
output[position] = x * a / 6;
}
intHSwish::enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) {
int count = inputs[0].count();
auto grid = CUDATools::grid_dims(count);
auto block = CUDATools::block_dims(count);
HSwishKernel <<<grid, block, 0, stream >>> (inputs[0].ptr<float>(), outputs[0].ptr<float>(), count);
return0;
}
RegisterPlugin(HSwish);
执行方式
配置好Makefile中的依赖项路径
make yolo -j64即可
执行结果
[2021-07-22 14:37:11][info][_main.cpp:160]:===================== test fp32 ==================================
[2021-07-22 14:37:11][info][trt_builder.cpp:430]:Compile FP32 Onnx Model 'yolov5m.onnx'.
[2021-07-22 14:37:18][warn][trt_infer.cpp:27]:NVInfer WARNING: src/tensorRT/onnx_parser/ModelImporter.cpp:257: Change input batch size: images, final dimensions: (1, 3, 640, 640), origin dimensions: (5, 3, 640, 640)
[2021-07-22 14:37:18][info][trt_builder.cpp:548]:Input shape is 1 x 3 x 640 x 640
[2021-07-22 14:37:18][info][trt_builder.cpp:549]:Set max batch size = 3
[2021-07-22 14:37:18][info][trt_builder.cpp:550]:Set max workspace size = 1024.00 MB
[2021-07-22 14:37:18][info][trt_builder.cpp:551]:Dynamic batch dimension is true
[2021-07-22 14:37:18][info][trt_builder.cpp:554]:Network has 1 inputs:
[2021-07-22 14:37:18][info][trt_builder.cpp:560]: 0.[images] shape is 1 x 3 x 640 x 640
[2021-07-22 14:37:18][info][trt_builder.cpp:566]:Network has 3 outputs:
[2021-07-22 14:37:18][info][trt_builder.cpp:571]: 0.[470] shape is 1 x 255 x 80 x 80
[2021-07-22 14:37:18][info][trt_builder.cpp:571]: 1.[471] shape is 1 x 255 x 40 x 40
[2021-07-22 14:37:18][info][trt_builder.cpp:571]: 2.[472] shape is 1 x 255 x 20 x 20
[2021-07-22 14:37:18][verbo][trt_builder.cpp:575]:Network has 226 layers:
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: >>> 0. Slice 1 x 3 x 640 x 640 -> 1 x 3 x 320 x 640
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: 1. Slice 1 x 3 x 320 x 640 -> 1 x 3 x 320 x 320
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: >>> 2. Slice 1 x 3 x 640 x 640 -> 1 x 3 x 320 x 640
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: 3. Slice 1 x 3 x 320 x 640 -> 1 x 3 x 320 x 320
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: >>> 4. Slice 1 x 3 x 640 x 640 -> 1 x 3 x 320 x 640
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: 5. Slice 1 x 3 x 320 x 640 -> 1 x 3 x 320 x 320
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: >>> 6. Slice 1 x 3 x 640 x 640 -> 1 x 3 x 320 x 640
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: 7. Slice 1 x 3 x 320 x 640 -> 1 x 3 x 320 x 320
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: 222.LeakyRelu 1 x 768 x 20 x 20 -> 1 x 768 x 20 x 20
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: *** 223.Convolution 1 x 192 x 80 x 80 -> 1 x 255 x 80 x 80 channel: 255, kernel: 1 x 1, padding: 0 x 0, stride: 1 x 1, dilation: 1 x 1, group: 1
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: *** 224.Convolution 1 x 384 x 40 x 40 -> 1 x 255 x 40 x 40 channel: 255, kernel: 1 x 1, padding: 0 x 0, stride: 1 x 1, dilation: 1 x 1, group: 1
[2021-07-22 14:37:18][verbo][trt_builder.cpp:606]: *** 225.Convolution 1 x 768 x 20 x 20 -> 1 x 255 x 20 x 20 channel: 255, kernel: 1 x 1, padding: 0 x 0, stride: 1 x 1, dilation: 1 x 1, group: 1
[2021-07-22 14:37:18][info][trt_builder.cpp:615]:Building engine...
[2021-07-22 14:37:19][warn][trt_infer.cpp:27]:NVInfer WARNING: Detected invalid timing cache, setup a local cache instead
[2021-07-22 14:37:40][info][trt_builder.cpp:635]:Build done 22344 ms !
Engine 0x23dd7780 detail
Max Batch Size: 3
Dynamic Batch Dimension: true
Inputs: 1
0.images : shape {1 x 3 x 640 x 640}
Outputs: 3
0.470 : shape {1 x 255 x 80 x 80}
1.471 : shape {1 x 255 x 40 x 40}
2.472 : shape {1 x 255 x 20 x 20}
[2021-07-22 14:37:42][info][_main.cpp:77]:input.shape = 3 x 3 x 640 x 640
[2021-07-22 14:37:42][info][_main.cpp:96]:input->shape_string() = 3 x 3 x 640 x 640
[2021-07-22 14:37:42][info][_main.cpp:124]:outputs[0].size = 2
[2021-07-22 14:37:42][info][_main.cpp:124]:outputs[1].size = 5
[2021-07-22 14:37:42][info][_main.cpp:124]:outputs[2].size = 1