- 自定义安装指定版本库(这里可以直接用本仓库中的)
git clone -b 'v0.5.0' https://github.com/open-mmlab/mmyolo.git --single-branch mmyolo
git clone -b 'v0.7.2' https://github.com/open-mmlab/mmengine.git --single-branch mmengine
git clone -b 'v3.0.0rc6' https://github.com/open-mmlab/mmdetection.git --single-branch mmdetection
git clone -b 'v2.0.0rc4' https://github.com/open-mmlab/mmcv.git --single-branch mmcv
conda create -n mmyolo_zl python=3.8
# CUDA 10.2
pip install torch==1.12.1+cu102 torchvision==0.13.1+cu102 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu102
# 切换到代码目录
wget https://github.com/microsoft/onnxruntime/releases/download/v1.12.1/onnxruntime-linux-x64-1.12.1.tgz
tar -zxvf onnxruntime-linux-x64-1.12.1.tgz
cd onnxruntime-linux-x64-1.12.1
export ONNXRUNTIME_DIR=$(pwd)
export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH
pip install onnx==1.11.0
pip install onnxruntime==1.12.1
pip install onnxsim==0.4.8
pip install opencv-python==4.7.0.72
pip install yapf==0.32.0
pip install pyyaml==6.0
cd mmengine
pip install -v -e .
cd ..
cd mmcv
# 编译安装mmcv相关onnxruntime算子
MMCV_WITH_OPS=1 python setup.py develop
cd ..
cd mmdetection
pip install -v -e .
cd ..
# mmrotate
git clone -b 'v1.0.0rc1' https://github.com/open-mmlab/mmrotate.git --single-branch mmrotate
cd mmrotate
pip install -v -e .
cd ..
cd mmyolo
# Install albumentations
pip install -r requirements/albu.txt
pip install -v -e .
# downgrade protobuf package version 3.20.x
pip install protobuf==3.20.0
pip install netron
# install tensorrt
pip install pycuda==2022.1
# 到自己的tensorrt下面找到whl包并安装
pip install numpy==1.21.6
https://mmyolo.readthedocs.io/zh_CN/dev/get_started/15_minutes_object_detection.html#easydeploy
# 导出onnx tensorrt版本
# yolov5
python projects/easydeploy/tools/export.py \
configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
--work-dir work_dirs/yolov5_s-v61_fast_1xb12-40e_cat \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 1000 \
--keep-topk 100 \
--iou-threshold 0.65 \
--score-threshold 0.25
# yolov6
pip install openmim==0.3.7
workdir="yolov6_n_syncbn_fast_8xb32-400e_coco"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
pth_path=`ls ./work_dirs/$workdir/*.pth`
python projects/easydeploy/tools/export.py \
work_dirs/$workdir/$workdir.py \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 30000 \
--keep-topk 300 \
--iou-threshold 0.65 \
--score-threshold 0.001
# 测试精度, 为加快速度还请去掉config文件中的 visualization hook
# python tools/test.py work_dirs/$workdir/$workdir.py \
# $pth_path \
# --json-prefix scripts/yolov6_n_coco_result
# yolov7
workdir="yolov7_tiny_syncbn_fast_8x16b-300e_coco"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
pth_path=`ls ./work_dirs/$workdir/*.pth`
python projects/easydeploy/tools/export.py \
work_dirs/$workdir/$workdir.py \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 1000 \
--keep-topk 100 \
--iou-threshold 0.65 \
--score-threshold 0.25
# yolov8
workdir="yolov8_n_syncbn_fast_8xb16-500e_coco"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
pth_path=`ls ./work_dirs/$workdir/*.pth`
python projects/easydeploy/tools/export.py \
work_dirs/$workdir/$workdir.py \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 1000 \
--keep-topk 100 \
--iou-threshold 0.65 \
--score-threshold 0.25
# yolox
workdir="yolox_tiny_fast_8xb8-300e_coco"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
pth_path=`ls ./work_dirs/$workdir/*.pth`
python projects/easydeploy/tools/export.py \
work_dirs/$workdir/$workdir.py \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 3 \
--pre-topk 1000 \
--keep-topk 100 \
--iou-threshold 0.65 \
--score-threshold 0.25
# ppyoloe
workdir="ppyoloe_plus_s_fast_8xb8-80e_coco"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
pth_path=`ls ./work_dirs/$workdir/*.pth`
python projects/easydeploy/tools/export.py \
work_dirs/$workdir/$workdir.py \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 1000 \
--keep-topk 100 \
--iou-threshold 0.65 \
--score-threshold 0.25
# rtmdet
workdir="rtmdet_tiny_syncbn_fast_8xb32-300e_coco"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
pth_path=`ls ./work_dirs/$workdir/*.pth`
python projects/easydeploy/tools/export.py \
work_dirs/$workdir/$workdir.py \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 1000 \
--keep-topk 100 \
--iou-threshold 0.65 \
--score-threshold 0.25
# rtmdet 旋转框
workdir="rtmdet-r_tiny_fast_1xb8-36e_dota"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
pth_path=`ls ./work_dirs/$workdir/*.pth`
python projects/easydeploy/tools/export.py \
work_dirs/$workdir/$workdir.py \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 1000 \
--keep-topk 100 \
--iou-threshold 0.65 \
--score-threshold 0.25
workdir="yolov6_n_syncbn_fast_8xb32-400e_coco"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
pth_path=`ls ./work_dirs/$workdir/*.pth`
config_file="work_dirs/$workdir/$workdir.py"
python projects/easydeploy/tools/export.py \
$config_file \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 1000 \
--keep-topk 300 \
--iou-threshold 0.65 \
--score-threshold 0.3
python projects/easydeploy/tools/build_engine.py \
./work_dirs/$workdir/end2end.onnx \
--img-size 640 640 \
--device cuda:0
python projects/easydeploy/tools/image-demo.py \
demo/dog.jpg \
$config_file \
./work_dirs/$workdir/end2end.engine \
--device cuda:0
workdir="yolov6_n_syncbn_fast_8xb32-400e_coco"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
pth_path=`ls ./work_dirs/$workdir/*.pth`
config_file="work_dirs/$workdir/$workdir.py"
python projects/easydeploy/tools/export.py \
$config_file \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 30000 \
--keep-topk 300 \
--iou-threshold 0.65 \
--score-threshold 0.001
python projects/easydeploy/tools/build_engine.py \
./work_dirs/$workdir/end2end.onnx \
--img-size 640 640 \
--device cuda:0
python self_study_scripts/01cmp_output_with_torch_tensorrt.py \
demo/dog.jpg \
$config_file \
./work_dirs/$workdir/end2end.engine \
$pth_path \
--device cuda:0
python self_study_scripts/01cmp_output_with_torch_tensorrt.py \
~/dataset/coco/val2017/ \
$config_file \
./work_dirs/$workdir/end2end.engine \
$pth_path \
--device cuda:0
目标:
- 探索相同输入下tensorrt和pytorch两个框架的输出有无不同
- 针对 coco-val 数据集下的精度,tensorrt和pytorch得到的有何区别?
得到yolov6 pytorch版本输出
# 测试精度, 为加快速度还请去掉config文件中的 visualization hook
workdir="yolov6_n_syncbn_fast_8xb32-400e_coco"
pth_path=`ls ./work_dirs/$workdir/*.pth`
python tools/test.py work_dirs/$workdir/$workdir.py \
$pth_path \
--json-prefix scripts/yolov6_n_coco_result
定位问题: 解码后的score和boxes完全一致,定位到nms后处理部分不一致。 猜测原因:
- pytorch中有些框在取topk的过程当中被过滤掉了,比如其分值满足>score_threshold, 但是不在topk当中
- pytorch和tensorrt有些算子在实现上不一致
topk的不一致 nms算子与torch的nms可能也不一致
workdir="yolov6_n_syncbn_fast_8xb32-400e_coco"
pth_path=`ls ./work_dirs/$workdir/*.pth`
config_file="work_dirs/$workdir/$workdir.py"
# onnxruntime
python projects/easydeploy/tools/export.py $config_file ${pth_path} --work-dir work_dirs/$workdir --img-size 640 640 --batch 1 --device cpu --simplify --opset 11 --backend 5 --pre-topk 30000 --keep-topk 300 --iou-threshold 0.65 --score-threshold 0.001
python self_study_scripts/03modify_onnx.py
python self_study_scripts/01cmp_output_with_torch_tensorrt.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/re_end2end.onnx $pth_path --device cuda:0 &> err.log
# tensorrt
python projects/easydeploy/tools/export.py $config_file ${pth_path} --work-dir work_dirs/$workdir --img-size 640 640 --batch 1 --device cpu --simplify --opset 11 --backend 5 --pre-topk 3840 --keep-topk 300 --iou-threshold 0.65 --score-threshold 0.001
python self_study_scripts/03modify_onnx.py
python projects/easydeploy/tools/build_engine.py ./work_dirs/$workdir/re_end2end.onnx --img-size 640 640 --device cuda:0
python self_study_scripts/01cmp_output_with_torch_tensorrt.py \
~/dataset/coco/val2017/ \
$config_file \
./work_dirs/$workdir/re_end2end.engine \
$pth_path \
--device cuda:0
# note 把配置文件中nms_pre=30000 参数改为nms_pre=3840
python self_study_scripts/01cmp_output_with_torch_tensorrt.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/re_end2end.engine $pth_path --device cuda:0 --result-json-path=self_study_scripts/trt-1.json &> err.log
python self_study_scripts/05infer_tensorrt_dynamic_axes.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx --max-batch-size=4
topk 在pytorch和onnxruntime上实现不一致
pip install tqdm==4.65.0
workdir="yolov6_n_syncbn_fast_8xb32-400e_coco"
pth_path=`ls ./work_dirs/$workdir/*.pth`
config_file="work_dirs/$workdir/$workdir.py"
# onnxruntime
python projects/easydeploy/tools/export.py $config_file ${pth_path} --work-dir work_dirs/$workdir --img-size 640 640 --batch 1 --device cpu --simplify --opset 11 --backend 2 --pre-topk 30000 --keep-topk 300 --iou-threshold 0.65 --score-threshold 0.001
python self_study_scripts/05infer_tensorrt_dynamic_axes.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx --max-batch-size=4 --enable-fp16
fp16精度
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.360
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.516
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.390
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.167
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.398
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.519
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.311
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.520
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.572
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.337
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.641
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.771
tensorrt 7
- 采用自带Calibrator用TRT内含的QAT算法,
- 采用set dynamic_range api 全量化
tensorrt8 支持api添加QDQ或者导入onnx QDQ算子 全量化/部分量化 partial quantization
对比 batch=1/4 时 fp32/fp16/int8的精度和latency
rm ./work_dirs/yolov6_n_syncbn_fast_8xb32-400e_coco/*.engine -rf; rm ./calibration.cache;
workdir="yolov6_n_syncbn_fast_8xb32-400e_coco"
pth_path=`ls ./work_dirs/$workdir/*.pth`
config_file="work_dirs/$workdir/$workdir.py"
python self_study_scripts/06infer_tensorrt_dynamic_axes_native_ptq.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx --max-batch-size=4 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16
默认量化部分数据 默认
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.333
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.477
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.358
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.146
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.367
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.493
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.299
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.501
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.552
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.303
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.615
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.758
int8 trt7 PTQ 全量训练集数据数据 ENTROPY_CALIBRATION_2
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.344
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.493
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.371
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.154
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.377
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.506
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.303
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.507
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.559
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.311
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.626
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.764
int8 trt7 PTQ 全量数据 MINMAX_CALIBRATION
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.343
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.491
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.369
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.155
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.376
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.506
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.303
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.506
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.557
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.308
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.625
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.764
int8 trt7 PTQ 全量数据 MINMAX_CALIBRATION
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.343
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.491
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.369
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.155
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.376
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.506
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.303
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.506
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.557
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.308
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.625
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.764
安装MQbench
cd MQBench
pip install -r requirements.txt
python setup.py develop
rm ./work_dirs/yolov6_n_syncbn_fast_8xb32-400e_coco/*.engine -rf; rm ./calibration.cache;
python self_study_scripts/07infer_tensorrt_dynamic_axes_mqbench_ptq.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx $pth_path --max-batch-size=4 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16
python self_study_scripts/09infer_tensorrt_dynamic_axes_mqbench_adaround.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx $pth_path --max-batch-size=1 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16
MQBench weight 和activation MinMaxObserver 用的数据量比较少
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.330
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.476
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.356
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.146
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.358
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.483
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.293
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.498
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.547
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.309
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.608
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.758
MQBench weight MinMaxObserver activation HistogramObserver 全量数据
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.346
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.496
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.372
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.158
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.376
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.506
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.305
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.510
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.561
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.317
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.626
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.768
Adaround advanced PTQ 对传统PTQ方式做了一些更改,比如在fp32->int8 是round还是ceil还是floor
1000 数据
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.354
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.507
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.380
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.162
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.393
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.510
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.308
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.515
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.566
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.319
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.633
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.772
4000 数据
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.354
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.507
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.380
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.162
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.393
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.510
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.308
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.515
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.566
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.319
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.633
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.772
分别量化
python self_study_scripts/11infer_tensorrt_dynamic_axes_mqbench_minmax.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx $pth_path --max-batch-size=4 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 &> err.log
结果如下
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.354
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.507
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.380
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.162
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.393
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.510
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.308
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.515
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.566
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.319
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.633
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.772
去除Adaround 采用全部数据 + 分别量化weight和activation 得到的结果与上面一致,结论就是Adaround没有起作用,分别量化起作用。
试一下yolov6 small上的表现 和在 yolov8上面的表现
workdir="yolov6_s_syncbn_fast_8xb32-400e_coco"
pth_path=`ls ./work_dirs/$workdir/*.pth`
config_file="work_dirs/$workdir/$workdir.py"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
python projects/easydeploy/tools/export.py \
$config_file \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 3000 \
--keep-topk 300 \
--iou-threshold 0.65 \
--score-threshold 0.001
torch version
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.434
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.606
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.467
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.237
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.484
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.594
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.349
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.577
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.629
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.415
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.698
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.806
tensorrt fp32
python self_study_scripts/05infer_tensorrt_dynamic_axes.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx --max-batch-size=4 &> err.log
cp output/result.json self_study_scripts/dynamic_batch_trt_result_int8.json
python self_study_scripts/02evel_trt_json_output.py
结果
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.434
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.606
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.467
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.237
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.484
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.594
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.349
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.577
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.629
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.415
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.698
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.806
tensorrt fp16
python self_study_scripts/05infer_tensorrt_dynamic_axes.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx --max-batch-size=4 --enable-fp16 &> err.log
cp output/result.json self_study_scripts/dynamic_batch_trt_result_int8.json
python self_study_scripts/02evel_trt_json_output.py
结果
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.434
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.606
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.467
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.237
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.484
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.594
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.348
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.577
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.630
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.414
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.699
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.806
tensorrt int8
rm ./work_dirs/$workdir/*.engine -rf;
python self_study_scripts/11infer_tensorrt_dynamic_axes_mqbench_minmax.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx $pth_path --max-batch-size=4 \
--calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 &> err.log
cp output/result.json self_study_scripts/dynamic_batch_trt_result_int8.json
python self_study_scripts/02evel_trt_json_output.py
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.416
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.586
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.448
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.234
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.460
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.569
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.340
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.563
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.613
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.406
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.673
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.790
python self_study_scripts/06infer_tensorrt_dynamic_axes_native_ptq.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx --max-batch-size=4 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 &> err1.log
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.408 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.580 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.438 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.215 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.455 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.574 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.337 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.561 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.614 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.387 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.683 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.792
yolov8 nano测试
workdir="yolov8_n_syncbn_fast_8xb16-500e_coco"
pth_path=`ls ./work_dirs/$workdir/*.pth`
config_file="work_dirs/$workdir/$workdir.py"
python projects/easydeploy/tools/export.py \
$config_file \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 640 640 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 1000 \
--keep-topk 300 \
--iou-threshold 0.7 \
--score-threshold 0.001
python self_study_scripts/05infer_tensorrt_dynamic_axes.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx --max-batch-size=4 &> err.log
cp output/result.json self_study_scripts/dynamic_batch_trt_result_int8.json
python self_study_scripts/02evel_trt_json_output.py
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.372
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.527
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.402
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.189
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.405
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.524
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.317
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.533
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.589
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.364
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.650
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.766
python self_study_scripts/05infer_tensorrt_dynamic_axes.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx --max-batch-size=4 --enable-fp16 &> err.log
cp output/result.json self_study_scripts/dynamic_batch_trt_result_int8.json
python self_study_scripts/02evel_trt_json_output.py
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.371
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.527
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.402
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.188
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.405
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.524
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.316
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.533
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.589
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.364
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.650
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.766
python self_study_scripts/06infer_tensorrt_dynamic_axes_native_ptq.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx --max-batch-size=4 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 &> err1.log
mv output/result.json self_study_scripts/dynamic_batch_trt_result_int8.json
python self_study_scripts/02evel_trt_json_output.py
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.325
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.470
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.352
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.152
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.356
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.481
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.293
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.494
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.547
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.305
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.613
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.739
python self_study_scripts/11infer_tensorrt_dynamic_axes_mqbench_minmax.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx $pth_path --max-batch-size=1 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 &> err.log
cp output/result.json self_study_scripts/dynamic_batch_trt_result_int8.json
python self_study_scripts/02evel_trt_json_output.py
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.371
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.531
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.399
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.188
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.405
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.523
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.317
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.530
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.582
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.360
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.642
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.758
支持导出trt识别的QDD算子
英伟达TensorRT团队出的,负责插入量化算子,且支持PTQ和QAT,但是QAT支持有限。 并不是基于torch.fx做的,所以需要自己插入量化算子或者用其api自动插入(不好用)
cd TensorRT/tools/pytorch-quantization
pip install -r requirements.txt
python setup.py develop
# 修改 DeConv1D
num_spatial_dims = xxx
workdir="yolov6_s_syncbn_fast_8xb32-400e_coco"
pth_path=`ls ./work_dirs/$workdir/*.pth`
config_file="work_dirs/$workdir/$workdir.py"
python self_study_scripts/12infer_tensorrt_dynamic_axes_nvidia_ptq.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx $pth_path --max-batch-size=4 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 &> quant_ptq.log
cp output/result.json self_study_scripts/dynamic_batch_trt_result_int8.json
python self_study_scripts/02evel_trt_json_output.py
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.416
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.586
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.449
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.221
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.468
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.569
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.341
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.566
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.618
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.391
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.687
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.796
当采用torch-quantization包来全量化yolov6时,精度出现了问题
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.325
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.480
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.348
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.198
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.418
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.372
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.307
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.543
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.604
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.373
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.685
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.770
采用trex可视化engine
cd TensorRT/tools/experimental/trt-engine-explorer
pip install -r requirements.txt
python setup.py develop
trtexec --loadEngine=./output/yolov6_s_syncbn_fast_8xb32-400e_coco_deploy_model.int8.engine --dumpRefit --dumpProfile --profilingVerbosity=detailed --dumpLayerInfo --exportLayerInfo=./output/layer.json --exportProfile=./output/profile.json
python self_study_scripts/11infer_tensorrt_dynamic_axes_mqbench_minmax.py ~/dataset/coco/val2017/ $config_file ./work_dirs/$workdir/end2end.onnx $pth_path --max-batch-size=4 \
--calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 --out-dir="yolov6-mqbench-ptq" &> err.log
parent_dir='yolov6-mqbench-ptq'
trtexec --loadEngine=./$parent_dir/mqbench_deploy_model.int8.engine --dumpRefit --dumpProfile --profilingVerbosity=detailed --dumpLayerInfo --exportLayerInfo=./$parent_dir/layer.json --exportProfile=./$parent_dir/profile.json
map
python self_study_scripts/13sensitive_analysis.py ~/dataset/coco/val2017/ $config_file $pth_path --max-batch-size=1 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 --eval-metric="map" --out-dir="./map_sensitive_analysis" &> map_sensitive_analysis.log
python self_study_scripts/14load_sensitive_analysis.py ~/dataset/coco/val2017/ $config_file $pth_path ./map_sensitive_analysis --max-batch-size=4 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 --out-dir="./map_sensitive_analysis" &> quant_ptq_map.log
精度
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.424
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.594
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.458
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.231
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.476
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.585
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.344
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.570
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.621
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.402
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.690
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.800
mse
python self_study_scripts/13sensitive_analysis.py ~/dataset/coco/val2017/ $config_file $pth_path --max-batch-size=1 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 --eval-metric="mse" --out-dir="./mse_sensitive_analysis" &> mse_sensitive_analysis.log
python self_study_scripts/14load_sensitive_analysis.py ~/dataset/coco/val2017/ $config_file $pth_path ./mse_sensitive_analysis --max-batch-size=4 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 --out-dir="./mse_sensitive_analysis" &> quant_ptq_mse.log
精度
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.420
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.590
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.453
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.225
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.473
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.571
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.345
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.570
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.622
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.396
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.690
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.801
cosine similarity
python self_study_scripts/13sensitive_analysis.py ~/dataset/coco/val2017/ $config_file $pth_path --max-batch-size=1 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 --eval-metric="cosine" --out-dir="./cosine_sensitive_analysis" &> cosine_sensitive_analysis.log
python self_study_scripts/14load_sensitive_analysis.py ~/dataset/coco/val2017/ $config_file $pth_path ./cosine_sensitive_analysis --max-batch-size=4 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 --out-dir="./cosine_sensitive_analysis" &> quant_ptq_cosine.log
精度
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.420
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.591
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.454
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.225
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.473
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.572
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.345
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.570
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.622
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.398
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.690
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.800
sqnr
python self_study_scripts/13sensitive_analysis.py ~/dataset/coco/val2017/ $config_file $pth_path --max-batch-size=1 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 --eval-metric="sqnr" --out-dir="./sqnr_sensitive_analysis" &> sqnr_sensitive_analysis.log
python self_study_scripts/14load_sensitive_analysis.py ~/dataset/coco/val2017/ $config_file $pth_path ./sqnr_sensitive_analysis --max-batch-size=4 --calib-imgs=./data/coco/train2017 --enable-int8 --enable-fp16 --out-dir="./sqnr_sensitive_analysis" &> quant_ptq_sqnr.log
精度
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.422
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.589
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.455
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.225
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.474
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.582
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.344
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.568
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.620
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.393
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.689
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.799
workdir="yolov6_s_syncbn_fast_8xb32-400e_coco"
# 表示预加载的PTQ模型
pth_path="./quant_model_rm_sensitive_map/${workdir}_deploy_model.pth"
config_file="work_dirs/$workdir/$workdir.py"
python self_study_scripts/15qat_train_nvidia_trt.py ./data/coco/val2017 $config_file $pth_path --enable-int8 --enable-fp16 --out-dir="qat_trainning" &> quant_qat.log
quantized_config_file="work_dirs/${workdir}_qat/${workdir}.py"
quantized_model_pth_path="work_dirs/${workdir}_qat/quantized_model.pth"
python self_study_scripts/16qat_export_nvidia_trt.py ./data/coco/val2017 $quantized_config_file $quantized_model_pth_path --max-batch-size=4 --enable-int8 --enable-fp16 --out-dir="quant_model_rm_sensitive_qat"
cp -f quant_model_rm_sensitive_qat/result.json self_study_scripts/dynamic_batch_trt_result_int8.json
python self_study_scripts/02evel_trt_json_output.py
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.430
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.602
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.464
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.232
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.479
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.587
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.346
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.575
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.628
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.414
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.694
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.808
workdir="rtmdet-r_tiny_fast_1xb8-36e_dota"
mim download mmyolo --config $workdir --dest ./work_dirs/$workdir
pth_path=`ls ./work_dirs/$workdir/*.pth`
config_file="work_dirs/$workdir/$workdir.py"
python projects/easydeploy/tools/export.py \
work_dirs/$workdir/$workdir.py \
${pth_path} \
--work-dir work_dirs/$workdir \
--img-size 1024 1024 \
--batch 1 \
--device cpu \
--simplify \
--opset 11 \
--backend 2 \
--pre-topk 1000 \
--keep-topk 100 \
--iou-threshold 0.1 \
--score-threshold 0.05
python projects/easydeploy/tools/build_engine.py \
./work_dirs/$workdir/end2end.onnx \
--img-size 1024 1024 \
--device cuda:0 \
--scale="[[1,3,1024,1024],[1,3,1024,1024],[1,3,1024,1024]]"
python self_study_scripts/01cmp_output_with_torch_tensorrt.py \
demo/dog.jpg \
$config_file \
./work_dirs/$workdir/end2end.engine \
$pth_path \
--device cuda:0