Very slow inference
fmmohammadi opened this issue · 2 comments
I implemented this on GTX 2080 on images of 192010803.
The inference time was 250 to 300 ms for each image! I don't know why but it's terrible!
@fmmohammadi can you share the code for inference on gpu... for me gpu utilization is not happening during inference
@fmmohammadi can you share the code for inference on gpu... for me gpu utilization is not happening during inference
main code with a bit draft edit:
`from absl import app, flags, logging
from absl.flags import FLAGS
import cv2
import os
import pathlib
import numpy as np
import tensorflow as tf
from modules.models import RetinaFaceModel
from modules.utils import (set_memory_growth, load_yaml, draw_bbox_landm,
pad_input_image, recover_pad_output)
flags.DEFINE_string('cfg_path', './configs/retinaface_mbv2.yaml',
'config file path')
flags.DEFINE_string('gpu', '1', 'which gpu to use')
flags.DEFINE_string('save_folder', './widerface_evaluate/widerface_txt/',
'folder path to save evaluate results')
flags.DEFINE_boolean('origin_size', True,
'whether use origin image size to evaluate')
flags.DEFINE_boolean('save_image', True, 'whether save evaluation images')
flags.DEFINE_float('iou_th', 0.4, 'iou threshold for nms')
flags.DEFINE_float('score_th', 0.02, 'score threshold for nms')
flags.DEFINE_float('vis_th', 0.5, 'threshold for visualization')
def load_info(txt_path):
"""load info from txt"""
img_paths = []
words = []
f = open(txt_path, 'r')
lines = f.readlines()
isFirst = True
labels = []
for line in lines:
line = line.rstrip()
if line.startswith('#'):
if isFirst is True:
isFirst = False
else:
labels_copy = labels.copy()
words.append(labels_copy)
labels.clear()
path = line[2:]
path = txt_path.replace('label.txt', 'images/') + path
img_paths.append(path)
else:
line = line.split(' ')
label = [float(x) for x in line]
labels.append(label)
words.append(labels)
return img_paths, words
import time
def main(_argv):
# init
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
logger = tf.get_logger()
logger.disabled = True
logger.setLevel(logging.FATAL)
set_memory_growth()
cfg = load_yaml(FLAGS.cfg_path)
# define network
model = RetinaFaceModel(cfg, training=False, iou_th=FLAGS.iou_th,
score_th=FLAGS.score_th)
# load checkpoint
checkpoint_dir = './checkpoints/' + cfg['sub_name']
checkpoint = tf.train.Checkpoint(model=model)
if tf.train.latest_checkpoint(checkpoint_dir):
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
print("[*] load ckpt from {}.".format(
tf.train.latest_checkpoint(checkpoint_dir)))
else:
print("[*] Cannot find ckpt from {}.".format(checkpoint_dir))
exit()
# evaluation on testing dataset
testset_folder = cfg['testing_dataset_path']
testset_list = os.path.join(testset_folder, 'label.txt')
# img_paths, _ = load_info(testset_list)
tot=0
img_index = 0
for img_path in os.listdir(testset_folder):
if not img_path.__contains__(".jpg"):
continue
img_path = testset_folder + img_path
# for img_index, img_path in enumerate(img_paths):
print(" [{} / {}] det {}".format(img_index + 1, len(img_path),
img_path))
img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR)
img_height_raw, img_width_raw, _ = img_raw.shape
img = np.float32(img_raw.copy())
# # testing scale
# target_size = 1600
# max_size = 2150
# img_shape = img.shape
# img_size_min = np.min(img_shape[0:2])
# img_size_max = np.max(img_shape[0:2])
# resize = float(target_size) / float(img_size_min)
# # prevent bigger axis from being more than max_size:
# if np.round(resize * img_size_max) > max_size:
# resize = float(max_size) / float(img_size_max)
# if FLAGS.origin_size:
# if os.path.basename(img_path) == '6_Funeral_Funeral_6_618.jpg':
# resize = 0.5 # this image is too big to avoid OOM problem
# else:
# resize = 1
#
# img = cv2.resize(img, None, None, fx=resize, fy=resize,
# interpolation=cv2.INTER_LINEAR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# pad input image to avoid unmatched shape problem
img, pad_params = pad_input_image(img, max_steps=max(cfg['steps']))
iii = []
for i in range(1):
iii.append(img)
start = time.time()
# run model
# outputs = model(img[np.newaxis, ...])#.numpy()
outputs = model(np.array(iii)).numpy()
end = time.time()
# # recover padding effect
# outputs = recover_pad_output(outputs, pad_params)
if (img_index+1)>10:
tot += (end-start)
if (img_index+1) % 20==0:
print(tot/(img_index+1-10)*1000)
img_index=img_index+1
# # write results
# img_name = os.path.basename(img_path)
# sub_dir = os.path.basename(os.path.dirname(img_path))
# save_name = os.path.join(
# FLAGS.save_folder, sub_dir, img_name.replace('.jpg', '.txt'))
#
# pathlib.Path(os.path.join(FLAGS.save_folder, sub_dir)).mkdir(
# parents=True, exist_ok=True)
#
# with open(save_name, "w") as file:
# bboxs = outputs[:, :4]
# confs = outputs[:, -1]
#
# file_name = img_name + "\n"
# bboxs_num = str(len(bboxs)) + "\n"
# file.write(file_name)
# file.write(bboxs_num)
# for box, conf in zip(bboxs, confs):
# x = int(box[0] * img_width_raw)
# y = int(box[1] * img_height_raw)
# w = int(box[2] * img_width_raw) - int(box[0] * img_width_raw)
# h = int(box[3] * img_height_raw) - int(box[1] * img_height_raw)
# confidence = str(conf)
# line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) \
# + " " + confidence + " \n"
# file.write(line)
# # save images
# pathlib.Path(os.path.join(
# './results', cfg['sub_name'], sub_dir)).mkdir(
# parents=True, exist_ok=True)
# if FLAGS.save_image:
# for prior_index in range(len(outputs)):
# if outputs[prior_index][15] >= FLAGS.vis_th:
# draw_bbox_landm(img_raw, outputs[prior_index],
# img_height_raw, img_width_raw)
# cv2.imwrite(os.path.join('./results', cfg['sub_name'], sub_dir,
# img_name), img_raw)
if name == 'main':
try:
app.run(main)
except SystemExit:
pass
`