Non-maximum supression and conversion

Hi, thank you for created such a beautiful package.

I have trouble in converting Yolov4 tiny from weights to tflite with int8 quantization, but I don't know how to use the parameter in data_set in the function save_as_tflite.

Besides, I used the tflite float16 yolov4-tiny but it showed a lot of bounding boxes, can you suggest for me how to adding the non-maximum suppression so that the bounding box would be more accurate?

Thank you a lot.

Tested on

TF: v2.2.1
yolov4: v1.2.1

ref:

from yolov4.tf import YOLOv4

yolo = YOLOv4(tiny=True)
yolo.classes = "coco.names"

yolo.make_model()
yolo.load_weights("yolov4-tiny.weights", weights_type="yolo")

dataset = yolo.load_dataset(
    "train2017.txt",
    training=False,
    image_path_prefix=r"D:\coco\train2017"
)

yolo.save_as_tflite(
    "yolov4-tiny.tflite",
    quantization="full_int8",
    data_set=dataset,
    num_calibration_steps=500
)

yolov4-tiny-int8.tflite: yolov4-tiny.zip
leaky-relu for tflite is in TF 2.3.1(?) or higher. tensorflow/tensorflow#36876

NMS is in yolo.candidates_to_pred_bboxes

tensorflow-yolov4/py_src/yolov4/tflite/__init__.py

Lines 73 to 114 in 1149d76

    
               def predict( 
        
                   self, 
        
                   frame: np.ndarray, 
        
                   iou_threshold: float = 0.3, 
        
                   score_threshold: float = 0.25, 
        
               ): 
        
                   """ 
        
                   Predict one frame 
        
                   @param frame: Dim(height, width, channels) 
        
                   @return pred_bboxes == Dim(-1, (x, y, w, h, class_id, probability)) 
        
                   """ 
        
                   # image_data == Dim(1, input_size[1], input_size[0], channels) 
        
                   image_data = self.resize_image(frame) 
        
                   image_data = image_data / 255 
        
                   image_data = image_data[np.newaxis, ...].astype(np.float32) 
        
                   # s_pred, m_pred, l_pred 
        
                   # x_pred == Dim(1, g_height, g_width, anchors, (bbox)) 
        
                   self.interpreter.set_tensor(self.input_index, image_data) 
        
                   self.interpreter.invoke() 
        
                   candidates = [ 
        
                       self.interpreter.get_tensor(index) for index in self.output_index 
        
                   ] 
        
                   _candidates = [] 
        
                   for candidate in candidates: 
        
                       grid_size = candidate.shape[1:3] 
        
                       _candidates.append( 
        
                           np.reshape( 
        
                               candidate[0], (1, grid_size[0] * grid_size[1] * 3, -1) 
        
                           ) 
        
                       ) 
        
                   candidates = np.concatenate(_candidates, axis=1) 
        
                   pred_bboxes = self.candidates_to_pred_bboxes( 
        
                       candidates[0], 
        
                       iou_threshold=iou_threshold, 
        
                       score_threshold=score_threshold, 
        
                   ) 
        
                   pred_bboxes = self.fit_pred_bboxes_to_original(pred_bboxes, frame.shape) 
        
                   return pred_bboxes

Hi, I currently using predict function combine with pygame to updating the frame in real-time, but when I increase the iou to above 0.5, the frame crash and it stop predicting, can you help me out

from yolov4.tflite import YOLOv4
import tensorflow as tf
import cv2
from time import sleep
from PIL import Image, ImageFont, ImageDraw
import pygame
import os
import colorsys
import time
import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input

from PIL import Image, ImageFont, ImageDraw
from timeit import default_timer as timer
import matplotlib.pyplot as plt
import cv2
h,w=480,680
border=50
N=0

def getFrame():
    """Generate next frame of simulation as numpy array"""

    # Create data on first call only
    if getFrame.z is None:
        xx, yy = np.meshgrid(np.linspace(0,2*np.pi,w), np.linspace(0,2*np.pi,h))
        getFrame.z = sin2d(xx, yy)
        getFrame.z = cv2.normalize(getFrame.z,None,alpha=0,beta=1,norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

    # Just roll data for subsequent calls
    getFrame.z = np.roll(getFrame.z,(1,2),(0,1))
    return getFrame.z
import cv2
from time import sleep
key = cv2.waitKey(1)
webcam = cv2.VideoCapture(0)
sleep(2)
frame_count = 0

pygame.init()
screen = pygame.display.set_mode((w+(2*border), h+(2*border)))
pygame.display.set_caption("Yolov4- Test")
done = False
clock = pygame.time.Clock()
prev_time = 0
basicfont = pygame.font.SysFont(None, 32)
if __name__ == '__main__':
    yolo = YOLOv4(tiny = True)

    yolo.classes = "custom.names"
    #yolo.make_model()
    #yolo.load_weights("yolov4-tiny.weights", weights_type="yolo")
    yolo.load_tflite("detect.tflite")
    while True:
        try:
            
            check, frame = webcam.read()
            print(check) #prints true as long as the webcam is running
            print(frame) #prints matrix values of each framecd
            
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    done = True

            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            img_ = frame
            _bboxes = yolo.predict(img_, iou_threshold=0.5, score_threshold=0.25)
            curr_time = time.time()
            
            getFrame.z= yolo.draw_bboxes(img_, _bboxes)
            npimage= getFrame()
                
            # Clear screen to white before drawing 
            screen.fill((255, 255, 255))
            # Convert to a surface and splat onto screen offset by border width and height
            surface = pygame.surfarray.make_surface(npimage)
            surface = pygame.transform.rotate(surface, 270)
            surface = pygame.transform.flip(surface, True, False)
            screen.blit(surface, (border, border))
            
            # Display and update frame counter
            text = basicfont.render('FPS: ' + str(round(N,2)), True, (255, 0, 0), (255, 255, 255))
            screen.blit(text, (border,h+border))
            N = 1 / (curr_time - prev_time)

            pygame.display.flip()
            clock.tick(10)
            prev_time = curr_time
            if key == ord('q'):
                webcam.release()
                cv2.destroyAllWindows()
                break
        except(KeyboardInterrupt):
            print("Turning off camera.")
            webcam.release()
            print("Camera off.")
            print("Program ended.")
            cv2.destroyAllWindows()
            break

As the iou_threshold is lower, the number of duplicated boxes decreases.

okay thank you a lot, but in my program when I increase the iou_threshold, the program crashed and stopped predicting, can you testing out the code? Or maybe are there anyway inferencing yolov4 tiny using webcam in real time

from yolov4.tf import YOLOv4
import tensorflow as tf
import cv2
from time import sleep
from PIL import Image, ImageFont, ImageDraw
import pygame
import os
import colorsys
import time
import numpy as np

from PIL import Image, ImageFont, ImageDraw
from timeit import default_timer as timer
import matplotlib.pyplot as plt
import cv2
h,w=480,680
border=50
N=0

def getFrame():
    """Generate next frame of simulation as numpy array"""

    # Create data on first call only
    if getFrame.z is None:
        xx, yy = np.meshgrid(np.linspace(0,2*np.pi,w), np.linspace(0,2*np.pi,h))
        getFrame.z = sin2d(xx, yy)
        getFrame.z = cv2.normalize(getFrame.z,None,alpha=0,beta=1,norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)

    # Just roll data for subsequent calls
    getFrame.z = np.roll(getFrame.z,(1,2),(0,1))
    return getFrame.z
import cv2
from time import sleep
key = cv2.waitKey(1)
webcam = cv2.VideoCapture(0)
sleep(2)
frame_count = 0

pygame.init()
screen = pygame.display.set_mode((w+(2*border), h+(2*border)))
pygame.display.set_caption("Yolov4- Test")
done = False
clock = pygame.time.Clock()
prev_time = 0
basicfont = pygame.font.SysFont(None, 32)
if __name__ == '__main__':
    yolo = YOLOv4(tiny = True)

    yolo.classes = "custom.names"
    yolo.make_model()
    yolo.load_weights("yolov4-tiny.weights", weights_type="yolo")
    while True:
        try:
            
            check, frame = webcam.read()
            print(check) #prints true as long as the webcam is running
            print(frame) #prints matrix values of each framecd
            
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    done = True

            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            img_ = frame
            _bboxes = yolo.predict(img_, iou_threshold=0.1, score_threshold=0.25)
            curr_time = time.time()
            
            getFrame.z= yolo.draw_bboxes(img_, _bboxes)
            npimage= getFrame()
                
            # Clear screen to white before drawing 
            screen.fill((255, 255, 255))
            # Convert to a surface and splat onto screen offset by border width and height
            surface = pygame.surfarray.make_surface(npimage)
            surface = pygame.transform.rotate(surface, 270)
            surface = pygame.transform.flip(surface, True, False)
            screen.blit(surface, (border, border))
            
            # Display and update frame counter
            text = basicfont.render('FPS: ' + str(round(N,2)), True, (255, 0, 0), (255, 255, 255))
            screen.blit(text, (border,h+border))
            N = 1 / (curr_time - prev_time)

            pygame.display.flip()
            clock.tick(10)
            prev_time = curr_time
            if key == ord('q'):
                webcam.release()
                cv2.destroyAllWindows()
                break
        except(KeyboardInterrupt):
            print("Turning off camera.")
            webcam.release()
            print("Camera off.")
            print("Program ended.")
            cv2.destroyAllWindows()
            break

it works well.

On PC, tflite is slower than tf.(I tested on CPU only)

your script works like yolo.inference(0, is_image=False)

I did it but

Traceback (most recent call last):
File "C:\Users\Son\Downloads\RIVER_SAVER\yolov4-tiny\webcam.py", line 8, in
yolo.inference(0, is_image=False)
File "C:\Users\Son\AppData\Local\Programs\Python\Python36\lib\site-packages\yolov4\common\base_class.py", line 235, in inference
raise FileNotFoundError("{} does not exist".format(media_path))
FileNotFoundError: 0 does not exist

Tested on

TF: v2.2.1

yolov4: v1.2.1

ref:

https://wiki.loliot.net/docs/lang/python/libraries/yolov4/python-yolov4-training#prepare-dataset

https://github.com/hhk7734/tensorflow-yolov4/blob/master/test/make_edgetpu_tflite.ipynb
from yolov4.tf import YOLOv4

yolo = YOLOv4(tiny=True)
yolo.classes = "coco.names"

yolo.make_model()
yolo.load_weights("yolov4-tiny.weights", weights_type="yolo")

dataset = yolo.load_dataset(
    "train2017.txt",
    training=False,
    image_path_prefix=r"D:\coco\train2017"
)

yolo.save_as_tflite(
    "yolov4-tiny.tflite",
    quantization="full_int8",
    data_set=dataset,
    num_calibration_steps=500
)
yolov4-tiny-int8.tflite: yolov4-tiny.zip
leaky-relu for tflite is in TF 2.3.1(?) or higher. tensorflow/tensorflow#36876

NMS is in yolo.candidates_to_pred_bboxes

tensorflow-yolov4/py_src/yolov4/tflite/__init__.py

Lines 73 to 114 in 1149d76

def predict(

self,

frame: np.ndarray,

iou_threshold: float = 0.3,

score_threshold: float = 0.25,

):

"""

Predict one frame

@param frame: Dim(height, width, channels)

@return pred_bboxes == Dim(-1, (x, y, w, h, class_id, probability))

"""

# image_data == Dim(1, input_size[1], input_size[0], channels)

image_data = self.resize_image(frame)

image_data = image_data / 255

image_data = image_data[np.newaxis, ...].astype(np.float32)

# s_pred, m_pred, l_pred

# x_pred == Dim(1, g_height, g_width, anchors, (bbox))

self.interpreter.set_tensor(self.input_index, image_data)

self.interpreter.invoke()

candidates = [

self.interpreter.get_tensor(index) for index in self.output_index

]

_candidates = []

for candidate in candidates:

grid_size = candidate.shape[1:3]

_candidates.append(

np.reshape(

candidate[0], (1, grid_size[0] * grid_size[1] * 3, -1)

)

)

candidates = np.concatenate(_candidates, axis=1)

pred_bboxes = self.candidates_to_pred_bboxes(

candidates[0],

iou_threshold=iou_threshold,

score_threshold=score_threshold,

)

pred_bboxes = self.fit_pred_bboxes_to_original(pred_bboxes, frame.shape)

return pred_bboxes

this is the error when I try to convert my yolov4-tiny

WARNING:tensorflow:From C:\Users\Son\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\training\tracking\tracking.py:111: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version. Instructions for updating: This property should not be used in TensorFlow 2.0, as updates are applied automatically. WARNING:tensorflow:From C:\Users\Son\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\training\tracking\tracking.py:111: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version. Instructions for updating: This property should not be used in TensorFlow 2.0, as updates are applied automatically. Traceback (most recent call last): File "C:\Users\Son\Downloads\RIVER_SAVER\yolov4-tiny\quantization_tflite.py", line 17, in <module> num_calibration_steps=200) File "C:\Users\Son\AppData\Local\Programs\Python\Python36\lib\site-packages\yolov4\tf\__init__.py", line 158, in save_as_tflite tflite_model = converter.convert() File "C:\Users\Son\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\lite\python\lite.py", line 831, in convert self).convert(graph_def, input_tensors, output_tensors) File "C:\Users\Son\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\lite\python\lite.py", line 638, in convert result = self._calibrate_quantize_model(result, **flags) File "C:\Users\Son\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\lite\python\lite.py", line 452, in _calibrate_quantize_model inference_output_type, allow_float, activations_type) File "C:\Users\Son\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\lite\python\optimize\calibrator.py", line 98, in calibrate_and_quantize np.dtype(activations_type.as_numpy_dtype()).num) RuntimeError: Unsupported output type INT8 for output tensor 'Identity' of type FLOAT32.

If you used tensorflow v2.3.0 or v2.1.0, uninstall it and install tensorflow==2.2.1.

On PC, tflite is slower than tf.(I tested on CPU only)

your script works like yolo.inference(0, is_image=False)

hi do you know why I showed this error:

Traceback (most recent call last): File "C:\Users\Son\Downloads\RIVER_SAVER\yolov4-tiny\webcam.py", line 8, in yolo.inference(0, is_image=False) File "C:\Users\Son\AppData\Local\Programs\Python\Python36\lib\site-packages\yolov4\common\base_class.py", line 235, in inference raise FileNotFoundError("{} does not exist".format(media_path)) FileNotFoundError: 0 does not exist

Again, you are the saver, thank you a lot!!

I tested on Windows 10 now. it works.
Check cam connection.

I tested on Windows 10 now. it works.
Check cam connection.

oh I just understand, compile and run won't work but running py will work. Anyway, can you testing out this model, I have just converted it successfully and feel very happy, but I cannot testing it while other model works well, Thank you.

yolov4-tiny.zip

It works, but the performance seems to be poor.

It works, but the performance seems to be poor.

yep, I tested it and it seems to be so slow, even slower than the float16 one, am I wrong somewhere?
this is my quantization code
`from yolov4.tf import YOLOv4
yolo = YOLOv4(tiny= True)

yolo.classes = "custom.names"

yolo.make_model()
yolo.load_weights("yolov4-tiny.weights", weights_type="yolo")

dataset = yolo.load_dataset(
"val.txt",
training=False,
image_path_prefix=r"data/valid"
)

yolo.save_as_tflite("yolov4-tiny-int.tflite", quantization="full_int8",
data_set=dataset,
num_calibration_steps=200)`

Moreover, when using 'inference(0, is_image=False)' when I try to improve the iou_threshold to >0.5 then the program crashed

this is the fp16 one
yolov4-tiny-fp16.zip

For quantization, the process of converting to int takes a long time because it repeats the inference as many as num_calibration_steps.

This may be due to computer performance or memory capacity.
I have no way of such errors.

Thanks a lot, anyway, can you test the tiny-fp16 model inference at the iou_threshold of 0.6 or 0.7 then try to detect a bottle? Mine get crashed :/

When the value rises to a certain level, it seems that it cannot escape the loop.

	def predict(
	self,
	frame: np.ndarray,
	iou_threshold: float = 0.3,
	score_threshold: float = 0.25,
	):
	"""
	Predict one frame

	@param frame: Dim(height, width, channels)

	@return pred_bboxes == Dim(-1, (x, y, w, h, class_id, probability))
	"""
	# image_data == Dim(1, input_size[1], input_size[0], channels)
	image_data = self.resize_image(frame)
	image_data = image_data / 255
	image_data = image_data[np.newaxis, ...].astype(np.float32)

	# s_pred, m_pred, l_pred
	# x_pred == Dim(1, g_height, g_width, anchors, (bbox))
	self.interpreter.set_tensor(self.input_index, image_data)
	self.interpreter.invoke()
	candidates = [
	self.interpreter.get_tensor(index) for index in self.output_index
	]
	_candidates = []
	for candidate in candidates:
	grid_size = candidate.shape[1:3]
	_candidates.append(
	np.reshape(
	candidate[0], (1, grid_size[0] * grid_size[1] * 3, -1)
	)
	)
	candidates = np.concatenate(_candidates, axis=1)

	pred_bboxes = self.candidates_to_pred_bboxes(
	candidates[0],
	iou_threshold=iou_threshold,
	score_threshold=score_threshold,
	)
	pred_bboxes = self.fit_pred_bboxes_to_original(pred_bboxes, frame.shape)
	return pred_bboxes