tensorflow/model-optimization

Quantize naive !!!

STAROFWIND opened this issue · 0 comments

Hi all,
I am working on quantization. I have a .h5 model. I want to convert the weights from float 32 to int8 or float16. It seem to be " post training quantization". How can I do that without converting to tflite ( still save as .h5 model) ?

I tried:
import os, argparse, json, cv2

Import necessary items from Keras

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, UpSampling2D
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

Import local packages

import tensorflow_model_optimization as tfmot
import tensorflow as tf
from tensorflow.keras.models import model_from_json
DEBUG = False

LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer
MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer

def apply_quantization(skip_layers):
def wrapper(layer):
if type(layer) in skip_layers:
print(layer.name)
return layer
else:
return tfmot.quantization.keras.quantize_annotate_layer(layer)
return wrapper

# def wrapper(layer):
#     if type(layer) in skip_layers:
#         print(layer.name)
#         return tfmot.quantization.keras.quantize_annotate_layer(layer)
#     else:
#         return layer
# return wrapper

LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer
MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer

class DefaultQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
def get_weights_and_quantizers(self, layer):
return [(layer.kernel, LastValueQuantizer(num_bits=4, symmetric=True, narrow_range=False, per_axis=False))]

def get_activations_and_quantizers(self, layer):
    return [(layer.activation, MovingAverageQuantizer(num_bits=4, symmetric=False, narrow_range=False, per_axis=False))]

def set_quantize_weights(self, layer, quantize_weights):
    layer.kernel = quantize_weights[0]
def set_quantize_activations(self, layer, quantize_activations):
    layer.activation = quantize_activations[0]
def get_output_quantizers(self, layer):
    return [tfmot.quantization.keras.quantizers.MovingAverageQuantizer(
    num_bits=4, per_axis=False, symmetric=False, narrow_range=False)]
def get_config(self):
    return {}

"""
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
    return [(layer.kernel, LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False))]

# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
    return [
        (layer.activation, MovingAverageQuantizer(num_bits=8, symmetric=False, narrow_range=False, per_axis=False))]

def set_quantize_weights(self, layer, quantize_weights):
    # Add this line for each item returned in `get_weights_and_quantizers`
    # , in the same order
    layer.kernel = quantize_weights[0]

def set_quantize_activations(self, layer, quantize_activations):
    # Add this line for each item returned in `get_activations_and_quantizers`
    # , in the same order.
    layer.activation = quantize_activations[0]

# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
    return []

def get_config(self):
    return {}
"""

if name == 'main':

input_shape = (320,320,3)

with open("path/to/1_model_quantize.json") as f:
    json_model = f.read()
model = model_from_json(json_model)
model.load_weights("path to h5")
model.summary()

# Quantize
quantize_model = tfmot.quantization.keras.quantize_model
# q_aware stands for for quantization aware.
# q_aware_model = quantize_model(model)
q_aware_model = tf.keras.models.clone_model(model, clone_function=apply_quantization(skip_layers=[BatchNormalization]))
with tfmot.quantization.keras.quantize_scope({'DefaultQuantizeConfig': DefaultQuantizeConfig,
                                              }):
    quant_aware_model = tfmot.quantization.keras.quantize_apply(q_aware_model)

# `quantize_model` requires a recompile.
quant_aware_model.compile(optimizer='Adam', loss='mean_squared_error', metrics=['mean_squared_error', 'accuracy'])
quantize_file = "save quantize .h5"
quant_aware_model.summary()
tf.keras.models.save_model(quant_aware_model, quantize_file, include_optimizer=False)

the result is all layer still in float32
Thank you so much.