tensorflow/model-optimization

Unable to quantize to 4-bits

MounikaVaddeboina opened this issue · 0 comments

Prior to filing: check that this should be a bug instead of a feature request. Everything supported, including the compatible versions of TensorFlow, is listed in the overview page of each technique. For example, the overview page of quantization-aware training is here. An issue for anything not supported should be a feature request.

Describe the bug
I'm trying to quantize Conv2D and Dense layers in my model but after quantizing the weights are not in the range of -7 to 7.
Kindly help me in solving this issue.

System information

TensorFlow version (installed from source or binary): 2.8.0

TensorFlow Model Optimization version (installed from source or binary): 0.7.1

Python version:3.8.0

Describe the expected behavior

I expected Conv2D and Dense layers weights to be in the range of -7 to 7

Describe the current behavior
But weights are in the range -128 to 127

Code to reproduce the issue
Provide a reproducible code that is the bare minimum necessary to generate the
problem.

import tempfile
import os

import tensorflow as tf
import numpy as np
import tensorflow_model_optimization as tfmot
from keras import backend as K
from tensorflow import keras

LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer
MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer

class DefaultDenseQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
return [(layer.kernel, LastValueQuantizer(num_bits=4, symmetric=True, narrow_range=False, per_axis=False))]

# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
    return [(layer.activation, MovingAverageQuantizer(num_bits=4, symmetric=False, narrow_range=False, per_axis=False))]

def set_quantize_weights(self, layer, quantize_weights):
    # Add this line for each item returned in `get_weights_and_quantizers`
    # , in the same order
    layer.kernel = quantize_weights[0]

def set_quantize_activations(self, layer, quantize_activations):
    # Add this line for each item returned in `get_activations_and_quantizers`
    # , in the same order.
    layer.activation = quantize_activations[0]

# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
    return []

def get_config(self):
    return {}

quantize_annotate_layer = tfmot.quantization.keras.quantize_annotate_layer
quantize_annotate_model = tfmot.quantization.keras.quantize_annotate_model
quantize_scope = tfmot.quantization.keras.quantize_scope

class ModifiedDenseQuantizeConfig(DefaultDenseQuantizeConfig):
# Configure weights to quantize with 4-bit instead of 8-bits.
def get_weights_and_quantizers(self, layer):
return [(layer.kernel, LastValueQuantizer(num_bits=4, symmetric=True, narrow_range=False, per_axis=False))]

Load MNIST dataset

mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Normalize the input image so that each pixel value is between 0 and 1.

train_images = train_images / 255.0
test_images = test_images / 255.0

new_model = quantize_annotate_model(tf.keras.Sequential([

Pass in modified QuantizeConfig to modify this Dense layer.

tf.keras.layers.InputLayer(input_shape=(28, 28)),
tf.keras.layers.Reshape(target_shape=(28, 28, 1)),
#tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
quantize_annotate_layer(tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), ModifiedDenseQuantizeConfig()),
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Flatten(),
quantize_annotate_layer(tf.keras.layers.Dense(10), ModifiedDenseQuantizeConfig()),
#tf.keras.layers.Dense(10),
tf.keras.layers.Flatten()

]))
new_model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
new_model.fit(
train_images,
train_labels,
epochs=2,
validation_split=0.1,
)
new_model.summary()

quantize_apply requires mentioning ModifiedDenseQuantizeConfig with quantize_scope:

with quantize_scope(
{'ModifiedDenseQuantizeConfig': ModifiedDenseQuantizeConfig}):
# Use quantize_apply to actually make the model quantization aware.
quant_aware_model = tfmot.quantization.keras.quantize_apply(new_model)

quant_aware_model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
quant_aware_model.fit(
train_images,
train_labels,
epochs=2,
validation_split=0.1,
)
quant_aware_model.summary()

converter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
with open('No_prune_4bit.tflite', 'wb') as f:
f.write(tflite_model)

Screenshots
If applicable, add screenshots to help explain your problem.

Additional context
Add any other context about the problem here.