Unable to quantize to 4-bits
MounikaVaddeboina opened this issue · 0 comments
Prior to filing: check that this should be a bug instead of a feature request. Everything supported, including the compatible versions of TensorFlow, is listed in the overview page of each technique. For example, the overview page of quantization-aware training is here. An issue for anything not supported should be a feature request.
Describe the bug
I'm trying to quantize Conv2D and Dense layers in my model but after quantizing the weights are not in the range of -7 to 7.
Kindly help me in solving this issue.
System information
TensorFlow version (installed from source or binary): 2.8.0
TensorFlow Model Optimization version (installed from source or binary): 0.7.1
Python version:3.8.0
Describe the expected behavior
I expected Conv2D and Dense layers weights to be in the range of -7 to 7
Describe the current behavior
But weights are in the range -128 to 127
Code to reproduce the issue
Provide a reproducible code that is the bare minimum necessary to generate the
problem.
import tempfile
import os
import tensorflow as tf
import numpy as np
import tensorflow_model_optimization as tfmot
from keras import backend as K
from tensorflow import keras
LastValueQuantizer = tfmot.quantization.keras.quantizers.LastValueQuantizer
MovingAverageQuantizer = tfmot.quantization.keras.quantizers.MovingAverageQuantizer
class DefaultDenseQuantizeConfig(tfmot.quantization.keras.QuantizeConfig):
# Configure how to quantize weights.
def get_weights_and_quantizers(self, layer):
return [(layer.kernel, LastValueQuantizer(num_bits=4, symmetric=True, narrow_range=False, per_axis=False))]
# Configure how to quantize activations.
def get_activations_and_quantizers(self, layer):
return [(layer.activation, MovingAverageQuantizer(num_bits=4, symmetric=False, narrow_range=False, per_axis=False))]
def set_quantize_weights(self, layer, quantize_weights):
# Add this line for each item returned in `get_weights_and_quantizers`
# , in the same order
layer.kernel = quantize_weights[0]
def set_quantize_activations(self, layer, quantize_activations):
# Add this line for each item returned in `get_activations_and_quantizers`
# , in the same order.
layer.activation = quantize_activations[0]
# Configure how to quantize outputs (may be equivalent to activations).
def get_output_quantizers(self, layer):
return []
def get_config(self):
return {}
quantize_annotate_layer = tfmot.quantization.keras.quantize_annotate_layer
quantize_annotate_model = tfmot.quantization.keras.quantize_annotate_model
quantize_scope = tfmot.quantization.keras.quantize_scope
class ModifiedDenseQuantizeConfig(DefaultDenseQuantizeConfig):
# Configure weights to quantize with 4-bit instead of 8-bits.
def get_weights_and_quantizers(self, layer):
return [(layer.kernel, LastValueQuantizer(num_bits=4, symmetric=True, narrow_range=False, per_axis=False))]
Load MNIST dataset
mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
Normalize the input image so that each pixel value is between 0 and 1.
train_images = train_images / 255.0
test_images = test_images / 255.0
new_model = quantize_annotate_model(tf.keras.Sequential([
Pass in modified QuantizeConfig
to modify this Dense layer.
tf.keras.layers.InputLayer(input_shape=(28, 28)),
tf.keras.layers.Reshape(target_shape=(28, 28, 1)),
#tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
quantize_annotate_layer(tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), ModifiedDenseQuantizeConfig()),
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Flatten(),
quantize_annotate_layer(tf.keras.layers.Dense(10), ModifiedDenseQuantizeConfig()),
#tf.keras.layers.Dense(10),
tf.keras.layers.Flatten()
]))
new_model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
new_model.fit(
train_images,
train_labels,
epochs=2,
validation_split=0.1,
)
new_model.summary()
quantize_apply
requires mentioning ModifiedDenseQuantizeConfig
with quantize_scope
:
with quantize_scope(
{'ModifiedDenseQuantizeConfig': ModifiedDenseQuantizeConfig}):
# Use quantize_apply
to actually make the model quantization aware.
quant_aware_model = tfmot.quantization.keras.quantize_apply(new_model)
quant_aware_model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
quant_aware_model.fit(
train_images,
train_labels,
epochs=2,
validation_split=0.1,
)
quant_aware_model.summary()
converter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
with open('No_prune_4bit.tflite', 'wb') as f:
f.write(tflite_model)
Screenshots
If applicable, add screenshots to help explain your problem.
Additional context
Add any other context about the problem here.