Obtained baseline accuracy for ResNet50v2 is different than officially reported
gcunhase opened this issue · 4 comments
gcunhase commented
Summary
Obtained baseline accuracy for ResNet50v2 is different than officially reported:
Model | Top-1 (%) |
---|---|
Baseline (official) | 75.96 |
Baseline (obtained) | 66.85 |
ResNet50v1 works fine, so I don't think the issue is with data-loading or with the eval code. My current guess is that there might be some disconnect between the loaded checkpoints and the input pre-processing function in ResNetv2 (keras.applications.resnet_v2.preprocess_input
).
Any advice is appreciated.
Environment
- Python version: 3.8
- Keras backend with version: TF 2.7
Logs or source codes for reproduction
- Download ImageNet
tfrecord
validation dataset. - Run evaluation script:
import os
import tensorflow as tf
DATA_DIR = "/media/Data/ImageNet/train-val-tfrecord"
_DEFAULT_IMAGE_SIZE = 224
_NUM_CHANNELS = 3
_RESIZE_MIN = 256
def load_data(data_dir='./data/imagenet', batch_size=8, num_val_files=128):
# 1. Load ImageNet2012 train dataset - needs to manually download the full ImageNet2012 dataset first.
assert os.path.exists(data_dir)
# 2. Make train/validation datasets
filenames = get_filenames(data_dir, num_val_files=num_val_files)
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(map_func=preprocess_image_record_tf_v2)
dataset = dataset.batch(batch_size, drop_remainder=True)
return dataset
def preprocess_image_record_tf_v2(record):
"""
This function is called by "dataset.map()" in the "map_func" argument.
That function doesn't allow for multiple input arguments, only "record".
"""
imgdata, label, bbox, text = _deserialize_image_record(record)
# Subtract one so that labels are in [0, 1000)
label -= 1
try:
image = tf.image.decode_jpeg(imgdata, channels=_NUM_CHANNELS, fancy_upscaling=False, dct_method='INTEGER_FAST')
except:
image = tf.image.decode_image(imgdata, channels=_NUM_CHANNELS)
# V2
image = tf.cast(image, tf.float32)
image = tf.image.resize(image, (_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE))
image = tf.keras.applications.resnet_v2.preprocess_input(image)
return image, label
def get_filenames(data_dir, num_val_files=128):
"""Return filenames for dataset."""
return [
os.path.join(data_dir, 'validation-{:05d}-of-{:05d}'.format(i, num_val_files))
for i in range(num_val_files)
]
def _deserialize_image_record(record):
feature_map = {
'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),
'image/class/label': tf.io.FixedLenFeature([], tf.int64, -1),
'image/class/text': tf.io.FixedLenFeature([], tf.string, ''),
'image/object/bbox/xmin': tf.io.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymin': tf.io.VarLenFeature(dtype=tf.float32),
'image/object/bbox/xmax': tf.io.VarLenFeature(dtype=tf.float32),
'image/object/bbox/ymax': tf.io.VarLenFeature(dtype=tf.float32)
}
with tf.name_scope('deserialize_image_record'):
obj = tf.io.parse_single_example(record, feature_map)
imgdata = obj['image/encoded']
label = tf.cast(obj['image/class/label'], tf.int32)
bbox = tf.stack([obj['image/object/bbox/%s' % x].values for x in ['ymin', 'xmin', 'ymax', 'xmax']])
bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1])
text = obj['image/class/text']
return imgdata, label, bbox, text
def main():
# Load tfrecord data
val_batches = load_data(data_dir=DATA_DIR, batch_size=128)
# Instantiate Baseline model
model = tf.keras.applications.ResNet50V2(
include_top=True, weights='imagenet', input_tensor=None,
input_shape=None, pooling=None, classes=1000,
classifier_activation='softmax'
)
model.compile(optimizer="sgd", loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
_, baseline_model_accuracy = model.evaluate(val_batches)
print("Baseline val accuracy:", baseline_model_accuracy)
if __name__ == '__main__':
main()
gcunhase commented
Any updates on this?
gcunhase commented
Any updates on this?
gcunhase commented
Any updates on this?
qlzh727 commented
Let me close this bug and track the progress in keras-team/keras#15822