Obtained baseline accuracy for ResNet50v2 is different than officially reported

Question

Obtained baseline accuracy for ResNet50v2 is different than officially reported

gcunhase opened this issue 3 years ago · 4 comments

Summary

Obtained baseline accuracy for ResNet50v2 is different than officially reported:

Model	Top-1 (%)
Baseline (official)	75.96
Baseline (obtained)	66.85

ResNet50v1 works fine, so I don't think the issue is with data-loading or with the eval code. My current guess is that there might be some disconnect between the loaded checkpoints and the input pre-processing function in ResNetv2 (keras.applications.resnet_v2.preprocess_input).

Any advice is appreciated.

Environment

Python version: 3.8
Keras backend with version: TF 2.7

Logs or source codes for reproduction

Download ImageNet tfrecord validation dataset.
Run evaluation script:

import os
import tensorflow as tf

DATA_DIR = "/media/Data/ImageNet/train-val-tfrecord"
_DEFAULT_IMAGE_SIZE = 224
_NUM_CHANNELS = 3
_RESIZE_MIN = 256


def load_data(data_dir='./data/imagenet', batch_size=8, num_val_files=128):
    # 1. Load ImageNet2012 train dataset - needs to manually download the full ImageNet2012 dataset first.
    assert os.path.exists(data_dir)

    # 2. Make train/validation datasets
    filenames = get_filenames(data_dir, num_val_files=num_val_files)
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(map_func=preprocess_image_record_tf_v2)
    dataset = dataset.batch(batch_size, drop_remainder=True)

    return dataset


def preprocess_image_record_tf_v2(record):
    """
        This function is called by "dataset.map()" in the "map_func" argument.
            That function doesn't allow for multiple input arguments, only "record".
    """
    imgdata, label, bbox, text = _deserialize_image_record(record)
    # Subtract one so that labels are in [0, 1000)
    label -= 1

    try:
        image = tf.image.decode_jpeg(imgdata, channels=_NUM_CHANNELS, fancy_upscaling=False, dct_method='INTEGER_FAST')
    except:
        image = tf.image.decode_image(imgdata, channels=_NUM_CHANNELS)

    # V2
    image = tf.cast(image, tf.float32)
    image = tf.image.resize(image, (_DEFAULT_IMAGE_SIZE, _DEFAULT_IMAGE_SIZE))
    image = tf.keras.applications.resnet_v2.preprocess_input(image)

    return image, label


def get_filenames(data_dir, num_val_files=128):
    """Return filenames for dataset."""
    return [
        os.path.join(data_dir, 'validation-{:05d}-of-{:05d}'.format(i, num_val_files))
        for i in range(num_val_files)
    ]


def _deserialize_image_record(record):
    feature_map = {
        'image/encoded': tf.io.FixedLenFeature([], tf.string, ''),
        'image/class/label': tf.io.FixedLenFeature([], tf.int64, -1),
        'image/class/text': tf.io.FixedLenFeature([], tf.string, ''),
        'image/object/bbox/xmin': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.io.VarLenFeature(dtype=tf.float32)
    }
    with tf.name_scope('deserialize_image_record'):
        obj = tf.io.parse_single_example(record, feature_map)
        imgdata = obj['image/encoded']
        label = tf.cast(obj['image/class/label'], tf.int32)
        bbox = tf.stack([obj['image/object/bbox/%s' % x].values for x in ['ymin', 'xmin', 'ymax', 'xmax']])
        bbox = tf.transpose(tf.expand_dims(bbox, 0), [0, 2, 1])
        text = obj['image/class/text']

        return imgdata, label, bbox, text


def main():
    # Load tfrecord data
    val_batches = load_data(data_dir=DATA_DIR, batch_size=128)

    # Instantiate Baseline model
    model = tf.keras.applications.ResNet50V2(
            include_top=True, weights='imagenet', input_tensor=None,
            input_shape=None, pooling=None, classes=1000,
            classifier_activation='softmax'
        )

    model.compile(optimizer="sgd", loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=["accuracy"])
    _, baseline_model_accuracy = model.evaluate(val_batches)
    print("Baseline val accuracy:", baseline_model_accuracy)


if __name__ == '__main__':
    main()

Answer 1 · 2021-12-14T23:11:50.000Z

Any updates on this?

Answer 2 · 2021-12-21T01:53:46.000Z

Any updates on this?

Answer 3 · 2022-01-04T03:00:43.000Z

Any updates on this?

Answer 4 · 2022-02-01T02:04:28.000Z

Let me close this bug and track the progress in keras-team/keras#15822