BIGBALLON/cifar-10-cnn

NIN训练问题

FlyEgle opened this issue · 5 comments

你好,对于NIN模型训练的时候,我第一次采用你的初始化方法,第二次采用he_normal。不过训练的结果还是差强人意,达不到lenet水平。请问一下是什么原因呢?而且我看到你并没有用mlpconv而是conv+relu+bn堆栈。
image

@jiangshaoye
hello, can you paste your code for me?
I want to check something .

the code is same as you, but I use the kernel_initializer is 'he_normal'.
The acc and loss below:
image
image

@jiangshaoye
It doesn't make sense, he_normal is good for weight initialization.

@jiangshaoye can you try this code?

import keras
import numpy as np
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, AveragePooling2D
from keras.initializers import RandomNormal  
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from keras.callbacks import LearningRateScheduler, TensorBoard

batch_size    = 128
epochs        = 200
iterations    = 391
num_classes   = 10
dropout       = 0.5
weight_decay  = 0.0001
log_filepath  = './nin_bn'

def color_preprocessing(x_train,x_test):
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    mean = [125.307, 122.95, 113.865]
    std  = [62.9932, 62.0887, 66.7048]
    for i in range(3):
        x_train[:,:,:,i] = (x_train[:,:,:,i] - mean[i]) / std[i]
        x_test[:,:,:,i] = (x_test[:,:,:,i] - mean[i]) / std[i]

    return x_train, x_test

def scheduler(epoch):
    if epoch <= 60:
        return 0.05
    if epoch <= 120:
        return 0.01
    if epoch <= 160:    
        return 0.002
    return 0.0004

def build_model():
  model = Sequential()

  model.add(Conv2D(192, (5, 5), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal", input_shape=x_train.shape[1:]))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Conv2D(160, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Conv2D(96, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(3, 3),strides=(2,2),padding = 'same'))
  
  model.add(Dropout(dropout))
  
  model.add(Conv2D(192, (5, 5), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Conv2D(192, (1, 1),padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Conv2D(192, (1, 1),padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(3, 3),strides=(2,2),padding = 'same'))
  
  model.add(Dropout(dropout))
  
  model.add(Conv2D(192, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Conv2D(192, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Conv2D(10, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  
  model.add(GlobalAveragePooling2D())
  model.add(Activation('softmax'))
  
  sgd = optimizers.SGD(lr=.1, momentum=0.9, nesterov=True)
  model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
  return model

if __name__ == '__main__':

    # load data
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    
    x_train, x_test = color_preprocessing(x_train, x_test)

    # build network
    model = build_model()
    print(model.summary())

    # set callback
    tb_cb = TensorBoard(log_dir=log_filepath, histogram_freq=0)
    change_lr = LearningRateScheduler(scheduler)
    cbks = [change_lr,tb_cb]

    # set data augmentation
    print('Using real-time data augmentation.')
    datagen = ImageDataGenerator(horizontal_flip=True,width_shift_range=0.125,height_shift_range=0.125,fill_mode='constant',cval=0.)
    datagen.fit(x_train)

    # start training
    model.fit_generator(datagen.flow(x_train, y_train,batch_size=batch_size),steps_per_epoch=iterations,epochs=epochs,callbacks=cbks,validation_data=(x_test, y_test))
    model.save('nin_bn.h5')
Epoch 1/200
391/391 [==============================] - 28s 73ms/step - loss: 1.7259 - acc: 0.4661 - val_loss: 1.5627 - val_acc: 0.5164
Epoch 2/200
391/391 [==============================] - 27s 70ms/step - loss: 1.2940 - acc: 0.6205 - val_loss: 1.2845 - val_acc: 0.6258
Epoch 3/200
391/391 [==============================] - 27s 70ms/step - loss: 1.1351 - acc: 0.6760 - val_loss: 1.2144 - val_acc: 0.6544
Epoch 4/200
391/391 [==============================] - 27s 70ms/step - loss: 1.0269 - acc: 0.7157 - val_loss: 0.9367 - val_acc: 0.7466
Epoch 5/200
391/391 [==============================] - 27s 70ms/step - loss: 0.9579 - acc: 0.7374 - val_loss: 0.9483 - val_acc: 0.7403
Epoch 6/200
391/391 [==============================] - 27s 70ms/step - loss: 0.9060 - acc: 0.7555 - val_loss: 0.9904 - val_acc: 0.7349
Epoch 7/200
391/391 [==============================] - 27s 70ms/step - loss: 0.8567 - acc: 0.7707 - val_loss: 0.8772 - val_acc: 0.7666
Epoch 8/200
391/391 [==============================] - 27s 70ms/step - loss: 0.8219 - acc: 0.7834 - val_loss: 0.8766 - val_acc: 0.7682
Epoch 9/200
391/391 [==============================] - 27s 70ms/step - loss: 0.7990 - acc: 0.7900 - val_loss: 0.7812 - val_acc: 0.7979
Epoch 10/200
391/391 [==============================] - 27s 70ms/step - loss: 0.7694 - acc: 0.8016 - val_loss: 0.9660 - val_acc: 0.7459
Epoch 11/200
391/391 [==============================] - 27s 70ms/step - loss: 0.7473 - acc: 0.8084 - val_loss: 0.8327 - val_acc: 0.7792
Epoch 12/200
391/391 [==============================] - 27s 70ms/step - loss: 0.7313 - acc: 0.8142 - val_loss: 0.7896 - val_acc: 0.8009

...
...
391/391 [==============================] - 27s 70ms/step - loss: 0.2248 - acc: 0.9725 - val_loss: 0.4582 - val_acc: 0.9132
Epoch 128/200
391/391 [==============================] - 27s 70ms/step - loss: 0.2281 - acc: 0.9703 - val_loss: 0.4627 - val_acc: 0.9124
Epoch 129/200
391/391 [==============================] - 27s 70ms/step - loss: 0.2214 - acc: 0.9729 - val_loss: 0.4624 - val_acc: 0.9120
Epoch 130/200

I created a new pull in 8cc556b , feel free to see it.

@jiangshaoye For your second problem:

This is an mlpconv in Network_in_Network_keras.py.

  model.add(Conv2D(192, (5, 5), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal", input_shape=x_train.shape[1:]))
  model.add(Activation('relu'))
  model.add(Conv2D(160, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(Activation('relu'))
  model.add(Conv2D(96, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(3, 3),strides=(2,2),padding = 'same'))

While this is an mlpconv with BN in Network_in_Network_bn_keras.py.

Since NIN is published in 2013, but BN is published in 2015, so NIN's paper doesn't use BN.
But we want to use BN to improve models performance, that is why my model have some differences with original paper. But I think it still mlpconv .

  model.add(Conv2D(192, (5, 5), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal", input_shape=x_train.shape[1:]))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Conv2D(160, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(Conv2D(96, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
  model.add(BatchNormalization())
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(3, 3),strides=(2,2),padding = 'same'))

Just close this issue, feel free to reopen if you need.