NIN训练问题
FlyEgle opened this issue · 5 comments
FlyEgle commented
BIGBALLON commented
@jiangshaoye
hello, can you paste your code for me?
I want to check something .
FlyEgle commented
BIGBALLON commented
@jiangshaoye
It doesn't make sense, he_normal is good for weight initialization.
BIGBALLON commented
@jiangshaoye can you try this code?
import keras
import numpy as np
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, AveragePooling2D
from keras.initializers import RandomNormal
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from keras.callbacks import LearningRateScheduler, TensorBoard
batch_size = 128
epochs = 200
iterations = 391
num_classes = 10
dropout = 0.5
weight_decay = 0.0001
log_filepath = './nin_bn'
def color_preprocessing(x_train,x_test):
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
mean = [125.307, 122.95, 113.865]
std = [62.9932, 62.0887, 66.7048]
for i in range(3):
x_train[:,:,:,i] = (x_train[:,:,:,i] - mean[i]) / std[i]
x_test[:,:,:,i] = (x_test[:,:,:,i] - mean[i]) / std[i]
return x_train, x_test
def scheduler(epoch):
if epoch <= 60:
return 0.05
if epoch <= 120:
return 0.01
if epoch <= 160:
return 0.002
return 0.0004
def build_model():
model = Sequential()
model.add(Conv2D(192, (5, 5), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal", input_shape=x_train.shape[1:]))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(160, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(96, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3),strides=(2,2),padding = 'same'))
model.add(Dropout(dropout))
model.add(Conv2D(192, (5, 5), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(192, (1, 1),padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(192, (1, 1),padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3),strides=(2,2),padding = 'same'))
model.add(Dropout(dropout))
model.add(Conv2D(192, (3, 3), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(192, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(10, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(GlobalAveragePooling2D())
model.add(Activation('softmax'))
sgd = optimizers.SGD(lr=.1, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
return model
if __name__ == '__main__':
# load data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
x_train, x_test = color_preprocessing(x_train, x_test)
# build network
model = build_model()
print(model.summary())
# set callback
tb_cb = TensorBoard(log_dir=log_filepath, histogram_freq=0)
change_lr = LearningRateScheduler(scheduler)
cbks = [change_lr,tb_cb]
# set data augmentation
print('Using real-time data augmentation.')
datagen = ImageDataGenerator(horizontal_flip=True,width_shift_range=0.125,height_shift_range=0.125,fill_mode='constant',cval=0.)
datagen.fit(x_train)
# start training
model.fit_generator(datagen.flow(x_train, y_train,batch_size=batch_size),steps_per_epoch=iterations,epochs=epochs,callbacks=cbks,validation_data=(x_test, y_test))
model.save('nin_bn.h5')
Epoch 1/200
391/391 [==============================] - 28s 73ms/step - loss: 1.7259 - acc: 0.4661 - val_loss: 1.5627 - val_acc: 0.5164
Epoch 2/200
391/391 [==============================] - 27s 70ms/step - loss: 1.2940 - acc: 0.6205 - val_loss: 1.2845 - val_acc: 0.6258
Epoch 3/200
391/391 [==============================] - 27s 70ms/step - loss: 1.1351 - acc: 0.6760 - val_loss: 1.2144 - val_acc: 0.6544
Epoch 4/200
391/391 [==============================] - 27s 70ms/step - loss: 1.0269 - acc: 0.7157 - val_loss: 0.9367 - val_acc: 0.7466
Epoch 5/200
391/391 [==============================] - 27s 70ms/step - loss: 0.9579 - acc: 0.7374 - val_loss: 0.9483 - val_acc: 0.7403
Epoch 6/200
391/391 [==============================] - 27s 70ms/step - loss: 0.9060 - acc: 0.7555 - val_loss: 0.9904 - val_acc: 0.7349
Epoch 7/200
391/391 [==============================] - 27s 70ms/step - loss: 0.8567 - acc: 0.7707 - val_loss: 0.8772 - val_acc: 0.7666
Epoch 8/200
391/391 [==============================] - 27s 70ms/step - loss: 0.8219 - acc: 0.7834 - val_loss: 0.8766 - val_acc: 0.7682
Epoch 9/200
391/391 [==============================] - 27s 70ms/step - loss: 0.7990 - acc: 0.7900 - val_loss: 0.7812 - val_acc: 0.7979
Epoch 10/200
391/391 [==============================] - 27s 70ms/step - loss: 0.7694 - acc: 0.8016 - val_loss: 0.9660 - val_acc: 0.7459
Epoch 11/200
391/391 [==============================] - 27s 70ms/step - loss: 0.7473 - acc: 0.8084 - val_loss: 0.8327 - val_acc: 0.7792
Epoch 12/200
391/391 [==============================] - 27s 70ms/step - loss: 0.7313 - acc: 0.8142 - val_loss: 0.7896 - val_acc: 0.8009
...
...
391/391 [==============================] - 27s 70ms/step - loss: 0.2248 - acc: 0.9725 - val_loss: 0.4582 - val_acc: 0.9132
Epoch 128/200
391/391 [==============================] - 27s 70ms/step - loss: 0.2281 - acc: 0.9703 - val_loss: 0.4627 - val_acc: 0.9124
Epoch 129/200
391/391 [==============================] - 27s 70ms/step - loss: 0.2214 - acc: 0.9729 - val_loss: 0.4624 - val_acc: 0.9120
Epoch 130/200
I created a new pull in 8cc556b , feel free to see it.
BIGBALLON commented
@jiangshaoye For your second problem:
This is an mlpconv in Network_in_Network_keras.py.
model.add(Conv2D(192, (5, 5), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal", input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(160, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(Activation('relu'))
model.add(Conv2D(96, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3),strides=(2,2),padding = 'same'))
While this is an mlpconv with BN in Network_in_Network_bn_keras.py.
Since NIN is published in 2013, but BN is published in 2015, so NIN's paper doesn't use BN.
But we want to use BN to improve models performance, that is why my model have some differences with original paper. But I think it still mlpconv .
model.add(Conv2D(192, (5, 5), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal", input_shape=x_train.shape[1:]))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(160, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(96, (1, 1), padding='same', kernel_regularizer=keras.regularizers.l2(weight_decay), kernel_initializer="he_normal"))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(3, 3),strides=(2,2),padding = 'same'))
Just close this issue, feel free to reopen if you need.