Bug: Incorrect Implementation of Inception_v4 Net in .prototxt file!
dprosperino opened this issue · 0 comments
Hey Guys,
Seems like you have made a mistake in the design of the Inception-C block in the Inception-v4 network. The attached picture shall explain the mistake pretty clearly, you mixed up a 3x1 convolutional layer with a 1x3 convolutional layer.
This should not be too big of a deal in terms of performance or functionality, however, it could lead to bugs, if you build your network from scratch and use the pretrained weights linked on this site.
Tips for other developers,
If you use this .prototxt file and these pretrained weights, you will be fine and your CNN will work perfectly. However, if you build your Inception-v4 network from scratch and build it accordingly to the paper and use these pretrained weights, caffe will not be able to match the weights and will throw following error:
Cannot copy param 0 weights from layer 'inception_c1_3x1_2'; shape mismatch. Source param shape is 448 384 3 1 (516096); target param shape is 512 448 3 1 (688128). To learn this layer's parameters from scratch rather than copying from a saved net, rename the layer.
To fix this problem and still be able to use the pretrained weights, simply build the Inception-C part of your Inception-v4 network according to the picture on this post.
I hope I was able of saving someone a few hours of debugging.
Cheers,
d3lt4-papa
Click to view actual code
Code of one Inception-C block in Inception-v4 according to original paper
layer {
name: "inception_c1_pool_ave"
type: "Pooling"
bottom: "reduction_b_concat"
top: "inception_c1_pool_ave"
pooling_param {
pool: AVE
kernel_size: 3
stride: 1
pad: 1
}
}
layer {
name: "inception_c1_1x1"
type: "Convolution"
bottom: "inception_c1_pool_ave"
top: "inception_c1_1x1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
}
}
layer {
name: "inception_c1_1x1_bn"
type: "BatchNorm"
bottom: "inception_c1_1x1"
top: "inception_c1_1x1"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x1_scale"
type: "Scale"
bottom: "inception_c1_1x1"
top: "inception_c1_1x1"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x1_relu"
type: "ReLU"
bottom: "inception_c1_1x1"
top: "inception_c1_1x1"
}
layer {
name: "inception_c1_1x1_2"
type: "Convolution"
bottom: "reduction_b_concat"
top: "inception_c1_1x1_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
}
}
layer {
name: "inception_c1_1x1_2_bn"
type: "BatchNorm"
bottom: "inception_c1_1x1_2"
top: "inception_c1_1x1_2"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x1_2_scale"
type: "Scale"
bottom: "inception_c1_1x1_2"
top: "inception_c1_1x1_2"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x1_2_relu"
type: "ReLU"
bottom: "inception_c1_1x1_2"
top: "inception_c1_1x1_2"
}
layer {
name: "inception_c1_1x1_3"
type: "Convolution"
bottom: "reduction_b_concat"
top: "inception_c1_1x1_3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 384
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
}
}
layer {
name: "inception_c1_1x1_3_bn"
type: "BatchNorm"
bottom: "inception_c1_1x1_3"
top: "inception_c1_1x1_3"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x1_3_scale"
type: "Scale"
bottom: "inception_c1_1x1_3"
top: "inception_c1_1x1_3"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x1_3_relu"
type: "ReLU"
bottom: "inception_c1_1x1_3"
top: "inception_c1_1x1_3"
}
layer {
name: "inception_c1_1x3"
type: "Convolution"
bottom: "inception_c1_1x1_3"
top: "inception_c1_1x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 0
pad_w: 1
kernel_h: 1
kernel_w: 3
}
}
layer {
name: "inception_c1_1x3_bn"
type: "BatchNorm"
bottom: "inception_c1_1x3"
top: "inception_c1_1x3"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x3_scale"
type: "Scale"
bottom: "inception_c1_1x3"
top: "inception_c1_1x3"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x3_relu"
type: "ReLU"
bottom: "inception_c1_1x3"
top: "inception_c1_1x3"
}
layer {
name: "inception_c1_3x1"
type: "Convolution"
bottom: "inception_c1_1x1_3"
top: "inception_c1_3x1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 1
pad_w: 0
kernel_h: 3
kernel_w: 1
}
}
layer {
name: "inception_c1_3x1_bn"
type: "BatchNorm"
bottom: "inception_c1_3x1"
top: "inception_c1_3x1"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_3x1_scale"
type: "Scale"
bottom: "inception_c1_3x1"
top: "inception_c1_3x1"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_3x1_relu"
type: "ReLU"
bottom: "inception_c1_3x1"
top: "inception_c1_3x1"
}
layer {
name: "inception_c1_1x1_4"
type: "Convolution"
bottom: "reduction_b_concat"
top: "inception_c1_1x1_4"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 384
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
}
}
layer {
name: "inception_c1_1x1_4_bn"
type: "BatchNorm"
bottom: "inception_c1_1x1_4"
top: "inception_c1_1x1_4"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x1_4_scale"
type: "Scale"
bottom: "inception_c1_1x1_4"
top: "inception_c1_1x1_4"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x1_4_relu"
type: "ReLU"
bottom: "inception_c1_1x1_4"
top: "inception_c1_1x1_4"
}
layer {
name: "inception_c1_1x3_2"
type: "Convolution"
bottom: "inception_c1_1x1_4"
top: "inception_c1_1x3_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 448
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 0
pad_w: 1
kernel_h: 1
kernel_w: 3
}
}
layer {
name: "inception_c1_1x3_2_bn"
type: "BatchNorm"
bottom: "inception_c1_1x3_2"
top: "inception_c1_1x3_2"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x3_2_scale"
type: "Scale"
bottom: "inception_c1_1x3_2"
top: "inception_c1_1x3_2"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x3_2_relu"
type: "ReLU"
bottom: "inception_c1_1x3_2"
top: "inception_c1_1x3_2"
}
layer {
name: "inception_c1_3x1_2"
type: "Convolution"
bottom: "inception_c1_1x3_2"
top: "inception_c1_3x1_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 512
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 1
pad_w: 0
kernel_h: 3
kernel_w: 1
}
}
layer {
name: "inception_c1_3x1_2_bn"
type: "BatchNorm"
bottom: "inception_c1_3x1_2"
top: "inception_c1_3x1_2"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_3x1_2_scale"
type: "Scale"
bottom: "inception_c1_3x1_2"
top: "inception_c1_3x1_2"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_3x1_2_relu"
type: "ReLU"
bottom: "inception_c1_3x1_2"
top: "inception_c1_3x1_2"
}
layer {
name: "inception_c1_1x3_3"
type: "Convolution"
bottom: "inception_c1_3x1_2"
top: "inception_c1_1x3_3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 0
pad_w: 1
kernel_h: 1
kernel_w: 3
}
}
layer {
name: "inception_c1_1x3_3_bn"
type: "BatchNorm"
bottom: "inception_c1_1x3_3"
top: "inception_c1_1x3_3"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x3_3_scale"
type: "Scale"
bottom: "inception_c1_1x3_3"
top: "inception_c1_1x3_3"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x3_3_relu"
type: "ReLU"
bottom: "inception_c1_1x3_3"
top: "inception_c1_1x3_3"
}
layer {
name: "inception_c1_3x1_3"
type: "Convolution"
bottom: "inception_c1_3x1_2"
top: "inception_c1_3x1_3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 1
pad_w: 0
kernel_h: 3
kernel_w: 1
}
}
layer {
name: "inception_c1_3x1_3_bn"
type: "BatchNorm"
bottom: "inception_c1_3x1_3"
top: "inception_c1_3x1_3"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_3x1_3_scale"
type: "Scale"
bottom: "inception_c1_3x1_3"
top: "inception_c1_3x1_3"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_3x1_3_relu"
type: "ReLU"
bottom: "inception_c1_3x1_3"
top: "inception_c1_3x1_3"
}
layer {
name: "inception_c1_concat"
type: "Concat"
bottom: "inception_c1_1x1"
bottom: "inception_c1_1x1_2"
bottom: "inception_c1_1x3"
bottom: "inception_c1_3x1"
bottom: "inception_c1_1x3_3"
bottom: "inception_c1_3x1_3"
top: "inception_c1_concat"
}
Code of one Inception-C block, if you want to be able to use these pretrained weights
layer {
name: "inception_c1_pool_ave"
type: "Pooling"
bottom: "reduction_b_concat"
top: "inception_c1_pool_ave"
pooling_param {
pool: AVE
kernel_size: 3
stride: 1
pad: 1
}
}
layer {
name: "inception_c1_1x1"
type: "Convolution"
bottom: "inception_c1_pool_ave"
top: "inception_c1_1x1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
}
}
layer {
name: "inception_c1_1x1_bn"
type: "BatchNorm"
bottom: "inception_c1_1x1"
top: "inception_c1_1x1"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x1_scale"
type: "Scale"
bottom: "inception_c1_1x1"
top: "inception_c1_1x1"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x1_relu"
type: "ReLU"
bottom: "inception_c1_1x1"
top: "inception_c1_1x1"
}
layer {
name: "inception_c1_1x1_2"
type: "Convolution"
bottom: "reduction_b_concat"
top: "inception_c1_1x1_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
}
}
layer {
name: "inception_c1_1x1_2_bn"
type: "BatchNorm"
bottom: "inception_c1_1x1_2"
top: "inception_c1_1x1_2"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x1_2_scale"
type: "Scale"
bottom: "inception_c1_1x1_2"
top: "inception_c1_1x1_2"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x1_2_relu"
type: "ReLU"
bottom: "inception_c1_1x1_2"
top: "inception_c1_1x1_2"
}
layer {
name: "inception_c1_1x1_3"
type: "Convolution"
bottom: "reduction_b_concat"
top: "inception_c1_1x1_3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 384
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
}
}
layer {
name: "inception_c1_1x1_3_bn"
type: "BatchNorm"
bottom: "inception_c1_1x1_3"
top: "inception_c1_1x1_3"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x1_3_scale"
type: "Scale"
bottom: "inception_c1_1x1_3"
top: "inception_c1_1x1_3"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x1_3_relu"
type: "ReLU"
bottom: "inception_c1_1x1_3"
top: "inception_c1_1x1_3"
}
layer {
name: "inception_c1_1x3"
type: "Convolution"
bottom: "inception_c1_1x1_3"
top: "inception_c1_1x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 0
pad_w: 1
kernel_h: 1
kernel_w: 3
}
}
layer {
name: "inception_c1_1x3_bn"
type: "BatchNorm"
bottom: "inception_c1_1x3"
top: "inception_c1_1x3"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x3_scale"
type: "Scale"
bottom: "inception_c1_1x3"
top: "inception_c1_1x3"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x3_relu"
type: "ReLU"
bottom: "inception_c1_1x3"
top: "inception_c1_1x3"
}
layer {
name: "inception_c1_3x1"
type: "Convolution"
bottom: "inception_c1_1x1_3"
top: "inception_c1_3x1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 1
pad_w: 0
kernel_h: 3
kernel_w: 1
}
}
layer {
name: "inception_c1_3x1_bn"
type: "BatchNorm"
bottom: "inception_c1_3x1"
top: "inception_c1_3x1"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_3x1_scale"
type: "Scale"
bottom: "inception_c1_3x1"
top: "inception_c1_3x1"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_3x1_relu"
type: "ReLU"
bottom: "inception_c1_3x1"
top: "inception_c1_3x1"
}
layer {
name: "inception_c1_1x1_4"
type: "Convolution"
bottom: "reduction_b_concat"
top: "inception_c1_1x1_4"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 384
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
}
}
layer {
name: "inception_c1_1x1_4_bn"
type: "BatchNorm"
bottom: "inception_c1_1x1_4"
top: "inception_c1_1x1_4"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x1_4_scale"
type: "Scale"
bottom: "inception_c1_1x1_4"
top: "inception_c1_1x1_4"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x1_4_relu"
type: "ReLU"
bottom: "inception_c1_1x1_4"
top: "inception_c1_1x1_4"
}
layer {
name: "inception_c1_3x1_2"
type: "Convolution"
bottom: "inception_c1_1x1_4"
top: "inception_c1_3x1_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 448
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 1
pad_w: 0
kernel_h: 3
kernel_w: 1
}
}
layer {
name: "inception_c1_3x1_2_bn"
type: "BatchNorm"
bottom: "inception_c1_3x1_2"
top: "inception_c1_3x1_2"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_3x1_2_scale"
type: "Scale"
bottom: "inception_c1_3x1_2"
top: "inception_c1_3x1_2"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_3x1_2_relu"
type: "ReLU"
bottom: "inception_c1_3x1_2"
top: "inception_c1_3x1_2"
}
layer {
name: "inception_c1_1x3_2"
type: "Convolution"
bottom: "inception_c1_3x1_2"
top: "inception_c1_1x3_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 512
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 0
pad_w: 1
kernel_h: 1
kernel_w: 3
}
}
layer {
name: "inception_c1_1x3_2_bn"
type: "BatchNorm"
bottom: "inception_c1_1x3_2"
top: "inception_c1_1x3_2"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x3_2_scale"
type: "Scale"
bottom: "inception_c1_1x3_2"
top: "inception_c1_1x3_2"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x3_2_relu"
type: "ReLU"
bottom: "inception_c1_1x3_2"
top: "inception_c1_1x3_2"
}
layer {
name: "inception_c1_1x3_3"
type: "Convolution"
bottom: "inception_c1_1x3_2"
top: "inception_c1_1x3_3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 0
pad_w: 1
kernel_h: 1
kernel_w: 3
}
}
layer {
name: "inception_c1_1x3_3_bn"
type: "BatchNorm"
bottom: "inception_c1_1x3_3"
top: "inception_c1_1x3_3"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_1x3_3_scale"
type: "Scale"
bottom: "inception_c1_1x3_3"
top: "inception_c1_1x3_3"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_1x3_3_relu"
type: "ReLU"
bottom: "inception_c1_1x3_3"
top: "inception_c1_1x3_3"
}
layer {
name: "inception_c1_3x1_3"
type: "Convolution"
bottom: "inception_c1_1x3_2"
top: "inception_c1_3x1_3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 256
bias_term: false
stride: 1
weight_filler {
type: "xavier"
std: 0.00999999977648
}
pad_h: 1
pad_w: 0
kernel_h: 3
kernel_w: 1
}
}
layer {
name: "inception_c1_3x1_3_bn"
type: "BatchNorm"
bottom: "inception_c1_3x1_3"
top: "inception_c1_3x1_3"
batch_norm_param {
use_global_stats: false
}
}
layer {
name: "inception_c1_3x1_3_scale"
type: "Scale"
bottom: "inception_c1_3x1_3"
top: "inception_c1_3x1_3"
scale_param {
bias_term: true
}
}
layer {
name: "inception_c1_3x1_3_relu"
type: "ReLU"
bottom: "inception_c1_3x1_3"
top: "inception_c1_3x1_3"
}
layer {
name: "inception_c1_concat"
type: "Concat"
bottom: "inception_c1_1x1"
bottom: "inception_c1_1x1_2"
bottom: "inception_c1_1x3"
bottom: "inception_c1_3x1"
bottom: "inception_c1_1x3_3"
bottom: "inception_c1_3x1_3"
top: "inception_c1_concat"
}