soeaver/caffe-model

Bug: Incorrect Implementation of Inception_v4 Net in .prototxt file!

dprosperino opened this issue · 0 comments

Hey Guys,

Seems like you have made a mistake in the design of the Inception-C block in the Inception-v4 network. The attached picture shall explain the mistake pretty clearly, you mixed up a 3x1 convolutional layer with a 1x3 convolutional layer.

Inception-C_block_mistake

This should not be too big of a deal in terms of performance or functionality, however, it could lead to bugs, if you build your network from scratch and use the pretrained weights linked on this site.

Tips for other developers,

If you use this .prototxt file and these pretrained weights, you will be fine and your CNN will work perfectly. However, if you build your Inception-v4 network from scratch and build it accordingly to the paper and use these pretrained weights, caffe will not be able to match the weights and will throw following error:
Cannot copy param 0 weights from layer 'inception_c1_3x1_2'; shape mismatch. Source param shape is 448 384 3 1 (516096); target param shape is 512 448 3 1 (688128). To learn this layer's parameters from scratch rather than copying from a saved net, rename the layer.

To fix this problem and still be able to use the pretrained weights, simply build the Inception-C part of your Inception-v4 network according to the picture on this post.

I hope I was able of saving someone a few hours of debugging.

Cheers,
d3lt4-papa

Click to view actual code

Code of one Inception-C block in Inception-v4 according to original paper

layer {
  name: "inception_c1_pool_ave"
  type: "Pooling"
  bottom: "reduction_b_concat"
  top: "inception_c1_pool_ave"
  pooling_param {
    pool: AVE
    kernel_size: 3
    stride: 1
    pad: 1
  }
}
layer {
  name: "inception_c1_1x1"
  type: "Convolution"
  bottom: "inception_c1_pool_ave"
  top: "inception_c1_1x1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_scale"
  type: "Scale"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
}
layer {
  name: "inception_c1_1x1_2"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_2_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_2_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
}
layer {
  name: "inception_c1_1x1_3"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 384
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_3_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
}
layer {
  name: "inception_c1_1x3"
  type: "Convolution"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_scale"
  type: "Scale"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
}
layer {
  name: "inception_c1_3x1"
  type: "Convolution"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_3x1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_scale"
  type: "Scale"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
}
layer {
  name: "inception_c1_1x1_4"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_4"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 384
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_4_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_4_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_4_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
}
layer {
  name: "inception_c1_1x3_2"
  type: "Convolution"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x3_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 448
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_2_scale"
  type: "Scale"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_2_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
}
layer {
  name: "inception_c1_3x1_2"
  type: "Convolution"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_3x1_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 512
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_2_scale"
  type: "Scale"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_2_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
}
layer {
  name: "inception_c1_1x3_3"
  type: "Convolution"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_1x3_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_3_scale"
  type: "Scale"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
}
layer {
  name: "inception_c1_3x1_3"
  type: "Convolution"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_3_scale"
  type: "Scale"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_3_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
}
layer {
  name: "inception_c1_concat"
  type: "Concat"
  bottom: "inception_c1_1x1"
  bottom: "inception_c1_1x1_2"
  bottom: "inception_c1_1x3"
  bottom: "inception_c1_3x1"
  bottom: "inception_c1_1x3_3"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_concat"
}

Code of one Inception-C block, if you want to be able to use these pretrained weights

layer {
  name: "inception_c1_pool_ave"
  type: "Pooling"
  bottom: "reduction_b_concat"
  top: "inception_c1_pool_ave"
  pooling_param {
    pool: AVE
    kernel_size: 3
    stride: 1
    pad: 1
  }
}
layer {
  name: "inception_c1_1x1"
  type: "Convolution"
  bottom: "inception_c1_pool_ave"
  top: "inception_c1_1x1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_scale"
  type: "Scale"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
}
layer {
  name: "inception_c1_1x1_2"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_2_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_2_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
}
layer {
  name: "inception_c1_1x1_3"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 384
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_3_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
}
layer {
  name: "inception_c1_1x3"
  type: "Convolution"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_scale"
  type: "Scale"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
}
layer {
  name: "inception_c1_3x1"
  type: "Convolution"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_3x1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_scale"
  type: "Scale"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
}
layer {
  name: "inception_c1_1x1_4"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_4"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 384
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_4_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_4_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_4_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
}
layer {
  name: "inception_c1_3x1_2"
  type: "Convolution"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_3x1_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 448
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_2_scale"
  type: "Scale"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_2_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
}
layer {
  name: "inception_c1_1x3_2"
  type: "Convolution"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_1x3_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 512
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_2_scale"
  type: "Scale"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_2_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
}
layer {
  name: "inception_c1_1x3_3"
  type: "Convolution"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_3_scale"
  type: "Scale"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
}
layer {
  name: "inception_c1_3x1_3"
  type: "Convolution"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_3x1_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_3_scale"
  type: "Scale"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_3_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
}
layer {
  name: "inception_c1_concat"
  type: "Concat"
  bottom: "inception_c1_1x1"
  bottom: "inception_c1_1x1_2"
  bottom: "inception_c1_1x3"
  bottom: "inception_c1_3x1"
  bottom: "inception_c1_1x3_3"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_concat"
}