train slowly
wander2017 opened this issue · 12 comments
利用您的脚本我把vgg16转换成了mobilenet的形式,caffe也重新编译了。但是训练速度很慢,大概是4分钟20次。显卡是TITAN X (Pascal)。环境是,ubuntu14.04,cuda8.0,cudnn5.1
这是我的训练设置:
test_iter: 5000
test_interval: 5000
base_lr: 0.01
display: 20
max_iter: 300000
lr_policy: "poly"
power: 1
momentum: 0.9
weight_decay: 0.0005
snapshot: 5000
snapshot_prefix: "models/vgg16_"
random_seed: 0
net: "mobilenet.prototxt"
test_initialization: false
iter_size: 16
solver_mode: GPU
把你的转换后的prototxt发一下吧。
layer {
name: "data"
type: "ImageData"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
crop_size: 224
mean_value: 103.939
mean_value: 116.779
mean_value: 123.68
}
image_data_param {
source: "/mobinet/model/cnn-models/VGG19_cvgj/train.txt"
batch_size: 16
shuffle: true
root_folder: "/mobinet/model/cnn-models/VGG19_cvgj/pic/"
}
}
layer {
name: "data"
type: "ImageData"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mirror: false
crop_size: 224
mean_value: 103.939
mean_value: 116.779
mean_value: 123.68
}
image_data_param {
source: "/mobinet/model/cnn-models/VGG19_cvgj/val.txt"
batch_size: 10
shuffle: false
root_folder: "/mobinet/model/cnn-models/VGG19_cvgj/pic/"
}
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2_depthwise"
type: "DepthwiseConvolution"
bottom: "conv1_1"
top: "conv1_2_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
group: 64
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv1_2_3x3_relu"
type: "ReLU"
bottom: "conv1_2_3x3"
top: "conv1_2_3x3"
}
layer {
name: "conv1_2_1x1"
type: "Convolution"
bottom: "conv1_2_3x3"
top: "conv1_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1_depthwise"
type: "DepthwiseConvolution"
bottom: "pool1"
top: "conv2_1_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
group: 64
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv2_1_3x3_relu"
type: "ReLU"
bottom: "conv2_1_3x3"
top: "conv2_1_3x3"
}
layer {
name: "conv2_1_1x1"
type: "Convolution"
bottom: "conv2_1_3x3"
top: "conv2_1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2_depthwise"
type: "DepthwiseConvolution"
bottom: "conv2_1"
top: "conv2_2_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
group: 128
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv2_2_3x3_relu"
type: "ReLU"
bottom: "conv2_2_3x3"
top: "conv2_2_3x3"
}
layer {
name: "conv2_2_1x1"
type: "Convolution"
bottom: "conv2_2_3x3"
top: "conv2_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1_depthwise"
type: "DepthwiseConvolution"
bottom: "pool2"
top: "conv3_1_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
group: 128
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv3_1_3x3_relu"
type: "ReLU"
bottom: "conv3_1_3x3"
top: "conv3_1_3x3"
}
layer {
name: "conv3_1_1x1"
type: "Convolution"
bottom: "conv3_1_3x3"
top: "conv3_1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2_depthwise"
type: "DepthwiseConvolution"
bottom: "conv3_1"
top: "conv3_2_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 256
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv3_2_3x3_relu"
type: "ReLU"
bottom: "conv3_2_3x3"
top: "conv3_2_3x3"
}
layer {
name: "conv3_2_1x1"
type: "Convolution"
bottom: "conv3_2_3x3"
top: "conv3_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3_depthwise"
type: "DepthwiseConvolution"
bottom: "conv3_2"
top: "conv3_3_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 256
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv3_3_3x3_relu"
type: "ReLU"
bottom: "conv3_3_3x3"
top: "conv3_3_3x3"
}
layer {
name: "conv3_3_1x1"
type: "Convolution"
bottom: "conv3_3_3x3"
top: "conv3_3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 256
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1_depthwise"
type: "DepthwiseConvolution"
bottom: "pool3"
top: "conv4_1_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 256
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv4_1_3x3_relu"
type: "ReLU"
bottom: "conv4_1_3x3"
top: "conv4_1_3x3"
}
layer {
name: "conv4_1_1x1"
type: "Convolution"
bottom: "conv4_1_3x3"
top: "conv4_1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2_depthwise"
type: "DepthwiseConvolution"
bottom: "conv4_1"
top: "conv4_2_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
group: 512
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv4_2_3x3_relu"
type: "ReLU"
bottom: "conv4_2_3x3"
top: "conv4_2_3x3"
}
layer {
name: "conv4_2_1x1"
type: "Convolution"
bottom: "conv4_2_3x3"
top: "conv4_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3_depthwise"
type: "DepthwiseConvolution"
bottom: "conv4_2"
top: "conv4_3_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
group: 512
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv4_3_3x3_relu"
type: "ReLU"
bottom: "conv4_3_3x3"
top: "conv4_3_3x3"
}
layer {
name: "conv4_3_1x1"
type: "Convolution"
bottom: "conv4_3_3x3"
top: "conv4_3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1_depthwise"
type: "DepthwiseConvolution"
bottom: "pool4"
top: "conv5_1_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
group: 512
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv5_1_3x3_relu"
type: "ReLU"
bottom: "conv5_1_3x3"
top: "conv5_1_3x3"
}
layer {
name: "conv5_1_1x1"
type: "Convolution"
bottom: "conv5_1_3x3"
top: "conv5_1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2_depthwise"
type: "DepthwiseConvolution"
bottom: "conv5_1"
top: "conv5_2_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
group: 512
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv5_2_3x3_relu"
type: "ReLU"
bottom: "conv5_2_3x3"
top: "conv5_2_3x3"
}
layer {
name: "conv5_2_1x1"
type: "Convolution"
bottom: "conv5_2_3x3"
top: "conv5_2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3_depthwise"
type: "DepthwiseConvolution"
bottom: "conv5_2"
top: "conv5_3_3x3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
group: 512
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv5_3_3x3_relu"
type: "ReLU"
bottom: "conv5_3_3x3"
top: "conv5_3_3x3"
}
layer {
name: "conv5_3_1x1"
type: "Convolution"
bottom: "conv5_3_3x3"
top: "conv5_3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 512
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
std: 0.01
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8_new"
type: "InnerProduct"
bottom: "fc7"
top: "fc8_new"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
inner_product_param {
num_output: 510
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 0.1
}
}
}
layer {
name: "accuracy_at_1"
type: "Accuracy"
bottom: "fc8_new"
bottom: "label"
top: "accuracy_at_1"
include {
phase: TEST
}
accuracy_param {
top_k: 1
}
}
layer {
name: "accuracy_at_5"
type: "Accuracy"
bottom: "fc8_new"
bottom: "label"
top: "accuracy_at_5"
include {
phase: TEST
}
accuracy_param {
top_k: 5
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8_new"
bottom: "label"
top: "loss"
}
你的网络我在1080上测试了
I1121 13:50:07.143968 12978 caffe.cpp:417] Average Forward pass: 8.51048 ms.
I1121 13:50:07.143985 12978 caffe.cpp:419] Average Backward pass: 36.0526 ms.
你看看是不是数据读取的问题,
先用 caffe time --model xxxx.prototxt --gpu 0
看看结果
然后把前面的数据层替换成:
input: "data"
input_dim: 1
input_dim: 3
input_dim: 224
input_dim: 224
input: "label"
input_dim: 1
input_dim: 1
input_dim: 1
input_dim: 1
然后用caffe time --model xxxx.prototxt --gpu 0
这再去测试一下,把结果发出来吧?
好的,麻烦您了
这是直接测试的结果:
I1121 07:25:08.040560 16438 caffe.cpp:377] Average Forward pass: 49.4973 ms.
I1121 07:25:08.040570 16438 caffe.cpp:379] Average Backward pass: 512.248 ms.
I1121 07:25:08.040577 16438 caffe.cpp:381] Average Forward-Backward: 561.856 ms.
I1121 07:25:08.040585 16438 caffe.cpp:383] Total Time: 28092.8 ms.
这是修改数据层后的结果:
I1121 07:34:34.576349 16469 caffe.cpp:377] Average Forward pass: 6.74808 ms.
I1121 07:34:34.576362 16469 caffe.cpp:379] Average Backward pass: 37.1771 ms.
I1121 07:34:34.576378 16469 caffe.cpp:381] Average Forward-Backward: 44.0411 ms.
我查一下数据的问题(下面的结果是说读一张图片需要这么久吗,我的数据大概有10万张图片)
另外还想问一下,预训练的参数应该怎么获取,只能重头开始训练吗?
不是啊。 你上面是16batch下面是1batch。
你一个iter只需要500ms,为什么需要4分钟才能跑20个iter呢?
网络一开始训练会进行一次测试。 是不是这次测试影响了你的训练速度判断
I1121 07:49:12.278923 16482 solver.cpp:228] Iteration 0, loss = 8.3027
I1121 07:49:12.279098 16482 solver.cpp:244] Train net output #0: loss = 8.79598 (* 1 = 8.79598 loss)
I1121 07:49:12.279136 16482 sgd_solver.cpp:106] Iteration 0, lr = 0.01
I1121 07:52:31.570077 16482 solver.cpp:228] Iteration 20, loss = 6.2159
I1121 07:52:31.570235 16482 solver.cpp:244] Train net output #0: loss = 6.19742 (* 1 = 6.19742 loss)
I1121 07:52:31.570255 16482 sgd_solver.cpp:106] Iteration 20, lr = 0.00999933
I1121 07:55:51.234297 16482 solver.cpp:228] Iteration 40, loss = 6.22004
I1121 07:55:51.234452 16482 solver.cpp:244] Train net output #0: loss = 6.18713 (* 1 = 6.18713 loss)
I1121 07:55:51.234465 16482 sgd_solver.cpp:106] Iteration 40, lr = 0.00999867
感谢您的回复。我再查下原因,有结果会回复的。
另外还想问一下,预训练的模型和参数应该怎么获取,是可以做相应的转换,还是只能重头开始训练
不可以 只能重新开训。 普通网络转换成mobilenet,参数数量都不一致了。
不过如果你用一些训好的mobilenet的模型,替换其中的depthwise操作的层成为我的层,可以直接使用原来的参数。比如我的readme中的shicai的模型。
感谢。
display显示速度慢的原因找到了,iter_size的值我设置成了16,导致一个batchsize的大小是16*16,而不是一个batchszie。
你们有没有比较过这种情况——原始的VGG网络的时间,把VGG的convolution转化为Depth-wise convolution以后的时间。有的话,希望能分享一下,thx@ @wander2017 @yonghenglh6
@Ai-is-light
如果你是跟原版做比较的话,确实会变慢,因为原版用的cublas(3x3可能是wingrad),而我的是自己手动写的cuda代码,即便运算少了反而会慢。
但是如果你跟增加了group的vgg(即把vgg里面的所有conv转换成两层(一个不变channel,增加group,一个变换channel,kernel为1))比较的话,因为原版caffe的group实现是序列实现的,所以速度极慢。
本来depthwise网络就是将来打算用cpu跑的,所以有一个勉强可以训练的caffe训一下。用的时候就和gpu无关了。