SigmoidCrossEntropyLoss
Opened this issue · 4 comments
Hi, I want to change the SoftmaxWithLoss layer for a SigmoidCrossEntropyLoss layer. I understand that SigmoidCrossEntropy doesn't accept a mask with multiple labels (e.g. 0, 1, 2 ...) but only a binary mask with values in {0,1}. To pass a single mask to the Sigmoid I put a python layer before it that selects the mask provided by the roi-data
layer (as in Mask R-CNN). This layer is described below. My training prototxt file is also given below (note that I'm using 3 classes only).
The problem I encounter is that the loss of mask is very large (around 40.000) and doesn't drop. Any help would be greatly appreciated?
PYTHON LAYER
import caffe
import numpy as np
class BinaryMaskLayer(caffe.Layer):
def setup(self, bottom, top):
layer_params = yaml.load(self.param_str_)
self._num_classes = layer_params['num_classes']
top[0].reshape(1, 1, cfg.TRAIN.MASK_SIZE, cfg.TRAIN.MASK_SIZE)
top[1].reshape(1, 1, cfg.TRAIN.MASK_SIZE, cfg.TRAIN.MASK_SIZE)
def forward(self, bottom, top):
mask_score = bottom[0].data
mask_targets = bottom[1].data
label_for_mask = bottom[2].data
# convert multilabel mask in binary mask
for i in xrange(mask_targets.shape[0]):
mask = mask_targets[i,...]
# make all but the bounding box labeled values 0
mask[ mask != label_for_mask[i] ] = 0
# make other values 1
mask[ mask == label_for_mask[i] ] = 1
mask_targets[i,...] = mask
label = int(label_for_mask[0])
# choose 1 mask, e.g. mask of label 1
mask_score = mask_score[:,label:label+1,:,:]
# add dimension and subsequently swap the first and second dimension
mask_targets = mask_targets[np.newaxis,:]
mask_targets = np.swapaxes(mask_targets,0,1)
top[0].reshape(*mask_score.shape)
top[0].data[...] = mask_score
top[1].reshape(*mask_targets.shape)
top[1].data[...] = mask_targets
def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass
def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass
TRAIN.PROTOTXT FILE
name: "VGG_ILSVRC_16_layers"
layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
top: 'seg_mask_inds'
top: 'flipped'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 3" # 2 obj categories + 1 background
}
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
#========= RPN ============
layer {
name: "rpn_conv/3x3"
type: "Convolution"
bottom: "conv5_3"
top: "rpn/output"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "rpn_relu/3x3"
type: "ReLU"
bottom: "rpn/output"
top: "rpn/output"
}
layer {
name: "rpn_cls_score"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
#num_output: 24
num_output: 30 # 2(bg/fg) * 15(n_anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "rpn_bbox_pred"
type: "Convolution"
bottom: "rpn/output"
top: "rpn_bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
convolution_param {
#num_output: 48 # 4 * 12(anchors)
num_output: 60 # 4 * 15(anchors)
kernel_size: 1 pad: 0 stride: 1
weight_filler { type: "gaussian" std: 0.01 }
bias_filler { type: "constant" value: 0 }
}
}
layer {
bottom: "rpn_cls_score"
top: "rpn_cls_score_reshape"
name: "rpn_cls_score_reshape"
type: "Reshape"
reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } }
}
layer {
name: 'rpn-data'
type: 'Python'
bottom: 'rpn_cls_score'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'data'
top: 'rpn_labels'
top: 'rpn_bbox_targets'
top: 'rpn_bbox_inside_weights'
top: 'rpn_bbox_outside_weights'
python_param {
module: 'rpn.anchor_target_layer'
layer: 'AnchorTargetLayer'
#param_str: "'feat_stride': 16 \n'scales': !!python/tuple [4, 8, 16, 32]"
param_str: "'feat_stride': 16 \n'scales': !!python/tuple [2, 4, 8, 16, 32]"
}
}
layer {
name: "rpn_loss_cls"
type: "SoftmaxWithLoss"
bottom: "rpn_cls_score_reshape"
bottom: "rpn_labels"
propagate_down: 1
propagate_down: 0
top: "rpn_cls_loss"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
}
}
layer {
name: "rpn_loss_bbox"
type: "SmoothL1Loss"
bottom: "rpn_bbox_pred"
bottom: "rpn_bbox_targets"
bottom: 'rpn_bbox_inside_weights'
bottom: 'rpn_bbox_outside_weights'
top: "rpn_loss_bbox"
loss_weight: 1
smooth_l1_loss_param { sigma: 3.0 }
}
#========= RoI Proposal ============
layer {
name: "rpn_cls_prob"
type: "Softmax"
bottom: "rpn_cls_score_reshape"
top: "rpn_cls_prob"
}
layer {
name: 'rpn_cls_prob_reshape'
type: 'Reshape'
bottom: 'rpn_cls_prob'
top: 'rpn_cls_prob_reshape'
#reshape_param { shape { dim: 0 dim: 24 dim: -1 dim: 0 } }
reshape_param { shape { dim: 0 dim: 30 dim: -1 dim: 0 } }
}
layer {
name: 'proposal'
type: 'Python'
bottom: 'rpn_cls_prob_reshape'
bottom: 'rpn_bbox_pred'
bottom: 'im_info'
top: 'rpn_rois'
python_param {
module: 'rpn.proposal_layer'
layer: 'ProposalLayer'
#param_str: "'feat_stride': 16 \n'scales': !!python/tuple [4, 8, 16, 32]"
param_str: "'feat_stride': 16 \n'scales': !!python/tuple [2, 4, 8, 16, 32]"
}
}
layer {
name: 'roi-data'
type: 'Python'
bottom: 'rpn_rois'
bottom: 'gt_boxes'
bottom: 'im_info'
bottom: 'seg_mask_inds'
bottom: 'flipped'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
top: 'mask_targets'
top: 'rois_pos'
top: 'label_for_mask'
python_param {
module: 'rpn.proposal_target_layer_ppsigmoid'
layer: 'ProposalTargetLayer'
param_str: "'num_classes': 3"
}
}
#========= RCNN ============
layer {
name: "roi_pool5"
#type: "ROIPooling"
#type: "ROIAlignment2"
type: "ROIAlignment"
bottom: "conv5_3" #bottom[0]
bottom: "rois" #bottom[1]
top: "pool5"
#roi_pooling_param {
#roi_alignment2_param {
roi_alignment_param {
pooled_w: 7
pooled_h: 7
spatial_scale: 0.0625
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 4096
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 4096
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output:3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 12 # = 4 * 3, i.e., box coordinate for each class
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss_cls"
type: "SoftmaxWithLoss"
bottom: "cls_score"
bottom: "labels"
propagate_down: 1
propagate_down: 0
top: "loss_cls"
loss_weight: 3
}
layer {
name: "loss_bbox"
type: "SmoothL1Loss"
bottom: "bbox_pred"
bottom: "bbox_targets"
bottom: "bbox_inside_weights"
bottom: "bbox_outside_weights"
top: "loss_bbox"
loss_weight: 2
}
##############Mask branch####################################
layer {
name: "roi_pool5_2"
#type: "ROIPooling"
#type: "ROIAlignment2"
type: "ROIAlignment"
bottom: "conv5_3"
bottom: "rois_pos"
top: "pool5_2"
#roi_pooling_param {
#roi_alignment2_param{
roi_alignment_param{
pooled_w: 7
pooled_h: 7
spatial_scale: 0.0625 # 1/16
}
}
## Conv-Relu 1
layer {
name: "pool5_2_conv"
type: "Convolution"
bottom: "pool5_2"
top: "pool5_2_conv"
param { lr_mult: 1.0 decay_mult: 1.0}
param { lr_mult: 2.0 decay_mult: 0}
convolution_param {
num_output: 512
kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "pool5_2_conv_relu"
type: "ReLU"
bottom: "pool5_2_conv"
top: "pool5_2_conv_relu"
}
## Conv-Relu 2
layer {
name: "pool5_2_conv2"
type: "Convolution"
bottom: "pool5_2_conv_relu"
top: "pool5_2_conv2"
param { lr_mult: 1.0 decay_mult: 1.0}
param { lr_mult: 2.0 decay_mult: 0}
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1#kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "pool5_2_conv2_relu"
type: "ReLU"
bottom: "pool5_2_conv2"
top: "pool5_2_conv2_relu"
}
# Deconv 1
layer {
name: "mask_deconv1"
type: "Deconvolution"
#bottom: "pool5_2_conv_relu"
bottom: "pool5_2_conv2_relu"
top: "mask_deconv1"
param { lr_mult: 1 decay_mult: 1.0 }
param { lr_mult: 2 decay_mult: 0}
convolution_param {
num_output: 256
#pad: 1 stride: 2 kernel_size: 4 # 14x14
#pad: 1 stride: 3 kernel_size: 6 # 22x22
pad: 1 stride: 4 kernel_size: 8 # 30x30
group: 256 #apply independently
weight_filler { type: "bilinear" }
#bias_filler { type: "constant" value: 1 }
}
}
## Conv-Relu 3
layer {
name: "pool5_2_conv3"
type: "Convolution"
bottom: "mask_deconv1"
top: "pool5_2_conv3"
param { lr_mult: 1.0 decay_mult: 1.0}
param { lr_mult: 2.0 decay_mult: 0}
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1#kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "pool5_2_conv3_relu"
type: "ReLU"
bottom: "pool5_2_conv3"
top: "pool5_2_conv3_relu"
}
## Conv-Relu 4
layer {
name: "pool5_2_conv4"
type: "Convolution"
bottom: "pool5_2_conv3_relu"
top: "pool5_2_conv4"
param { lr_mult: 1.0 decay_mult: 1.0}
param { lr_mult: 2.0 decay_mult: 0}
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1#kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "pool5_2_conv4_relu"
type: "ReLU"
bottom: "pool5_2_conv4"
top: "pool5_2_conv4_relu"
}
# Deconv 2
layer {
name: "mask_deconv2"
type: "Deconvolution"
bottom: "pool5_2_conv4_relu"
top: "mask_deconv2"
param { lr_mult: 1 decay_mult: 1.0 }
param { lr_mult: 2 decay_mult: 0}
convolution_param {
num_output: 256
#pad: 1 stride: 2 kernel_size: 4 # 28x28
#pad: 1 stride: 8 kernel_size: 16 # 490x490
pad: 1 stride: 4 kernel_size: 8
group: 256 #apply independently
weight_filler { type: "bilinear" }
#bias_filler { type: "constant" value: 1 }
}
}
## Conv-Relu 5
layer {
name: "pool5_2_conv5"
type: "Convolution"
bottom: "mask_deconv2"
top: "pool5_2_conv5"
param { lr_mult: 1.0 decay_mult: 1.0}
param { lr_mult: 2.0 decay_mult: 0}
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1#kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "pool5_2_conv5_relu"
type: "ReLU"
bottom: "pool5_2_conv5"
top: "pool5_2_conv5_relu"
}
## Conv-Relu 6
layer {
name: "pool5_2_conv6"
type: "Convolution"
bottom: "pool5_2_conv5_relu"
top: "pool5_2_conv6"
param { lr_mult: 1.0 decay_mult: 1.0}
param { lr_mult: 2.0 decay_mult: 0}
convolution_param {
num_output: 512
kernel_size: 3 pad: 1 stride: 1#kernel_size: 1 pad: 0 #kernel_size: 3 pad: 1 stride: 1
weight_filler { type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: "pool5_2_conv6_relu"
type: "ReLU"
bottom: "pool5_2_conv6"
top: "pool5_2_conv6_relu"
}
# Deconv 3
layer {
name: "mask_deconv3"
type: "Deconvolution"
bottom: "pool5_2_conv6_relu"
top: "mask_deconv3"
param { lr_mult: 1 decay_mult: 1.0 }
param { lr_mult: 2 decay_mult: 0}
convolution_param {
num_output: 256
pad: 1 stride: 2 kernel_size: 4
#pad: 1 stride: 8 kernel_size: 16
#pad: 1 stride: 4 kernel_size: 8
group: 256 #apply independently
weight_filler { type: "bilinear" }
#bias_filler { type: "constant" value: 1 }
}
}
layer {
name: "mask_score"
type: "Convolution"
bottom: "mask_deconv3" #
top: "mask_score"
param { lr_mult: 1.0 decay_mult: 1.0 }
param { lr_mult: 2.0 decay_mult: 0 }
convolution_param {
num_output: 3 # 2 classes + 1 background
kernel_size: 1 pad: 0
weight_filler {type: "gaussian" std: 0.01 } #weight_filler { type: "xavier" }
bias_filler { type: "constant" value: 0 }
}
}
layer {
name: 'binary-mask'
type: 'Python'
bottom: 'mask_score'
bottom: 'mask_targets' #from lib/rpn/proposal_target_layer.py roi-data
bottom: 'label_for_mask' #from lib/rpn/proposal_target_layer.py roi-data
top: 'mask_score2'
top: 'binary_mask'
python_param {
module: 'rpn.binary_mask'
layer: 'BinaryMaskLayer'
param_str: "'num_classes': 3"
}
}
layer {
name: "loss_mask"
type: "SigmoidCrossEntropyLoss"
bottom: 'mask_score2'
bottom: "binary_mask"
top: "loss_mask"
loss_weight: 1
loss_param {
ignore_label: -1
normalize: true
#normalize: false
}
propagate_down: true # backprop to prediction
propagate_down: false # don't backprop to labels
}
If you have 3 classes, then using Softmax is a good choice.
If you really need to use Sigmoid for some reasons, then make sure you format the groundtruths (in binary?) in a correct way (to compute the loss against the output of the network - now the network outputs binary mask). The total loss is very large may because you forgot to divide to the total number of pixels.
Hope it helps!
Thanks for your reply @nqanh. I thought it might be the case that I would have to divide by the number of pixels (i.e. 244^2) as that also is in the order of magnitude of the loss, but I didn't see that applied in the Softmax.
Why is there no division by the number of pixels in the Softmax? Or am I missing it somewhere?
Thanks!
The Sigmoid loss in Caffe is already well implemented. If you use this one then it should be ok. However, the way we handle the groudntruth map in this case would be very complicated, since the groundtruth map can't have {0, 1, 2}, but only {0, 1} --> you will need more than 1 map, each for one class?
Thanks for your comments @nqanh. I think I fixed the ground truth map classes in the BinaryMaskLayer python layer I described above. This gives me a loss in the order of 0.8 for the mask with Sigmoid Loss, which seems reasonable.