owruby/shake-drop_pytorch

Implemention on resnext50_32x4d

NightQing opened this issue · 0 comments

Hi,
I want to implement shake-drop on pytorch official resnext architecture, following is the main code:

from models.shakedrop import ShakeDrop
from models.shakeshake import Shortcut

class ShakeBottleNeck(nn.Module):

    def __init__(self, in_ch, mid_ch, out_ch, cardinary, stride=1, p_shakedrop=1.0):
        super(ShakeBottleNeck, self).__init__()
        self.equal_io = in_ch == out_ch
        self.shortcut = None if self.equal_io else Shortcut(in_ch, out_ch, stride=stride)
        self.branch = self._make_branch(in_ch, mid_ch, out_ch, cardinary, stride)
        self.shake_drop = ShakeDrop(p_shakedrop)

    def forward(self, x):
        h = self.branch(x)
        h = self.shake_drop(h)
        h0 = x if self.equal_io else self.shortcut(x)
        return h + h0

    def _make_branch(self, in_ch, mid_ch, out_ch, cardinary, stride=1):
        return nn.Sequential(
            nn.Conv2d(in_ch, mid_ch, 1, padding=0, bias=False),
            nn.BatchNorm2d(mid_ch),
            nn.ReLU(inplace=False),
            nn.Conv2d(mid_ch, mid_ch, 3, padding=1, stride=stride, groups=cardinary, bias=False),
            nn.BatchNorm2d(mid_ch),
            nn.ReLU(inplace=False),
            nn.Conv2d(mid_ch, out_ch, 1, padding=0, bias=False),
            nn.BatchNorm2d(out_ch))

the p_shakedrop is in range(0, 0.5 ) for blocks in resnet.

but the loss will increasing quickly and acc will close to 0.
So is there any drop-block implementation of resnext/resnet? Any advice?
Thanks.