from augmented.stodepth import resnet18_StoDepth_lineardecay
...
model = resnet18_StoDepth_lineardecay(num_classes=100)
...
from augmented.label_smoothing import LabelSmoothingCrossEntropy
...
criterion = LabelSmoothingCrossEntropy()
...
from augmented.cutout import Cutout
#if args.cutout:
# train_transform.transforms.append(Cutout(n_holes=args.n_holes, length=args.length))
#dataset = datasets.CIFAR100(args.cifarpath, train=True, download=True, transform=train_transform)
#or
#dataset = datasets.CIFAR100(args.cifarpath, train=True, download=True, transform=transform_train)
#dataset = Cutout(dataset, n_holes=args.n_holes, length=args.length)
#or
cutout=Cutout(n_holes=args.n_holes, length=args.length)
...
for _ in range(num_epoch):
for input, target in loader:
input=cutout(input)
...
from augmented.dropblock.resnet18_dropblock import ResNet18
...
self.dropblock = LinearScheduler(
DropBlock2D(drop_prob=0., block_size=5),
start_value=0.0,
stop_value=0.25,
nr_steps=5e3
)
...
model = ResNet18(num_classes=100)
from augmented.mixup import mixup_data,mixup_criterion
...
for batch_idx, (inputs, targets) in enumerate(trainloader):
if use_cuda:
inputs, targets = inputs.cuda(), targets.cuda()
inputs, targets_a, targets_b, lam = mixup_data(inputs, targets,args.alpha, use_cuda)
inputs, targets_a, targets_b = map(Variable, (inputs,targets_a, targets_b))
outputs = net(inputs)
loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
...
from augmented.resnet18_manifold_mixup import ResNet18
model = ResNet18(num_classes=100)
...
lame = np.random.beta(1, 1)
rand_index = torch.randperm(b) # 打乱索引
target_a = batch_labels
target_b = batch_labels[rand_index]
r = np.random.rand(1)
...
predicted = model(inputs, rand_index, r,lame)
loss = lame * self.myloss(predicted, target_a) + (1-lame) * self.myloss(predicted, target_b)
...
from augmented.resnet18_shakedrop import ResNet18
model = ResNet18(num_classes=100)
from augmented.cutmix import CutMix,CutMixCrossEntropyLoss
...
dataset = datasets.CIFAR100(args.cifarpath, train=True, download=True, transform=transform_train)
dataset = CutMix(dataset, num_class=100, beta=1.0, prob=0.5, num_mix=2) # this is paper's original setting for cifar.
...
criterion = CutMixCrossEntropyLoss(True)
for _ in range(num_epoch):
for input, target in loader: # input is cutmixed image's normalized tensor and target is soft-label which made by mixing 2 or more labels.
output = model(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()
optimizer.zero_grad()
#else#
lame = np.random.beta(1, 1)
rand_index = torch.randperm(b) # 打乱索引
target_a = batch_labels
target_b = batch_labels[rand_index]
r = np.random.rand(1)
if r < 0.0:
bbx1, bby1, bbx2, bby2 = rand_bbox(batch_imgs.size(), lame)
batch_imgs[:, :, bbx1:bbx2, bby1:bby2] = batch_imgs[rand_index, :, bbx1:bbx2, bby1:bby2]
lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (H * W))
predicted = self.model(batch_imgs, rand_index, r)
loss =lam * self.myloss(predicted, target_a) + (1-lam) * self.myloss(predicted, target_b)
ResNet18 + StochDepth|label_smoothing|Cutout|DropBlock|Mixup|Manifold Mixup|ShakeDrop|CutMix CIFAR-100
Model |
Top-1 acc(@200epoch) |
Top-5 acc |
ResNet18 |
77.70 |
93.89 |
+ StochDepth |
77.85 |
94.93 |
+ label_smoothing |
79.11 |
94.42 |
+ Cutout |
78.22 |
94.41 |
+ DropBlock |
78.12 |
94.85 |
+ Mixup |
79.63 |
94.78 |
+ Manifold Mixup |
80.28 |
94.96 |
+ ShakeDrop |
78.98 |
95.00 |
+ CutMix |
80.72 |
95.86 |
- Official Paper
- Deep Networks with Stochastic Depth
- Rethinking the Inception Architecture for Computer Vision
- Improved regularization of convolutional neural networks with cutout
- DropBlock: A regularization method for convolutional networks
- mixup: Beyond Empirical Risk Minimization
- Manifold Mixup: Better Representations by Interpolating Hidden States
- ShakeDrop Regularization for Deep Residual Learning
- CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features
- Implementation : https://github.com/clovaai/CutMix-PyTorch