Saving state_dict into another CNN

Question

Saving state_dict into another CNN

florencejt opened this issue 4 years ago · 1 comments

Hi there, I'm thinking of using this model for some school work (I'll cite of course).
I'm trying to save the trained weights from the vgg16 model into an identical vgg16 Net(nn.Module) thing (all the same layers and sizes and I've changed the layer names to match) because I need access to only one of the convolutional layers.

However, when I do this, the new model with the weights from vgg16_bn is acting like it's never been trained.

my_model = vgg16_bn(pretrained=True)
my_model.eval()
torch.save(my_model.state_dict(), 'vgg16_cifar_weights')

and then later in my "from scratch" vgg16
vgg.load_state_dict(torch.load('PytorchCifarModel/vgg16_cifar_weights_scratch3'))

Can you see what I'm doing wrong? This trained model would be so perfect for my project, I'm hoping that it'll work.

I'll put my from scratch vgg16 here as well in case there's an issue there. I know it's really long winded but it needs to be for my proejct. Thank you!!

class Net(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        
        self.conv1_1 = nn.Conv2d(3, 64, 3, 1, padding=1)
        self.batch1_1 = nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv1_2 = nn.Conv2d(64, 64, 3, 1, 1)
        self.batch1_2 = nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch2_1 = nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch2_2 = nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch3_1 = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch3_2 = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch3_3 = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch4_1 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch4_2 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch4_3 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch5_1 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch5_2 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.batch5_3 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        
        self.classifier = nn.Sequential(
            nn.Linear(512 * 1 * 1, 4096),
            # nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 10),
        )
        
        self.relu_f = nn.ReLU(True)
        
    def forward(self, x):
        x = self.conv1_1(x)
        x = self.relu_f(self.batch1_1(x))
        x = self.conv1_2(x)
        x = self.relu_f(self.batch1_2(x))
        
        x = self.maxpool(x)
        
        x = self.conv2_1(x)
        x = self.relu_f(self.batch2_1(x))
        x = self.conv2_2(x)
        x = self.relu_f(self.batch2_2(x))
        
        x = self.maxpool(x)
        
        x = self.conv3_1(x)
        x = self.relu_f(self.batch3_1(x))
        x = self.conv3_2(x)
        x = self.relu_f(self.batch3_2(x))
        x = self.conv3_2(x)
        x = self.relu_f(self.batch3_3(x))
        
        x = self.maxpool(x)
        
        x = self.conv4_1(x)
        x = self.relu_f(self.batch4_1(x))
        x = self.conv4_2(x)
        x = self.relu_f(self.batch4_2(x))
        x = self.conv4_2(x)
        x = self.relu_f(self.batch4_3(x))
        
        x = self.maxpool(x)
        
        x = self.conv5_1(x)
        x = self.relu_f(self.batch5_1(x))
        x = self.conv5_2(x)
        x = self.relu_f(self.batch5_2(x))
        x = self.conv5_3(x)
        x = self.relu_f(self.batch5_3(x))
        
        x = self.maxpool(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        
        return x

Answer 1 · 2021-06-24T04:19:41.000Z

The layer names are different so you can't load the pretrain state_dict properly