BloodAxe/pytorch-toolbelt

TypeError: object of type 'int' has no len()

sainatarajan opened this issue · 4 comments

I am unable to create a basic UNet model from the library as given on the readme. Here's the code for the same:

from torch import nn
from pytorch_toolbelt.modules import encoders as E
from pytorch_toolbelt.modules import decoders as D

class UNet(nn.Module):
    def __init__(self, input_channels, num_classes):
        super().__init__()
        self.encoder = E.UnetEncoder(in_channels=input_channels, out_channels=32, growth_factor=2)
        self.decoder = D.UNetDecoder(self.encoder.channels, decoder_features=32)
        self.logits = nn.Conv2d(self.decoder.channels[0], num_classes, kernel_size=1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return self.logits(x[0])
    
model= UNet(input_channels= 3, num_classes= 1)

Error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-1-4e8064bebb83> in <module>
     15         return self.logits(x[0])
     16 
---> 17 model= UNet(input_channels= 3, num_classes= 1)

<ipython-input-1-4e8064bebb83> in __init__(self, input_channels, num_classes)
      7         super().__init__()
      8         self.encoder = E.UnetEncoder(in_channels=input_channels, out_channels=32, growth_factor=2)
----> 9         self.decoder = D.UNetDecoder(self.encoder.channels, decoder_features=32)
     10         self.logits = nn.Conv2d(self.decoder.channels[0], num_classes, kernel_size=1)
     11 

~/anaconda3/envs/dl_gpu/lib/python3.7/site-packages/pytorch_toolbelt/modules/decoders/unet.py in __init__(self, feature_maps, decoder_features, unet_block, upsample_block)
     38             decoder_features = [None] * num_blocks
     39         else:
---> 40             if len(decoder_features) != num_blocks:
     41                 raise ValueError(f"decoder_features must have length of {num_blocks}")
     42         in_channels_for_upsample_block = feature_maps[-1]

TypeError: object of type 'int' has no len()
cswwp commented

same +1

I also have same problem.

Correct me if I'm wrong...
This might be a documentation problem, without changing the Decoder module class, all one needs to do is pass the
encoder channels to the decoder_features with one less element decoder_features=self.encoder.channels[1:] adn this should hopefully solve the issue

class UNet(nn.Module):
    def __init__(self, input_channels, num_classes):
        super().__init__()
        self.encoder = E.UnetEncoder(in_channels=input_channels, out_channels=32, growth_factor=2)
        self.decoder = D.UNetDecoder(self.encoder.channels, decoder_features=self.encoder.channels[1:])
        self.logits = nn.Conv2d(self.decoder.channels[0], num_classes, kernel_size=1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return self.logits(x[0])
    
model= UNet(input_channels= 3, num_classes= 1)

out = model(torch.randn(2, 3, 512, 512))
print(out.shape)

Output:

torch.Size([2, 1, 512, 512])

Printing out the model gives the following output:

UNet(
  (encoder): UnetEncoder(
    (layer0): UnetBlock(
      (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (abn1): Sequential(
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
      (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (abn2): Sequential(
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
      )
    )
    (layer1): Sequential(
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (conv): UnetBlock(
        (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn1): Sequential(
          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn2): Sequential(
          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
      )
    )
    (layer2): Sequential(
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (conv): UnetBlock(
        (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn1): Sequential(
          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn2): Sequential(
          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
      )
    )
    (layer3): Sequential(
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (conv): UnetBlock(
        (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn1): Sequential(
          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn2): Sequential(
          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
      )
    )
  )
  (decoder): UNetDecoder(
    (blocks): ModuleList(
      (0): UnetBlock(
        (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn1): Sequential(
          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn2): Sequential(
          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
      )
      (1): UnetBlock(
        (conv1): Conv2d(192, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn1): Sequential(
          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn2): Sequential(
          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
      )
      (2): UnetBlock(
        (conv1): Conv2d(96, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn1): Sequential(
          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (abn2): Sequential(
          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
        )
      )
    )
    (upsamples): ModuleList(
      (0): ConvTranspose2d(256, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): ConvTranspose2d(256, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (2): ConvTranspose2d(128, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    )
  )
  (logits): Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1))
)

Please take a look on the working example of the segmentation model available here: https://github.com/BloodAxe/pytorch-toolbelt/blob/develop/pytorch_toolbelt/zoo/segmentation.py#L92
As a rule of thumb - number of decoder stages is less by one than number of the encoder feature maps.