RuntimeError: The size of tensor a (319) must match the size of tensor b (639) at non-singleton dimension 3
Saahil18 opened this issue · 0 comments
Saahil18 commented
I am trying to train on my custom data consisting of png images. I have created my own custom_data.py and stored it in datasets folder as well. But I am getting this error always and don't know why.
Error :
Training model named:
monodepth_custom5
Models and tensorboard events files are saved to:
./logs
Training is using:
cpu
/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:558: UserWarning: This DataLoader will create 12 worker processes in total. Our suggested max number of worker in current system is 8, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
warnings.warn(_create_warning_msg(
Using split:
eigen_zhou
There are 9450 training items and 9450 validation items
/usr/local/lib/python3.10/dist-packages/torch/optim/lr_scheduler.py:143: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
warnings.warn("Detected call of `lr_scheduler.step()` before `optimizer.step()`. "
Training
/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py:4343: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.
warnings.warn(
Traceback (most recent call last):
File "/content/monodepth2/train.py", line 18, in <module>
trainer.train()
File "/content/monodepth2/trainer.py", line 211, in train
self.run_epoch()
File "/content/monodepth2/trainer.py", line 227, in run_epoch
outputs, losses = self.process_batch(inputs)
File "/content/monodepth2/trainer.py", line 429, in process_batch
losses = self.compute_losses(inputs, outputs)
File "/content/monodepth2/trainer.py", line 514, in compute_losses
smooth_loss = get_smooth_loss(norm_disp, color)
File "/content/monodepth2/layers.py", line 216, in get_smooth_loss
grad_disp_x *= torch.exp(-grad_img_x)
RuntimeError: The size of tensor a (319) must match the size of tensor b (639) at non-singleton dimension 3
This is my custom_data.py .
# custom_dataset.py
import torch
from PIL import Image
from torchvision import transforms
import os
class CustomDataset(torch.utils.data.Dataset):
def __init__(self, root_dir, frame_ids, height, width, scales, transform=None):
self.root_dir = root_dir
self.transform = transform
self.frame_ids = frame_ids
self.height = height
self.width = width
self.scales = scales
self.image_files = [os.path.join(root_dir, f) for f in os.listdir(root_dir) if f.endswith('png')]
# Define intrinsic camera parameters
self.K = torch.tensor([[0.58, 0, 0.5, 0],
[0, 1.92, 0.5, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]], dtype=torch.float32)
self.inv_K = torch.inverse(self.K)
def __len__(self):
return len(self.image_files)
def __getitem__(self, idx):
img_name = self.image_files[idx]
image = Image.open(img_name).convert('RGB') # Ensure image is in RGB format
inputs = {}
# Generate image for each scale
for scale in self.scales:
scaled_image = image.resize((self.width // (2 ** scale), self.height // (2 ** scale)), Image.LANCZOS)
if self.transform:
scaled_image = self.transform(scaled_image)
for i in self.frame_ids:
inputs[("color", i, scale)] = scaled_image
inputs[("color_aug", i, scale)] = scaled_image # Assuming no augmentation is applied
# Add intrinsic camera parameters
for scale in self.scales:
K = self.K.clone()
inv_K = self.inv_K.clone()
K[0, :] *= self.width // (2 ** scale)
K[1, :] *= self.height // (2 ** scale)
inv_K[0, :] /= self.width // (2 ** scale)
inv_K[1, :] /= self.height // (2 ** scale)
inputs[("K", scale)] = K
inputs[("inv_K", scale)] = inv_K
return inputs