for a while im trying to run this model and i'm getting this error
cehguru opened this issue · 2 comments
this is the block of code where i'm getting out of memory error
def batch_gd(model, criterion, train_loader, test_laoder, epochs):
train_losses = np.zeros(epochs)
test_losses = np.zeros(epochs)
for e in range(epochs):
t0 = datetime.now()
train_loss = []
for inputs, targets in train_loader:
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
output = model(inputs)
loss = criterion(output, targets)
train_loss.append(loss.item()) # torch to numpy world
loss.backward()
optimizer.step()
train_loss = np.mean(train_loss)
validation_loss = []
for inputs, targets in validation_loader:
inputs, targets = inputs.to(device), targets.to(device)
output = model(inputs)
loss = criterion(output, targets)
validation_loss.append(loss.item()) # torch to numpy world
validation_loss = np.mean(validation_loss)
train_losses[e] = train_loss
validation_losses[e] = validation_loss
dt = datetime.now() - t0
print(
f"Epoch : {e+1}/{epochs} Train_loss:{train_loss:.3f} Test_loss:{validation_loss:.3f} Duration:{dt}"
)
return train_losses, validation_losses
batch_size = 64
train_loader = torch.utils.data.DataLoader(
dataset, batch_size=batch_size, sampler=train_sampler
)
test_loader = torch.utils.data.DataLoader(
dataset, batch_size=batch_size, sampler=test_sampler
)
validation_loader = torch.utils.data.DataLoader(
dataset, batch_size=batch_size, sampler=validation_sampler
)
train_losses, validation_losses = batch_gd(
model, criterion, train_loader, validation_loader, 5
)
As the error suggests, the memory allocation goes out of available cuda memory, can you try using a reduced batch size ?
Ex: 32 or 16 ?