error in copy-task-plots.ipynb
Opened this issue · 2 comments
bw-xu commented
when run the code piece
seq_len = 60
_, x, y = next(iter(dataloader(1, 1, 8, seq_len, seq_len)))
result = evaluate(model.net, model.criterion, x, y)
y_out = result['y_out']
there comes the error information:
IndexError Traceback (most recent call last)
<ipython-input-41-127bd44fb490> in <module>()
1 seq_len = 60
2 _, x, y = next(iter(dataloader(1, 1, 8, seq_len, seq_len)))
----> 3 result = evaluate(model.net, model.criterion, x, y)
4 y_out = result['y_out']
D:\GithubProjs\pytorch-ntm-master\train.py in evaluate(net, criterion, X, Y)
151
152 result = {
--> 153 'loss': loss.data[0],
154 'cost': cost / batch_size,
155 'y_out': y_out,
IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number
how to solve it?
muyi95 commented
Did you solve this problem?
hankyul2 commented
You should change data[0]
to item()
. It is described in your error log. And you should also change it in draw_sequence
. I tried it and success without that error.
def evaluate(net, criterion, X, Y):
"""Evaluate a single batch (without training)."""
inp_seq_len = X.size(0)
outp_seq_len, batch_size, _ = Y.size()
# New sequence
net.init_sequence(batch_size)
# Feed the sequence + delimiter
states = []
for i in range(inp_seq_len):
o, state = net(X[i])
states += [state]
# Read the output (no input given)
y_out = torch.zeros(Y.size())
for i in range(outp_seq_len):
y_out[i], state = net()
states += [state]
loss = criterion(y_out, Y)
y_out_binarized = y_out.clone().data
y_out_binarized.apply_(lambda x: 0 if x < 0.5 else 1)
# The cost is the number of error bits per sequence
cost = torch.sum(torch.abs(y_out_binarized - Y.data))
result = {
'loss': loss.item(),
'cost': cost / batch_size,
'y_out': y_out,
'y_out_binarized': y_out_binarized,
'states': states
}
return result
def draw_sequence(y, u=12):
seq_len = y.size(0)
seq_width = y.size(2)
inset = u // 8
pad = u // 2
width = seq_len * u + 2 * pad
height = seq_width * u + 2 * pad
im = Image.new('L', (width, height))
draw = ImageDraw.ImageDraw(im)
draw.rectangle([0, 0, width, height], fill=250)
for i in range(seq_len):
for j in range(seq_width):
val = 1 - y[i, 0, j].item()
draw.rectangle([pad + i*u + inset,
pad + j*u + inset,
pad + (i+1)*u - inset,
pad + (j+1)*u - inset], fill=cmap(val))
return im