Tried to update stack rnn to keras1.0, but failed
dytmas opened this issue · 3 comments
Hi, @EderSantana
I tried to update stack-rnn, queue-rnn and ntm to keras 1.0. but got strange errors.
Below are the codes modified by me:
class Stack(Recurrent):
""" Stack and queue network
output_dim = output dimension
n_slots = number of memory slot
m_length = dimention of the memory
rnn_size = output length of the memory controler
inner_rnn = "lstm" only lstm is supported
stack = True to create neural stack or False to create neural queue
from Learning to Transduce with Unbounded Memory
[[http://arxiv.org/pdf/1506.02516.pdf]]
"""
def __init__(self, output_dim, n_slots, m_length,
inner_rnn='lstm',rnn_size=64, stack=True,
init='glorot_uniform', inner_init='orthogonal',
input_dim=None, input_length=None, **kwargs):
self.output_dim = output_dim
self.n_slots = n_slots + 1 # because we start at time 1
self.m_length = m_length
self.init = init
self.inner_init = inner_init
if inner_rnn != "lstm":
print "Only lstm is supported"
raise
self.inner_rnn = inner_rnn
self.rnn_size = rnn_size
self.stack = stack
self.input_dim = input_dim
self.input_length = input_length
if self.input_dim:
kwargs['input_shape'] = (self.input_length, self.input_dim)
super(Stack, self).__init__(**kwargs)
def build(self, input_shape):
self.input_spec = [InputSpec(shape=input_shape)]
input_leng, input_dim = input_shape[1:]
inner_input_shape = list(input_shape)
inner_input_shape[-1] = input_dim+self.m_length
if self.inner_rnn == 'gru':
self.rnn = GRU(
activation='relu',
input_dim=input_dim+self.m_length,
input_length=input_leng,
output_dim=self.output_dim, init=self.init,
inner_init=self.inner_init)
elif self.inner_rnn == 'lstm':
self.rnn = LSTM(
input_dim=input_dim+self.m_length,
input_length=input_leng,
output_dim=self.rnn_size, init=self.init,
forget_bias_init='zero',
inner_init=self.inner_init)
else:
raise ValueError('this inner_rnn is not implemented yet.')
self.rnn.build(inner_input_shape)
self.init_h = K.zeros((self.rnn_size))
self.W_d = self.rnn.init((self.rnn_size,1))
self.W_u = self.rnn.init((self.rnn_size,1))
self.W_v = self.rnn.init((self.rnn_size,self.m_length))
self.W_o = self.rnn.init((self.rnn_size,self.output_dim))
self.b_d = K.zeros((1,),name="b_d")
self.b_u = K.zeros((1,),name="b_u")
self.b_v = K.zeros((self.m_length,))
self.b_o = K.zeros((self.output_dim,))
self.trainable_weights = self.rnn.trainable_weights + [
self.W_d, self.b_d,
self.W_v, self.b_v,
self.W_u, self.b_u,
self.W_o, self.b_o, self.init_h]
if self.inner_rnn == 'lstm':
self.init_c = K.zeros((self.rnn_size))
self.trainable_weights = self.trainable_weights + [self.init_c, ]
#self.trainable_weights =[self.W_d]
def get_initial_states(self, X):
batch_size = X.shape[0]
init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0)
init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0)
init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0)
init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
itime = K.zeros((1,),dtype=np.int32)
if self.inner_rnn == 'lstm':
init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
return [init_r , init_V,init_S,itime,init_h,init_c]
def get_output_shape_for(self, input_shape):
if self.return_sequences:
return input_shape[0], input_shape[1], self.output_dim
else:
return input_shape[0], self.output_dim
def step(self, x, states):
r_tm1, V_tm1,s_tm1,time = states[:4]
h_tm1 = states[4:]
r_tm1 = r_tm1
op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1),
h_tm1)
# op_t = op_t + print_name_shape("W_d",self.W_d.get_value())
op_t = op_t
#op_t = op_t[:,0,:]
d_t = K.sigmoid( K.dot(op_t, self.W_d) + self.b_d)
u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o)
time = time + 1
V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0],
u_t[::,0], v_t,time[0],stack=self.stack)
return o_t, [r_t, V_t, s_t, time] + h_t
I always got some error messages like
ValueError Traceback (most recent call last)
in ()
----> 1 y = model.predict(data_x)
2 print(y.shape)
/usr/local/lib/python2.7/dist-packages/keras/engine/training.pyc in predict(self, x, batch_size, verbose)
1153 f = self.predict_function
1154 return self._predict_loop(f, ins,
-> 1155 batch_size=batch_size, verbose=verbose)
1156
1157 def train_on_batch(self, x, y,
...
/usr/local/lib/python2.7/dist-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
312 # extra long error message in that case.
313 pass
--> 314 reraise(exc_type, exc_value, exc_trace)
315
316
theano/scan_module/scan_perform.pyx in theano.scan_module.scan_perform.perform (/home/v-chshu/.theano/compiledir_Linux-3.13--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64/scan_perform/mod.cpp:4193)()
ValueError: dimension mismatch in args to gemm (32,128)x(128,128)->(32,0)
Apply node that caused the error: GpuGemm{no_inplace}(GpuSubtensor{::, int64::}.0, TensorConstant{0.20000000298}, <CudaNdarrayType(float32, matrix)>, lstm_35_U_o_copy[cuda], TensorConstant{0.20000000298})
Toposort index: 11
Inputs types: [CudaNdarrayType(float32, matrix), TensorType(float32, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), TensorType(float32, scalar)]
Inputs shapes: [(32, 0), (), (32, 128), (128, 128), ()]
Inputs strides: [(28, 1), (), (128, 1), (128, 1), ()]
Inputs values: [CudaNdarray([]), array(0.20000000298023224, dtype=float32), 'not shown', 'not shown', array(0.20000000298023224, dtype=float32)]
Outputs clients: [[GpuElemwise{Composite{(clip((i0 + i1), i2, i3) * tanh(i4))},no_inplace}(CudaNdarrayConstant{[[ 0.5]]}, GpuGemm{no_inplace}.0, CudaNdarrayConstant{[[ 0.]]}, CudaNdarrayConstant{[[ 1.]]}, GpuElemwise{Composite{((clip((i0 + i1), i2, i3) * i4) + (clip((i0 + i5), i2, i3) * tanh(i6)))},no_inplace}.0)]]
...
Please help me! Thanks!
Finally I solved this problem!!!! It took me many hours!!!!
Just add consume_less = 'gpu' to the inner lstm
like this:
self.rnn = LSTM(
input_dim=input_dim+self.m_length,
input_length=input_leng,
output_dim=self.rnn_size, init=self.init,
forget_bias_init='zero',
inner_init=self.inner_init, consume_less='gpu')
perfect! I didn't know about consume_less
. Do you mind making a pull request with your modifications to the keras1
branch?
fixed by #42