EderSantana/seya

Tried to update stack rnn to keras1.0, but failed

dytmas opened this issue · 3 comments

Hi, @EderSantana

I tried to update stack-rnn, queue-rnn and ntm to keras 1.0. but got strange errors.
Below are the codes modified by me:

class Stack(Recurrent):
""" Stack and queue network

output_dim = output dimension
n_slots = number of memory slot
m_length = dimention of the memory
rnn_size = output length of the memory controler
inner_rnn = "lstm" only lstm is supported 
stack = True to create neural stack or False to create neural queue


from Learning to Transduce with Unbounded Memory
[[http://arxiv.org/pdf/1506.02516.pdf]]
"""
def __init__(self, output_dim, n_slots, m_length,
             inner_rnn='lstm',rnn_size=64, stack=True,
             init='glorot_uniform', inner_init='orthogonal',
             input_dim=None, input_length=None, **kwargs):
    self.output_dim = output_dim
    self.n_slots = n_slots + 1  # because we start at time 1
    self.m_length = m_length
    self.init = init
    self.inner_init = inner_init
    if inner_rnn != "lstm":
        print "Only lstm is supported"
        raise
    self.inner_rnn = inner_rnn
    self.rnn_size = rnn_size
    self.stack = stack

    self.input_dim = input_dim
    self.input_length = input_length
    if self.input_dim:
        kwargs['input_shape'] = (self.input_length, self.input_dim)
    super(Stack, self).__init__(**kwargs)

def build(self, input_shape):
    self.input_spec = [InputSpec(shape=input_shape)]
    input_leng, input_dim = input_shape[1:]
    inner_input_shape = list(input_shape)
    inner_input_shape[-1] = input_dim+self.m_length
    if self.inner_rnn == 'gru':
        self.rnn = GRU(
            activation='relu',
            input_dim=input_dim+self.m_length,
            input_length=input_leng,
            output_dim=self.output_dim, init=self.init,
            inner_init=self.inner_init)
    elif self.inner_rnn == 'lstm':
        self.rnn = LSTM(
            input_dim=input_dim+self.m_length,
            input_length=input_leng,
            output_dim=self.rnn_size, init=self.init,
            forget_bias_init='zero',
            inner_init=self.inner_init)
    else:
        raise ValueError('this inner_rnn is not implemented yet.')

    self.rnn.build(inner_input_shape)


    self.init_h = K.zeros((self.rnn_size))

    self.W_d = self.rnn.init((self.rnn_size,1))
    self.W_u = self.rnn.init((self.rnn_size,1))

    self.W_v = self.rnn.init((self.rnn_size,self.m_length))
    self.W_o = self.rnn.init((self.rnn_size,self.output_dim))

    self.b_d = K.zeros((1,),name="b_d")
    self.b_u = K.zeros((1,),name="b_u")
    self.b_v = K.zeros((self.m_length,))
    self.b_o = K.zeros((self.output_dim,))


    self.trainable_weights = self.rnn.trainable_weights + [
       self.W_d, self.b_d,
        self.W_v, self.b_v,
        self.W_u,  self.b_u,
        self.W_o, self.b_o, self.init_h]

    if self.inner_rnn == 'lstm':
        self.init_c = K.zeros((self.rnn_size))
        self.trainable_weights = self.trainable_weights + [self.init_c, ]
    #self.trainable_weights =[self.W_d]


def get_initial_states(self, X):


    batch_size = X.shape[0]

    init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0)
    init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0)
    init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0)
    init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

    itime = K.zeros((1,),dtype=np.int32)


    if self.inner_rnn == 'lstm':
        init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        return [init_r , init_V,init_S,itime,init_h,init_c]

def get_output_shape_for(self, input_shape):
    if self.return_sequences:
        return input_shape[0], input_shape[1], self.output_dim
    else:
        return input_shape[0], self.output_dim

def step(self, x, states):

    r_tm1, V_tm1,s_tm1,time = states[:4]
    h_tm1 = states[4:]



    r_tm1 = r_tm1

    op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1),
                                         h_tm1)

   # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value()) 
    op_t = op_t
    #op_t = op_t[:,0,:]
    d_t = K.sigmoid( K.dot(op_t, self.W_d)  + self.b_d)  
    u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
    v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
    o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) 


    time = time + 1
    V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                         u_t[::,0], v_t,time[0],stack=self.stack)


    return o_t, [r_t, V_t, s_t, time] + h_t

I always got some error messages like

ValueError Traceback (most recent call last)
in ()
----> 1 y = model.predict(data_x)
2 print(y.shape)

/usr/local/lib/python2.7/dist-packages/keras/engine/training.pyc in predict(self, x, batch_size, verbose)
1153 f = self.predict_function
1154 return self._predict_loop(f, ins,
-> 1155 batch_size=batch_size, verbose=verbose)
1156
1157 def train_on_batch(self, x, y,
...

/usr/local/lib/python2.7/dist-packages/theano/gof/link.pyc in raise_with_op(node, thunk, exc_info, storage_map)
312 # extra long error message in that case.
313 pass
--> 314 reraise(exc_type, exc_value, exc_trace)
315
316

theano/scan_module/scan_perform.pyx in theano.scan_module.scan_perform.perform (/home/v-chshu/.theano/compiledir_Linux-3.13--generic-x86_64-with-Ubuntu-14.04-trusty-x86_64-2.7.6-64/scan_perform/mod.cpp:4193)()

ValueError: dimension mismatch in args to gemm (32,128)x(128,128)->(32,0)
Apply node that caused the error: GpuGemm{no_inplace}(GpuSubtensor{::, int64::}.0, TensorConstant{0.20000000298}, <CudaNdarrayType(float32, matrix)>, lstm_35_U_o_copy[cuda], TensorConstant{0.20000000298})
Toposort index: 11
Inputs types: [CudaNdarrayType(float32, matrix), TensorType(float32, scalar), CudaNdarrayType(float32, matrix), CudaNdarrayType(float32, matrix), TensorType(float32, scalar)]
Inputs shapes: [(32, 0), (), (32, 128), (128, 128), ()]
Inputs strides: [(28, 1), (), (128, 1), (128, 1), ()]
Inputs values: [CudaNdarray([]), array(0.20000000298023224, dtype=float32), 'not shown', 'not shown', array(0.20000000298023224, dtype=float32)]
Outputs clients: [[GpuElemwise{Composite{(clip((i0 + i1), i2, i3) * tanh(i4))},no_inplace}(CudaNdarrayConstant{[[ 0.5]]}, GpuGemm{no_inplace}.0, CudaNdarrayConstant{[[ 0.]]}, CudaNdarrayConstant{[[ 1.]]}, GpuElemwise{Composite{((clip((i0 + i1), i2, i3) * i4) + (clip((i0 + i5), i2, i3) * tanh(i6)))},no_inplace}.0)]]
...

Please help me! Thanks!

Finally I solved this problem!!!! It took me many hours!!!!
Just add consume_less = 'gpu' to the inner lstm
like this:
self.rnn = LSTM(
input_dim=input_dim+self.m_length,
input_length=input_leng,
output_dim=self.rnn_size, init=self.init,
forget_bias_init='zero',
inner_init=self.inner_init, consume_less='gpu')

perfect! I didn't know about consume_less. Do you mind making a pull request with your modifications to the keras1 branch?

fixed by #42