nicodjimenez/lstm

can't convergence

fmscole opened this issue · 0 comments

test,but can't convergence to target:

when: y_list = [-0.8333333333, 0.33333, 0.166666667, -0.8]
result: iter 9999: y_pred = [-0.76083, -0.00007, -0.00007, -0.79996], loss: 1.442e-01

I add a linare layer after h,now it can convergence to any target:

import numpy as np

from lstm import LstmParam, LstmNetwork


class ToyLossLayer:
    def __init__(self,mem_cell_ct):
        self.v=np.zeros(mem_cell_ct)
    
    def value(self, pred):
        out=self.v.dot(pred)  
        return out
    def loss(self, pred, label):
        out=self.value(pred)  
        return (out- label) ** 2


    def bottom_diff(self, pred, label):
        out=self.value(pred)
        df = 2 * (out - label)/self.v.shape[0] 
        diff=df*self.v
        self.v-=0.1*pred*df
        return diff


def example_0():
    # learns to repeat simple sequence from random inputs
    np.random.seed(0)

    # parameters for input data dimension and lstm cell count
    mem_cell_ct = 1000
    x_dim = 50
    lstm_param = LstmParam(mem_cell_ct, x_dim)
    lstm_net = LstmNetwork(lstm_param)
    y_list = [-8.8, 80.2, 3.1, 8000.8]
   
    input_val_arr = [np.random.random(x_dim) for _ in y_list]
    loss=ToyLossLayer(mem_cell_ct)
    for cur_iter in range(10000):
        # print(y_list)
        print("iter", "%2s" % str(cur_iter), end=": ")
        for ind in range(len(y_list)):
            lstm_net.x_list_add(input_val_arr[ind])

        print("y_pred = [" +
              ", ".join(["% 2.5f" % loss.value(lstm_net.lstm_node_list[ind].state.h) for ind in range(len(y_list))]) +
              "]", end=", ")

        lossv = lstm_net.y_list_is(y_list, loss)
        print("loss:", "%.3e" % lossv)
        lstm_param.apply_diff(lr=0.1)
        lstm_net.x_list_clear()


if __name__ == "__main__":
    example_0()

when: y_list = [-8.8, 80.2, 3.1, 8000.8]
result: iter 9999: y_pred = [-8.72629, 80.11621, 3.70385, 8000.36304], loss: 5.988e-01