Multiple LSTM cell isn't working as per code given in Chapter-5

Question

Multiple LSTM cell isn't working as per code given in Chapter-5

GKarmakar opened this issue 7 years ago · 2 comments

in chapter - 5 an example has been given to show how multiple LSTM blocks can be stacked in order to create RNN classifier with multiple layers. But that code is throwing an error which I failed to debug so far. The error is matrix dimension mismatch [128, 64] matrix multiplication with [96, 128]
The detailed error message is as follows:
Code Blocks:
num_LSTM_layers = 2
with tf.variable_scope("lstm"):

lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_layer_size,
                                         forget_bias=1.0)
cell = tf.contrib.rnn.MultiRNNCell(cells=[lstm_cell]*num_LSTM_layers,
                                   state_is_tuple=True)
outputs, states = tf.nn.dynamic_rnn(cell, embed,
                                    sequence_length = _seqlens,
                                    dtype=tf.float32)

Error:

InvalidArgumentError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
685 graph_def_version, node_def_str, input_shapes, input_tensors,
--> 686 input_tensors_as_shapes, status)
687 except errors.InvalidArgumentError as err:

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py in exit(self, type_arg, value_arg, traceback_arg)
472 compat.as_text(c_api.TF_Message(self.status.status)),
--> 473 c_api.TF_GetCode(self.status.status))
474 # Delete the underlying status object from memory otherwise it stays alive

InvalidArgumentError: Dimensions must be equal, but are 64 and 96 for 'lstm_1/rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/MatMul_1' (op: 'MatMul') with input shapes: [128,64], [96,128].

During handling of the above exception, another exception occurred:

ValueError Traceback (most recent call last)
in ()
8 outputs, states = tf.nn.dynamic_rnn(cell, embed,
9 sequence_length = _seqlens,
---> 10 dtype=tf.float32)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in dynamic_rnn(cell, inputs, sequence_length, initial_state, dtype, parallel_iterations, swap_memory, time_major, scope)
612 swap_memory=swap_memory,
613 sequence_length=sequence_length,
--> 614 dtype=dtype)
615
616 # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in _dynamic_rnn_loop(cell, inputs, initial_state, parallel_iterations, swap_memory, sequence_length, dtype)
775 loop_vars=(time, output_ta, state),
776 parallel_iterations=parallel_iterations,
--> 777 swap_memory=swap_memory)
778
779 # Unpack final output if not using output tuples.

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
2814 loop_context = WhileContext(parallel_iterations, back_prop, swap_memory) # pylint: disable=redefined-outer-name
2815 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
-> 2816 result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
2817 return result
2818

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2638 self.Enter()
2639 original_body_result, exit_vars = self._BuildLoop(
-> 2640 pred, body, original_loop_vars, loop_vars, shape_invariants)
2641 finally:
2642 self.Exit()

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2588 structure=original_loop_vars,
2589 flat_sequence=vars_for_body_with_tensor_arrays)
-> 2590 body_result = body(*packed_vars_for_body)
2591 if not nest.is_sequence(body_result):
2592 body_result = [body_result]

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in _time_step(time, output_ta_t, state)
758 call_cell=call_cell,
759 state_size=state_size,
--> 760 skip_conditionals=True)
761 else:
762 (output, new_state) = call_cell()

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in _rnn_step(time, sequence_length, min_sequence_length, max_sequence_length, zero_output, state, call_cell, state_size, skip_conditionals)
234 # steps. This is faster when max_seq_len is equal to the number of unrolls
235 # (which is typical for dynamic_rnn).
--> 236 new_output, new_state = call_cell()
237 nest.assert_same_structure(state, new_state)
238 new_state = nest.flatten(new_state)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in ()
746
747 input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t)
--> 748 call_cell = lambda: cell(input_t, state)
749
750 if sequence_length is not None:

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state, scope)
181 with vs.variable_scope(vs.get_variable_scope(),
182 custom_getter=self._rnn_get_variable):
--> 183 return super(RNNCell, self).call(inputs, state)
184
185 def _rnn_get_variable(self, getter, *args, **kwargs):

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/base.py in call(self, inputs, *args, **kwargs)
573 if in_graph_mode:
574 self._assert_input_compatibility(inputs)
--> 575 outputs = self.call(inputs, *args, **kwargs)
576
577 if outputs is None:

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
1064 [-1, cell.state_size])
1065 cur_state_pos += cell.state_size
-> 1066 cur_inp, new_state = cell(cur_inp, cur_state)
1067 new_states.append(new_state)
1068

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state, scope)
181 with vs.variable_scope(vs.get_variable_scope(),
182 custom_getter=self._rnn_get_variable):
--> 183 return super(RNNCell, self).call(inputs, state)
184
185 def _rnn_get_variable(self, getter, *args, **kwargs):

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/base.py in call(self, inputs, *args, **kwargs)
573 if in_graph_mode:
574 self._assert_input_compatibility(inputs)
--> 575 outputs = self.call(inputs, *args, **kwargs)
576
577 if outputs is None:

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
439 # i = input_gate, j = new_input, f = forget_gate, o = output_gate
440 i, j, f, o = array_ops.split(
--> 441 value=self._linear([inputs, h]), num_or_size_splits=4, axis=1)
442
443 new_c = (

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, args)
1187 res = math_ops.matmul(args[0], self._weights)
1188 else:
-> 1189 res = math_ops.matmul(array_ops.concat(args, 1), self._weights)
1190 if self._build_bias:
1191 res = nn_ops.bias_add(res, self._biases)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py in matmul(a, b, transpose_a, transpose_b, adjoint_a, adjoint_b, a_is_sparse, b_is_sparse, name)
1889 else:
1890 return gen_math_ops._mat_mul(
-> 1891 a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
1892
1893

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py in _mat_mul(a, b, transpose_a, transpose_b, name)
2435 _, _, _op = _op_def_lib._apply_op_helper(
2436 "MatMul", a=a, b=b, transpose_a=transpose_a, transpose_b=transpose_b,
-> 2437 name=name)
2438 _result = _op.outputs[:]
2439 _inputs_flat = _op.inputs

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
785 op = g.create_op(op_type_name, inputs, output_types, name=scope,
786 input_types=input_types, attrs=attr_protos,
--> 787 op_def=op_def)
788 return output_structure, op_def.is_stateful, op
789

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
2956 op_def=op_def)
2957 if compute_shapes:
-> 2958 set_shapes_for_outputs(ret)
2959 self._add_op(ret)
2960 self._record_op_seen_by_control_dependencies(ret)

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in set_shapes_for_outputs(op)
2207 shape_func = _call_cpp_shape_fn_and_require_op
2208
-> 2209 shapes = shape_func(op)
2210 if shapes is None:
2211 raise RuntimeError(

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in call_with_requiring(op)
2157
2158 def call_with_requiring(op):
-> 2159 return call_cpp_shape_fn(op, require_shape_fn=True)
2160
2161 _call_cpp_shape_fn_and_require_op = call_with_requiring

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py in call_cpp_shape_fn(op, require_shape_fn)
625 res = _call_cpp_shape_fn_impl(op, input_tensors_needed,
626 input_tensors_as_shapes_needed,
--> 627 require_shape_fn)
628 if not isinstance(res, dict):
629 # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).

~/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
689 missing_shape_fn = True
690 else:
--> 691 raise ValueError(err.message)
692
693 if missing_shape_fn:

ValueError: Dimensions must be equal, but are 64 and 96 for 'lstm_1/rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/MatMul_1' (op: 'MatMul') with input shapes: [128,64], [96,128].

Answer 1 · 2018-02-17T20:39:42.000Z

Same problem here.

Answer 2 · 2018-04-29T22:54:14.000Z

I didn't see this when I made my pull request but the error and the solution are noted in the errata for the book.
You can find it at http://www.oreilly.com/catalog/errata.csp?isbn=0636920063698