Course 3 - Week 2 - Lesson 3 model.fit raises ValueError
borundev opened this issue · 6 comments
During the model fitting I get the error ValueError: Shapes () and (None, 1) must have the same rank and during handling of it get more errors and I am reproducing the stacktrace below. However, if I peek into the next lesson and do the following
BATCH_SIZE = 64
train_data = train_data.padded_batch(BATCH_SIZE, train_data.output_shapes)
test_data = test_data.padded_batch(BATCH_SIZE, test_data.output_shapes)
before the fit then it works. I understand the issue is with the inputs not having the same shape but do not understand why this is not a problem in the course video and what the best fix is.
Epoch 1/10
1/Unknown - 0s 61ms/step
ValueError Traceback (most recent call last)
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/framework/tensor_shape.py in merge_with(self, other)
927 try:
--> 928 self.assert_same_rank(other)
929 new_dims = []
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/framework/tensor_shape.py in assert_same_rank(self, other)
982 raise ValueError("Shapes %s and %s must have the same rank" %
--> 983 (self, other))
984
ValueError: Shapes () and (None, 1) must have the same rank
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_impl.py in sigmoid_cross_entropy_with_logits(_sentinel, labels, logits, name)
167 try:
--> 168 labels.get_shape().merge_with(logits.get_shape())
169 except ValueError:
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/framework/tensor_shape.py in merge_with(self, other)
933 except ValueError:
--> 934 raise ValueError("Shapes %s and %s are not compatible" % (self, other))
935
ValueError: Shapes () and (None, 1) are not compatible
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
in
3 model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
4
----> 5 history = model.fit(train_data, epochs=num_epochs, validation_data=test_data)
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
732 max_queue_size=max_queue_size,
733 workers=workers,
--> 734 use_multiprocessing=use_multiprocessing)
735
736 def evaluate(self,
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
322 mode=ModeKeys.TRAIN,
323 training_context=training_context,
--> 324 total_epochs=epochs)
325 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
326
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
121 step=step, mode=mode, size=current_batch_size) as batch_logs:
122 try:
--> 123 batch_outs = execution_function(iterator)
124 except (StopIteration, errors.OutOfRangeError):
125 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
84 # numpy
translates Tensors to values in Eager mode.
85 return nest.map_structure(_non_none_constant_value,
---> 86 distributed_function(input_fn))
87
88 return execution_function
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in call(self, *args, **kwds)
425 # This is the first call of call, so we have to initialize.
426 initializer_map = object_identity.ObjectIdentityDictionary()
--> 427 self._initialize(args, kwds, add_initializers_to=initializer_map)
428 if self._created_variables:
429 try:
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
368 self._concrete_stateful_fn = (
369 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 370 *args, **kwds))
371
372 def invalid_creator_scope(*unused_args, **unused_kwds):
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
1845 if self.input_signature:
1846 args, kwargs = None, None
-> 1847 graph_function, _, _ = self._maybe_define_function(args, kwargs)
1848 return graph_function
1849
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2145 graph_function = self._function_cache.primary.get(cache_key, None)
2146 if graph_function is None:
-> 2147 graph_function = self._create_graph_function(args, kwargs)
2148 self._function_cache.primary[cache_key] = graph_function
2149 return graph_function, args, kwargs
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2036 arg_names=arg_names,
2037 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2038 capture_by_value=self._capture_by_value),
2039 self._function_attributes,
2040 # Tell the ConcreteFunction to clean up its graph once it goes out of
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
913 converted_func)
914
--> 915 func_outputs = python_func(*func_args, **func_kwargs)
916
917 # invariant: func_outputs
contains only Tensors, CompositeTensors,
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
318 # wrapped allows AutoGraph to swap in a converted function. We give
319 # the function a weak reference to itself to avoid a reference cycle.
--> 320 return weak_wrapped_fn().wrapped(*args, **kwds)
321 weak_wrapped_fn = weakref.ref(wrapped_fn)
322
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in distributed_function(input_iterator)
71 strategy = distribution_strategy_context.get_strategy()
72 outputs = strategy.experimental_run_v2(
---> 73 per_replica_function, args=(model, x, y, sample_weights))
74 # Out of PerReplica outputs reduce or pick values to return.
75 all_outputs = dist_utils.unwrap_output_dict(
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
758 fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
759 convert_by_default=False)
--> 760 return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
761
762 def reduce(self, reduce_op, value, axis):
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
1785 kwargs = {}
1786 with self._container_strategy().scope():
-> 1787 return self._call_for_each_replica(fn, args, kwargs)
1788
1789 def _call_for_each_replica(self, fn, args, kwargs):
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
2130 self._container_strategy(),
2131 replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2132 return fn(*args, **kwargs)
2133
2134 def _reduce_to(self, reduce_op, value, destinations):
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
290 def wrapper(*args, **kwargs):
291 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292 return func(*args, **kwargs)
293
294 if inspect.isfunction(func) or inspect.ismethod(func):
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics)
262 y,
263 sample_weights=sample_weights,
--> 264 output_loss_metrics=model._output_loss_metrics)
265
266 if reset_metrics:
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
309 sample_weights=sample_weights,
310 training=True,
--> 311 output_loss_metrics=output_loss_metrics))
312 if not isinstance(outs, list):
313 outs = [outs]
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
250 output_loss_metrics=output_loss_metrics,
251 sample_weights=sample_weights,
--> 252 training=training))
253 if total_loss is None:
254 raise ValueError('The model cannot be run '
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
164
165 if hasattr(loss_fn, 'reduction'):
--> 166 per_sample_losses = loss_fn.call(targets[i], outs[i])
167 weighted_losses = losses_utils.compute_weighted_loss(
168 per_sample_losses,
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/losses.py in call(self, y_true, y_pred)
214 Loss values per sample.
215 """
--> 216 return self.fn(y_true, y_pred, **self._fn_kwargs)
217
218 def get_config(self):
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/losses.py in binary_crossentropy(y_true, y_pred, from_logits, label_smoothing)
987 _smooth_labels, lambda: y_true)
988 return K.mean(
--> 989 K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)
990
991
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py in binary_crossentropy(target, output, from_logits)
4471 assert len(output.op.inputs) == 1
4472 output = output.op.inputs[0]
-> 4473 return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
4474
4475
~/miniconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/ops/nn_impl.py in sigmoid_cross_entropy_with_logits(_sentinel, labels, logits, name)
169 except ValueError:
170 raise ValueError("logits and labels must have the same shape (%s vs %s)" %
--> 171 (logits.get_shape(), labels.get_shape()))
172
173 # The logistic loss formula from above is
ValueError: logits and labels must have the same shape ((None, 1) vs ())
Hi, how about post the whole google colab or download it as ipython notebook and upload it here?
Hi, how about post the whole google colab or download it as ipython notebook and upload it here?
actually the notebook is first uploaded here and then accessed via google colab...
@imneonizer
Sorry, I thought borundev was talking about course 4 and edit it on his or her own.
I observed that if I added these 3 lines:
BATCH_SIZE = 64
train_data = train_data.padded_batch(BATCH_SIZE, train_data.output_shapes)
test_data = test_data.padded_batch(BATCH_SIZE, test_data.output_shapes)
It solves the issue and also it gave me about 87% validation accuracy after training 10 epochs, which is different from the learning curve showing in the lecture vedio.
Any clues why this is happening?
I am also seeing this issue.
I observed that if I added these 3 lines:
BATCH_SIZE = 64
train_data = train_data.padded_batch(BATCH_SIZE, train_data.output_shapes)
test_data = test_data.padded_batch(BATCH_SIZE, test_data.output_shapes)It solves the issue and also it gave me about 87% validation accuracy after training 10 epochs, which is different from the learning curve showing in the lecture vedio.
Any clues why this is happening?
Same problem here. I added these 3 lines, too. And after training 10 epochs, I get the following results:
- loss: 0.1660 - accuracy: 0.9414 - val_loss: 0.3244 - val_accuracy: 0.8788