vamp._loss_VAMP_sym error
euhruska opened this issue · 4 comments
euhruska commented
with vamp._loss_VAMP_sym
in line
hist = model.fit_generator(generator = vamp_data_loader.build_generator_on_source(train_data_source,
batch_size,
tau,
output_size),
steps_per_epoch = steps_per_train_epoch,
epochs = nb_epoch,
verbose = 0,
validation_data = vamp_data_loader.build_generator_on_source(valid_data_source,
batch_size,
tau,
output_size),
validation_steps = steps_per_valid_epoch,
shuffle = True
)
I get an fatal error:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-54-480bb4a81ec5> in <module>()
28 output_size),
29 validation_steps = steps_per_valid_epoch,
---> 30 shuffle = True
31 )
32
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name +
90 '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1413 use_multiprocessing=use_multiprocessing,
1414 shuffle=shuffle,
-> 1415 initial_epoch=initial_epoch)
1416
1417 @interfaces.legacy_generator_methods_support
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/keras/engine/training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
228 val_enqueuer_gen,
229 validation_steps,
--> 230 workers=0)
231 else:
232 # No need for try/except because
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name +
90 '` call to the Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/keras/engine/training.py in evaluate_generator(self, generator, steps, max_queue_size, workers, use_multiprocessing, verbose)
1467 workers=workers,
1468 use_multiprocessing=use_multiprocessing,
-> 1469 verbose=verbose)
1470
1471 @interfaces.legacy_generator_methods_support
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/keras/engine/training_generator.py in evaluate_generator(model, generator, steps, max_queue_size, workers, use_multiprocessing, verbose)
341 'or (x, y). Found: ' +
342 str(generator_output))
--> 343 outs = model.test_on_batch(x, y, sample_weight=sample_weight)
344 outs = to_list(outs)
345 outs_per_batch.append(outs)
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/keras/engine/training.py in test_on_batch(self, x, y, sample_weight)
1252 ins = x + y + sample_weights
1253 self._make_test_function()
-> 1254 outputs = self.test_function(ins)
1255 return unpack_singleton(outputs)
1256
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
2664 return self._legacy_call(inputs)
2665
-> 2666 return self._call(inputs)
2667 else:
2668 if py_any(is_tensor(x) for x in inputs):
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py in _call(self, inputs)
2634 symbol_vals,
2635 session)
-> 2636 fetched = self._callable_fn(*array_vals)
2637 return fetched[:len(self.outputs)]
2638
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/tensorflow/python/client/session.py in __call__(self, *args, **kwargs)
1380 ret = tf_session.TF_SessionRunCallable(
1381 self._session._session, self._handle, args, status,
-> 1382 run_metadata_ptr)
1383 if run_metadata:
1384 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/scratch1/eh22/conda/envs/extasy13/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
517 None, None,
518 compat.as_text(c_api.TF_Message(self.status.status)),
--> 519 c_api.TF_GetCode(self.status.status))
520 # Delete the underlying status object from memory otherwise it stays alive
521 # as there is a reference to status from this from the traceback due to
InvalidArgumentError: Got info = 2 for batch index 0, expected info = 0. Debug_info = heevd
[[Node: metrics_4/metric_VAMP/SelfAdjointEigV2 = SelfAdjointEigV2[T=DT_FLOAT, compute_v=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](loss_4/concatenate_1_loss/mul_3)]]
[[Node: loss_4/mul/_603 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_450_loss_4/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
pasqualil commented
I don't get this error, which tensorflow version are you using? This was an issue appearing sometimes on tf 1.7-1.8
euhruska commented
tf version 1.10.0
amardt commented
For us this is working with this version of tensorflow. We are unable to reproduce this. Are you working on a GPU? (Perhaps also the cuda version plays a role...)
Can you write us your Cuda version and which Cudnn you are using if it applies?
Thx
Andreas
clonker commented
It might also be that it failed to compute the SVD for the first batch so there could be a problem with your data. The documentation of heevd (which is a eigenvalue/eigenvector solver) says:
if INFO = i and JOBZ = 'N', then the algorithm failed
to converge; i off-diagonal elements of an intermediate
tridiagonal form did not converge to zero;
if INFO = i and JOBZ = 'V', then the algorithm failed
to compute an eigenvalue while working on the submatrix
lying in rows and columns INFO/(N+1) through
mod(INFO,N+1).