Using ResNet V2 50
adrianAzoitei opened this issue · 0 comments
Hello,
Trying to train the model on my system always throws an OOM error, even with a batch size of 1, so I am trying to actually change the base-architecture to ResNet V2 50, for which I got the checkpoint from: https://github.com/tensorflow/models/tree/master/research/slim#Pretrained
I have these errors now, however:
2020-05-15 15:33:17.492065: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1304] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 1343 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1050, pci bus id: 0000:01:00.0, compute capability: 6.1)
INFO:tensorflow:Restoring parameters from ./model\model.ckpt-30358
I0515 15:33:17.509414 5860 saver.py:1284] Restoring parameters from ./model\model.ckpt-30358
2020-05-15 15:33:18.676859: W tensorflow/core/framework/op_kernel.cc:1651] OP_REQUIRES failed at save_restore_v2_ops.cc:184 : Not found: Key resnet_v2_50/block1/unit_1/bottleneck_v2/conv1/BatchNorm/beta not found in checkpoint
Traceback (most recent call last):
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1365, in _do_call
return fn(*args)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1350, in _run_fn
target_list, run_metadata)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1443, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.NotFoundError: Key resnet_v2_50/block1/unit_1/bottleneck_v2/conv1/BatchNorm/beta not found in checkpoint
[[{{node save/RestoreV2}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 1290, in restore
{self.saver_def.filename_tensor_name: save_path})
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 956, in run
run_metadata_ptr)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1180, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1359, in _do_run
run_metadata)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\client\session.py", line 1384, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.NotFoundError: Key resnet_v2_50/block1/unit_1/bottleneck_v2/conv1/BatchNorm/beta not found in checkpoint
[[node save/RestoreV2 (defined at C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py:1748) ]]
Original stack trace for 'save/RestoreV2':
File "train.py", line 285, in
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\platform\app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\absl\app.py", line 299, in run
_run_main(main, args)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\absl\app.py", line 250, in _run_main
sys.exit(main(argv))
File "train.py", line 268, in main
steps=1 # For debug
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1195, in _train_model_default
saving_listeners)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1490, in _train_with_estimator_spec
log_step_count_steps=log_step_count_steps) as mon_sess:
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 584, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 1014, in init
stop_grace_period_secs=stop_grace_period_secs)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 725, in init
self._sess = _RecoverableSession(self._coordinated_creator)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 1207, in init
_WrappedSession.init(self, self._create_session())
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 1212, in _create_session
return self._sess_creator.create_session()
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 878, in create_session
self.tf_sess = self._session_creator.create_session()
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 638, in create_session
self._scaffold.finalize()
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 237, in finalize
self._saver.build()
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 840, in build
self._build(self._filename, build_save=True, build_restore=True)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 878, in _build
build_restore=build_restore)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 502, in _build_internal
restore_sequentially, reshape)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 381, in _AddShardedRestoreOps
name="restore_shard"))
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 328, in _AddRestoreOps
restore_sequentially)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 575, in bulk_restore
return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\ops\gen_io_ops.py", line 1696, in restore_v2
name=name)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 794, in _apply_op_helper
op_def=op_def)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3357, in create_op
attrs, op_def, compute_device)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3426, in _create_op_internal
op_def=op_def)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1748, in init
self._traceback = tf_stack.extract_stack()
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 1300, in restore
names_to_keys = object_graph_key_mapping(save_path)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 1618, in object_graph_key_mapping
object_graph_string = reader.get_tensor(trackable.OBJECT_GRAPH_PROTO_KEY)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\pywrap_tensorflow_internal.py", line 915, in get_tensor
return CheckpointReader_GetTensor(self, compat.as_bytes(tensor_str))
tensorflow.python.framework.errors_impl.NotFoundError: Key _CHECKPOINTABLE_OBJECT_GRAPH not found in checkpoint
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "train.py", line 285, in
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\platform\app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\absl\app.py", line 299, in run
_run_main(main, args)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\absl\app.py", line 250, in _run_main
sys.exit(main(argv))
File "train.py", line 268, in main
steps=1 # For debug
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1195, in _train_model_default
saving_listeners)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1490, in _train_with_estimator_spec
log_step_count_steps=log_step_count_steps) as mon_sess:
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 584, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 1014, in init
stop_grace_period_secs=stop_grace_period_secs)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 725, in init
self._sess = _RecoverableSession(self._coordinated_creator)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 1207, in init
_WrappedSession.init(self, self._create_session())
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 1212, in _create_session
return self._sess_creator.create_session()
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 878, in create_session
self.tf_sess = self._session_creator.create_session()
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 647, in create_session
init_fn=self._scaffold.init_fn)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\session_manager.py", line 290, in prepare_session
config=config)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\session_manager.py", line 220, in _restore_checkpoint
saver.restore(sess, ckpt.model_checkpoint_path)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 1306, in restore
err, "a Variable name or other graph key that is missing")
tensorflow.python.framework.errors_impl.NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:
Key resnet_v2_50/block1/unit_1/bottleneck_v2/conv1/BatchNorm/beta not found in checkpoint
[[node save/RestoreV2 (defined at C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py:1748) ]]
Original stack trace for 'save/RestoreV2':
File "train.py", line 285, in
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\platform\app.py", line 40, in run
_run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\absl\app.py", line 299, in run
_run_main(main, args)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\absl\app.py", line 250, in _run_main
sys.exit(main(argv))
File "train.py", line 268, in main
steps=1 # For debug
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1195, in _train_model_default
saving_listeners)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1490, in _train_with_estimator_spec
log_step_count_steps=log_step_count_steps) as mon_sess:
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 584, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 1014, in init
stop_grace_period_secs=stop_grace_period_secs)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 725, in init
self._sess = _RecoverableSession(self._coordinated_creator)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 1207, in init
_WrappedSession.init(self, self._create_session())
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 1212, in _create_session
return self._sess_creator.create_session()
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 878, in create_session
self.tf_sess = self._session_creator.create_session()
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 638, in create_session
self._scaffold.finalize()
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\monitored_session.py", line 237, in finalize
self._saver.build()
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 840, in build
self._build(self._filename, build_save=True, build_restore=True)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 878, in _build
build_restore=build_restore)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 502, in _build_internal
restore_sequentially, reshape)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 381, in _AddShardedRestoreOps
name="restore_shard"))
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 328, in _AddRestoreOps
restore_sequentially)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\training\saver.py", line 575, in bulk_restore
return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\ops\gen_io_ops.py", line 1696, in restore_v2
name=name)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\op_def_library.py", line 794, in _apply_op_helper
op_def=op_def)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3357, in create_op
attrs, op_def, compute_device)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py", line 3426, in _create_op_internal
op_def=op_def)
File "C:\Users\Adrian\AppData\Local\Programs\Python\Python37\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1748, in init
self._traceback = tf_stack.extract_stack()
Any ideas?