ResourceExhaustedError at training with GPU
hi6pii opened this issue ยท 3 comments
environment:
- OS Platform and Distribution: Linux Ubuntu 16.04
- TensorFlow installed from : pip
- TensorFlow version:Tensorflow_gpu_1.4.0
- Python version: 3.6.4
- CUDA version:8.0.61
- Cudnn version: 6.0.21
- GPU model and memory: two or eight of Tesla K40c(11GB memory)
I made a docker container and tested a training scripts with GPU or CPU as below:
python isl/launch.py --alsologtostderr --base_directory . --mode TRAIN --metric LOSS --master "" --restore_directory checkpoints2 --read_pngs --dataset_train_directory data_sample/condition_e_sample_B2 --preprocess_batch_size 2 --preprocess_shuffle_batch_num_threads 2 --preprocess_batch_capacity 8 --loss_crop_size 260
When I used CPU, there are no issues to train.
When I used GPU, training started but stopped with
Traceback (most recent call last):
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1323, in _do_call
return fn(*args)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1302, in _run_fn
status, run_metadata)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[2,218,12,12]
[[Node: setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/Conv/Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="VALID", strides=[1, 2, 2, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/concat, downscale_4_3/expand_rv2/Conv/weights/read)]]
[[Node: Adam/update/_18502 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_40035_Adam/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/supervisor.py", line 954, in managed_session
yield sess
File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/slim/python/slim/learning.py", line 763, in train
sess, train_op, global_step, train_step_kwargs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/slim/python/slim/learning.py", line 487, in train_step
run_metadata=run_metadata)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[2,218,12,12]
[[Node: setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/Conv/Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="VALID", strides=[1, 2, 2, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/concat, downscale_4_3/expand_rv2/Conv/weights/read)]]
[[Node: Adam/update/_18502 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_40035_Adam/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op 'setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/Conv/Conv2D', defined at:
File "isl/launch.py", line 624, in <module>
app.run()
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "isl/launch.py", line 606, in main
train(gitapp)
File "isl/launch.py", line 426, in train
total_loss_op, _, _ = total_loss(gitapp)
File "isl/launch.py", line 402, in total_loss
input_loss_lts, target_loss_lts = controller.setup_losses(gitapp)
File "/tmp/isl/controller.py", line 399, in setup_losses
predict_target_lt) = get_input_target_and_predicted(gitapp)
File "/tmp/isl/controller.py", line 263, in get_input_target_and_predicted
gitapp.core_model, gitapp.add_head, pp, gitapp.is_train, input_lt)
File "/tmp/isl/tensorcheck.py", line 179, in new_f
return f(*new_args, **kwds)
File "/tmp/isl/controller.py", line 173, in model
core_model_op = core_model(is_train=is_train, input_op=input_op, name=name)
File "/tmp/isl/tensorcheck.py", line 179, in new_f
return f(*new_args, **kwds)
File "/tmp/isl/models/concordance.py", line 155, in core
scale_op = foveate(lls[4][8], scale_op, 'downscale_4_3')
File "/tmp/isl/models/concordance.py", line 129, in foveate
op, name)
File "/tmp/isl/models/model_util.py", line 341, in learned_fovea
name=name)
File "/tmp/isl/tensorcheck.py", line 179, in new_f
return f(*new_args, **kwds)
File "/tmp/isl/models/model_util.py", line 270, in module
name='expand_rv2')
File "/tmp/isl/tensorcheck.py", line 179, in new_f
return f(*new_args, **kwds)
File "/tmp/isl/models/model_util.py", line 103, in residual_v2_conv
conv_op = slim.conv2d()
File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1033, in convolution
outputs = layer.apply(inputs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 671, in apply
return self.__call__(inputs, *args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/layers/convolutional.py", line 167, in call
outputs = self._convolution_op(inputs, self.kernel)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 835, in __call__
return self.conv_op(inp, filter)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 499, in __call__
return self.call(inp, filter)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 187, in __call__
name=self.name)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 631, in conv2d
data_format=data_format, name=name)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[2,218,12,12]
[[Node: setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/Conv/Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="VALID", strides=[1, 2, 2, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/concat, downscale_4_3/expand_rv2/Conv/weights/read)]]
[[Node: Adam/update/_18502 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_40035_Adam/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "isl/launch.py", line 624, in <module>
app.run()
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "isl/launch.py", line 606, in main
train(gitapp)
File "isl/launch.py", line 482, in train
saver=tf.train.Saver(keep_checkpoint_every_n_hours=2.0),
File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/slim/python/slim/learning.py", line 775, in train
sv.stop(threads, close_summary_writer=True)
File "/opt/conda/lib/python3.6/contextlib.py", line 99, in __exit__
self.gen.throw(type, value, traceback)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/supervisor.py", line 964, in managed_session
self.stop(close_summary_writer=close_summary_writer)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/supervisor.py", line 792, in stop
stop_grace_period_secs=self._stop_grace_secs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/coordinator.py", line 389, in join
six.reraise(*self._exc_info_to_raise)
File "/opt/conda/lib/python3.6/site-packages/six.py", line 693, in reraise
raise value
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/queue_runner_impl.py", line 238, in _run
enqueue_callable()
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1231, in _single_operation_run
target_list_as_strings, status, None)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized.
[[Node: setup_losses/get_input_target_and_predicted/provide_preprocessed_data/cropped_input_and_target/load_image_set_as_tensor/PyFunc_86/_17999 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_723_setup_losses/get_input_target_and_predicted/provide_preprocessed_data/cropped_input_and_target/load_image_set_as_tensor/PyFunc_86", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[Node: setup_losses/get_input_target_and_predicted/provide_preprocessed_data/setup_losses/get_input_target_and_predicted/provide_preprocessed_data/target_1/_18083 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1319_setup_losses/get_input_target_and_predicted/provide_preprocessed_data/setup_losses/get_input_target_and_predicted/provide_preprocessed_data/target_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Traceback (most recent call last):
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1323, in _do_call
return fn(*args)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1302, in _run_fn
status, run_metadata)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[2,218,12,12]
[[Node: setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/Conv/Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="VALID", strides=[1, 2, 2, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/concat, downscale_4_3/expand_rv2/Conv/weights/read)]]
[[Node: Adam/update/_18502 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_40035_Adam/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/supervisor.py", line 954, in managed_session
yield sess
File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/slim/python/slim/learning.py", line 763, in train
sess, train_op, global_step, train_step_kwargs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/slim/python/slim/learning.py", line 487, in train_step
run_metadata=run_metadata)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[2,218,12,12]
[[Node: setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/Conv/Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="VALID", strides=[1, 2, 2, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/concat, downscale_4_3/expand_rv2/Conv/weights/read)]]
[[Node: Adam/update/_18502 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_40035_Adam/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op 'setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/Conv/Conv2D', defined at:
File "isl/launch.py", line 624, in <module>
app.run()
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "isl/launch.py", line 606, in main
train(gitapp)
File "isl/launch.py", line 426, in train
total_loss_op, _, _ = total_loss(gitapp)
File "isl/launch.py", line 402, in total_loss
input_loss_lts, target_loss_lts = controller.setup_losses(gitapp)
File "/tmp/isl/controller.py", line 399, in setup_losses
predict_target_lt) = get_input_target_and_predicted(gitapp)
File "/tmp/isl/controller.py", line 263, in get_input_target_and_predicted
gitapp.core_model, gitapp.add_head, pp, gitapp.is_train, input_lt)
File "/tmp/isl/tensorcheck.py", line 179, in new_f
return f(*new_args, **kwds)
File "/tmp/isl/controller.py", line 173, in model
core_model_op = core_model(is_train=is_train, input_op=input_op, name=name)
File "/tmp/isl/tensorcheck.py", line 179, in new_f
return f(*new_args, **kwds)
File "/tmp/isl/models/concordance.py", line 155, in core
scale_op = foveate(lls[4][8], scale_op, 'downscale_4_3')
File "/tmp/isl/models/concordance.py", line 129, in foveate
op, name)
File "/tmp/isl/models/model_util.py", line 341, in learned_fovea
name=name)
File "/tmp/isl/tensorcheck.py", line 179, in new_f
return f(*new_args, **kwds)
File "/tmp/isl/models/model_util.py", line 270, in module
name='expand_rv2')
File "/tmp/isl/tensorcheck.py", line 179, in new_f
return f(*new_args, **kwds)
File "/tmp/isl/models/model_util.py", line 103, in residual_v2_conv
conv_op = slim.conv2d()
File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1033, in convolution
outputs = layer.apply(inputs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 671, in apply
return self.__call__(inputs, *args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 575, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/layers/convolutional.py", line 167, in call
outputs = self._convolution_op(inputs, self.kernel)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 835, in __call__
return self.conv_op(inp, filter)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 499, in __call__
return self.call(inp, filter)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 187, in __call__
name=self.name)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 631, in conv2d
data_format=data_format, name=name)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[2,218,12,12]
[[Node: setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/Conv/Conv2D = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="VALID", strides=[1, 2, 2, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](setup_losses/get_input_target_and_predicted/model/concordance_core/downscale_4_3/expand_rv2/concat, downscale_4_3/expand_rv2/Conv/weights/read)]]
[[Node: Adam/update/_18502 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_40035_Adam/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "isl/launch.py", line 624, in <module>
app.run()
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "isl/launch.py", line 606, in main
train(gitapp)
File "isl/launch.py", line 482, in train
saver=tf.train.Saver(keep_checkpoint_every_n_hours=2.0),
File "/opt/conda/lib/python3.6/site-packages/tensorflow/contrib/slim/python/slim/learning.py", line 775, in train
sv.stop(threads, close_summary_writer=True)
File "/opt/conda/lib/python3.6/contextlib.py", line 99, in __exit__
self.gen.throw(type, value, traceback)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/supervisor.py", line 964, in managed_session
self.stop(close_summary_writer=close_summary_writer)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/supervisor.py", line 792, in stop
stop_grace_period_secs=self._stop_grace_secs)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/coordinator.py", line 389, in join
six.reraise(*self._exc_info_to_raise)
File "/opt/conda/lib/python3.6/site-packages/six.py", line 693, in reraise
raise value
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/training/queue_runner_impl.py", line 238, in _run
enqueue_callable()
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1231, in _single_operation_run
target_list_as_strings, status, None)
File "/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized.
[[Node: setup_losses/get_input_target_and_predicted/provide_preprocessed_data/cropped_input_and_target/load_image_set_as_tensor/PyFunc_86/_17999 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_723_setup_losses/get_input_target_and_predicted/provide_preprocessed_data/cropped_input_and_target/load_image_set_as_tensor/PyFunc_86", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
[[Node: setup_losses/get_input_target_and_predicted/provide_preprocessed_data/setup_losses/get_input_target_and_predicted/provide_preprocessed_data/target_1/_18083 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_1319_setup_losses/get_input_target_and_predicted/provide_preprocessed_data/setup_losses/get_input_target_and_predicted/provide_preprocessed_data/target_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
I think you simply don't have enough RAM on your GPU.
You got the error OOM when allocating tensor with shape...
You can try reducing base_depth
to reduce model size.
The default is 400 but you could try 100.
Oh, and of course you can decrease your batch size ๐
Thank you for your reply. I'd like to use cpu environment.