CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: too many resources requested for launch
oahzxl opened this issue · 2 comments
Traceback (most recent call last):
File "tools/train.py", line 163, in
main()
File "tools/train.py", line 159, in main
meta=meta)
File "/home/zxl/mm/mmsegmentation/mmseg/apis/train.py", line 116, in train_segmentor
runner.run(data_loaders, cfg.workflow)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/mmcv/runner/iter_based_runner.py", line 131, in run
iter_runner(iter_loaders[i], **kwargs)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/mmcv/runner/iter_based_runner.py", line 66, in train
self.call_hook('after_train_iter')
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/mmcv/runner/base_runner.py", line 308, in call_hook
getattr(hook, fn_name)(self)
File "/home/zxl/mm/mmsegmentation/mmseg/core/evaluation/eval_hooks.py", line 89, in after_train_iter
gpu_collect=self.gpu_collect)
File "/home/zxl/mm/mmsegmentation/mmseg/apis/test.py", line 140, in multi_gpu_test
result = model(return_loss=False, rescale=True, **data)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/parallel/distributed.py", line 619, in forward
output = self.module(*inputs[0], **kwargs[0])
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/mmcv/runner/fp16_utils.py", line 84, in new_func
return old_func(*args, **kwargs)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/base.py", line 124, in forward
return self.forward_test(img, img_metas, **kwargs)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/base.py", line 106, in forward_test
return self.simple_test(imgs[0], img_metas[0], **kwargs)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 265, in simple_test
seg_logit = self.inference(img, img_meta, rescale)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 250, in inference
seg_logit = self.whole_inference(img, img_meta, rescale)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 217, in whole_inference
seg_logit = self.encode_decode(img, img_meta)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 87, in encode_decode
x = self.extract_feat(img)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 79, in extract_feat
x = self.backbone(img)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/zxl/mm/mmsegmentation/mmseg/models/backbones/rednet.py", line 456, in forward
x = self.stem(x)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/container.py", line 117, in forward
input = module(input)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/zxl/mm/mmsegmentation/mmseg/models/utils/involution_cuda.py", line 281, in forward
out = _involution_cuda(x, weight, stride=self.stride, padding=(self.kernel_size-1)//2)
File "/home/zxl/mm/mmsegmentation/mmseg/models/utils/involution_cuda.py", line 238, in _involution_cuda
out = _involution.apply(input, weight, _pair(stride), _pair(padding), _pair(dilation))
File "/home/zxl/mm/mmsegmentation/mmseg/models/utils/involution_cuda.py", line 174, in forward
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
File "cupy/cuda/function.pyx", line 182, in cupy.cuda.function.Function.call
File "cupy/cuda/function.pyx", line 164, in cupy.cuda.function._launch
File "cupy_backends/cuda/api/driver.pyx", line 299, in cupy_backends.cuda.api.driver.launchKernel
File "cupy_backends/cuda/api/driver.pyx", line 124, in cupy_backends.cuda.api.driver.check_status
cupy_backends.cuda.api.driver.CUDADriverError: CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: too many resources requested for launch
Traceback (most recent call last):
File "tools/train.py", line 163, in
main()
File "tools/train.py", line 159, in main
meta=meta)
File "/home/zxl/mm/mmsegmentation/mmseg/apis/train.py", line 116, in train_segmentor
runner.run(data_loaders, cfg.workflow)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/mmcv/runner/iter_based_runner.py", line 131, in run
iter_runner(iter_loaders[i], **kwargs)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/mmcv/runner/iter_based_runner.py", line 66, in train
self.call_hook('after_train_iter')
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/mmcv/runner/base_runner.py", line 308, in call_hook
getattr(hook, fn_name)(self)
File "/home/zxl/mm/mmsegmentation/mmseg/core/evaluation/eval_hooks.py", line 89, in after_train_iter
gpu_collect=self.gpu_collect)
File "/home/zxl/mm/mmsegmentation/mmseg/apis/test.py", line 140, in multi_gpu_test
result = model(return_loss=False, rescale=True, **data)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/parallel/distributed.py", line 619, in forward
output = self.module(*inputs[0], **kwargs[0])
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/mmcv/runner/fp16_utils.py", line 84, in new_func
return old_func(*args, **kwargs)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/base.py", line 124, in forward
return self.forward_test(img, img_metas, **kwargs)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/base.py", line 106, in forward_test
return self.simple_test(imgs[0], img_metas[0], **kwargs)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 265, in simple_test
seg_logit = self.inference(img, img_meta, rescale)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 250, in inference
seg_logit = self.whole_inference(img, img_meta, rescale)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 217, in whole_inference
seg_logit = self.encode_decode(img, img_meta)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 87, in encode_decode
x = self.extract_feat(img)
File "/home/zxl/mm/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py", line 79, in extract_feat
x = self.backbone(img)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/zxl/mm/mmsegmentation/mmseg/models/backbones/rednet.py", line 456, in forward
x = self.stem(x)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/container.py", line 117, in forward
input = module(input)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/zxl/mm/mmsegmentation/mmseg/models/utils/involution_cuda.py", line 281, in forward
out = _involution_cuda(x, weight, stride=self.stride, padding=(self.kernel_size-1)//2)
File "/home/zxl/mm/mmsegmentation/mmseg/models/utils/involution_cuda.py", line 238, in _involution_cuda
out = _involution.apply(input, weight, _pair(stride), _pair(padding), _pair(dilation))
File "/home/zxl/mm/mmsegmentation/mmseg/models/utils/involution_cuda.py", line 174, in forward
stream=Stream(ptr=torch.cuda.current_stream().cuda_stream))
File "cupy/cuda/function.pyx", line 182, in cupy.cuda.function.Function.call
File "cupy/cuda/function.pyx", line 164, in cupy.cuda.function._launch
File "cupy_backends/cuda/api/driver.pyx", line 299, in cupy_backends.cuda.api.driver.launchKernel
File "cupy_backends/cuda/api/driver.pyx", line 124, in cupy_backends.cuda.api.driver.check_status
cupy_backends.cuda.api.driver.CUDADriverError: CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: too many resources requested for launch
Traceback (most recent call last):
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"main", mod_spec)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/distributed/launch.py", line 260, in
main()
File "/home/zxl/anaconda3/envs/ms/lib/python3.7/site-packages/torch/distributed/launch.py", line 256, in main
cmd=cmd)
I met this problem in seg's eval and det's train. Could be solved by setting CUDA_NUM_THREADS in involution_cuda.py to 512 or smaller.
We test the released version on NVIDIA V100 GPUs, so the resource may not be adequate on GPUs of other types.