RuntimeError: CUDA error: no kernel image is available for execution on the device
Manjuphoenix opened this issue · 1 comments
There was an error while reproducing the code on the machine with the following spec:
Ubuntu: 20.04
GPU: Nvidia A6000
python version: 3.8.0
pip list:
- pip:
- absl-py==1.4.0
- antlr4-python3-runtime==4.9.3
- appdirs==1.4.4
- black==22.3.0
- cachetools==5.3.1
- certifi==2023.5.7
- chardet==4.0.0
- charset-normalizer==3.1.0
- cityscapesscripts==2.2.2
- click==8.1.3
- cloudpickle==2.2.1
- coloredlogs==15.0.1
- contourpy==1.0.7
- cycler==0.11.0
- detectron2==0.6
- fairscale==0.4.13
- filelock==3.12.0
- fonttools==4.39.4
- future==0.18.3
- fvcore==0.1.5.post20221221
- google-auth==2.19.1
- google-auth-oauthlib==1.0.0
- grpcio==1.54.2
- humanfriendly==10.0
- hydra-core==1.3.2
- idna==3.4
- importlib-metadata==6.6.0
- importlib-resources==5.12.0
- iopath==0.1.9
- kiwisolver==1.4.4
- markdown==3.4.3
- markupsafe==2.1.3
- matplotlib==3.7.1
- mypy-extensions==1.0.0
- numpy==1.21.0
- oauthlib==3.2.2
- omegaconf==2.3.0
- packaging==23.1
- pathspec==0.11.1
- pillow==9.5.0
- portalocker==2.7.0
- protobuf==3.20.3
- pyasn1==0.5.0
- pyasn1-modules==0.3.0
- pycocotools==2.0.6
- pydot==1.4.2
- pyparsing==3.0.9
- pyquaternion==0.9.9
- python-dateutil==2.8.2
- pyyaml==6.0
- regex==2023.6.3
- requests==2.31.0
- requests-oauthlib==1.3.1
- rsa==4.9
- setuptools==67.8.0
- shapely==2.0.1
- six==1.16.0
- tabulate==0.9.0
- tensorboard==2.13.0
- tensorboard-data-server==0.7.0
- termcolor==2.3.0
- toml==0.10.2
- torch==1.8.0+cu101
- torchvision==0.9.0+cu101
- tqdm==4.65.0
- typing==3.7.4.3
- urllib3==1.26.16
- werkzeug==2.3.6
- wheel==0.40.0
- yacs==0.1.8
- zipp==3.15.0
nvcc --version:
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Mon_Oct_24_19:12:58_PDT_2022
Cuda compilation tools, release 12.0, V12.0.76
Build cuda_12.0.r12.0/compiler.31968024_0
Didn't change much of the config for cityscapes to foggy cityscapes.
Error message:
[06/13 22:42:17 d2.engine.defaults]: Model:
DAobjTwoStagePseudoLabGeneralizedRCNN(
(backbone): vgg_backbone(
(vgg0): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(vgg1): Sequential(
(0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(vgg2): Sequential(
(0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(vgg3): Sequential(
(0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(vgg4): Sequential(
(0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
)
(proposal_generator): PseudoLabRPN(
(rpn_head): StandardRPNHead(
(conv): Conv2d(
512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
(activation): ReLU()
)
(objectness_logits): Conv2d(512, 15, kernel_size=(1, 1), stride=(1, 1))
(anchor_deltas): Conv2d(512, 60, kernel_size=(1, 1), stride=(1, 1))
)
(anchor_generator): DefaultAnchorGenerator(
(cell_anchors): BufferList()
)
)
(roi_heads): StandardROIHeadsPseudoLab(
(box_pooler): ROIPooler(
(level_poolers): ModuleList(
(0): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=0, aligned=True)
)
)
(box_head): FastRCNNConvFCHead(
(flatten): Flatten(start_dim=1, end_dim=-1)
(fc1): Linear(in_features=25088, out_features=1024, bias=True)
(fc_relu1): ReLU()
(fc2): Linear(in_features=1024, out_features=1024, bias=True)
(fc_relu2): ReLU()
)
(box_predictor): FastRCNNOutputLayers(
(cls_score): Linear(in_features=1024, out_features=9, bias=True)
(bbox_pred): Linear(in_features=1024, out_features=32, bias=True)
)
)
(D_img): FCDiscriminator_img(
(conv1): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(classifier): Conv2d(128, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(leaky_relu): LeakyReLU(negative_slope=0.2, inplace=True)
)
)
[06/13 22:42:17 fvcore.common.checkpoint]: No checkpoint found. Initializing model from scratch
Exception during training:
Traceback (most recent call last):
File "/four_tb/manjunath/adaptive_teacher/adapteacher/engine/trainer.py", line 404, in train_loop
self.run_step_full_semisup()
File "/four_tb/manjunath/adaptive_teacher/adapteacher/engine/trainer.py", line 512, in run_step_full_semisup
record_dict, _, _, _ = self.model(
File "/home/user/anaconda3/envs/fbadapt/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/four_tb/manjunath/adaptive_teacher/adapteacher/modeling/meta_arch/rcnn.py", line 207, in forward
images = self.preprocess_image(batched_inputs)
File "/home/user/anaconda3/envs/fbadapt/lib/python3.8/site-packages/detectron2/modeling/meta_arch/rcnn.py", line 225, in preprocess_image
images = [(x - self.pixel_mean) / self.pixel_std for x in images]
File "/home/user/anaconda3/envs/fbadapt/lib/python3.8/site-packages/detectron2/modeling/meta_arch/rcnn.py", line 225, in
images = [(x - self.pixel_mean) / self.pixel_std for x in images]
RuntimeError: CUDA error: no kernel image is available for execution on the device
[06/13 22:42:18 d2.engine.hooks]: Total training time: 0:00:00 (0:00:00 on hooks)
[06/13 22:42:18 d2.utils.events]: iter: 0 lr: N/A max_mem: 368M
Traceback (most recent call last):
File "train_net.py", line 73, in
launch(
File "/home/user/anaconda3/envs/fbadapt/lib/python3.8/site-packages/detectron2/engine/launch.py", line 82, in launch
main_func(*args)
File "train_net.py", line 66, in main
return trainer.train()
File "/four_tb/manjunath/adaptive_teacher/adapteacher/engine/trainer.py", line 386, in train
self.train_loop(self.start_iter, self.max_iter)
File "/four_tb/manjunath/adaptive_teacher/adapteacher/engine/trainer.py", line 404, in train_loop
self.run_step_full_semisup()
File "/four_tb/manjunath/adaptive_teacher/adapteacher/engine/trainer.py", line 512, in run_step_full_semisup
record_dict, _, _, _ = self.model(
File "/home/user/anaconda3/envs/fbadapt/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/four_tb/manjunath/adaptive_teacher/adapteacher/modeling/meta_arch/rcnn.py", line 207, in forward
images = self.preprocess_image(batched_inputs)
File "/home/user/anaconda3/envs/fbadapt/lib/python3.8/site-packages/detectron2/modeling/meta_arch/rcnn.py", line 225, in preprocess_image
images = [(x - self.pixel_mean) / self.pixel_std for x in images]
File "/home/user/anaconda3/envs/fbadapt/lib/python3.8/site-packages/detectron2/modeling/meta_arch/rcnn.py", line 225, in
images = [(x - self.pixel_mean) / self.pixel_std for x in images]
RuntimeError: CUDA error: no kernel image is available for execution on the device
Thought it was cuda version issue, but running the same on docker container (cuda version 10.1) on same machine gave the same error
Usually this error is caused by the incompatibility between your cuda version used to compile pytorch and your GPU. A6000 may not support torch with cu10.1, try installing torch version with cu11.x