CUDA error: no kernel image
vinven7 opened this issue · 2 comments
I followed all the instructions in the repo to install it from the source.
Torch version - 1.10.2
torchtext - 0.11.2
cuda - 11.1
By when I run https://github.com/graph4ai/graph4nlp_demo/blob/main/AAAI2022_demo/semantic_parsing.ipynb
I get the error:
RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
See the full error here:
`[ Using CUDA ]
/home/synthesisproject/anaconda3/envs/g4nlp/lib/python3.8/site-packages/graph4nlp_cu111-0.5.5-py3.8.egg/graph4nlp/pytorch/modules/graph_embedding_learning/gat.py:259: UserWarning: The residual option must be False when num_heads > 1
warnings.warn("The residual option must be False when num_heads > 1")
/home/synthesisproject/anaconda3/envs/g4nlp/lib/python3.8/site-packages/torch-1.10.2-py3.8-linux-x86_64.egg/torch/cuda/init.py:143: UserWarning:
NVIDIA RTX A5000 with CUDA capability sm_86 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70.
If you want to use the NVIDIA RTX A5000 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
warnings.warn(incompatible_device_warn.format(device_name, capability, " ".join(arch_list), device_name))
RuntimeError Traceback (most recent call last)
in
1 # run the model
----> 2 runner = Jobs(opt)
3 max_score = runner.train()
4 print("Train finish, best val score: {:.3f}".format(max_score))
5 test_score = runner.translate()
in init(self, opt)
7 self._build_device(self.opt)
8 self._build_dataloader()
----> 9 self._build_model()
10 self._build_optimizer()
11 self._build_evaluation()
in _build_model(self)
75
76 def _build_model(self):
---> 77 self.model = Graph2Seq.from_args(self.opt, self.vocab).to(self.device)
78
79 def _build_optimizer(self):
~/anaconda3/envs/g4nlp/lib/python3.8/site-packages/torch-1.10.2-py3.8-linux-x86_64.egg/torch/nn/modules/module.py in to(self, *args, **kwargs)
897 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
898
--> 899 return self._apply(convert)
900
901 def register_backward_hook(
~/anaconda3/envs/g4nlp/lib/python3.8/site-packages/torch-1.10.2-py3.8-linux-x86_64.egg/torch/nn/modules/module.py in _apply(self, fn)
568 def _apply(self, fn):
569 for module in self.children():
--> 570 module._apply(fn)
571
572 def compute_should_use_set_data(tensor, tensor_applied):
~/anaconda3/envs/g4nlp/lib/python3.8/site-packages/torch-1.10.2-py3.8-linux-x86_64.egg/torch/nn/modules/module.py in _apply(self, fn)
568 def _apply(self, fn):
569 for module in self.children():
--> 570 module._apply(fn)
571
572 def compute_should_use_set_data(tensor, tensor_applied):
~/anaconda3/envs/g4nlp/lib/python3.8/site-packages/torch-1.10.2-py3.8-linux-x86_64.egg/torch/nn/modules/module.py in _apply(self, fn)
568 def _apply(self, fn):
569 for module in self.children():
--> 570 module._apply(fn)
571
572 def compute_should_use_set_data(tensor, tensor_applied):
~/anaconda3/envs/g4nlp/lib/python3.8/site-packages/torch-1.10.2-py3.8-linux-x86_64.egg/torch/nn/modules/module.py in _apply(self, fn)
568 def _apply(self, fn):
569 for module in self.children():
--> 570 module._apply(fn)
571
572 def compute_should_use_set_data(tensor, tensor_applied):
~/anaconda3/envs/g4nlp/lib/python3.8/site-packages/torch-1.10.2-py3.8-linux-x86_64.egg/torch/nn/modules/rnn.py in _apply(self, fn)
187 self._flat_weights = [(lambda wn: getattr(self, wn) if hasattr(self, wn) else None)(wn) for wn in self._flat_weights_names]
188 # Flattens params (on CUDA)
--> 189 self.flatten_parameters()
190
191 return ret
~/anaconda3/envs/g4nlp/lib/python3.8/site-packages/torch-1.10.2-py3.8-linux-x86_64.egg/torch/nn/modules/rnn.py in flatten_parameters(self)
173 if self.proj_size > 0:
174 num_weights += 1
--> 175 torch._cudnn_rnn_flatten_weight(
176 self._flat_weights, num_weights,
177 self.input_size, rnn.get_cudnn_mode(self.mode),
RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.`
Please make sure that the CUDA version and PyTorch build in your environment are consistent.
A recommended setup is to use conda for managing them:
conda install cudatoolkit=11.1 -c conda-forge
conda install pytorch -c pytorch
You may try this out and see if the problem remains. thanks.