CUDA Error: no kernel image is available for execution on the device

Question

CUDA Error: no kernel image is available for execution on the device

tuchuanbin opened this issue 2 years ago · 0 comments

当在页面上提问时发生CUDA运行期错误。我的环境：CentOS 7.9+P100单卡+NVIDIA-SMI 460.91.03 Driver Version: 460.91.03 CUDA Version: 11.2+Python 3.10.9，Torch试过了2.0.1和1.8.0都是同样的错。

2023-05-21 23:08:31,755 [WARNING] [logging.py:295] The dtype of attention mask (torch.int64) is not bool
Traceback (most recent call last):
File "/opt/Python/lib/python3.10/site-packages/gradio/routes.py", line 422, in run_predict
output = await app.get_blocks().process_api(
File "/opt/Python/lib/python3.10/site-packages/gradio/blocks.py", line 1323, in process_api
result = await self.call_function(
File "/opt/Python/lib/python3.10/site-packages/gradio/blocks.py", line 1051, in call_function
prediction = await anyio.to_thread.run_sync(
File "/opt/Python/lib/python3.10/site-packages/anyio/to_thread.py", line 31, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/opt/Python/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
return await future
File "/opt/Python/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 867, in run
result = context.run(func, *args)
File "/root/Chinese-LangChain/main.py", line 82, in predict
result = application.get_llm_answer(query=input, web_content=web_content)
File "/root/Chinese-LangChain/clc/langchain_application.py", line 85, in get_llm_answer
result = self.llm_service._call(prompt)
File "/root/Chinese-LangChain/clc/gpt_service.py", line 41, in _call
response, _ = self.model.chat(
File "/opt/Python/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4-qe/977d9df4cfae6b7a756e07698483872c5c070eee/modeling_chatglm.py", line 1253, in chat
outputs = self.generate(**inputs, **gen_kwargs)
File "/opt/Python/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/opt/Python/lib/python3.10/site-packages/transformers/generation/utils.py", line 1565, in generate
return self.sample(
File "/opt/Python/lib/python3.10/site-packages/transformers/generation/utils.py", line 2612, in sample
outputs = self(
File "/opt/Python/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4-qe/977d9df4cfae6b7a756e07698483872c5c070eee/modeling_chatglm.py", line 1158, in forward
transformer_outputs = self.transformer(
File "/opt/Python/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4-qe/977d9df4cfae6b7a756e07698483872c5c070eee/modeling_chatglm.py", line 905, in forward
inputs_embeds = self.word_embeddings(input_ids)
File "/opt/Python/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4-qe/977d9df4cfae6b7a756e07698483872c5c070eee/quantization.py", line 384, in forward
original_weight = extract_weight_to_half(weight=self.weight, scale_list=self.weight_scale, source_bit_width=self.weight_bit_width)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4-qe/977d9df4cfae6b7a756e07698483872c5c070eee/quantization.py", line 239, in extract_weight_to_half
func(
File "/opt/Python/lib/python3.10/site-packages/cpm_kernels/kernels/base.py", line 48, in call
func = self._prepare_func()
File "/opt/Python/lib/python3.10/site-packages/cpm_kernels/kernels/base.py", line 40, in _prepare_func
self._module.get_module(), self._func_name
File "/opt/Python/lib/python3.10/site-packages/cpm_kernels/kernels/base.py", line 24, in get_module
self._module[curr_device] = cuda.cuModuleLoadData(self._code)
File "/opt/Python/lib/python3.10/site-packages/cpm_kernels/library/base.py", line 94, in wrapper
return f(*args, **kwargs)
File "/opt/Python/lib/python3.10/site-packages/cpm_kernels/library/cuda.py", line 233, in cuModuleLoadData
checkCUStatus(cuda.cuModuleLoadData(ctypes.byref(module), data))
File "/opt/Python/lib/python3.10/site-packages/cpm_kernels/library/cuda.py", line 216, in checkCUStatus
raise RuntimeError("CUDA Error: %s" % cuGetErrorString(error))
RuntimeError: CUDA Error: no kernel image is available for execution on the device