I see Error after any first message into UI
bymihaj opened this issue · 1 comments
My PC: Windows-11, Python 3.10.10,
CPU i7 gen 12
python app.py decapoda-research/llama-7b-hf project-baize/baize-lora-7B
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization.
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'.
The class this function is called from is 'LlamaTokenizer'.
Loading checkpoint shards: 100%|█████████████████████████████████████| 33/33 [00:11<00:00, 2.93it/s]
C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\gradio\deprecation.py:43: UserWarning: You have unused kwarg parameters in Row, please remove them: {'scale': 1}
warnings.warn(
Reloading javascript...
Running on local URL: http://127.0.0.1:7860
To create a public link, set share=True
in launch()
.
Traceback (most recent call last):
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\gradio\routes.py", line 394, in run_predict
output = await app.get_blocks().process_api(
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\gradio\blocks.py", line 1075, in process_api
result = await self.call_function(
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\gradio\blocks.py", line 898, in call_function
prediction = await anyio.to_thread.run_sync(
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\anyio\to_thread.py", line 31, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\anyio_backends_asyncio.py", line 937, in run_sync_in_worker_thread
return await future
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\anyio_backends_asyncio.py", line 867, in run
result = context.run(func, *args)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\gradio\utils.py", line 549, in async_iteration
return next(iterator)
File "d:\tmp\ai\baize\demo\app.py", line 43, in predict
for x in greedy_search(input_ids,model,tokenizer,stop_words=["[|Human|]", "[|AI|]"],max_length=max_length_tokens,temperature=temperature,top_p=top_p):
File "d:\tmp\ai\baize\demo\app_modules\utils.py", line 253, in greedy_search
outputs = model(input_ids)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\accelerate\hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\peft\peft_model.py", line 575, in forward
return self.base_model(
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\accelerate\hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\transformers\models\llama\modeling_llama.py", line 687, in forward
outputs = self.model(
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\accelerate\hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\transformers\models\llama\modeling_llama.py", line 577, in forward
layer_outputs = decoder_layer(
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\accelerate\hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\transformers\models\llama\modeling_llama.py", line 292, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\accelerate\hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\transformers\models\llama\modeling_llama.py", line 196, in forward
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\torch\nn\modules\module.py", line 1501, in call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\accelerate\hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "C:\Users\mihaj\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\peft\tuners\lora.py", line 406, in forward
result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
RuntimeError: "addmm_impl_cpu" not implemented for 'Half'
Removing this part of code from app_modules\utils.py solved issue locally for me
if not load_8bit:
model.half() # seems to fix bugs for some users.