Cannot run inference

Question

Cannot run inference

findalexli opened this issue 7 months ago · 0 comments

I followed the instruction to create a new conda environment, and following the inference script to quick load the model.

(mplug_owl2) alex@HP-machine:~/mPLUG-Owl/mPLUG-Owl2$ python test_inference_mplugowl.py [2024-02-10 16:54:58,039] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect) Traceback (most recent call last): File "/home/alex/mPLUG-Owl/mPLUG-Owl2/test_inference_mplugowl.py", line 15, in <module> tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, None, model_name, load_8bit=False, load_4bit=False, device="cuda") File "/home/alex/mPLUG-Owl/mPLUG-Owl2/mplug_owl2/model/builder.py", line 117, in load_pretrained_model model = AutoModelForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, **kwargs) File "/home/alex/mambaforge-pypy3/envs/mplug_owl2/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py", line 493, in from_pretrained return model_class.from_pretrained( File "/home/alex/mambaforge-pypy3/envs/mplug_owl2/lib/python3.10/site-packages/transformers/modeling_utils.py", line 2700, in from_pretrained model = cls(config, *model_args, **model_kwargs) File "/home/alex/mPLUG-Owl/mPLUG-Owl2/mplug_owl2/model/modeling_mplug_owl2.py", line 218, in __init__ self.model = MPLUGOwl2LlamaModel(config) File "/home/alex/mPLUG-Owl/mPLUG-Owl2/mplug_owl2/model/modeling_mplug_owl2.py", line 205, in __init__ super(MPLUGOwl2LlamaModel, self).__init__(config) File "/home/alex/mPLUG-Owl/mPLUG-Owl2/mplug_owl2/model/modeling_mplug_owl2.py", line 36, in __init__ self.vision_model = MplugOwlVisionModel( File "/home/alex/mPLUG-Owl/mPLUG-Owl2/mplug_owl2/model/visual_encoder.py", line 403, in __init__ self.embeddings = MplugOwlVisionEmbeddings(config) File "/home/alex/mPLUG-Owl/mPLUG-Owl2/mplug_owl2/model/visual_encoder.py", line 105, in __init__ if self.cls_token: RuntimeError: Boolean value of Tensor with more than one value is ambiguous

Here is the script

import torch
from PIL import Image
from transformers import TextStreamer

from mplug_owl2.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
from mplug_owl2.conversation import conv_templates, SeparatorStyle
from mplug_owl2.model.builder import load_pretrained_model
from mplug_owl2.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria

image_file = '/home/alex/mPLUG-Owl/mPLUG-Owl2/assets/mplug_owl2_logo.png' # Image Path
model_path = 'MAGAer13/mplug-owl2-llama2-7b'
query = "Describe the image."

model_name = get_model_name_from_path(model_path)
tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, None, model_name, load_8bit=False, load_4bit=False, device="cuda")

conv = conv_templates["mplug_owl2"].copy()
roles = conv.roles

image = Image.open(image_file).convert('RGB')
max_edge = max(image.size) # We recommand you to resize to squared image for BEST performance.
image = image.resize((max_edge, max_edge))

image_tensor = process_images([image], image_processor)
image_tensor = image_tensor.to(model.device, dtype=torch.float16)

inp = DEFAULT_IMAGE_TOKEN + query
conv.append_message(conv.roles[0], inp)
conv.append_message(conv.roles[1], None)
prompt = conv.get_prompt()

input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to(model.device)
stop_str = conv.sep2
keywords = [stop_str]
stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

temperature = 0.7
max_new_tokens = 512

with torch.inference_mode():
    output_ids = model.generate(
        input_ids,
        images=image_tensor,
        do_sample=True,
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        streamer=streamer,
        use_cache=True,
        stopping_criteria=[stopping_criteria])

outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
print(outputs)