lyhue1991/torchkeras

我想进行多模态机器翻译,但是我加了图像特征,并且融合后无法进行推理,这种大佬有头绪吗?

ShaoDonCui opened this issue · 1 comments


TypeError Traceback (most recent call last)
Cell In[38], line 19
17 model.to('cuda:0')
18 input_ids = torch.tensor([[english_word2idx[i] for i in x.split(" ")if i != '']]).to('cuda:0')
---> 19 out = model.generate(inputs=input_ids,image=img)

File /opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py:115, in context_decorator..decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1681, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
1674 input_ids, model_kwargs = self._expand_inputs_for_generation(
1675 input_ids=input_ids,
1676 expand_size=generation_config.num_beams,
1677 is_encoder_decoder=self.config.is_encoder_decoder,
1678 **model_kwargs,
1679 )
1680 # 13. run beam search
-> 1681 return self.beam_search(
1682 input_ids,
1683 beam_scorer,
1684 logits_processor=logits_processor,
1685 stopping_criteria=stopping_criteria,
1686 pad_token_id=generation_config.pad_token_id,
1687 eos_token_id=generation_config.eos_token_id,
1688 output_scores=generation_config.output_scores,
1689 return_dict_in_generate=generation_config.return_dict_in_generate,
1690 synced_gpus=synced_gpus,
1691 **model_kwargs,
1692 )
1694 elif generation_mode == GenerationMode.BEAM_SAMPLE:
1695 # 11. prepare logits warper
1696 logits_warper = self._get_logits_warper(generation_config)

File /opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:3020, in GenerationMixin.beam_search(self, input_ids, beam_scorer, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, **model_kwargs)
3016 break
3018 model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
-> 3020 outputs = self(
3021 **model_inputs,
3022 return_dict=True,
3023 output_attentions=output_attentions,
3024 output_hidden_states=output_hidden_states,
3025 )
3027 if synced_gpus and this_peer_finished:
3028 cur_len = cur_len + 1

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:632, in convert_outputs_to_fp32..forward(*args, **kwargs)
631 def forward(*args, **kwargs):
--> 632 return model_forward(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:620, in ConvertOutputsToFp32.call(self, *args, **kwargs)
619 def call(self, *args, **kwargs):
--> 620 return convert_to_fp32(self.model_forward(*args, **kwargs))

File /opt/conda/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14, in autocast_decorator..decorate_autocast(*args, **kwargs)
11 @functools.wraps(func)
12 def decorate_autocast(*args, **kwargs):
13 with autocast_instance:
---> 14 return func(*args, **kwargs)

Cell In[16], line 82, in LlamaForCausalLM.forward(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, image, use_cache, output_attentions, output_hidden_states, return_dict)
79 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
81 # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
---> 82 outputs = self.model(
83 input_ids=input_ids,
84 image=image,
85 attention_mask=attention_mask,
86 position_ids=position_ids,
87 past_key_values=past_key_values,
88 inputs_embeds=inputs_embeds,
89 use_cache=use_cache,
90 output_attentions=output_attentions,
91 output_hidden_states=output_hidden_states,
92 return_dict=return_dict,
93 )
95 hidden_states = outputs[0]
96 if self.config.pretraining_tp > 1:

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

Cell In[15], line 178, in LlamaModel.forward(self, input_ids, image, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict)
173 attention_mask = self._prepare_decoder_attention_mask(
174 attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
175 )
177 hidden_states = inputs_embeds
--> 178 imgfeature=self.imgmodel(image)
180 imgfeature = imgfeature.unsqueeze(1)
183 hidden_states = hidden_states + imgfeature

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/torchvision/models/vgg.py:66, in VGG.forward(self, x)
65 def forward(self, x: torch.Tensor) -> torch.Tensor:
---> 66 x = self.features(x)
67 x = self.avgpool(x)
68 x = torch.flatten(x, 1)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/container.py:217, in Sequential.forward(self, input)
215 def forward(self, input):
216 for module in self:
--> 217 input = module(input)
218 return input

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/conv.py:463, in Conv2d.forward(self, input)
462 def forward(self, input: Tensor) -> Tensor:
--> 463 return self._conv_forward(input, self.weight, self.bias)

File /opt/conda/lib/python3.10/site-packages/torch/nn/modules/conv.py:459, in Conv2d._conv_forward(self, input, weight, bias)
455 if self.padding_mode != 'zeros':
456 return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
457 weight, bias, self.stride,
458 _pair(0), self.dilation, self.groups)
--> 459 return F.conv2d(input, weight, bias, self.stride,
460 self.padding, self.dilation, self.groups)

TypeError: conv2d() received an invalid combination of arguments - got (NoneType, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:

  • (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
    didn't match because some of the arguments have invalid types: (!NoneType!, !Parameter!, !Parameter!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, int)
  • (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
    didn't match because some of the arguments have invalid types: (!NoneType!, !Parameter!, !Parameter!, !tuple of (int, int)!, !tuple of (int, int)!, !tuple of (int, int)!, int)

我发现原因在beam_search这块是封装死的方法,没传入图像特征,怎么能修改的将图像特征传进去呢?