epfLLM/Megatron-LLM

finetune llama2-7B when set --seq_length 4096 error

13416157913 opened this issue · 1 comments

Traceback (most recent call last):
File "/home/dengkaibiao/Megatron-LLM/finetune.py", line 261, in
pretrain(args, data_provider, model_provider, ModelType.encoder_or_decoder,
File "/home/dengkaibiao/Megatron-LLM/megatron/training.py", line 139, in pretrain
iteration = _train(args,
File "/home/dengkaibiao/Megatron-LLM/megatron/training.py", line 685, in _train
train_step(forward_step_func,
File "/home/dengkaibiao/Megatron-LLM/megatron/training.py", line 412, in train_step
losses_reduced = forward_backward_func(
File "/home/dengkaibiao/Megatron-LLM/megatron/schedules.py", line 234, in forward_backward_no_pipelining
output_tensor = forward_step(forward_step_func, data_iterator,
File "/home/dengkaibiao/Megatron-LLM/megatron/schedules.py", line 117, in forward_step
output_tensor, loss_func = forward_step_func(data_iterator, model)
File "/home/dengkaibiao/Megatron-LLM/finetune.py", line 227, in forward_step
output_tensor = model(tokens, position_ids, attention_mask,
File "/home/dengkaibiao/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/distributed.py", line 58, in forward
return self.module(*inputs, **kwargs)
File "/home/dengkaibiao/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/module.py", line 186, in forward
outputs = self.module(*inputs, **kwargs)
File "/home/dengkaibiao/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/gpt_model.py", line 87, in forward
lm_output = self.language_model(
File "/home/dengkaibiao/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/language_model.py", line 512, in forward
encoder_output = self.encoder(
File "/home/dengkaibiao/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/transformer.py", line 1239, in forward
hidden_states = layer(
File "/home/dengkaibiao/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/transformer.py", line 757, in forward
attention_output, attention_bias = self.self_attention(layernorm_output,
File "/home/dengkaibiao/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/transformer.py", line 510, in forward
context_layer = self._checkpointed_attention_forward(
File "/home/dengkaibiao/Megatron-LLM/megatron/model/transformer.py", line 397, in checkpointed_attention_forward
hidden_states = megatron.core.tensor_parallel.checkpoint(
File "/home/dengkaibiao/Megatron-LLM/megatron/core/tensor_parallel/random.py", line 251, in checkpoint
return CheckpointFunction.apply(function,
File "/home/dengkaibiao/Megatron-LLM/megatron/core/tensor_parallel/random.py", line 194, in forward
outputs = run_function(*args)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/transformer.py", line 393, in custom_forward
output
= self.core_attention(query_layer, key_layer,
File "/home/dengkaibiao/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/transformer.py", line 231, in forward
attention_probs = self.scale_mask_softmax(attention_scores,
File "/home/dengkaibiao/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/fused_softmax.py", line 148, in forward
return self.forward_fused_softmax(input, mask)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/fused_softmax.py", line 183, in forward_fused_softmax
probs = ScaledUpperTriangMaskedSoftmax.apply(input, scale)
File "/home/dengkaibiao/Megatron-LLM/megatron/model/fused_softmax.py", line 22, in forward
softmax_results = scaled_upper_triang_masked_softmax_cuda.forward(
RuntimeError: seq_len <= 2048 INTERNAL ASSERT FAILED at "/home/llm-deploy/apex/csrc/megatron/scaled_upper_triang_masked_softmax_cuda.cu":38, please report a bug to PyTorch.

This is not enough for us to help you with. Please design a MWE and submit a bug report to PyTorch directly as the stack trace says, or else give us some more background along with the inputs.