Model architecture is modified when I use BitsAndBytesConfig with default params
yunhao-tech opened this issue · 0 comments
yunhao-tech commented
System Info
Ubuntu 20.04
cuda 12.2.2
Python=3.11.9
transformers=4.44.2
bitandbytes=0.43.3
GPU: A800
Reproduction
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
def format_size(size):
K, M, B = 1e3, 1e6, 1e9
if size == 0:
return '0'
elif size < M:
return f"{size / K:.1f}K"
elif size < B:
return f"{size / M:.1f}M"
else:
return f"{size / B:.1f}B"
def get_pytorch_model_info(model: torch.nn.Module) -> (dict, list):
params_list = []
total_params = 0
total_params_non_trainable = 0
for name, param in model.named_parameters():
layer_name = name.split('.')[0]
layer = dict(model.named_modules())[layer_name]
layer_class = layer.__class__.__name__
params_count = param.numel()
trainable = param.requires_grad
params_list.append({
'tensor': name,
'layer_class': layer_class,
'shape': str(list(param.size())),
'precision': str(param.dtype).split('.')[-1],
'params_count': str(params_count),
'trainable': str(trainable),
})
total_params += params_count
if not trainable:
total_params_non_trainable += params_count
total_params_trainable = total_params - total_params_non_trainable
total_params_info = {
'total_params': format_size(total_params),
'total_params_trainable': format_size(total_params_trainable),
'total_params_non_trainable': format_size(total_params_non_trainable)
}
return total_params_info, params_list
if __name__ == "__main__":
base_model = "My fine tuned starcoder2 model path."
quantization_config = BitsAndBytesConfig()
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(
base_model,
quantization_config=quantization_config,
torch_dtype=torch.float16, # torch.bfloat16
device_map="auto",
trust_remote_code=True
)
model.config.pad_token_id = tokenizer.pad_token_id
model.eval()
total_params_info, params_list = get_pytorch_model_info(model)
print(total_params_info)
for ele in params_list:
print(ele)
Expected behavior
When I load the model without BitsandBytes
, with code:
model = AutoModelForCausalLM.from_pretrained(
base_model,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
However, when I use BitsAndBytesConfig
(with default parameters), the model architecture is modified and the number of parameters becomes half.
I expect that, if I pass no parameters (i.e. just with default choice), there should be same as normal case (no quantization, no modification of model architecture).