N.B: python version must be required up to 3.10.0
!python --version
pip install astra-llm==0.1.19
import torch
# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from astra.model import Model
model = Model('/kaggle/input/astra/pytorch/1.4b/2', map_location=device)
response = model.generate(
messages="hello",
max_new_tokens=50,
top_k=25,
temperature=1.4
)