pretrain_llama inference: torchrun --nproc_per_node=1 --master-port=29501 inference.py --num_nodes=1 pretrain: torchrun --nproc_per_node=1 --master-port=29501 train.py --num_nodes=1