python3 -m install pip
pip install -r requirements.txt
See tokenizing.py
datapath=/path/to/data
text_column=text
st=my/save/path
mkdir -p $st
python tokenizing.py -fe gpt2 -p ${datapath}/data_train.csv,${datapath}/data_val.csv,${datapath}/data_test.csv -vs 20000 -mf 2 -st $st -tc $text_column
See trainer.py and train.sh
. train.sh
%load_ext tensorboard
%tensorboard --logdir ${my/log/dir}/${task}/lightning_logs
. TODO