#BatchFormerV2
The hyperparameters
--add_global 2 --insert_idx 8 --bt_lr 0.5 # for larger model, maybe, it requires to avoid overfitting. e.g. a smaller lr for bt
--add_global 2 --start_idx 0 # CIFAR
#BatchFormerV2
The hyperparameters
--add_global 2 --insert_idx 8 --bt_lr 0.5 # for larger model, maybe, it requires to avoid overfitting. e.g. a smaller lr for bt
--add_global 2 --start_idx 0 # CIFAR