A deep learning training template constructed as a minimal working MNIST example. Utilizes dataclasses as flexible train configs and mlflow for analytics and artifact logging.
# create `schema` conda environment
conda create -n schema python=3.9 pip
conda activate schema
# install torch and dependencies, assumes cuda version >= 11.0
pip install -U pip
pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
pip install mlflow pyrallis pandas tqdm pillow
# install hyperparameter search dependencies
pip install ray[tune] hyperopt
# install dl-schema repo
git clone https://github.com/phelps-matthew/dl-schema.git
cd dl-schema
pip install -e .
- Download and extract the MNIST dataset
cd data
python create_mnist_dataset.py
- Train small CNN model
python train.py
- View train configuration options
python train.py --help
- Train from yaml configuration, with CLI override
python train.py --config_path train_cfg.yaml --lr 0.001 --gpus [7]
- Start mlflow ui to visualize results
# navgiate to dl_schema root directory containing `mlruns`
mlflow ui
# to set host and port
mlflow ui --host 0.0.0.0 --port 8080
- Serialize dataclass train config to yaml, outputting
train_cfg.yaml
python cfg.py
- Use ray tune to perform multi-gpu hyperparameter search
CUDA_VISIBLE_DEVICES=0,1,2,3 python tune.py --exp_name hyper_search