docker:

make build
make run-dl0
make exec

run:

cd /DeepSpeedImagenet
deepspeed src/cifar10.py --deepspeed --deepspeed_config configs/imagenet_config.json

bench results on resnet50@224, 2x 1080ti

speed:

  • pytroch-lightning = 395 samples/sec
  • nvidia-apex = 448 samples/sec
  • DeepSpeed = 398 samples/sec

memory:

  • pytroch-lightning = 7632 MB per GPU
  • nvidia-apex = 7650 MB per GPU
  • DeepSpeed = 6930 MB per GPU