This repository contains code for kick starting data version controlling using DVC.
chmod +x ./initial_setup.sh
./initial_setup.sh
cd dvc_tutorial cat data/evaluation.txt
nano code/featurization.py
bag_of_words = CountVectorizer(stop_words='english', max_features=6000, ngram_range=(1, 3))
git commit -am "Add bigram features"
dvc repro evaluation.txt.dvc
cat data/evaluation.txt