# COMP_598_MiniProjectOne
Linear Regression

Known to work on OSX 10.9

DEPENDENCIES

python
numpy
pandas
beautifulsoup

SETUP

In terminal:

1. $ mkdir foo
2. $ cd foo
3. $ git clone https://github.com/unheimlich/COMP_598_MiniProjectOne.git
4. $ cd COMP_598_MiniProjectOne
5. $ python

PART ONE

1. Create test, validation, and training sets
    
    import src.CrossValidate as cv
    
    filename = 'data/PartOne/OnlineNewsPopularity.csv'
    cv.separateTestSet(filename, 5)                       # Set aside 20% for test
    cv.kfoldlabel(filename, 5)                          # Create 5-fold cross validation indices
    
2. Run a cross validation set
    
    optimization = 'LeastSquares'     # 'LeastSquares', 'GradientDescent'
    lambda = 0.0                      # L2 regularization constant: float >= 0
    alpha = 0.0                       # Learning Rate: float >= 0
    preprocess = ''                   # '', 'center', 'standardize', 'whiten'

    train_rmse, validation_rmse, y_train, t_train = cv.runcrossval(optimization, lambda, alpha, preprocess)
    
3. Evaluate Test Error

    test_rmse, w, y_test, t_test, w = cv.test(optimization, lambda, alpha, preprocess)
    
PART TWO

1. Create test, validation, and training sets
    
    import src.CrossValidate2 as cv2
    
    filename = 'data/PartTwo/DailyMailArticlePopularity.csv'
    cv2.separateTestSet(filename, 5)                       # Set aside 20% for test
    cv2.kfoldlabel(filename, 5)                          # Create 5-fold cross validation indices
    
2. Run a cross validation set
    
    optimization = 'LeastSquares'     # 'LeastSquares', 'GradientDescent'
    lambda = 0.0                      # L2 regularization constant: float >= 0
    alpha = 0.0                       # Learning Rate: float >= 0
    preprocess = ''                   # '', 'center', 'standardize', 'whiten'

    train_rmse, validation_rmse, y_train, t_train = cv2.runcrossval(optimization, lambda, alpha, preprocess)
    
3. Evaluate Test Error

    test_rmse, w, y_test, t_test, w = cv2.test(optimization, lambda, alpha, preprocess)