This assessment covers building and training a tf.keras
Sequential
model, then applying regularization. The dataset comes from a "don't overfit" Kaggle competition. There are 300 features labeled 0-299, and a binary target called "target". There are only 250 records total, meaning this is a very small dataset to be used with a neural network.
You can assume that the dataset has already been scaled.
N.B. You may get comments from keras/ternsorflow regarding your kernel and runtime. These are completely normal and are informative comments, rather than warnings.
# Run this cell without changes
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import Sequential, regularizers
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
- Using
pandas
, open the filedata.csv
as a DataFrame - Drop the
"id"
column, since this is a unique identifier and not a feature - Separate the data into
X
(a DataFrame with all columns except"target"
) andy
(a Series with just the"target"
column) - The train-test split should work as-is once you create these variables
# Replace None with appropriate code
# Read in the data
df = None
# Drop the "id" column
None
# Separate into X and y
X = None
y = None
# Test/train split (set the random state to 2021) and check the X_Train shape
None
# Run this code block without any changes
# Assert
assert type(df) == pd.DataFrame
assert type(X) == pd.DataFrame
assert type(y) == pd.Series
assert X_train.shape == (187, 300)
assert y_train.shape == (187,)
In the cell below, create an instance of a Sequential
model (documentation here) called dense_model
with a name
of "dense"
and otherwise default arguments.
In other words, create a model without any layers. We will add layers in a future step.
# Replace None with appropriate code
dense_model = None
# Run this code without change
# Assert
assert len(dense_model.layers) == 0
assert type(dense_model) == Sequential
assert dense_model.name == "dense"
How many input and output nodes should this model have?
Feel free to explore the attributes of X
and y
to determine this answer, or just to enter numbers based on the problem description above.
# Replace None with appropriate code
num_input_nodes = None
num_output_nodes = None
# Run this code without change
# Both values should be integers
assert type(num_input_nodes) == int
assert type(num_output_nodes) == int
score = 0
# 300 features, so 300 input nodes
if num_input_nodes == 300:
score += 0.5
# binary output, so 1 output node
if num_output_nodes == 1:
score += 0.5
elif num_output_nodes == 2:
# Partial credit for this answer, since it's technically
# possible to use 2 output nodes for this, although it's
# confusingly redundant
score += 0.25
score
The code below will use the input and output shapes you specified to add Dense
layers to the model:
# Run this cell without changes
# Add input layer
dense_model.add(Dense(units=64, input_shape=(num_input_nodes,)))
# Add hidden layers
dense_model.add(Dense(units=64))
dense_model.add(Dense(units=64))
dense_model.layers
Specify an appropriate activation function (documentation here).
We'll simplify the problem by specifying that you should use the string identifier for the function, and it should be one of these options:
sigmoid
softmax
Hint: is this a binary or a multi-class problem? This should guide your choice of activation function.
# Replace None with appropriate code
activation_function = None
# Run this cell without changes
# activation_function should be a string
assert type(activation_function) == str
if num_output_nodes == 1:
assert activation_function == "sigmoid"
else:
# The number of output nodes _should_ be 1, but we'll
# give credit for a matching function even if the
# previous answer was incorrect
assert activation_function == "softmax"
Now we'll use that information to finalize the model.
If this code produces an error, consider restarting the kernel and re-running the code above. If it still produces an error, that is an indication that one or more of your answers above is incorrect.
# Run this cell without changes
# Add output layer
dense_model.add(Dense(units=num_output_nodes, activation=activation_function))
# Determine appropriate loss function
if num_output_nodes == 1:
loss = "binary_crossentropy"
else:
loss = "categorical_crossentropy"
# Compile model
dense_model.compile(
optimizer="adam",
loss=loss,
metrics=["accuracy"]
)
dense_model.summary()
# Replace None as necessary
# Fit the model to the training data, using a subset of the
# training data as validation data
dense_model_results = dense_model.fit(
x=None,
y=None,
batch_size=None,
epochs=None,
verbose=None,
validation_split=0.4,
shuffle=None
)
# Run this cell without changes
def plot_loss_and_accuracy(results, final=False):
if final:
val_label="test"
else:
val_label="validation"
# Extracting metrics from model fitting
train_loss = results.history['loss']
val_loss = results.history['val_loss']
train_accuracy = results.history['accuracy']
val_accuracy = results.history['val_accuracy']
# Setting up plots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
# Plotting loss info
ax1.set_title("Loss")
sns.lineplot(x=results.epoch, y=train_loss, ax=ax1, label="train")
sns.lineplot(x=results.epoch, y=val_loss, ax=ax1, label=val_label)
ax1.legend()
# Plotting accuracy info
ax2.set_title("Accuracy")
sns.lineplot(x=results.epoch, y=train_accuracy, ax=ax2, label="train")
sns.lineplot(x=results.epoch, y=val_accuracy, ax=ax2, label=val_label)
ax2.legend()
plot_loss_and_accuracy(dense_model_results)
The model appears to be overfitting. To deal with this overfitting, modify the code below to include regularization in the model. You can add L1, L2, both L1 and L2, or dropout regularization.
Hint: these might be helpful
(EarlyStopping
is a type of regularization that is not applicable to this problem framing, since it's a callback and not a layer.)
# Complete the following code
def build_model_with_regularization(n_input, n_output, activation, loss):
"""
Creates and compiles a tf.keras Sequential model with two hidden layers
This time regularization has been added
"""
# create classifier
classifier = Sequential(name="regularized")
# add input layer
classifier.add(Dense(units=64, input_shape=(n_input,)))
# add hidden layers
# add output layer
classifier.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
return classifier
model_with_regularization = build_model_with_regularization(
num_input_nodes, num_output_nodes, activation_function, loss
)
model_with_regularization.summary()
# Run the code below without change
# Testing function to build model
assert type(model_with_regularization) == Sequential
def check_regularization(model):
regularization_count = 0
for layer in model.get_config()['layers']:
# Checking if kernel regularizer was specified
if 'kernel_regularizer' in layer['config']:
if layer['config'].get('kernel_regularizer'):
regularization_count += 1
# Checking if layer is dropout layer
if layer["class_name"] == "Dropout":
regularization_count += 1
return regularization_count > 0
score = .3
if check_regularization(model_with_regularization):
score += .7
score
Now we'll evaluate the new model on the training set as well:
# Run this cell without changes
# Fit the model to the training data, using a subset of the
# training data as validation data
reg_model_results = model_with_regularization.fit(
x=X_train,
y=y_train,
batch_size=None,
epochs=20,
verbose=0,
validation_split=0.4,
shuffle=False
)
plot_loss_and_accuracy(reg_model_results)
(Whether or not your regularization made a difference will partially depend on how strong of regularization you applied, as well as some random elements of your current TensorFlow configuration.)
Now we evaluate both models on the holdout set:
# Run this cell without changes
final_dense_model_results = dense_model.fit(
x=X_train,
y=y_train,
batch_size=None,
epochs=20,
verbose=0,
validation_data=(X_test, y_test),
shuffle=False
)
plot_loss_and_accuracy(final_dense_model_results, final=True)
Plot the loss and accuracy your final regularized model.
# Replace None, as necessary
final_reg_model_results = model_with_regularization.fit(
x=None,
y=None,
batch_size=None,
epochs=None,
verbose=None,
validation_data=(None, None),
shuffle=None
)
plot_loss_and_accuracy(final_reg_model_results, final=True)