Customer churn rate is an important performance metric in the Telecoms industry
due to the highly competitive markets. The churn rate enables
companies to understand why their customers are leaving. You
are hereby provided with the churn dataset containing randomly collected
data from a telecom company’s database. Develop ML models that can help
the retention team predict high-risk churn customers before they leave
Project structure
All the required library
# install packages just of the first run
set.seed(42)
install.packages("caret")
install.packages("e1071")
install.packages("dplyr")
install.packages("tidyr")
install.packages("readr")
install.packages("corrplot")
install.packages("ggplot2")
install.packages("MASS")
install.packages("rms")
install.packages("ggpubr")
install.packages("gplots")
# Random Forest
install.packages("randomForest")
# Decision tree
install.packages("rpart")
install.packages("rpart.plot")
# Xgboost
install.packages("xgboost")
# Neural Network
install.packages("tensorflow")
install_tensorflow()
install.packages('reticulate')
install.packages('keras')
#ROC curve
install.packages("ROCR")
install.packages("pROC")
Applied some feature engineering on the tenure feature. I have converted the “tenure” feature
from numbers to categories to represent the years instead of the number of months.
Convert categorical to factor (numerical)
cat("\nThe features before converting",str(data))
data %>% mutate_if(is.character, as.factor) -> data
cat("\nThe features after converting",str(data))
Split the data into 80% for training and 20% for testing.
# For the Training Set
train_pred_gini = predict(DT_Model_gini, data = train_data, type = "class")
train_prob_gini = predict(DT_Model_gini, data = train_data, type = "prob")
train_actual = ifelse(train_data$Churn == "Yes", 1,0)
print('the confusion matrix of the decision tree with Gini on the training set')
print(confusionMatrix(data = train_pred_gini,mode = "everything", reference = train_data$Churn))
roc <- roc(train_actual, train_prob_gini[,2], plot= TRUE, print.auc=TRUE,main ="ROC Decision Tree for Training set with Gini splitting")
Confusion Matrix
ROC Curve
# For the Test Set
test_pred_gini = predict(DT_Model_gini, newdata= test_data, type = "class")
test_prob_gini = predict(DT_Model_gini, newdata = test_data, type = "prob")
test_actual <- ifelse(test_data$Churn == "Yes", 1,0)
print('the confusion matrix of the decision tree with Gini on the testing set')
print(confusionMatrix(data = test_pred_gini,mode = "everything", reference = test_data$Churn))
roc <- roc(test_actual, test_prob_gini[,2], plot = TRUE, print.auc = TRUE,main ="ROC Decision Tree for Testing set with Gini splitting")
#For the Training Set
train_pred_info = predict(DT_Model_info, data = train_data, type = "class")
train_prob_info = predict(DT_Model_info, data = train_data, type = "prob")
train_actual = ifelse(train_data$Churn == "Yes", 1,0)
print('the confusion matrix of the decision tree with information gain on the training set')
print(confusionMatrix(data = train_pred_info,mode = "everything", reference = train_data$Churn))
roc <- roc(train_actual, train_prob_info[,2], plot= TRUE, print.auc=TRUE,main ="ROC Decision Tree for Training set with information splitting")
Confusion Matrix
ROC Curve
#For the Test Set:
predict(DT_Model_info, newdata= test_data, type = "class") -> test_pred_info
predict(DT_Model_info, newdata = test_data, type = "prob") -> test_prob_info
test_actual = ifelse(test_data$Churn == "Yes", 1,0)
print('the confusion matrix of the decision tree with information gain on the testing set')
print(confusionMatrix(data = test_pred_info, mode = "everything",reference = test_data$Churn))
roc <- roc(test_actual, test_prob_info[,2], plot = TRUE, print.auc = TRUE ,main ="ROC Decision Tree for Testing set with information splitting")
Confusion Matrix
ROC Curve
Conclusion: There is no difference with changing the splitting strategies only.
Prune the Decision Tree by reducing the max_level from 3 to 2
#For the Training Set
train_pred = predict(DT_Model_information, data = train_data, type = "class")
train_prob = predict(DT_Model_information, data = train_data, type = "prob")
train_actual = ifelse(train_data$Churn == "Yes", 1,0)
print('the confusion matrix of the decision tree with maxlength = 2 on the training set')
confusionMatrix(data = train_pred,mode = "everything", reference = train_data$Churn)
roc <- roc(train_actual, train_prob[,2], plot= TRUE, print.auc=TRUE,main ="ROC Decision Tree for Training set with with maxlength = 2")
Confusion Matrix
ROC Curve
#For the Test Set:
test_pred = predict(DT_Model_information, newdata= test_data, type = "class")
test_prob = predict(DT_Model_information, newdata = test_data, type = "prob")
test_actual = ifelse(test_data$Churn == "Yes", 1,0)
print('the confusion matrix of the decision tree with maxlength = 2 on the testing set')
print(confusionMatrix(data = test_pred,mode = "everything", reference = test_data$Churn))
roc <- roc(test_actual, test_prob[,2], plot = TRUE, print.auc = TRUE,main ="ROC Decision Tree for Testing set with with maxlength = 2")
#For the Training Set
train_pred_cp1=predict(DT_Model_cp1, data = train_data, type = "class")
train_prob_cp1=predict(DT_Model_cp1, data = train_data, type = "prob")
train_actual = ifelse(train_data$Churn == "Yes", 1,0)
print("confusion Matrix for Decision Tree on training set with cp = 0")
confusionMatrix(data = train_pred_cp1,mode = "everything", reference = train_data$Churn)
roc <- roc(train_actual, train_prob_cp1[,2], plot= TRUE, print.auc=TRUE,main ="ROC for Decision Tree on training set with cp = 0")
Confusion Matrix
ROC Curve
#For the Test Set:
test_pred_cp1=predict(DT_Model_cp1, newdata= test_data, type = "class")
test_prob_cp1=predict(DT_Model_cp1, newdata = test_data, type = "prob")
test_actual = ifelse(test_data$Churn == "Yes", 1,0)
print("confusion Matrix for Decision Tree on testing set with cp = 0")
confusionMatrix(data = test_pred_cp1,mode = "everything", reference = test_data$Churn)
roc <- roc(test_actual, test_prob_cp1[,2], plot = TRUE, print.auc = TRUE,main ="ROC for Decision Tree on testing set with cp = 0")
Confusion Matrix
ROC Curve
Control the Decision Tree by changing the Cp value to .01
# the Decision Tree by changing the Cp value [.1]
DT_Model_cp2 <- rpart(formula = Churn ~., data = train_data,
method = "class", parms = list(split = "gini"), control = rpart.control(c = .01))
Confusion Matrix And ROC Curve
#For the Training Set
train_pred_cp2=predict(DT_Model_cp2, data= train_data, type = "class")
train_prob_cp2=predict(DT_Model_cp2, data= train_data, type = "prob")
train_actual = ifelse(train_data$Churn == "Yes", 1,0)
print("confusion Matrix for Decision Tree on training set with cp = .01")
confusionMatrix(data = train_pred_cp2,mode = "everything", reference = train_data$Churn)
roc <- roc(train_actual, train_prob_cp2[,2], plot= TRUE, print.auc=TRUE,main ="ROC for Decision Tree on training set with cp = .01")
Confusion Matrix
ROC Curve
#For the Test Set:
test_pred_cp2=predict(DT_Model_cp2, newdata= test_data, type = "class")
test_prob_cp2=predict(DT_Model_cp2, newdata = test_data, type = "prob")
test_actual = ifelse(test_data$Churn == "Yes", 1,0)
print("confusion Matrix for Decision Tree on testing set with cp = 0.01")
confusionMatrix(data = test_pred_cp2,mode = "everything", reference = test_data$Churn)
roc <- roc(test_actual, test_prob_cp2[,2], plot = TRUE, print.auc = TRUE,main ="ROC for Decision Tree on testing set with cp = .01")
Confusion Matrix
ROC Curve
Control the Decision Tree by changing the Cp value to .001
#For the Training Set
train_pred_cp3=predict(DT_Model_cp3, data = train_data, type = "class")
train_prob_cp3=predict(DT_Model_cp3, data = train_data, type = "prob")
train_actual = ifelse(train_data$Churn == "Yes", 1,0)
print("confusion Matrix for Decision Tree on training set with cp = 0.001")
confusionMatrix(data = train_pred_cp3,mode = "everything", reference = train_data$Churn)
roc <- roc(train_actual, train_prob_cp3[,2], plot= TRUE, print.auc=TRUE,main ="ROC for Decision Tree on training set with cp = .001")
Confusion Matrix
ROC Curve
#For the Test Set:
test_pred_cp3=predict(DT_Model_cp3, newdata= test_data, type = "class")
test_prob_cp3=predict(DT_Model_cp3, newdata = test_data, type = "prob")
test_actual = ifelse(test_data$Churn == "Yes", 1,0)
print("confusion Matrix for Decision Tree on testing set with cp = 0.001")
confusionMatrix(data = test_pred_cp3,mode = "everything", reference = test_data$Churn)
roc <- roc(test_actual, test_prob_cp3[,2], plot = TRUE, print.auc = TRUE,main ="ROC for Decision Tree on testing set with cp = .001")
Confusion Matrix
ROC Curve
Decision Tree Conclusion
Different splitting strategies
Parameters
Train accuracy
Test accuracy
Split = ”gini”
0.7894
0.7905
Split = ”information”
0.7894
0.7905
There is no difference with changing the splitting strategies only.
Change the cp
Parameters
Train accuracy
Test accuracy
cp = 0
0.8619
0.7517
cp = 0.01 (default)
0.7894
0.7905
cp = 0.001
0.8493
0.7823
Cp = 0 suffers roughly from overfitting
Change the maxdepth
Parameters
Train accuracy
Test accuracy
maxdepth = 3 (default)
0.7894
0.7905
maxdepth = 2
0.7603
0.7694
Pruning reduces the complexity of the tree and reduces the over-fitting. But not improves the overall accuracy. After trying to prune the model its accuracy dropped from .794 to 0.768 by changing the cp to be “.1”.
#For the Training Set:
train_prod_xgb=predict(xgb, as.matrix(subset(sapply(train_data, unclass), select = -Churn)), type = "class")
train_prob_xgb=predict(xgb, as.matrix(subset(sapply(train_data, unclass), select = -Churn)), type = "prob")
train_actual = ifelse(train_data$Churn == "Yes", 1,0)
confusionMatrix(data = factor(ifelse(train_prod_xgb >= .5, 1,0),0:1),mode = "everything", reference = as.factor(train_actual))
roc <- roc(train_actual, train_prob_xgb, plot= TRUE, print.auc=TRUE,main ="ROC for XGBoost on training set")
Confusion Matrix
ROC Curve
#For the Test Set:
test_pred_xgb=predict(xgb, newdata = as.matrix(subset(sapply(test_data, unclass), select = -Churn)), type = "class")
test_prob_xgb=predict(xgb, newdata = as.matrix(subset(sapply(test_data, unclass), select = -Churn)), type = "prob")
test_actual = ifelse(test_data$Churn == "Yes", 1,0)
confusionMatrix(data = factor(ifelse(test_pred_xgb >= .5, 1,0),0:1),mode = "everything", reference = as.factor(test_actual))
roc <- roc(test_actual, test_prob_xgb, plot = TRUE, print.auc = TRUE,main ="ROC for XGBoost on testing set")
Confusion Matrix
ROC Curve
Note: The XGBoost training accuracy is 84.06% and the testing accuracy is 80.07%, which is not a huge
difference in the accuracy so we can say it’s a slight overfitting. and the sign of overfitting there are 4%
difference between the train and testing accuracies.
Neural Network
Build DNN using keras with 3 dense layers and relu activation function