Source data and R programs. The programs read in the data, manipulate it, then output visualisations and statistics.
Source files containing toy data. The goal advancing visualisations. So some could be percceived as progressive or impractical. Some report ready.
The cumulative mean session length metric prominently displays AB test variant session length estimates trending over time to what appear to be true central tendencies.
data %>%
arrange(SessionStart) %>%
group_by(Prototype) %>%
mutate(cumulative_avg_session_length_mins = cummean(sessionLength)) %>%
ggplot(
aes(
SessionStart,
cumulative_avg_session_length_mins,
group = Prototype,
col = Prototype
)
) +
geom_point(size = .5) +
xlab("Session Start Time") +
ylab("Session Length (Min.)") +
ggtitle("Cumulative Mean Session Length") +
viridis::scale_color_viridis(
option = "D",
discrete = TRUE,
begin = 0,
end = 0.6
) +
theme(
panel.background = element_rect(fill = 'grey97'),
text = element_text(family = 'Segoe UI'),
plot.title = element_text(size = 9),
axis.title.x = element_text(size = 9),
axis.title.y = element_text(size = 9),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
legend.title = element_text(size = 9),
legend.text = element_text(size = 9)
)
Prototype A (proto_A) consistently outperforms Prototype B (proto_B).
Proto_A outperformance of proto_B has been statistically significant at 95% confidence for the majority of the test. (Overlapping confidence intervals represent statistical significance. Non overlapping intervals represent non statistically significant difference.) Narrowing confidence intervals and a persistent difference represent a high probability that proto_A maximizes session length.
data %>%
mutate(date = as.Date(SessionStart)) %>%
arrange(Prototype, date) %>%
group_by(Prototype) %>%
mutate(
observations = 1,
cumulative_avg_session_length = cummean(sessionLength),
cumulative_obs = cumsum(observations)
) %>%
group_by(Prototype, date) %>%
mutate(
cumulative_avg_session_length = last(cumulative_avg_session_length),
cumulative_obs = max(cumulative_obs),
cumulative_session_length_squared_errors = cumsum(as.numeric(sessionLength - cumulative_avg_session_length)^2)
) %>%
group_by(Prototype, date) %>%
summarise(
cumulative_session_length_ci_lower = max(cumulative_avg_session_length) - 1.39 * (sqrt(max(cumulative_session_length_squared_errors) / max(cumulative_obs)) / sqrt(max(cumulative_obs))),
cumulative_avg_session_length = max(cumulative_avg_session_length),
cumulative_session_length_ci_upper = max(cumulative_avg_session_length) + 1.39 * (sqrt(max(cumulative_session_length_squared_errors) / max(cumulative_obs)) / sqrt(max(cumulative_obs)))
) %>%
filter(date != "2018-07-21") %>%
ggplot(
aes(
date,
cumulative_avg_session_length,
group = Prototype,
col = Prototype
)
) +
geom_errorbar(
aes(
ymin = cumulative_session_length_ci_lower,
ymax = cumulative_session_length_ci_upper
),
col = 'black',
alpha = 0.5,
width = 0.35
) +
geom_line(alpha = 0.2) +
geom_linerange(
aes(
ymin = cumulative_session_length_ci_lower,
ymax = cumulative_session_length_ci_upper
),
size = 5
) +
xlab("Session Start Date") +
ylab("Session Length (Min.)") +
ggtitle("Cumulative Mean Session Length: 83.4% Confidence Intervals") +
viridis::scale_color_viridis(
option = "D",
discrete = TRUE,
begin = 0,
end = 0.6
) +
theme(
panel.background = element_rect(fill = 'grey97'),
text = element_text(family = 'Segoe UI'),
plot.title = element_text(size = 9),
axis.title.x = element_text(size = 9),
axis.title.y = element_text(size = 9),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
legend.title = element_text(size = 9),
legend.text = element_text(size = 9)
)
This shows the retention rate for each marketing channel.
data %>%
group_by(channel) %>%
mutate(segment_size = n_distinct(player_id)) %>%
mutate(months_since_ftd = floor(as.numeric(difftime(activity_date, ftd_date, units = "days")) / (365.25 / 12))) %>%
filter(
ftd_date <= "2017-12-31",
ftd_date != activity_date,
months_since_ftd %in% seq(1, 12, by = 1)
) %>%
group_by(channel, months_since_ftd) %>%
summarise(rate = n_distinct(player_id) / max(segment_size)) %>%
ggplot(
aes(
as.factor(months_since_ftd),
as.factor(channel),
fill = rate
)
) +
geom_tile() +
xlab("Months Since 1st Transaction") +
ggtitle("Retention Rate") +
viridis::scale_fill_viridis(
option = "inferno",
discrete = FALSE,
name = "Rate",
labels = scales::percent
) +
theme(
text = element_text(family = 'Segoe UI'),
plot.background = element_rect(colour = "black", fill = "black"),
plot.title = element_text(colour = "gray 40", size = 9),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
legend.background = element_blank(),
legend.text = element_text(colour = "gray40", size = 8),
legend.title = element_text(colour = "gray40", size = 9),
legend.box.background = element_blank(),
legend.key.width = unit(2, "mm"),
axis.title.x = element_text(colour = "gray40", size = 9),
axis.text.y = element_text(colour = "gray40", size = 8),
axis.text.x = element_text(colour = 'gray40', size = 8),
axis.ticks.y = element_blank()
)
Lifetime revenue per monthly cohort.
data %>%
filter(activity_date >= ftd_date) %>%
arrange(ftd_date, activity_date) %>%
mutate(yearmon_fd = as.yearmon(ftd_date)) %>%
group_by(yearmon_fd, activity_date) %>%
summarise(total_daily_deposits = sum(deposits)) %>%
mutate(
life_time_revenue = cumsum(total_daily_deposits),
activity_date = if_else(
yearmon_fd == as.yearmon(activity_date),
as.Date(yearmon_fd + 0.1),
as.Date(activity_date)
)
) %>%
ggplot(
aes(
activity_date,
life_time_revenue,
col = as.factor(yearmon_fd)
)
) +
geom_line() +
ggtitle("Cumulative Lifetime Revenue") +
xlab('Date') +
ylab('Revenue') +
scale_y_continuous(labels = scales::dollar_format(prefix = "£")) +
viridis::scale_color_viridis(option = "viridis", discrete = TRUE) +
theme(
text = element_text(family = 'Segoe UI'),
plot.background = element_rect(colour = "black", fill = "black"),
plot.title = element_text(color = "gray40", size = 9),
panel.grid = element_blank(),
panel.background = element_blank(),
axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_text(angle = 40, size = 8),
axis.text.y = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "none"
)