This repository contains solutions for #8WeekSQLChallenge, they are interesting real-world case studies that will allow you to apply and enhance your SQL skills in many use cases.
I used Microsoft SQL Server in writing SQL queries to solve these case studies.
- Data cleaning & transformation
- Aggregations
- Joins
- CTEs
- Variables
- Window functions
- Ranking (ROW_NUMBER, DENSE_RANK)
- Analytics (LEAD, LAG)
- CASE WHEN statements
- Subqueries
- UNION & INTERSECT
- DATETIME functions
- Data type conversion
- TEXT functions, text and string manipulation
Case Study #1 : Danny's Diner
Case Study #2 : Pizza Runner
Case Study #3 : Foodie-Fi
Case Study #4 : Data Bank
-- Case study 1, question 10
-- Question: In the first week after a customer joins the program (including their join date) they earn 2x points on all items, not just sushi - how many points do customer A and B have at the end of January?
WITH dates_cte AS(
SELECT *,
DATEADD(DAY, 6, join_date) AS valid_date,
EOMONTH('2021-01-1') AS last_date
FROM members
)
SELECT
s.customer_id,
sum(CASE
WHEN s.product_id = 1 THEN price*20
WHEN s.order_date between d.join_date and d.valid_date THEN price*20
ELSE price*10
END) as total_points
FROM
dates_cte d,
sales s,
menu m
WHERE
d.customer_id = s.customer_id
AND
m.product_id = s.product_id
AND
s.order_date <= d.last_date
GROUP BY s.customer_id;
-- Case study 2, data cleaning
SELECT
order_id,
runner_id,
cast(CASE
WHEN pickup_time = 'null' THEN null
ELSE pickup_time
END as datetime) as pickup_time,
cast(CASE
WHEN distance = 'null' THEN null
ELSE TRIM('km' from distance)
END as float) as distance,
cast(CASE
WHEN duration = 'null' THEN null
ELSE SUBSTRING(duration, 1, 2)
END as int)as duration,
CASE
WHEN cancellation in ('null', '') THEN null
ELSE cancellation
END as cancellation
INTO #cleaned_runner_orders
FROM runner_orders;
-- Case study 2, c) ingredient optimisation data cleaning
-- Question: The ingredients of each pizza a stored at pizza_recipes table as a comma separated string including ids of all its toppings, change this string to multiple rows
SELECT
p.pizza_id,
TRIM(t.value) AS topping_id,
pt.topping_name
INTO #cleaned_toppings
FROM
pizza_recipes as p
CROSS APPLY string_split(p.toppings, ',') as t
JOIN pizza_toppings as pt
ON TRIM(t.value) = pt.topping_id
;
-- Case study 2, c) ingredient optimisation question 3
-- Question: Generate an order item for each record in the customers_orders table in the format of one of the following:
-- Meat Lovers
-- Meat Lovers - Exclude Beef
-- Meat Lovers - Extra Bacon
-- Meat Lovers - Exclude Cheese, Bacon - Extra Mushroom, Peppers
WITH extras_cte AS
(
SELECT
record_id,
'Extra ' + STRING_AGG(t.topping_name, ', ') as record_options
FROM
#extras e,
pizza_toppings t
WHERE e.topping_id = t.topping_id
GROUP BY record_id
),
exclusions_cte AS
(
SELECT
record_id,
'Exclude ' + STRING_AGG(t.topping_name, ', ') as record_options
FROM
#exclusions e,
pizza_toppings t
WHERE e.topping_id = t.topping_id
GROUP BY record_id
),
union_cte AS
(
SELECT * FROM extras_cte
UNION
SELECT * FROM exclusions_cte
)
SELECT
c.record_id,
CONCAT_WS(' - ', p.pizza_name, STRING_AGG(cte.record_options, ' - '))
FROM
#cleaned_customer_orders c
JOIN pizza_names p
ON c.pizza_id = p.pizza_id
LEFT JOIN union_cte cte
ON c.record_id = cte.record_id
GROUP BY
c.record_id,
p.pizza_name
ORDER BY 1;
-- Case study 3, b) data analysis question 5
-- Question: How many customers have churned straight after their initial free trial - what percentage is this rounded to the nearest whole number?
WITH churned_after_trial AS(
SELECT
customer_id,
CASE
WHEN
plan_id = 4 --churn plan
AND
LAG(plan_id) OVER (PARTITION BY customer_id ORDER BY start_date) = 0 --trial plan
THEN 1
ELSE 0
END as is_churned
FROM subscriptions
)
SELECT
SUM(is_churned) as churned_customers,
FLOOR(SUM(is_churned) / CAST(COUNT(DISTINCT customer_id) AS float) * 100) as churn_perct
FROM churned_after_trial;
-- Case study 3, b) data analysis question 10
-- Question: Can you further breakdown this average value into 30 day periods (i.e. 0-30 days, 31-60 days etc)
WITH trial_plan AS(
SELECT
customer_id,
start_date AS join_date
FROM subscriptions
WHERE plan_id = 0
),
annual_plan AS(
SELECT
customer_id,
start_date AS annual_start_date
FROM subscriptions
WHERE plan_id = 3
),
buckets AS(
SELECT
tp.customer_id,
join_date,
annual_start_date,
-- create buckets of 30 days period from 1 to 12 (i.e monthly buckets)
DATEDIFF(DAY, join_date, annual_start_date)/30 + 1 AS bucket
FROM
trial_plan tp
JOIN annual_plan ap
ON tp.customer_id = ap.customer_id
)
SELECT
CASE
WHEN bucket = 1 THEN CONCAT(bucket-1, ' - ', bucket*30, ' days')
ELSE CONCAT((bucket-1)*30 + 1, ' - ', bucket*30, ' days')
END AS period,
COUNT(customer_id) AS total_customers,
CAST(AVG(DATEDIFF(DAY, join_date, annual_start_date)*1.0) AS decimal(5, 2)) AS average_days
FROM buckets
GROUP BY bucket;