Hey all, I am new to the forum. Just completed the projects on the data science path in 5th section entitled Analyze Real data with SQL. I’m looking for feedback regarding the last project about Marketing Attribution Queries.
- Are my queries correct? Is there a better way to construct the queries?
- Does the presentation reflect what would be expected in the real world?
Thanks all for any help!
Presentation slides follow here:
Note: I am having trouble uploading the powerpoint presentation that the directions recommended using. Any suggestions???
SQL follows here:
/*Count for the number of distinct campaigns*/
SELECT DISTINCT utm_campaign
FROM page_visits
GROUP BY 1;
/*Count for the number of distinct sources*/
SELECT DISTINCT utm_source
FROM page_visits
GROUP BY 1;
/*Relationship of campaigns and sources*/
SELECT utm_source,
utm_campaign
FROM page_visits
GROUP BY 1, 2;
/*Identification of the website pages*/
SELECT DISTINCT page_name
FROM page_visits;
/*First touches query*/
WITH first_touch AS (
SELECT user_id,
MIN(timestamp) as first_touch_at
FROM page_visits
GROUP BY user_id),
ft_attr AS (
SELECT ft.user_id,
ft.first_touch_at,
pv.utm_source,
pv.utm_campaign
FROM first_touch ft
JOIN page_visits pv
ON ft.user_id = pv.user_id
AND ft.first_touch_at = pv.timestamp
)
SELECT ft_attr.utm_source,
ft_attr.utm_campaign,
COUNT(*)
FROM ft_attr
GROUP BY 1,2
ORDER BY 3 DESC;
/*Last touch query*/
WITH last_touch AS (
SELECT user_id,
MAX(timestamp) as last_touch_at
FROM page_visits
GROUP BY user_id),
lt_attr AS (
SELECT lt.user_id,
lt.last_touch_at,
pv.utm_source,
pv.utm_campaign
FROM last_touch lt
JOIN page_visits pv
ON lt.user_id = pv.user_id
AND lt.last_touch_at = pv.timestamp
)
SELECT lt_attr.utm_source,
lt_attr.utm_campaign,
COUNT(*)
FROM lt_attr
GROUP BY 1,2
ORDER BY 3 DESC;
/*Count of users that land on the purchase page of the wesite*/
SELECT COUNT (DISTINCT user_id)
FROM page_visits
WHERE page_name LIKE '4%';
/*Last touch query selected for purchase page of the webiste*/
WITH last_touch AS (
SELECT user_id,
MAX(timestamp) as last_touch_at
FROM page_visits
WHERE page_name LIKE '4%'
GROUP BY user_id),
lt_attr AS (
SELECT lt.user_id,
lt.last_touch_at,
pv.utm_source,
pv.utm_campaign
FROM last_touch lt
JOIN page_visits pv
ON lt.user_id = pv.user_id
AND lt.last_touch_at = pv.timestamp
)
SELECT lt_attr.utm_source,
lt_attr.utm_campaign,
COUNT(*)
FROM lt_attr
GROUP BY 1,2
ORDER BY 3 DESC;
/*Query for Funnel with campaign included*/
SELECT DATE(timestamp, 'start of month') AS month,
utm_campaign,
SUM(CASE
WHEN page_name LIKE '1%'
THEN 1
ELSE 0
END) AS landing_page,
SUM(CASE
WHEN page_name LIKE '2%'
THEN 1
ELSE 0
END) AS shopping_cart,
SUM(CASE
WHEN page_name LIKE '3%'
THEN 1
ELSE 0
END) AS checkout,
SUM(CASE
WHEN page_name LIKE '4%'
THEN 1
ELSE 0
END) AS purchase
FROM page_visits
GROUP BY DATE(timestamp, 'start of month'), utm_campaign
ORDER BY DATE(timestamp, 'start of month'), utm_campaign;