작성
·
67
0
1번 과제
#weekly retention
WITH base AS(
SELECT
DISTINCT
user_id,
event_name,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
DATE(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_date,
user_pseudo_id
FROM advanced.app_logs
WHERE event_date BETWEEN '2022-08-01' AND '2022-11-03'
)
, dates AS(
SELECT DISTINCT user_pseudo_id,
DATE_TRUNC(MIN(event_date) OVER (PARTITION BY user_pseudo_id), WEEK(MONDAY)) AS first_week,
DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
FROM base
)
, first_week_and_diff AS (
SELECT *,
DATE_DIFF(event_week, first_week, week) AS diff_of_week
FROM dates
)
, user_counts AS (
SELECT diff_of_week,
COUNT(DISTINCT user_pseudo_id) AS user_cnt
FROM first_week_and_diff
GROUP BY diff_of_week
ORDER BY diff_of_week
)
, first_week_user_count AS (
SELECT diff_of_week,
user_cnt,
FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week) AS first_week_user_cnt
FROM user_counts
WHERE diff_of_week = 0
)
SELECT *, SAFE_DIVIDE(user_cnt, first_week_user_cnt) AS retain_rate
FROM first_week_user_count
2번 과제
WITH base AS (
-- 날짜 데이터 전처리
-- DATE도 실제 날짜와 일치하지 않을 수 있으니, 다시 확인하기
-- DATETIME(TIMESTAMP_MICROS(timestamp데이터),'대륙/국가') 기억하고 사용하기
SELECT
DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul') AS event_datetime,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul')) AS event_date,
event_name,
user_pseudo_id
FROM `bigquery-432401.avdanced.app_logs_temp`
WHERE
event_date BETWEEN "2022-08-01" AND "2022-11-03"
)
, cal_diff AS (
-- 날짜데이터 간 차이 구하기
SELECT
*,
DATE_DIFF("2022-11-03", event_date,DAY) AS diff_date
FROM base
), user_type AS (
-- user type 구하기
-- 0. 해당없음
-- 1. new : 첫번째 접속
--2. current : 최근 30일내에 접속
-- 3. resurrected : 30일 이상 활동이 없고, 최근 7일 이내 다시 접속한 사용자
-- 4. dormant : 최근 60일 간 접속기록 없음
SELECT
*,
CASE
-- 첫 번째 접속 => 'new'
WHEN diff_date = FIRST_VALUE(diff_date) OVER(PARTITION BY user_pseudo_id ORDER BY event_date) THEN 'new'
-- 최근 30일 내에 접속한 사용자 => 'current'
WHEN diff_date <= 30 THEN 'current'
-- 30일 이상 활동이 없고, 최근 7일 이내 다시 접속한 사용자 => 'resurrected'
WHEN diff_date > 30 AND diff_date <= 37 THEN 'resurrected'
-- 60일 이상 접속하지 않은 사용자 => 'dormant'
WHEN diff_date > 60 THEN 'dormant'
ELSE 'none'
END AS type
FROM cal_diff
GROUP BY all
ORDER BY user_pseudo_id,diff_date
)
SELECT
*
FROM user_type
3번과제
WITH base AS (
-- 날짜 데이터 전처리
SELECT
DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul') AS event_datetime,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul')) AS event_date,
event_name,
user_pseudo_id
FROM `bigquery-432401.avdanced.app_logs_temp`
WHERE
event_date BETWEEN "2022-08-01" AND "2022-11-03"
),
cal_diff AS (
-- 날짜데이터 간 차이 구하기
SELECT
*,
DATE_DIFF("2022-11-03", event_date, DAY) AS diff_date
FROM base
),
user_type AS (
-- user type 구하기
SELECT
distinct user_pseudo_id,
CASE
WHEN diff_date = FIRST_VALUE(diff_date) OVER(PARTITION BY user_pseudo_id ORDER BY event_date) THEN 'new'
WHEN diff_date <= 30 THEN 'current'
WHEN diff_date > 30 AND diff_date <= 37 THEN 'resurrected'
WHEN diff_date > 60 THEN 'dormant'
ELSE 'none'
END AS type
FROM cal_diff
), cal_cnt AS (
-- 각 type별 사용자 수 계산
SELECT
type,
COUNT(user_pseudo_id) AS user_cnt
FROM user_type
GROUP BY type
)
SELECT
type,
user_cnt,
ROUND(user_cnt * 100.0 / (SELECT COUNT(*) FROM user_type), 2) AS ratio
FROM cal_cnt
ORDER BY ratio DESC
4번과제
# click_payment event를 경험한 유저들의 weekly retention
WITH base AS(
SELECT
DISTINCT
user_id,
event_name,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
DATE(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_date,
user_pseudo_id
FROM advanced.app_logs
WHERE event_date BETWEEN '2022-08-01' AND '2023-08-31'
AND user_pseudo_id IN (SELECT user_pseudo_id FROM advanced.app_logs WHERE event_name = 'click_payment')
)
, dates AS(
SELECT DISTINCT user_pseudo_id,
DATE_TRUNC(MIN(event_date) OVER (PARTITION BY user_pseudo_id), WEEK(MONDAY)) AS first_week,
DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
FROM base
)
, first_week_and_diff AS (
SELECT *,
DATE_DIFF(event_week, first_week, week) AS diff_of_week
FROM dates
)
, user_counts AS (
SELECT diff_of_week,
COUNT(DISTINCT user_pseudo_id) AS user_cnt
FROM first_week_and_diff
GROUP BY diff_of_week
ORDER BY diff_of_week
)
, first_week_user_count AS (
SELECT diff_of_week,
user_cnt,
FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week) AS first_week_user_cnt
FROM user_counts
)
SELECT *, SAFE_DIVIDE(user_cnt, first_week_user_cnt) AS retain_rate
FROM first_week_user_count
답변