강의

멘토링

로드맵

인프런 커뮤니티 질문&답변

cg0630님의 프로필 이미지
cg0630

작성한 질문수

[인프런 빅쿼리 빠짝스터디 3주차] 리텐션 연습문제

작성

·

67

0

1번 과제

#weekly retention
WITH base AS(
  SELECT
    DISTINCT
    user_id,
    event_name,
    DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime, 
    DATE(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_date,
    user_pseudo_id
FROM advanced.app_logs
WHERE event_date BETWEEN '2022-08-01' AND '2022-11-03'
)
, dates AS(
  SELECT DISTINCT user_pseudo_id,
    DATE_TRUNC(MIN(event_date) OVER (PARTITION BY user_pseudo_id), WEEK(MONDAY)) AS first_week,
    DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
FROM base
)
, first_week_and_diff AS (
  SELECT *,
    DATE_DIFF(event_week, first_week, week) AS diff_of_week
FROM dates
)
, user_counts AS (
	SELECT diff_of_week,
    COUNT(DISTINCT user_pseudo_id) AS user_cnt
	FROM first_week_and_diff
	GROUP BY diff_of_week
	ORDER BY diff_of_week
)
, first_week_user_count AS (
  SELECT diff_of_week,
    user_cnt,
    FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week) AS first_week_user_cnt
  FROM user_counts
  WHERE diff_of_week = 0
)
SELECT *, SAFE_DIVIDE(user_cnt, first_week_user_cnt) AS retain_rate
FROM first_week_user_count

2번 과제

WITH base AS (
  -- 날짜 데이터 전처리 
  -- DATE도 실제 날짜와 일치하지 않을 수 있으니, 다시 확인하기 
  -- DATETIME(TIMESTAMP_MICROS(timestamp데이터),'대륙/국가') 기억하고 사용하기 

  SELECT 
    DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul') AS event_datetime, 
    DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul')) AS event_date,
    event_name, 
    user_pseudo_id
  FROM `bigquery-432401.avdanced.app_logs_temp` 
  WHERE
    event_date BETWEEN "2022-08-01" AND "2022-11-03"
)
, cal_diff AS (
  -- 날짜데이터 간 차이 구하기
  SELECT
    *,
    DATE_DIFF("2022-11-03", event_date,DAY) AS diff_date
  FROM base
), user_type AS (

  -- user type 구하기
  -- 0. 해당없음
  -- 1. new : 첫번째 접속
  --2. current : 최근 30일내에 접속
  -- 3. resurrected : 30일 이상 활동이 없고, 최근 7일 이내 다시 접속한 사용자
  -- 4. dormant : 최근 60일 간 접속기록 없음  
  SELECT
    *,
    CASE
      -- 첫 번째 접속 => 'new'
      WHEN diff_date = FIRST_VALUE(diff_date) OVER(PARTITION BY user_pseudo_id ORDER BY event_date) THEN 'new' 
      -- 최근 30일 내에 접속한 사용자 => 'current'
      WHEN diff_date <= 30 THEN 'current'
      -- 30일 이상 활동이 없고, 최근 7일 이내 다시 접속한 사용자 => 'resurrected'
      WHEN diff_date > 30 AND diff_date <= 37 THEN 'resurrected'
      -- 60일 이상 접속하지 않은 사용자 => 'dormant'
      WHEN diff_date > 60 THEN 'dormant'
      ELSE 'none'
    END AS type
  FROM cal_diff
  GROUP BY all
  ORDER BY user_pseudo_id,diff_date
)
SELECT 
  *
FROM user_type

3번과제

WITH base AS (
  -- 날짜 데이터 전처리
  SELECT 
    DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul') AS event_datetime, 
    DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp),'Asia/Seoul')) AS event_date,
    event_name, 
    user_pseudo_id
  FROM `bigquery-432401.avdanced.app_logs_temp` 
  WHERE
    event_date BETWEEN "2022-08-01" AND "2022-11-03"
),
cal_diff AS (
  -- 날짜데이터 간 차이 구하기
  SELECT
    *,
    DATE_DIFF("2022-11-03", event_date, DAY) AS diff_date
  FROM base
),
user_type AS (
  -- user type 구하기
  SELECT
    distinct user_pseudo_id,
    CASE
      WHEN diff_date = FIRST_VALUE(diff_date) OVER(PARTITION BY user_pseudo_id ORDER BY event_date) THEN 'new'
      WHEN diff_date <= 30 THEN 'current'
      WHEN diff_date > 30 AND diff_date <= 37 THEN 'resurrected'
      WHEN diff_date > 60 THEN 'dormant'
      ELSE 'none'
    END AS type
  FROM cal_diff
), cal_cnt AS (
  -- 각 type별 사용자 수 계산
  SELECT 
    type, 
    COUNT(user_pseudo_id) AS user_cnt
  FROM user_type
  GROUP BY type
)
SELECT
  type,
  user_cnt,
  ROUND(user_cnt * 100.0 / (SELECT COUNT(*) FROM user_type), 2) AS ratio
FROM cal_cnt
ORDER BY ratio DESC

4번과제

# click_payment event를 경험한 유저들의 weekly retention
WITH base AS(
  SELECT
    DISTINCT
    user_id,
    event_name,
    DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime, 
    DATE(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_date,
    user_pseudo_id
FROM advanced.app_logs
WHERE event_date BETWEEN '2022-08-01' AND '2023-08-31'
AND user_pseudo_id IN (SELECT user_pseudo_id FROM advanced.app_logs WHERE event_name = 'click_payment')
)
, dates AS(
  SELECT DISTINCT user_pseudo_id,
    DATE_TRUNC(MIN(event_date) OVER (PARTITION BY user_pseudo_id), WEEK(MONDAY)) AS first_week,
    DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
FROM base
)
, first_week_and_diff AS (
  SELECT *,
    DATE_DIFF(event_week, first_week, week) AS diff_of_week
FROM dates
)
, user_counts AS (
	SELECT diff_of_week,
    COUNT(DISTINCT user_pseudo_id) AS user_cnt
	FROM first_week_and_diff
	GROUP BY diff_of_week
	ORDER BY diff_of_week
)
, first_week_user_count AS (
  SELECT diff_of_week,
    user_cnt,
    FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week) AS first_week_user_cnt
  FROM user_counts
)
SELECT *, SAFE_DIVIDE(user_cnt, first_week_user_cnt) AS retain_rate
FROM first_week_user_count

답변

답변을 기다리고 있는 질문이에요
첫번째 답변을 남겨보세요!
cg0630님의 프로필 이미지
cg0630

작성한 질문수

질문하기