작성
·
65
0
주요 학습 Point
기존 SQL 작성할 때 없던 개념: Array와 Struct 및 Unnest(Flatten) 개념파악
Pivot 등 퍼널 분석 기본 쿼리 작성 ✍🏼 및 목적에 맞는 퍼널분석 방법론 비교
연습문제 1-4
/* 연습문제 1
- 테이블: array_exercises
- Row 기준: 각 영화 Title 별
- Column 기준: 장르 Unnest
*/
select title
, genre
from advanced.array_exercises
cross join unnest(genres) as genre -- genre가 array 형태임
;
/* 연습문제 2
- 테이블: array_exercises
- Row 기준: 각 영화 Title 별
- Column 기준: actor, character
*/
select title
, actor
from advanced.array_exercises
cross join unnest(actors) as actor
;
/* 연습문제 3
- 테이블: array_exercises
- Column 기준: title, actor, character, genre
*/
select title
, actor
, genre
from advanced.array_exercises
cross join unnest(actors) as actor
cross join unnest(genres) as genre
limit 100
;
/* 연습문제 4
- 문제: app_logs 배열 풀기
- 주어진 배열: event_params(>key, string_value or int_value)
*/
select event_date
, event_timestamp
, event_name
, param.key
, param.value.string_value
, param.value.int_value
, user_id
, user_pseudo_id
, platform
from advanced.app_logs
cross join unnest(event_params) as param
limit 10
;
Pivot 연습문제
/* 연습문제 1
- 테이블: orders
- Row 기준: order_date
- Column 기준: user_id 별
- value 기준: sum(amount)
*/
select order_date
, sum(if(user_id = 1, amount, 0)) as user_1
, sum(if(user_id = 2, amount, 0)) as user_2
, sum(if(user_id = 3, amount, 0)) as user_3
from advanced.orders
group by order_date
order by order_date
;
/* 연습문제 2
- 테이블: orders
- Row 기준: user_id
- Column 기준: order_date 날짜 별
- value 기준: sum(amount)
*/
select user_id
, sum(if(order_date = '2023-05-01', amount, 0)) as `2023-05-01`
, sum(if(order_date = '2023-05-02', amount, 0)) as `2023-05-02`
, sum(if(order_date = '2023-05-03', amount, 0)) as `2023-05-03`
, sum(if(order_date = '2023-05-04', amount, 0)) as `2024-05-04`
, sum(if(order_date = '2023-05-05', amount, 0)) as `2023-05-05`
from advanced.orders
group by user_id
;
/* 연습문제 3
- 테이블: orders
- Row 기준: user_id
- Column 기준: order_date 날짜 별
- value 기준: 주문 있으면 1 없으면 0 (Indicator)
*/
select user_id
, max(if(order_date = '2023-05-01', 1, 0)) as `2023-05-01`
, max(if(order_date = '2023-05-02', 1, 0)) as `2023-05-02`
, max(if(order_date = '2023-05-03', 1, 0)) as `2023-05-03`
, max(if(order_date = '2023-05-04', 1, 0)) as `2024-05-04`
, max(if(order_date = '2023-05-05', 1, 0)) as `2023-05-05`
from advanced.orders
group by user_id
;
/* 연습문제4
- 테이블: app_logs
- Pivot
- where 기준: user_id = 32888, event_name: click_cart
*/
with tmp as (
select user_id
, event_date
, event_name
, user_pseudo_id
, param.key
, param.value.string_value
, param.value.int_value
from advanced.app_logs
cross join unnest(event_params) as param
where 1=1
and user_id = 32888
and event_name = 'click_cart'
and key = 'food_id'
)
select int_value
from tmp
limit 10
;
퍼널 연습문제
/* 문제1
- 테이블: app_logs
- 문제: 각 퍼널 유저수 집계
- where 기준: 2022-08-01 ~ 2022-08-18
*/
WITH tmp AS (
SELECT
event_date,
event_timestamp,
event_name,
user_id,
user_pseudo_id,
MAX(IF(param.key = 'firebase_screen', param.value.string_value, null)) AS screen_name,
CONCAT(event_name, '-', MAX(IF(param.key = 'firebase_screen', param.value.string_value, null))) AS event_name_with_screen
FROM advanced.app_logs
cross join UNNEST(event_params) AS param
WHERE event_date BETWEEN '2022-08-01' AND '2022-08-18'
GROUP BY 1,2,3,4,5
)
SELECT
event_name_with_screen,
CASE WHEN event_name_with_screen = 'screen_view-welcome' THEN 1
WHEN event_name_with_screen = 'screen_view-home' THEN 2
WHEN event_name_with_screen = 'screen_view-food_category' THEN 3
WHEN event_name_with_screen = 'screen_view-restaurant' THEN 4
WHEN event_name_with_screen = 'screen_view-cart' THEN 5
WHEN event_name_with_screen = 'click_payment-cart' THEN 6 END AS step_number,
COUNT(DISTINCT user_pseudo_id) AS cnt
FROM tmp
WHERE event_name IN ('screen_view', 'click_payment')
AND screen_name IN ('welcome', 'home', 'food_category', 'restaurant', 'cart')
GROUP BY 1,2
ORDER by 2
;
/* 문제2
- 테이블: app_logs
- 문제: 일자별 퍼널 유저수 집계
- where 기준: 2022-08-01 ~ 2022-08-18
*/
WITH tmp AS (
SELECT
event_date,
event_timestamp,
event_name,
user_id,
user_pseudo_id,
MAX(IF(param.key = 'firebase_screen', param.value.string_value, null)) AS screen_name,
CONCAT(event_name, '-', MAX(IF(param.key = 'firebase_screen', param.value.string_value, null))) AS event_name_with_screen
FROM advanced.app_logs
cross join UNNEST(event_params) AS param
WHERE event_date BETWEEN '2022-08-01' AND '2022-08-18'
GROUP BY 1,2,3,4,5
)
SELECT event_date
, event_name_with_screen
, CASE WHEN event_name_with_screen = 'screen_view-welcome' THEN 1
WHEN event_name_with_screen = 'screen_view-home' THEN 2
WHEN event_name_with_screen = 'screen_view-food_category' THEN 3
WHEN event_name_with_screen = 'screen_view-restaurant' THEN 4
WHEN event_name_with_screen = 'screen_view-cart' THEN 5
WHEN event_name_with_screen = 'click_payment-cart' THEN 6 END AS step_number
, COUNT(DISTINCT user_pseudo_id) AS cnt
FROM tmp
WHERE event_name IN ('screen_view', 'click_payment')
AND screen_name IN ('welcome', 'home', 'food_category', 'restaurant', 'cart')
GROUP BY 1,2,3
ORDER by 1, 3
;
/* 문제3
- 테이블: app_logs
- 문제: 집계한 데이터 pivot
- where 기준: 2022-08-01 ~ 2022-08-18
*/
WITH tmp AS (
SELECT
event_date,
event_timestamp,
event_name,
user_id,
user_pseudo_id,
MAX(IF(param.key = 'firebase_screen', param.value.string_value, null)) AS screen_name,
CONCAT(event_name, '-', MAX(IF(param.key = 'firebase_screen', param.value.string_value, null))) AS event_name_with_screen
FROM advanced.app_logs
cross join UNNEST(event_params) AS param
WHERE event_date BETWEEN '2022-08-01' AND '2022-08-18'
GROUP BY 1,2,3,4,5
), tmp2 as (
SELECT event_date
, event_name_with_screen
, CASE WHEN event_name_with_screen = 'screen_view-welcome' THEN 1
WHEN event_name_with_screen = 'screen_view-home' THEN 2
WHEN event_name_with_screen = 'screen_view-food_category' THEN 3
WHEN event_name_with_screen = 'screen_view-restaurant' THEN 4
WHEN event_name_with_screen = 'screen_view-cart' THEN 5
WHEN event_name_with_screen = 'click_payment-cart' THEN 6 END AS step_number
, COUNT(DISTINCT user_pseudo_id) AS cnt
FROM tmp
WHERE event_name IN ('screen_view', 'click_payment')
AND screen_name IN ('welcome', 'home', 'food_category', 'restaurant', 'cart')
GROUP BY 1,2,3
ORDER by 1, 3
)
select event_date
, sum(if(step_number = 1, cnt, 0)) as `screen_view-welcome`
, sum(if(step_number = 2, cnt, 0)) as `screen_view-home`
, sum(if(step_number = 3, cnt, 0)) as `screen_view-food_category`
, sum(if(step_number = 4, cnt, 0)) as `screen_view-restaurant`
, sum(if(step_number = 5, cnt, 0)) as `screen_view-cart`
, sum(if(step_number = 6, cnt, 0)) as `click_payment-cart`
from tmp2
group by event_date
order by 1
;
pivot 쿼리 연습문제 2번에서, 모든 날짜를 기입하는 방식으로만 Pivot화 할 수 있는지 궁금합니다. 예를 들어, 날짜가 20개 있는 경우라면 일일이 날짜 기입하지 않고 일반화해서 pivot할 수 있는 방법이 있을지 여쭙습니다.
답변 1
0
안녕하세요, 인프런 AI 인턴입니다.
jihyun lee님, SQL에서 다수의 날짜를 간편하게 Pivot 처리를 하려면 CASE
문을 일일이 사용하는 대신 동적인 피벗을 사용할 수 있습니다. 일반적으로 RDBMS에 따라 접근 방식이 조금씩 다를 수 있지만, 예를 들어 MySQL이나 PostgreSQL에서는 동적 SQL을 사용하거나, CROSS JOIN
을 통해 다중 날짜를 포함하도록 설계할 수 있습니다.
홉을 알기 위해 다음과 같은 접근 방식을 고려해 보세요:
동적 SQL 사용(주로 프로시저에서 사용 가능): SQL을 문자열로 구성한 뒤 해당 문자열을 실행하여 동적으로 열을 생성합니다.
CROSS JOIN을 활용한 Grouping: SQL을 조금 변형하여 각 날짜에 대해 그룹별로 데이터를 집계할 수 있습니다.
그러나, 주어진 데이터베이스의 기능에 따라 구체적인 방법이 달라질 수 있습니다. 따라서 각 데이터베이스의 문서를 참고하거나 Stack Overflow와 같은 커뮤니티에서 질문하여 더 많은 도움을 받을 수 있습니다.
Stack Overflow 링크: Spring Data on Stack Overflow
추가로 도움이 필요하시면 언제든지 문의해 주세요.