온라인 쇼핑몰의 월 별 매출액 집계
SELECT STRFTIME('%Y-%m', DATE(A.order_date)) AS `order_month`
, SUM(CASE WHEN B.order_id NOT LIKE 'C%' THEN B.price * B.quantity ELSE 0 END) AS `ordered_amount`
, SUM(CASE WHEN B.order_id LIKE 'C%' THEN B.price * B.quantity ELSE 0 END) AS `canceled_amount`
, SUM(B.price * B.quantity) AS `total_amount`
FROM orders A
JOIN order_items B
ON A.order_id = B.order_id
GROUP BY STRFTIME('%Y-%m', DATE(A.order_date))
ORDER BY `order_month` ASC
게임 평점 예측하기 1
누락된 평점 정보를 찾기 위해서는 critic_score나 user_score가 NULL인 데이터를 먼저 거른다.
그런 다음, critic_score, critic_count, user_score, user_count 각각 NULL일 때에만 같은 장르의 게임들에 대한 데이터로 대체한다.
대체할 데이터는, score라면 평균을 소수점 넷째 자리에서 반올림한 수치이며, count는 평균을 올림한 수치이다.
SELECT G.game_id
, G.name
, (
CASE WHEN G.critic_score IS NULL
THEN (
SELECT ROUND(AVG(A.critic_score), 3)
FROM games A
WHERE G.genre_id = A.genre_id
)
ELSE G.critic_score
END
) AS `critic_score`
, (
CASE WHEN G.critic_count IS NULL
THEN (
SELECT CAST(AVG(B.critic_count) + 0.999999 AS INTEGER)
FROM games B
WHERE G.genre_id = B.genre_id
)
ELSE G.critic_count
END
) AS `critic_count`
, (
CASE WHEN G.user_score IS NULL
THEN (
SELECT ROUND(AVG(C.user_score), 3)
FROM games C
WHERE G.genre_id = C.genre_id
)
ELSE G.user_score
END
) AS `user_score`
, (
CASE WHEN G.user_count IS NULL
THEN (
SELECT CAST(AVG(D.user_count) + 0.999999 AS INTEGER)
FROM games D
WHERE G.genre_id = D.genre_id
)
ELSE G.user_count
END
) AS `user_count`
FROM games AS G
WHERE G.year >= 2015 AND (G.critic_score IS NULL OR G.user_score IS NULL)
서울숲 요일별 대기오염도 계산하기
날짜 정보에서 요일을 구하는데, 기본적으로 일요일이 0이다. 따라서 6을 더하고 7로 나눈 나머지로 바꾼다. 이렇게 되면 월요일부터 정렬되게 된다.
SELECT
CASE
WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 0 THEN '월요일'
WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 1 THEN '화요일'
WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 2 THEN '수요일'
WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 3 THEN '목요일'
WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 4 THEN '금요일'
WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 5 THEN '토요일'
WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 6 THEN '일요일'
END AS `weekday`
, ROUND(AVG(no2), 4) AS `no2`
, ROUND(AVG(o3), 4) AS `o3`
, ROUND(AVG(co), 4) AS `co`
, ROUND(AVG(so2), 4) AS `so2`
, ROUND(AVG(pm10), 4) AS `pm10`
, ROUND(AVG(pm2_5), 4) AS `pm2_5`
FROM measurements
GROUP BY (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7
ORDER BY (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 ASC
폐쇄할 따릉이 정류소 찾기 2
대여 건수와 반납 건수를 구한 테이블을 따로 생성하고, 두 테이블을 JOIN하고 대여 건수 합과 반납 건수 합 둘 다 0보다 큰 경우에 한해서 2018년 대비 2019년 데이터의 비율을 구한다.
WITH
A AS (
SELECT
rent_station_id,
SUM(
CASE
WHEN STRFTIME ('%Y-%m', DATE(rent_at)) = '2018-10' THEN 1
ELSE 0
END
) AS `rent_2018`,
SUM(
CASE
WHEN STRFTIME ('%Y-%m', DATE(rent_at)) = '2019-10' THEN 1
ELSE 0
END
) AS `rent_2019`
FROM
rental_history
GROUP BY
rent_station_id
),
B AS (
SELECT
return_station_id,
SUM(
CASE
WHEN STRFTIME ('%Y-%m', DATE(return_at)) = '2018-10' THEN 1
ELSE 0
END
) AS `return_2018`,
SUM(
CASE
WHEN STRFTIME ('%Y-%m', DATE(return_at)) = '2019-10' THEN 1
ELSE 0
END
) AS `return_2019`
FROM
rental_history
GROUP BY
return_station_id
),
C AS (
SELECT
A.rent_station_id,
ROUND(
100.0 * (A.`rent_2019` + B.`return_2019`) / (A.`rent_2018` + B.`return_2018`),
2
) AS `usage_pct`
FROM
A
JOIN B ON A.rent_station_id = B.return_station_id
WHERE
(A.`rent_2018` + B.`return_2018`) > 0
AND (A.`rent_2019` + B.`return_2019`) > 0
)
SELECT
D.station_id,
D.name,
D.local,
C.`usage_pct`
FROM
station D
JOIN C ON D.station_id = C.rent_station_id
WHERE
C.`usage_pct` <= 50
멀티 플랫폼 게임 찾기
IN을 활용해서 3가지 메이저 플랫폼 계열에서 출시한 게임들을 분류한 테이블을 생성하고, UNION ALL로 3개의 테이블에 있는 모든 데이터를 합한다. 그리고 게임 이름을 기준으로 GROUP BY를 하면 계열사의 개수를 셀 수 있다. 계열사의 개수가 2개 이상인 게임들의 이름을 오름차순으로 출력한다.
WITH A AS (
SELECT DISTINCT(G.name), 'Sony' AS `platform`, G.year AS `year`
FROM games G
JOIN platforms P
ON G.platform_id = P.platform_id
WHERE G.year >= 2012
AND P.name IN ('PS3', 'PS4', 'PSP', 'PSV')
GROUP BY G.name
),
B AS (
SELECT DISTINCT(G.name), 'Nintendo' AS `platform`, G.year AS `year`
FROM games G
JOIN platforms P
ON G.platform_id = P.platform_id
WHERE G.year >= 2012
AND P.name IN ('Wii', 'WiiU', 'DS', '3DS')
GROUP BY G.name
),
C AS (
SELECT DISTINCT(G.name), 'Microsoft' AS `platform`, G.year AS `year`
FROM games G
JOIN platforms P
ON G.platform_id = P.platform_id
WHERE G.year >= 2012
AND P.name IN ('X360', 'XONE')
GROUP BY G.name
),
D AS (
SELECT * FROM A
UNION ALL
SELECT * FROM B
UNION ALL
SELECT * FROM C
)
SELECT name FROM D
GROUP BY name
HAVING COUNT(*) >= 2
ORDER BY name
전국 카페 주소 데이터 정제하기
SPLIT() 함수가 없기 때문에, 다른 방법을 사용해야 한다.
SELECT SUBSTR(address, 1, INSTR(address, ' ') - 1) AS `sido`
, SUBSTR(address, INSTR(address, ' ') + 1, INSTR(SUBSTR(address, INSTR(address, ' ') + 1), ' ') - 1) AS `sigungu`
, COUNT(*) AS `cnt`
FROM cafes
GROUP BY `sido`, `sigungu`
ORDER BY `cnt` DESC
미세먼지 수치의 계절간 차이
WITH A AS (
SELECT station, pm10
, (
CASE
WHEN (DATE(measured_at) >= '2022-03-01') AND (DATE(measured_at) <= '2022-05-31')
THEN 'spring'
WHEN (DATE(measured_at) >= '2022-06-01') AND (DATE(measured_at) <= '2022-08-31')
THEN 'summer'
WHEN (DATE(measured_at) >= '2022-09-01') AND (DATE(measured_at) <= '2022-11-30')
THEN 'autumn'
ELSE 'winter'
END
) AS `season`
FROM measurements
)
SELECT A.`season`
, MEDIAN(A.pm10) AS `pm10_median`
, ROUND(AVG(A.pm10), 2) AS `pm10_average`
FROM A
GROUP BY A.`season`
ORDER BY A.pm10
친구 수 집계하기
문제에서는 edges 테이블만 설명해주었지만 유효한 데이터를 파악하기 위해서는 users 테이블도 활용해야 한다.
WITH A AS (
SELECT C.user_id AS `a_id`
, COUNT(E.user_b_id) AS `a_friends`
FROM users C
LEFT JOIN edges E
ON C.user_id = E.user_a_id
GROUP BY `a_id`
),
B AS (
SELECT D.user_id AS `b_id`
, COUNT(F.user_a_id) AS `b_friends`
FROM users D
LEFT JOIN edges F
ON D.user_id = F.user_b_id
GROUP BY `b_id`
)
SELECT A.`a_id` AS `user_id`
, (A.`a_friends` + B.`b_friends`) AS `num_friends`
FROM A
JOIN B ON A.`a_id` = B.`b_id`
GROUP BY `user_id`
ORDER BY `num_friends` DESC, `user_id` ASC
'SolveSQL > 난이도 3' 카테고리의 다른 글
[SolveSQL/난이도 3] 데이터리안 SQL 데이터 분석 캠프 실전반(MySQL, SQLite) (0) | 2024.12.28 |
---|---|
[SolveSQL/난이도 3] 데이터리안 SQL 데이터 분석 캠프 입문반(MySQL, SQLite) (1) | 2024.12.28 |