SolveSQL/난이도 3

[SolveSQL/난이도 3] Advent of SQL 2024(MySQL, SQLite)

보단잉 2024. 12. 29. 21:07

온라인 쇼핑몰의 월 별 매출액 집계

SELECT STRFTIME('%Y-%m', DATE(A.order_date)) AS `order_month`
, SUM(CASE WHEN B.order_id NOT LIKE 'C%' THEN B.price * B.quantity ELSE 0 END) AS `ordered_amount`
, SUM(CASE WHEN B.order_id LIKE 'C%' THEN B.price * B.quantity ELSE 0 END) AS `canceled_amount`
, SUM(B.price * B.quantity) AS `total_amount`
FROM orders A
JOIN order_items B
ON A.order_id = B.order_id
GROUP BY STRFTIME('%Y-%m', DATE(A.order_date))
ORDER BY `order_month` ASC

 

게임 평점 예측하기 1

누락된 평점 정보를 찾기 위해서는 critic_score나 user_score가 NULL인 데이터를 먼저 거른다.

그런 다음, critic_score, critic_count, user_score, user_count 각각 NULL일 때에만 같은 장르의 게임들에 대한 데이터로 대체한다.

대체할 데이터는, score라면 평균을 소수점 넷째 자리에서 반올림한 수치이며, count는 평균을 올림한 수치이다.

SELECT G.game_id
, G.name
, (
  CASE WHEN G.critic_score IS NULL
  THEN (
    SELECT ROUND(AVG(A.critic_score), 3)
    FROM games A
    WHERE G.genre_id = A.genre_id
  )
  ELSE G.critic_score
  END
) AS `critic_score`
, (
  CASE WHEN G.critic_count IS NULL
  THEN (
    SELECT CAST(AVG(B.critic_count) + 0.999999 AS INTEGER)
    FROM games B
    WHERE G.genre_id = B.genre_id
  )
  ELSE G.critic_count
  END
) AS `critic_count`
, (
  CASE WHEN G.user_score IS NULL
  THEN (
    SELECT ROUND(AVG(C.user_score), 3)
    FROM games C
    WHERE G.genre_id = C.genre_id
  )
  ELSE G.user_score
  END
) AS `user_score`
, (
  CASE WHEN G.user_count IS NULL
  THEN (
    SELECT CAST(AVG(D.user_count) + 0.999999 AS INTEGER)
    FROM games D
    WHERE G.genre_id = D.genre_id
  )
  ELSE G.user_count
  END
) AS `user_count`
FROM games AS G
WHERE G.year >= 2015 AND (G.critic_score IS NULL OR G.user_score IS NULL)

 

서울숲 요일별 대기오염도 계산하기

날짜 정보에서 요일을 구하는데, 기본적으로 일요일이 0이다. 따라서 6을 더하고 7로 나눈 나머지로 바꾼다. 이렇게 되면 월요일부터 정렬되게 된다.

SELECT
CASE
  WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 0 THEN '월요일'
  WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 1 THEN '화요일'
  WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 2 THEN '수요일'
  WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 3 THEN '목요일'
  WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 4 THEN '금요일'
  WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 5 THEN '토요일'
  WHEN (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 = 6 THEN '일요일'
  END AS `weekday`
, ROUND(AVG(no2), 4) AS `no2`
, ROUND(AVG(o3), 4) AS `o3`
, ROUND(AVG(co), 4) AS `co`
, ROUND(AVG(so2), 4) AS `so2`
, ROUND(AVG(pm10), 4) AS `pm10`
, ROUND(AVG(pm2_5), 4) AS `pm2_5`
FROM measurements
GROUP BY (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7
ORDER BY (CAST(STRFTIME('%w', DATE(measured_at)) AS INTEGER) + 6) % 7 ASC

 

폐쇄할 따릉이 정류소 찾기 2

대여 건수와 반납 건수를 구한 테이블을 따로 생성하고, 두 테이블을 JOIN하고 대여 건수 합과 반납 건수 합 둘 다 0보다 큰 경우에 한해서 2018년 대비 2019년 데이터의 비율을 구한다.

WITH
  A AS (
    SELECT
      rent_station_id,
      SUM(
        CASE
          WHEN STRFTIME ('%Y-%m', DATE(rent_at)) = '2018-10' THEN 1
          ELSE 0
        END
      ) AS `rent_2018`,
      SUM(
        CASE
          WHEN STRFTIME ('%Y-%m', DATE(rent_at)) = '2019-10' THEN 1
          ELSE 0
        END
      ) AS `rent_2019`
    FROM
      rental_history
    GROUP BY
      rent_station_id
  ),
  B AS (
    SELECT
      return_station_id,
      SUM(
        CASE
          WHEN STRFTIME ('%Y-%m', DATE(return_at)) = '2018-10' THEN 1
          ELSE 0
        END
      ) AS `return_2018`,
      SUM(
        CASE
          WHEN STRFTIME ('%Y-%m', DATE(return_at)) = '2019-10' THEN 1
          ELSE 0
        END
      ) AS `return_2019`
    FROM
      rental_history
    GROUP BY
      return_station_id
  ),
  C AS (
    SELECT
      A.rent_station_id,
      ROUND(
        100.0 * (A.`rent_2019` + B.`return_2019`) / (A.`rent_2018` + B.`return_2018`),
        2
      ) AS `usage_pct`
    FROM
      A
      JOIN B ON A.rent_station_id = B.return_station_id
    WHERE
      (A.`rent_2018` + B.`return_2018`) > 0
      AND (A.`rent_2019` + B.`return_2019`) > 0
  )
SELECT
  D.station_id,
  D.name,
  D.local,
  C.`usage_pct`
FROM
  station D
  JOIN C ON D.station_id = C.rent_station_id
WHERE
  C.`usage_pct` <= 50

 

멀티 플랫폼 게임 찾기

IN을 활용해서 3가지 메이저 플랫폼 계열에서 출시한 게임들을 분류한 테이블을 생성하고, UNION ALL로 3개의 테이블에 있는 모든 데이터를 합한다. 그리고 게임 이름을 기준으로 GROUP BY를 하면 계열사의 개수를 셀 수 있다. 계열사의 개수가 2개 이상인 게임들의 이름을 오름차순으로 출력한다.

WITH A AS (
  SELECT DISTINCT(G.name), 'Sony' AS `platform`, G.year AS `year`
  FROM games G
  JOIN platforms P
  ON G.platform_id = P.platform_id
  WHERE G.year >= 2012
  AND P.name IN ('PS3', 'PS4', 'PSP', 'PSV')
  GROUP BY G.name
),
B AS (
  SELECT DISTINCT(G.name), 'Nintendo' AS `platform`, G.year AS `year`
  FROM games G
  JOIN platforms P
  ON G.platform_id = P.platform_id
  WHERE G.year >= 2012
  AND P.name IN ('Wii', 'WiiU', 'DS', '3DS')
  GROUP BY G.name
),
C AS (
  SELECT DISTINCT(G.name), 'Microsoft' AS `platform`, G.year AS `year`
  FROM games G
  JOIN platforms P
  ON G.platform_id = P.platform_id
  WHERE G.year >= 2012
  AND P.name IN ('X360', 'XONE')
  GROUP BY G.name
),
D AS (
  SELECT * FROM A
  UNION ALL
  SELECT * FROM B
  UNION ALL
  SELECT * FROM C
)
SELECT name FROM D
GROUP BY name
HAVING COUNT(*) >= 2
ORDER BY name

 

전국 카페 주소 데이터 정제하기

SPLIT() 함수가 없기 때문에, 다른 방법을 사용해야 한다.

문자열 자르는 방법

SELECT SUBSTR(address, 1, INSTR(address, ' ') - 1) AS `sido`
, SUBSTR(address, INSTR(address, ' ') + 1, INSTR(SUBSTR(address, INSTR(address, ' ') + 1), ' ') - 1) AS `sigungu`
, COUNT(*) AS `cnt`
FROM cafes
GROUP BY `sido`, `sigungu`
ORDER BY `cnt` DESC

 

미세먼지 수치의 계절간 차이

WITH A AS (
  SELECT station, pm10
  , (
    CASE
      WHEN (DATE(measured_at) >= '2022-03-01') AND (DATE(measured_at) <= '2022-05-31')
      THEN 'spring'
      WHEN (DATE(measured_at) >= '2022-06-01') AND (DATE(measured_at) <= '2022-08-31')
      THEN 'summer'
      WHEN (DATE(measured_at) >= '2022-09-01') AND (DATE(measured_at) <= '2022-11-30')
      THEN 'autumn'
      ELSE 'winter'
      END
  ) AS `season`
  FROM measurements
)
SELECT A.`season`
, MEDIAN(A.pm10) AS `pm10_median`
, ROUND(AVG(A.pm10), 2) AS `pm10_average`
FROM A
GROUP BY A.`season`
ORDER BY A.pm10

 

친구 수 집계하기

문제에서는 edges 테이블만 설명해주었지만 유효한 데이터를 파악하기 위해서는 users 테이블도 활용해야 한다.

WITH A AS (
  SELECT C.user_id AS `a_id`
  , COUNT(E.user_b_id) AS `a_friends`
  FROM users C
  LEFT JOIN edges E
  ON C.user_id = E.user_a_id
  GROUP BY `a_id`
),
B AS (
  SELECT D.user_id AS `b_id`
  , COUNT(F.user_a_id) AS `b_friends`
  FROM users D
  LEFT JOIN edges F
  ON D.user_id = F.user_b_id
  GROUP BY `b_id`
)
SELECT A.`a_id` AS `user_id`
, (A.`a_friends` + B.`b_friends`) AS `num_friends`
FROM A
JOIN B ON A.`a_id` = B.`b_id`
GROUP BY `user_id`
ORDER BY `num_friends` DESC, `user_id` ASC