『SQLパズル 第2版』の別解など その1
SQLパズルの主にWindow関数を用いた別解やその他備忘録
実行環境はPostgreSQL13
パズル2
■Window関数を用いた別解
DELETE FROM Personnel WHERE emp_id IN (SELECT emp_id FROM (SELECT emp_id ,absent_date ,severity_points ,MIN(absent_date) OVER (PARTITION BY emp_id ORDER BY absent_date ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS pre_date FROM Absenteeism) AS A2 WHERE (absent_date <> pre_date + interval '1 day' OR pre_date IS NULL) AND absent_date BETWEEN current_date - interval '1 year' AND current_date GROUP BY emp_id HAVING SUM(severity_points) >= 6);
パズル3
■Window関数を用いた別解
SELECT proc_id ,anest_name ,start_time ,end_time ,COUNT(CASE start_time) OVER (PARTITION BY anest_name) FROM Procs;
パズル9
■連続する番号をグループ化
SELECT MIN(seat) ,'~' AS ~ ,MAX(seat) FROM (SELECT seat ,seat - ROW_NUMBER() OVER(ORDER BY seat) AS cgroup FROM Restaurant) AS GR GROUP BY cgroup ORDER BY cgroup;
■連続した欠番をグループ化
SELECT seat - (cgroup - precgroup) AS from ,'~' AS ~ ,seat - 1 AS to FROM (SELECT seat ,cgroup ,MAX(cgroup) OVER(ORDER BY seat ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS precgroup FROM (SELECT seat ,seat - ROW_NUMBER() OVER(ORDER BY seat) AS cgroup FROM Restaurant) AS GR1) AS GR2 WHERE cgroup > precgroup;
または欠番の補集合に連続した番号のグループ化を適用してもいい
パズル10
■Window関数を用いた別解
month_cnt = 0 の年のときはインクリメントしない行番号を生成し、これを通常の行番号から引き算することで month_cnt = 0 に挟まれた行で同じ値になる(GROUPにできる)要素を引き出せる
SELECT sin ,MAX(pen_year) AS end_year ,SUM(month_cnt) AS total_cnt ,SUM(earnings) AS total_earn FROM (SELECT sin ,pen_year ,month_cnt ,earnings ,ROW_NUMBER() OVER(PARTITION BY sin ORDER BY pen_year DESC) - accum_year_cnt AS cgroup FROM (SELECT sin ,pen_year ,month_cnt ,earnings ,COUNT(CASE WHEN month_cnt > 0 THEN month_cnt ELSE NULL END) OVER(PARTITION BY sin ORDER BY pen_year DESC) AS accum_year_cnt FROM Pensions) AS P1) AS P2 WHERE month_cnt > 0 GROUP BY sin, cgroup HAVING SUM(month_cnt) >= 60 ORDER BY sin, end_year;
集計関数を入れ子にできるなら(たしかOracleは1段だけ入れ子にできた)このようなSQLもいけるはず
SELECT sin ,MAX(CASE WHEN accum_cnt_traceback - MIN(accum_cnt_traceback) >= 60 THEN pen_year ELSE NULL END) AS start_year ,MAX(pen_year) AS end_year ,SUM(month_cnt) AS total_cnt ,SUM(earnings) AS total_earn FROM (SELECT sin ,pen_year ,month_cnt ,earnings ,accum_cnt_traceback ,ROW_NUMBER() OVER(PARTITION BY sin ORDER BY pen_year DESC) - accum_year_cnt AS cgroup FROM (SELECT sin ,pen_year ,month_cnt ,earnings ,SUM(month_cnt) OVER(PARTITION BY sin ORDER BY pen_year DESC) AS accum_cnt_traceback ,COUNT(CASE WHEN month_cnt > 0 THEN month_cnt ELSE NULL END) OVER(PARTITION BY sin ORDER BY pen_year DESC) AS accum_year_cnt FROM Pensions) AS P1) AS P2 WHERE month_cnt > 0 GROUP BY sin, cgroup HAVING SUM(month_cnt) >= 60 ORDER BY sin, end_year;
最後の別解は複雑なのでまずSQLについて連続を扱う方法について、思いついた方法について説明します。 今回のように month_cnt = 0 があるとリセットされるような「連続であることが前提として求められる」ようなものはSQLは苦手としています。 そこでつぎのテーブルを見てみましょう。
pen_year | month_cnt ----------+----------- 2001 | 1 2002 | 2 2003 | 3 2004 | 4 2005 | 0 2006 | 3 2007 | 3 2008 | 3 2009 | 0 2010 | 2 2011 | 0
このテーブルにたいして以下のCase式とWindow関数を組み合わせたもの
CASE WHEN month_cnt = 0 THEN -SUM(month_cnt) OVER(PARTITION BY sin ORDER BY pen_year) ELSE month_cnt END accum_cnt
を使って列を生成すると以下のテーブルになります。
pen_year | month_cnt | accum_cnt ----------+-----------+----------- 2001 | 1 | 1 2002 | 2 | 2 2003 | 3 | 3 2004 | 4 | 4 2005 | 0 | -10 2006 | 3 | 3 2007 | 3 | 3 2008 | 3 | 3 2009 | 0 | -9 2010 | 2 | 2 2011 | 0 | -2
さらにこのテーブルにたいして以下のWindow関数を使って列を生成すると
SUM(accum_cnt) OVER(PARTITION BY sin ORDER BY pen_year) AS part_sum
こうなります
pen_year | month_cnt | accum_cnt | part_sum ----------+-----------+-----------+---------- 2001 | 1 | 1 | 1 2002 | 2 | 2 | 3 2003 | 3 | 3 | 6 2004 | 4 | 4 | 10 2005 | 0 | -10 | 0 <-- month_cnt = 0 のときに累計値がリセットされる 2006 | 3 | 3 | 3 2007 | 3 | 3 | 6 2008 | 3 | 3 | 9 2009 | 0 | -9 | 0 <-- month_cnt = 0 のときに累計値がリセットされる 2010 | 2 | 2 | 2 2011 | 0 | -2 | 0 <-- month_cnt = 0 のときに累計値がリセットされる
まるで month_cnt = 0 の行でパーティションを切ってSUM(month_cnt) しているように見えるでしょう。 これをつかって年金の「連続」の方の条件を満たしていることを先に保証しちゃいます。
条件を満たす期間で最も直近の期間がほしいため逆方向からのカウントも用意する必要があります。 条件を満たす最も直近の期間を求めるビューは以下のようになって
CREATE VIEW MOST_RECENT_VALID_PENSION_RANGE AS SELECT sin ,MAX(CASE WHEN accum_cnt_trace_desc_in_group >= 60 THEN pen_year ELSE NULL END) AS start_year ,MAX(CASE WHEN accum_cnt_trace_asc_in_group >= 60 THEN pen_year ELSE NULL END) AS end_year FROM (SELECT sin ,pen_year ,month_cnt ,SUM(accum_cnt_trace_desc) OVER(PARTITION BY sin ORDER BY pen_year DESC) AS accum_cnt_trace_desc_in_group ,SUM(accum_cnt_trace_asc) OVER(PARTITION BY sin ORDER BY pen_year) AS accum_cnt_trace_asc_in_group FROM (SELECT sin ,pen_year ,month_cnt ,earnings ,CASE WHEN month_cnt = 0 THEN -SUM(month_cnt) OVER(PARTITION BY sin ORDER BY pen_year DESC) ELSE month_cnt END accum_cnt_trace_desc ,CASE WHEN month_cnt = 0 THEN -SUM(month_cnt) OVER(PARTITION BY sin ORDER BY pen_year) ELSE month_cnt END accum_cnt_trace_asc FROM Pensions) AS P1) AS P2 WHERE month_cnt > 0 GROUP BY sin;
その期間に対してearningの合計をサブクエリで出します。
SELECT sin ,(SELECT SUM(month_cnt) FROM Pensions WHERE M.sin = Pensions.sin AND pen_year BETWEEN start_year AND end_year) ,(SELECT SUM(earnings) FROM Pensions WHERE M.sin = Pensions.sin AND pen_year BETWEEN start_year AND end_year) FROM MOST_RECENT_VALID_PENSION_RANGE AS M ORDER BY sin;