『SQLパズル 第2版』の別解など その3
『SQLパズル 第2版』の別解など その2 - 不思議の備忘録
の続き
パズル45
■別解 GROUP BY句にCASE式
SELECT句の特性関数はCASE式を用いたGROUPBY句と相互に変換できます
SELECT cust_id ,CASE WHEN bill_date BETWEEN CURRENT_DATE - INTERVAL '30' DAY AND CURRENT_DATE THEN '0-30days_ago' WHEN bill_date BETWEEN CURRENT_DATE - INTERVAL '60' DAY AND CURRENT_DATE - INTERVAL '31' DAY THEN '31-60days_ago' WHEN bill_date BETWEEN CURRENT_DATE - INTERVAL '90' DAY AND CURRENT_DATE - INTERVAL '61' DAY THEN '61-90days_ago' ELSE '91-days_ago' END AS span ,SUM(pizza_amt) AS total_pizza_amt FROM FriendsofPepperoni GROUP BY cust_id, span ORDER BY cust_id, span;
パズル46
答え1について、スカラサブクエリをP1.start_dateにするとなぜ動かないのか。それは、集約キーじゃないので集合として存在してるから。 ゆえに、GROUP BY 句に列を追加するか、下のようにMAXでとりだす必要があります。
SELECT S1.clerk_name, P1.promo_name, SUM(S1.sale_amt) AS sales_tot FROM Sales AS S1, Promotions AS P1 WHERE S1.sale_date BETWEEN P1.start_date AND P1.end_date GROUP BY S1.clerk_name, P1.promo_name HAVING SUM(sale_amt) >= ALL ( SELECT SUM(sale_amt) FROM Sales AS S2 WHERE S2.clerk_name <> S1.clerk_name AND S2.sale_date BETWEEN MAX(P1.start_date) AND MAX(P1.end_date) GROUP BY S2.clerk_name);
パズル52
集約関数の中に集約関数は入れられないが、Window関数の中に集約関数をいれることができる。(類:パズル33の別解) これで疑似的に入れ子の集約関数(2レベルまで)を実現できる
SELECT DISTINCT AVG(COUNT(*)) OVER() FROM Personnel GROUP BY dept_id; INSERT INTO Personnel VALUES('A');
パズル53
■Window関数を用いた別解
SELECT MIN(CASE WHEN cc = 1 THEN color ELSE NULL END) AS color ,MIN(CASE WHEN cl = 1 THEN length ELSE NULL END) AS length ,MIN(CASE WHEN cw = 1 THEN width ELSE NULL END) AS width ,MIN(CASE WHEN ch = 1 THEN hgt ELSE NULL END) AS hgt FROM (SELECT lvl ,color ,length ,width ,hgt ,COUNT(color) OVER(ORDER BY lvl DESC) cc ,COUNT(length) OVER(ORDER BY lvl DESC) AS cl ,COUNT(width) OVER(ORDER BY lvl DESC) AS cw ,COUNT(hgt) OVER(ORDER BY lvl DESC) AS ch FROM Foobar) AS TMP;
パズル55
RacingResultsテーブルしかない場合について、UNIONをCASE式で置き換える方法
CREATE TABLE OneTwoThree(num INTEGER); INSERT INTO OneTwoThree VALUES(1); INSERT INTO OneTwoThree VALUES(2); INSERT INTO OneTwoThree VALUES(3); SELECT horse_name, COUNT(*) FROM (SELECT track_id ,race_date ,race_nbr ,OTT.num AS rank ,CASE OTT.num WHEN 1 THEN win_name WHEN 2 THEN place_name WHEN 3 THEN show_name END AS horse_name FROM RacingResults CROSS JOIN OneTwoThree AS OTT) AS TMP GROUP BY horse_name;
順位ごとにも
SELECT horse_name, rank, COUNT(*) FROM (SELECT track_id ,race_date ,race_nbr ,OTT.num AS rank ,CASE OTT.num WHEN 1 THEN win_name WHEN 2 THEN place_name WHEN 3 THEN show_name END AS horse_name FROM RacingResults CROSS JOIN OneTwoThree AS OTT) AS TMP GROUP BY horse_name, rank;
HorseNamesがあるとき何着に何回なったかのクエリ(これが一番シンプルですかね)
SELECT H1.horse ,CASE WHEN H1.horse = win_name THEN 1 WHEN H1.horse = place_name THEN 2 WHEN H1.horse = show_name THEN 3 ELSE NULL END AS rank ,COUNT(*) FROM HorseNames AS H1, RacingResults AS R1 WHERE H1.horse IN (R1.win_name, R1.place_name, R1.show_name) GROUP BY H1.horse, rank; --単純CASE式のバージョン SELECT H1.horse ,CASE H1.horse WHEN win_name THEN 1 WHEN place_name THEN 2 WHEN show_name THEN 3 ELSE NULL END AS rank ,COUNT(*) FROM HorseNames AS H1, RacingResults AS R1 WHERE H1.horse IN (R1.win_name, R1.place_name, R1.show_name) GROUP BY H1.horse, rank; SELECT H1.horse ,CASE H1.horse WHEN win_name THEN 1 WHEN place_name THEN 2 WHEN show_name THEN 3 ELSE NULL END AS rank ,COUNT(win_name) FROM HorseNames AS H1 LEFT OUTER JOIN RacingResults AS R1 ON H1.horse IN (R1.win_name, R1.place_name, R1.show_name) GROUP BY H1.horse, rank;
パズル59
問題設定としてはパズル3と似ていますが、5日と6日の間が切れるっていうのが最大の違いであり、そこが難しくしている要因ですね。
パターンの網羅のためにもこれも入れておきます
INSERT INTO Timesheets VALUES (11, '1997-01-13', '1997-01-13');
■1日を3つに分割する別解
この別解は非常に複雑で図を用いた詳細な解説が必要だと思います。(このクエリから意図を読むのは難しいのではないでしょうか?) 解説が完成したら別記事で解説します。
CREATE TABLE Days199701(date DATE NOT NULL); INSERT INTO Days199701 VALUES ('1997-01-01'); INSERT INTO Days199701 VALUES ('1997-01-02'); INSERT INTO Days199701 VALUES ('1997-01-03'); INSERT INTO Days199701 VALUES ('1997-01-04'); INSERT INTO Days199701 VALUES ('1997-01-05'); INSERT INTO Days199701 VALUES ('1997-01-06'); INSERT INTO Days199701 VALUES ('1997-01-07'); INSERT INTO Days199701 VALUES ('1997-01-08'); INSERT INTO Days199701 VALUES ('1997-01-09'); INSERT INTO Days199701 VALUES ('1997-01-10'); INSERT INTO Days199701 VALUES ('1997-01-11'); INSERT INTO Days199701 VALUES ('1997-01-12'); INSERT INTO Days199701 VALUES ('1997-01-13'); INSERT INTO Days199701 VALUES ('1997-01-14'); INSERT INTO Days199701 VALUES ('1997-01-15'); INSERT INTO Days199701 VALUES ('1997-01-16'); INSERT INTO Days199701 VALUES ('1997-01-17'); INSERT INTO Days199701 VALUES ('1997-01-18'); INSERT INTO Days199701 VALUES ('1997-01-19'); INSERT INTO Days199701 VALUES ('1997-01-20'); INSERT INTO Days199701 VALUES ('1997-01-21'); INSERT INTO Days199701 VALUES ('1997-01-22'); INSERT INTO Days199701 VALUES ('1997-01-23'); INSERT INTO Days199701 VALUES ('1997-01-24'); INSERT INTO Days199701 VALUES ('1997-01-25'); INSERT INTO Days199701 VALUES ('1997-01-26'); INSERT INTO Days199701 VALUES ('1997-01-27'); INSERT INTO Days199701 VALUES ('1997-01-28'); INSERT INTO Days199701 VALUES ('1997-01-29'); INSERT INTO Days199701 VALUES ('1997-01-30'); INSERT INTO Days199701 VALUES ('1997-01-31');
SELECT MIN(date) AS start_date ,MAX(date) AS end_date FROM (SELECT date ,CEIL(SUM(COUNT(CASE WHEN cnt = 0 THEN 1 ELSE NULL END)) OVER(ORDER BY date) / 2 ) AS pair_num FROM (SELECT date ,(SELECT COUNT(*) FROM Timesheets WHERE date BETWEEN start_date + 1 AND end_date) AS cnt FROM Days199701 UNION ALL SELECT date ,(SELECT COUNT(*) FROM Timesheets WHERE date BETWEEN start_date AND end_date) AS cnt FROM Days199701 UNION ALL SELECT date ,(SELECT COUNT(*) FROM Timesheets WHERE date BETWEEN start_date AND end_date - 1) AS cnt FROM Days199701) AS TMP GROUP BY date HAVING MIN(cnt) = 0 AND MAX(cnt) > 0 ORDER BY date) AS TMP2 GROUP BY pair_num;
■答え1の補足
AND T3.end_date <= T2.end_date --T3.end_date < T4.start_dateがあるから不要 AND T4.start_date >= T1.start_date --T3.end_date < T4.start_dateがあるから不要
■答え2の補足 答え2は以下のクエリと同じ結果を得ることができます。
SELECT start_date, end_date FROM (SELECT T1.start_date ,ROW_NUMBER() OVER(ORDER BY T1.start_date) FROM Timesheets AS T1 LEFT OUTER JOIN Timesheets AS T2 ON T1.start_date > T2.start_date AND T1.start_date <= T2.end_date GROUP BY T1.start_date HAVING COUNT(T2.start_date) = 0 ) AS X(start_date, rn) INNER JOIN (SELECT T3.end_date ,ROW_NUMBER() OVER(ORDER BY T3.end_date) FROM Timesheets AS T3 LEFT OUTER JOIN Timesheets AS T4 ON T3.end_date < T4.end_date AND T3.end_date >= T4.start_date GROUP BY T3.end_date HAVING COUNT(T4.start_date) = 0 ) AS Y(end_date, rn) ON X.rn = Y.rn;
結局何がしたいのかというと2つの行数の同じテーブルを横にならべて合体させることですね。
また、答え2の
SELECT T1.start_date FROM Timesheets AS T1 LEFT OUTER JOIN Timesheets AS T2 ON T1.start_date > T2.start_date AND T1.start_date <= T2.end_date GROUP BY T1.start_date HAVING COUNT(T2.start_date) = 0
この部分は以下と同じ
SELECT T1.start_date FROM Timesheets AS T1 LEFT OUTER JOIN Timesheets AS T2 ON T1.start_date > T2.start_date AND T1.start_date <= T2.end_date WHERE T2.start_date IS NULL
さらに、これとも同じ
SELECT T1.start_date FROM Timesheets AS T1 WHERE NOT EXISTS (SELECT * FROM Timesheets AS T2 WHERE T1.start_date > T2.start_date AND T1.start_date <= T2.end_date)
EXISTS(NOT EXISTS)述語は1行見つかった時点で判定を打ち切るため、これが一番速いのではないでしょうか
■別視点での別解
この問題を解いていて思ったのが「歯抜け」から探したほうが手っ取り早いのではないかということです。具体的には
SELECT DISTINCT T1.end_date, T2.start_date FROM Timesheets AS T1 INNER JOIN Timesheets AS T2 ON T1.end_date < T2.start_date AND NOT EXISTS (SELECT * FROM Timesheets AS T3 WHERE T1.end_date + time '00:01' BETWEEN T3.start_date AND T3.end_date OR T2.start_date - time '00:01' BETWEEN T3.start_date AND T3.end_date OR T3.start_date BETWEEN T1.end_date + time '00:01' AND T2.start_date - time '00:01' OR T3.end_date BETWEEN T1.end_date + time '00:01' AND T2.start_date - time '00:01' );
または
SELECT DISTINCT T1.end_date, T2.start_date FROM Timesheets AS T1 INNER JOIN Timesheets AS T2 ON T1.end_date < T2.start_date AND NOT EXISTS (SELECT * FROM Timesheets AS T3 WHERE NOT( T1.end_date + time '00:01' >= T3.end_date OR T2.start_date - time '00:01' <= T3.start_date ) );
ほかには
WITH TMP AS (SELECT DISTINCT T1.end_date AS gap_start_date ,T2.start_date AS gap_end_date FROM Timesheets AS T1 INNER JOIN Timesheets AS T2 ON T1.end_date < T2.start_date AND NOT EXISTS (SELECT * FROM Timesheets AS T3 WHERE NOT( T1.end_date + time '00:01' >= T3.end_date OR T2.start_date - time '00:01' <= T3.start_date ) ) ) SELECT (SELECT MIN(start_date) FROM Timesheets) AS start_date, (SELECT MIN(gap_start_date) FROM TMP) AS end_date UNION ALL SELECT gap_end_date AS start_date ,COALESCE(MAX(gap_start_date) OVER(ORDER BY gap_end_date ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING), (SELECT MAX(end_date) FROM Timesheets)) FROM TMP;