『SQLパズル 第2版』の別解など その3

『SQLパズル 第2版』の別解など その2 - 不思議の備忘録

の続き

パズル45

■別解 GROUP BY句にCASE式

SELECT句の特性関数はCASE式を用いたGROUPBY句と相互に変換できます

SELECT cust_id
      ,CASE WHEN bill_date BETWEEN CURRENT_DATE - INTERVAL '30' DAY
                               AND CURRENT_DATE
            THEN '0-30days_ago'
            WHEN bill_date BETWEEN CURRENT_DATE - INTERVAL '60' DAY
                               AND CURRENT_DATE - INTERVAL '31' DAY
            THEN '31-60days_ago'
            WHEN bill_date BETWEEN CURRENT_DATE - INTERVAL '90' DAY
                               AND CURRENT_DATE - INTERVAL '61' DAY
            THEN '61-90days_ago'
            ELSE '91-days_ago' END AS span
      ,SUM(pizza_amt) AS total_pizza_amt
  FROM FriendsofPepperoni
 GROUP BY cust_id, span
 ORDER BY cust_id, span;

パズル46

答え1について、スカラサブクエリをP1.start_dateにするとなぜ動かないのか。それは、集約キーじゃないので集合として存在してるから。 ゆえに、GROUP BY 句に列を追加するか、下のようにMAXでとりだす必要があります。

SELECT S1.clerk_name, P1.promo_name,
       SUM(S1.sale_amt) AS sales_tot
  FROM Sales AS S1, Promotions AS P1
 WHERE S1.sale_date BETWEEN P1.start_date AND P1.end_date
 GROUP BY S1.clerk_name, P1.promo_name
HAVING SUM(sale_amt) >= ALL (
         SELECT SUM(sale_amt)
           FROM Sales AS S2
          WHERE S2.clerk_name <> S1.clerk_name
            AND S2.sale_date BETWEEN MAX(P1.start_date)
                                 AND MAX(P1.end_date)
          GROUP BY S2.clerk_name);

パズル52

集約関数の中に集約関数は入れられないが、Window関数の中に集約関数をいれることができる。(類:パズル33の別解) これで疑似的に入れ子の集約関数(2レベルまで)を実現できる

SELECT DISTINCT AVG(COUNT(*)) OVER()
  FROM Personnel
 GROUP BY dept_id;

INSERT INTO Personnel VALUES('A');

パズル53

■Window関数を用いた別解

SELECT MIN(CASE WHEN cc = 1 THEN color ELSE NULL END) AS color
      ,MIN(CASE WHEN cl = 1 THEN length ELSE NULL END) AS length
      ,MIN(CASE WHEN cw = 1 THEN width ELSE NULL END) AS width
      ,MIN(CASE WHEN ch = 1 THEN hgt ELSE NULL END) AS hgt
  FROM (SELECT lvl
              ,color
              ,length
              ,width
              ,hgt
              ,COUNT(color) OVER(ORDER BY lvl DESC) cc
              ,COUNT(length) OVER(ORDER BY lvl DESC) AS cl
              ,COUNT(width) OVER(ORDER BY lvl DESC) AS cw
              ,COUNT(hgt) OVER(ORDER BY lvl DESC) AS ch
          FROM Foobar) AS TMP;

パズル55

RacingResultsテーブルしかない場合について、UNIONをCASE式で置き換える方法

CREATE TABLE OneTwoThree(num INTEGER);
INSERT INTO OneTwoThree VALUES(1);
INSERT INTO OneTwoThree VALUES(2);
INSERT INTO OneTwoThree VALUES(3);

SELECT horse_name, COUNT(*)
  FROM (SELECT track_id
              ,race_date
              ,race_nbr
              ,OTT.num AS rank
              ,CASE OTT.num WHEN 1 THEN win_name
                            WHEN 2 THEN place_name
                            WHEN 3 THEN show_name 
               END AS horse_name
          FROM RacingResults 
               CROSS JOIN 
               OneTwoThree AS OTT) AS TMP
 GROUP BY horse_name;

順位ごとにも

SELECT horse_name, rank, COUNT(*)
  FROM (SELECT track_id
              ,race_date
              ,race_nbr
              ,OTT.num AS rank
              ,CASE OTT.num WHEN 1 THEN win_name
                            WHEN 2 THEN place_name
                            WHEN 3 THEN show_name 
               END AS horse_name
          FROM RacingResults 
               CROSS JOIN 
               OneTwoThree AS OTT) AS TMP
 GROUP BY horse_name, rank;

HorseNamesがあるとき何着に何回なったかのクエリ(これが一番シンプルですかね)

SELECT H1.horse
      ,CASE WHEN H1.horse = win_name THEN 1
            WHEN H1.horse = place_name THEN 2
            WHEN H1.horse = show_name THEN 3
            ELSE NULL
       END AS rank
      ,COUNT(*)
  FROM HorseNames AS H1, RacingResults AS R1
 WHERE H1.horse IN (R1.win_name, R1.place_name, R1.show_name)
 GROUP BY H1.horse, rank;

--単純CASE式のバージョン
SELECT H1.horse
      ,CASE H1.horse WHEN win_name THEN 1
                     WHEN place_name THEN 2
                     WHEN show_name THEN 3
            ELSE NULL
       END AS rank
      ,COUNT(*)
  FROM HorseNames AS H1, RacingResults AS R1
 WHERE H1.horse IN (R1.win_name, R1.place_name, R1.show_name)
 GROUP BY H1.horse, rank;


SELECT H1.horse
      ,CASE H1.horse WHEN win_name THEN 1
                     WHEN place_name THEN 2
                     WHEN show_name THEN 3
            ELSE NULL
       END AS rank
      ,COUNT(win_name)
  FROM HorseNames AS H1 
       LEFT OUTER JOIN
       RacingResults AS R1
    ON H1.horse IN (R1.win_name, R1.place_name, R1.show_name)
 GROUP BY H1.horse, rank;

パズル59

問題設定としてはパズル3と似ていますが、5日と6日の間が切れるっていうのが最大の違いであり、そこが難しくしている要因ですね。

パターンの網羅のためにもこれも入れておきます

INSERT INTO Timesheets VALUES (11, '1997-01-13', '1997-01-13');

■1日を3つに分割する別解

この別解は非常に複雑で図を用いた詳細な解説が必要だと思います。(このクエリから意図を読むのは難しいのではないでしょうか?) 解説が完成したら別記事で解説します。

CREATE TABLE Days199701(date DATE NOT NULL);
INSERT INTO Days199701 VALUES ('1997-01-01');
INSERT INTO Days199701 VALUES ('1997-01-02');
INSERT INTO Days199701 VALUES ('1997-01-03');
INSERT INTO Days199701 VALUES ('1997-01-04');
INSERT INTO Days199701 VALUES ('1997-01-05');
INSERT INTO Days199701 VALUES ('1997-01-06');
INSERT INTO Days199701 VALUES ('1997-01-07');
INSERT INTO Days199701 VALUES ('1997-01-08');
INSERT INTO Days199701 VALUES ('1997-01-09');
INSERT INTO Days199701 VALUES ('1997-01-10');
INSERT INTO Days199701 VALUES ('1997-01-11');
INSERT INTO Days199701 VALUES ('1997-01-12');
INSERT INTO Days199701 VALUES ('1997-01-13');
INSERT INTO Days199701 VALUES ('1997-01-14');
INSERT INTO Days199701 VALUES ('1997-01-15');
INSERT INTO Days199701 VALUES ('1997-01-16');
INSERT INTO Days199701 VALUES ('1997-01-17');
INSERT INTO Days199701 VALUES ('1997-01-18');
INSERT INTO Days199701 VALUES ('1997-01-19');
INSERT INTO Days199701 VALUES ('1997-01-20');
INSERT INTO Days199701 VALUES ('1997-01-21');
INSERT INTO Days199701 VALUES ('1997-01-22');
INSERT INTO Days199701 VALUES ('1997-01-23');
INSERT INTO Days199701 VALUES ('1997-01-24');
INSERT INTO Days199701 VALUES ('1997-01-25');
INSERT INTO Days199701 VALUES ('1997-01-26');
INSERT INTO Days199701 VALUES ('1997-01-27');
INSERT INTO Days199701 VALUES ('1997-01-28');
INSERT INTO Days199701 VALUES ('1997-01-29');
INSERT INTO Days199701 VALUES ('1997-01-30');
INSERT INTO Days199701 VALUES ('1997-01-31');
SELECT MIN(date) AS start_date
      ,MAX(date) AS end_date
  FROM (SELECT date
              ,CEIL(SUM(COUNT(CASE WHEN cnt = 0 THEN 1 ELSE NULL END)) OVER(ORDER BY date) / 2 ) AS pair_num
          FROM (SELECT date
                      ,(SELECT COUNT(*)
                          FROM Timesheets
                         WHERE date BETWEEN start_date + 1 AND end_date) AS cnt
                  FROM Days199701
                UNION ALL
                SELECT date
                      ,(SELECT COUNT(*)
                          FROM Timesheets
                         WHERE date BETWEEN start_date AND end_date) AS cnt
                  FROM Days199701
                UNION ALL
                SELECT date
                      ,(SELECT COUNT(*)
                          FROM Timesheets
                         WHERE date BETWEEN start_date AND end_date - 1) AS cnt
                  FROM Days199701) AS TMP
         GROUP BY date
        HAVING MIN(cnt) = 0 
           AND MAX(cnt) > 0
         ORDER BY date) AS TMP2
 GROUP BY pair_num;

■答え1の補足

 AND T3.end_date <= T2.end_date  --T3.end_date < T4.start_dateがあるから不要
 AND T4.start_date >= T1.start_date  --T3.end_date < T4.start_dateがあるから不要

■答え2の補足 答え2は以下のクエリと同じ結果を得ることができます。

SELECT start_date, end_date
  FROM (SELECT T1.start_date
              ,ROW_NUMBER() OVER(ORDER BY T1.start_date)
          FROM Timesheets AS T1
               LEFT OUTER JOIN
               Timesheets AS T2
               ON T1.start_date > T2.start_date
              AND T1.start_date <= T2.end_date
            GROUP BY T1.start_date
           HAVING COUNT(T2.start_date) = 0
       ) AS X(start_date, rn)
       INNER JOIN
       (SELECT T3.end_date
              ,ROW_NUMBER() OVER(ORDER BY T3.end_date)
          FROM Timesheets AS T3
               LEFT OUTER JOIN
               Timesheets AS T4
               ON T3.end_date < T4.end_date
              AND T3.end_date >= T4.start_date
            GROUP BY T3.end_date
           HAVING COUNT(T4.start_date) = 0
       ) AS Y(end_date, rn)
       ON X.rn = Y.rn;

結局何がしたいのかというと2つの行数の同じテーブルを横にならべて合体させることですね。

また、答え2の

SELECT T1.start_date
  FROM Timesheets AS T1
       LEFT OUTER JOIN
       Timesheets AS T2
       ON T1.start_date > T2.start_date
      AND T1.start_date <= T2.end_date
    GROUP BY T1.start_date
   HAVING COUNT(T2.start_date) = 0

この部分は以下と同じ

SELECT T1.start_date
  FROM Timesheets AS T1
       LEFT OUTER JOIN
       Timesheets AS T2
       ON T1.start_date > T2.start_date
      AND T1.start_date <= T2.end_date
 WHERE T2.start_date IS NULL

さらに、これとも同じ

SELECT T1.start_date
  FROM Timesheets AS T1
 WHERE NOT EXISTS
       (SELECT * 
          FROM Timesheets AS T2
         WHERE T1.start_date > T2.start_date
           AND T1.start_date <= T2.end_date)

EXISTS(NOT EXISTS)述語は1行見つかった時点で判定を打ち切るため、これが一番速いのではないでしょうか

■別視点での別解

この問題を解いていて思ったのが「歯抜け」から探したほうが手っ取り早いのではないかということです。具体的には

SELECT DISTINCT T1.end_date, T2.start_date
  FROM Timesheets AS T1
       INNER JOIN 
       Timesheets AS T2
       ON T1.end_date < T2.start_date
      AND NOT EXISTS
          (SELECT *
             FROM Timesheets AS T3
            WHERE T1.end_date + time '00:01' BETWEEN T3.start_date AND T3.end_date
               OR T2.start_date - time '00:01' BETWEEN T3.start_date AND T3.end_date
               OR T3.start_date BETWEEN T1.end_date + time '00:01' AND T2.start_date - time '00:01'
               OR T3.end_date BETWEEN T1.end_date + time '00:01' AND T2.start_date - time '00:01'
          );

または

SELECT DISTINCT T1.end_date, T2.start_date
  FROM Timesheets AS T1
       INNER JOIN 
       Timesheets AS T2
       ON T1.end_date < T2.start_date
      AND NOT EXISTS
          (SELECT *
             FROM Timesheets AS T3
            WHERE NOT( T1.end_date + time '00:01' >= T3.end_date
               OR T2.start_date - time '00:01' <=  T3.start_date )
          );

ほかには

WITH TMP AS
(SELECT DISTINCT
        T1.end_date AS gap_start_date
       ,T2.start_date AS gap_end_date
   FROM Timesheets AS T1
        INNER JOIN 
        Timesheets AS T2
        ON T1.end_date < T2.start_date
       AND NOT EXISTS
           (SELECT *
              FROM Timesheets AS T3
             WHERE NOT( T1.end_date + time '00:01' >= T3.end_date
                     OR T2.start_date - time '00:01' <=  T3.start_date )
           )
) 

SELECT (SELECT MIN(start_date) FROM Timesheets) AS start_date, (SELECT MIN(gap_start_date) FROM TMP) AS end_date
UNION ALL
SELECT gap_end_date AS start_date
      ,COALESCE(MAX(gap_start_date) OVER(ORDER BY gap_end_date ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING),
                (SELECT MAX(end_date) FROM Timesheets))
  FROM TMP;