Illustration of a (possible) SQLite bug by especkman@gmail.com
Code & Data: notebook_and_database.zip
%%capture
%load_ext sql
%sql sqlite:///chinook-test.db
%%sql
SELECT sqlite_version();
%%sql
SELECT NULL
I believe the SQL library juypyter is using is translating NULL
into the pythonic equivalent None
when displaying results
%%sql
WITH
tracks_purchased AS
(
SELECT *
FROM invoice_line as il
INNER JOIN track tr on il.track_id = tr.track_id
),
candidates AS
(
SELECT invoice_id, album_id
FROM tracks_purchased
GROUP BY invoice_id,album_id
),
single_album_purchases AS
(
SELECT
invoice_id,
album_id
FROM tracks_purchased
GROUP BY invoice_id
HAVING count(DISTINCT(album_id)) = 1
),
whole_album_purchases AS
(
SELECT
s.invoice_id,
(
(
SELECT track_id
FROM tracks_purchased tp
WHERE tp.invoice_id = s.invoice_id
EXCEPT
SELECT tr.track_id
FROM track tr
WHERE tr.album_id = (SELECT c.album_id FROM candidates c WHERE c.invoice_id=s.invoice_id)
) IS NULL
AND
(
SELECT track_id
FROM track tr
WHERE tr.album_id = (SELECT c.album_id FROM candidates c WHERE c.invoice_id=s.invoice_id)
EXCEPT
SELECT tp.track_id
FROM tracks_purchased tp
WHERE tp.invoice_id = s.invoice_id
)IS NULL
) AS whole_album
FROM single_album_purchases s
)
SELECT
w.whole_album,
CASE
WHEN w.whole_album IS NULL THEN "Nope"
WHEN w.whole_album IS TRUE then "Yes"
ELSE "No"
END is_album,
w.whole_album IS NULL,
i.invoice_id,
i.total
FROM invoice i
LEFT JOIN whole_album_purchases w ON i.invoice_id = w.invoice_id
LIMIT 10;
The whole_album
column contains a mix of True & False values from the right table in a LEFT JOIN, as well as NULL values where no rows existed matching the join condition.
The is_album
column uses CASE statements to translate the contents of the whole_album
column. It was originally intended to have just "Yes" and "No" values, but a third case was added because NULL's were being Traslated as YES, rather than falling through to the ELSE catchall.
w.whole_album IS NULL
further demonstrates that the DB engine does not evaluate the NULLs from the query as NULL.
%%sql
WITH
tracks_purchased AS
(
SELECT *
FROM invoice_line as il
INNER JOIN track tr on il.track_id = tr.track_id
),
candidates AS
(
SELECT invoice_id, album_id
FROM tracks_purchased
GROUP BY invoice_id,album_id
),
single_album_purchases AS
(
SELECT
invoice_id,
album_id
FROM tracks_purchased
GROUP BY invoice_id
HAVING count(DISTINCT(album_id)) = 1
),
whole_album_purchases AS
(
SELECT
s.invoice_id,
(
(
SELECT track_id
FROM tracks_purchased tp
WHERE tp.invoice_id = s.invoice_id
EXCEPT
SELECT tr.track_id
FROM track tr
WHERE tr.album_id = (SELECT c.album_id FROM candidates c WHERE c.invoice_id=s.invoice_id)
) IS NULL
AND
(
SELECT track_id
FROM track tr
WHERE tr.album_id = (SELECT c.album_id FROM candidates c WHERE c.invoice_id=s.invoice_id)
EXCEPT
SELECT tp.track_id
FROM tracks_purchased tp
WHERE tp.invoice_id = s.invoice_id
)IS NULL
) AS whole_album
FROM single_album_purchases s
)
SELECT
w.whole_album,
CASE
WHEN w.whole_album IS NULL THEN "Nope"
WHEN w.whole_album IS TRUE then "Yes"
ELSE "No"
END is_album,
w.whole_album IS NULL,
i.invoice_id,
i.total
FROM invoice i
LEFT JOIN whole_album_purchases w ON i.invoice_id = w.invoice_id
WHERE w.whole_album IS NULL;
This query replicates the previous query and adds a WHERE condition that is expected to select only rows where the whole_album
column is NULL. Instead it produces an empty result.
%%sql
WITH
tracks_purchased AS
(
SELECT *
FROM invoice_line as il
INNER JOIN track tr on il.track_id = tr.track_id
),
candidates AS
(
SELECT invoice_id, album_id
FROM tracks_purchased
GROUP BY invoice_id,album_id
),
single_album_purchases AS
(
SELECT
invoice_id,
album_id
FROM tracks_purchased
GROUP BY invoice_id
HAVING count(DISTINCT(album_id)) = 1
)
SELECT t.album_id, s.album_id
FROM track t
LEFT JOIN single_album_purchases s ON s.album_id = t.album_id
WHERE s.album_id IS NULL
LIMIT 10;
The above is a simpler case. The NULLs resulting from a LEFT JOIN are properly evaluated as NULL.
The following queries are further attempts to provoke the problem behavior. Instead they work as one would expect.
%%sql
SELECT
tr.track_id,
il.invoice_id
FROM track tr
LEFT JOIN invoice_line il ON il.track_id = tr.track_id
WHERE il.invoice_id IS NULL
LIMIT 10;
%%sql
WITH
tracks_purchased AS
(
SELECT *
FROM invoice_line as il
INNER JOIN track tr on il.track_id = tr.track_id
),
candidates AS
(
SELECT invoice_id, album_id
FROM tracks_purchased
GROUP BY invoice_id,album_id
),
single_album_purchases AS
(
SELECT
invoice_id,
album_id
FROM tracks_purchased
GROUP BY invoice_id
HAVING count(DISTINCT(album_id)) = 1
),
whole_album_purchases AS
(
SELECT
s.invoice_id
/*,
(
(
SELECT track_id
FROM invoice_line il
WHERE il.invoice_id = s.invoice_id
EXCEPT
SELECT tr.track_id
FROM track tr
WHERE tr.album_id = (SELECT c.album_id FROM candidates c WHERE c.invoice_id=s.invoice_id)
) IS NULL
AND
(
SELECT track_id
FROM track tr
WHERE tr.album_id = (SELECT c.album_id FROM candidates c WHERE c.invoice_id=s.invoice_id)
EXCEPT
SELECT track_id
FROM invoice_line il
WHERE il.invoice_id = s.invoice_id
)IS NULL
) AS whole_album */
FROM single_album_purchases s
)
SELECT
/* w.whole_album,
w.whole_album IS NULL,
CASE
WHEN w.whole_album IS NULL THEN "Nope"
WHEN w.whole_album IS TRUE then "Yes"
ELSE "No"
END is_album,
*/
i.invoice_id,
w.invoice_id,
i.total
FROM invoice i
LEFT JOIN whole_album_purchases w ON i.invoice_id = w.invoice_id
WHERE w.invoice_id IS NULL
LIMIT 10;
%%sql
WITH
tracks_purchased AS
(
SELECT *,
CASE
WHEN RANDOM() > 0 THEN NULL
ELSE 0
END this_is_null
FROM invoice_line as il
INNER JOIN track tr on il.track_id = tr.track_id
),
candidates AS
(
SELECT invoice_id, album_id, this_is_null
FROM tracks_purchased
GROUP BY invoice_id, album_id
),
single_album_purchases AS
(
SELECT
invoice_id,
album_id,
this_is_null
FROM tracks_purchased
GROUP BY invoice_id
HAVING count(DISTINCT(album_id)) = 1
),
whole_album_purchases AS
(
SELECT
s.invoice_id,
CASE
WHEN s.this_is_null IS NULL THEN NULL
ELSE True
END this_is_null,
(
(
SELECT track_id
FROM tracks_purchased tp
WHERE tp.invoice_id = s.invoice_id
EXCEPT
SELECT tr.track_id
FROM track tr
WHERE tr.album_id = (SELECT c.album_id FROM candidates c WHERE c.invoice_id=s.invoice_id)
) IS NULL
AND
(
SELECT track_id
FROM track tr
WHERE tr.album_id = (SELECT c.album_id FROM candidates c WHERE c.invoice_id=s.invoice_id)
EXCEPT
SELECT tp.track_id
FROM tracks_purchased tp
WHERE tp.invoice_id = s.invoice_id
)IS NULL
) AS whole_album
FROM single_album_purchases s
)
SELECT
w.whole_album,
w.whole_album IS NULL,
w.this_is_null,
w.this_is_null IS NULL,
CASE
WHEN w.whole_album IS NULL THEN "Nope"
WHEN w.whole_album IS TRUE then "Yes"
ELSE "No"
END is_album,
i.invoice_id,
i.total
FROM invoice i
LEFT JOIN whole_album_purchases w ON i.invoice_id = w.invoice_id
LIMIT 10;