But what if we need to identify possibly new records added to the set?
Then we need to use a bit different form of the queries:
SET STATISTICS IO, TIME, PROFILE ON
SELECT *
FROM @Set1 AS t
WHERE
NOT EXISTS (SELECT s.* FROM @Set2 AS s INTERSECT SELECT t.*)
SELECT *
FROM @Set1 AS t
WHERE
NOT EXISTS
(
SELECT 1
FROM @Set2 s
WHERE
t.pk = s.pk
AND (t.ival = s.ival OR (t.ival IS NULL AND s.ival IS NULL))
AND (t.cval = s.cval OR (t.cval IS NULL AND s.cval IS NULL))
AND (t.mval = s.mval OR (t.mval IS NULL AND s.mval IS NULL))
)
SELECT *
FROM @Set1 AS t
EXCEPT
SELECT *
FROM @Set2 s
SET STATISTICS IO, TIME, PROFILE OFF
If you run it you'll see:
- all 3 queries generate exactly the same output;
- all 3 queries result in exactly the same IO stats;
- time stats are different from time to time, and there is no consistent winner or loser amongst these 3 queries;
- execution plans are almost identical for all 3 queries, except INTERSECT query has an extra TOP 1 operation in it. It may be still below 1% overhead, but anyway - it's there. An overhead.
RowsExecutesStmtTextStmtIdNodeIdParentPhysicalOpLogicalOpArgumentDefinedValuesEstimateRowsEstimateIOEstimateCPUAvgRowSizeTotalSubtreeCostOutputListWarningsTypeParallelEstimateExecutions
41SELECT *
FROM @Set1 AS t
WHERE
NOT EXISTS (SELECT s.* FROM @Set2 AS s INTERSECT SELECT t.*)1110NULLNULLNULLNULL1NULLNULLNULL0.00657166NULLNULLSELECT0NULL
41 |--Nested Loops(Left Anti Semi Join, OUTER REFERENCES:([t].[pk], [t].[ival], [t].[cval], [t].[mval]))1121Nested LoopsLeft Anti Semi JoinOUTER REFERENCES:([t].[pk], [t].[ival], [t].[cval], [t].[mval])NULL104.18E-06280.00657166[t].[pk], [t].[ival], [t].[cval], [t].[mval]NULLPLAN_ROW01
61 |--Index Scan(OBJECT:(@Set1 AS [t]))1132Index ScanIndex ScanOBJECT:(@Set1 AS [t])[t].[pk], [t].[ival], [t].[cval], [t].[mval]10.0031250.0001581280.0032831[t].[pk], [t].[ival], [t].[cval], [t].[mval]NULLPLAN_ROW01
26 |--Top(TOP EXPRESSION:((1)))1142TopTopTOP EXPRESSION:((1))NULL101E-0790.00328438NULLNULLPLAN_ROW01
26 |--Clustered Index Seek(OBJECT:(@Set2 AS ), SEEK:(.[pk]=@Set1.[pk] as [t].[pk]), WHERE:(@Set2.[ival] as .[ival] = @Set1.[ival] as [t].[ival] AND @Set2.[mval] as .[mval] = @Set1.[mval] as [t].[mval] AND @Set2.[cval] as .[cval] = @Set1.[cval] as [t].[cval]) ORDERED FORWARD)1154Clustered Index SeekClustered Index SeekOBJECT:(@Set2 AS ), SEEK:(.[pk]=@Set1.[pk] as [t].[pk]), WHERE:(@Set2.[ival] as .[ival] = @Set1.[ival] as [t].[ival] AND @Set2.[mval] as .[mval] = @Set1.[mval] as [t].[mval] AND @Set2.[cval] as .[cval] = @Set1.[cval] as [t].[cval]) ORDERED FORWARDNULL10.0031250.0001581200.0032831NULLNULLPLAN_ROW01
RowsExecutesStmtTextStmtIdNodeIdParentPhysicalOpLogicalOpArgumentDefinedValuesEstimateRowsEstimateIOEstimateCPUAvgRowSizeTotalSubtreeCostOutputListWarningsTypeParallelEstimateExecutions
41SELECT *
FROM @Set1 AS t
WHERE
NOT EXISTS
(
SELECT 1
FROM @Set2 s
WHERE
t.pk = s.pk
AND (t.ival = s.ival OR (t.ival IS NULL AND s.ival IS NULL))
AND (t.cval = s.cval OR (t.cval IS NULL AND s.cval IS NULL))
AND (t.mval = s.mval OR (t.mval IS NULL AND s.mval IS NULL))
)1310NULLNULLNULLNULL1NULLNULLNULL0.00657156NULLNULLSELECT0NULL
41 |--Nested Loops(Left Anti Semi Join, OUTER REFERENCES:([t].[pk], [t].[ival], [t].[cval], [t].[mval]))1321Nested LoopsLeft Anti Semi JoinOUTER REFERENCES:([t].[pk], [t].[ival], [t].[cval], [t].[mval])NULL104.18E-06280.00657156[t].[pk], [t].[ival], [t].[cval], [t].[mval]NULLPLAN_ROW01
61 |--Index Scan(OBJECT:(@Set1 AS [t]))1332Index ScanIndex ScanOBJECT:(@Set1 AS [t])[t].[pk], [t].[ival], [t].[cval], [t].[mval]10.0031250.0001581280.0032831[t].[pk], [t].[ival], [t].[cval], [t].[mval]NULLPLAN_ROW01
26 |--Clustered Index Seek(OBJECT:(@Set2 AS ), SEEK:(.[pk]=@Set1.[pk] as [t].[pk]), WHERE:(@Set1.[ival] as [t].[ival] = @Set2.[ival] as .[ival] AND @Set1.[mval] as [t].[mval] = @Set2.[mval] as .[mval] AND @Set1.[cval] as [t].[cval] = @Set2.[cval] as .[cval]) ORDERED FORWARD)1342Clustered Index SeekClustered Index SeekOBJECT:(@Set2 AS ), SEEK:(.[pk]=@Set1.[pk] as [t].[pk]), WHERE:(@Set1.[ival] as [t].[ival] = @Set2.[ival] as .[ival] AND @Set1.[mval] as [t].[mval] = @Set2.[mval] as .[mval] AND @Set1.[cval] as [t].[cval] = @Set2.[cval] as .[cval]) ORDERED FORWARDNULL10.0031250.0001581200.0032831NULLNULLPLAN_ROW01
RowsExecutesStmtTextStmtIdNodeIdParentPhysicalOpLogicalOpArgumentDefinedValuesEstimateRowsEstimateIOEstimateCPUAvgRowSizeTotalSubtreeCostOutputListWarningsTypeParallelEstimateExecutions
41SELECT *
FROM @Set1 AS t
EXCEPT
SELECT *
FROM @Set2 s1510NULLNULLNULLNULL1NULLNULLNULL0.00657156NULLNULLSELECT0NULL
41 |--Nested Loops(Left Anti Semi Join, OUTER REFERENCES:([t].[pk], [t].[ival], [t].[cval], [t].[mval]))1521Nested LoopsLeft Anti Semi JoinOUTER REFERENCES:([t].[pk], [t].[ival], [t].[cval], [t].[mval])NULL104.18E-06280.00657156[t].[pk], [t].[ival], [t].[cval], [t].[mval]NULLPLAN_ROW01
61 |--Index Scan(OBJECT:(@Set1 AS [t]))1532Index ScanIndex ScanOBJECT:(@Set1 AS [t])[t].[pk], [t].[ival], [t].[cval], [t].[mval]10.0031250.0001581280.0032831[t].[pk], [t].[ival], [t].[cval], [t].[mval]NULLPLAN_ROW01
26 |--Clustered Index Seek(OBJECT:(@Set2 AS ), SEEK:(.[pk]=@Set1.[pk] as [t].[pk]), WHERE:(@Set1.[ival] as [t].[ival] = @Set2.[ival] as .[ival] AND @Set1.[mval] as [t].[mval] = @Set2.[mval] as .[mval] AND @Set1.[cval] as [t].[cval] = @Set2.[cval] as .[cval]) ORDERED FORWARD)1542Clustered Index SeekClustered Index SeekOBJECT:(@Set2 AS ), SEEK:(.[pk]=@Set1.[pk] as [t].[pk]), WHERE:(@Set1.[ival] as [t].[ival] = @Set2.[ival] as .[ival] AND @Set1.[mval] as [t].[mval] = @Set2.[mval] as .[mval] AND @Set1.[cval] as [t].[cval] = @Set2.[cval] as .[cval]) ORDERED FORWARDNULL10.0031250.0001581200.0032831NULLNULLPLAN_ROW01
I also tested with several extra rows added to table @Set1:
INSERT @Set1
(pk, ival, cval, mval)
VALUES
(1, 1000, 'a', $1),
(2, NULL, 'b', $2),
(3, 3000, 'c', NULL),
(4, NULL, 'd', $4),
(5, 5000, 'e', $5),
(6, 6000, 'e', $6),
(7, 7000, 'e', $7),
(8, 8000, 'e', $8),
(9, 9000, 'e', $9),
(10, 10000, 'e', $10),
(11, 11000, 'e', $11),
(12, 12000, 'e', $12);
;
Conclusion remains the same:
1. NOT EXISTS ... INTERSECT does not provide any advantage comparing to NOT EXISTS ...EQUAL or EXCEPT forms of queries;
2. It adds a small overhead of an additional TOP 1 operation when looking for newly added rows.
Which is a bit different from what Paul has come up with.
You may with to run those tests for yourself.
See which conclusions you'll support afterwards.
_____________
Code for TallyGenerator