Home Forums SQL Server 2008 T-SQL (SS2K8) Improving query performance to detect first duplicate RE: Improving query performance to detect first duplicate

  • Here is another potential solution. Would like to have others give it a test.

    Against the small original test set it worked. Unfortunately, I think it may be wrong in other ways. This would probably work better if there were a fixed test suite of values with a validated results set to test against.

    with BaseData as (

    select

    s.SaleID,

    s.SaleStart,

    s.SaleEnd,

    slx.LocationID,

    six.ItemID

    from

    Sale s

    inner join SaleLocationXref slx

    on (s.SaleID = slx.SaleID)

    inner join SaleItemXref six

    on (s.SaleID = six.SaleID)

    )

    select

    bd.SaleID,

    bd.SaleStart,

    bd.SaleEnd,

    bd.LocationID,

    bd.ItemID

    from

    BaseData bd

    where

    exists(select 1

    from BaseData bd1

    where

    bd.SaleID <> bd1.SaleID and

    bd.LocationID = bd1.LocationID and

    bd.ItemID = bd1.ItemID and

    bd.SaleStart <= bd1.SaleEnd and

    bd.SaleEnd >= bd1.SaleStart)

    order by

    bd.ItemID,

    bd.LocationID,

    bd.SaleID;

    Modified to work using the test suite above (I commented 3 lines in the final select statement and added the holder variables) and include a run using MAXDOP(1) as well:

    set nocount on;

    go

    create table #SaleLocationXref (

    SaleID int not null,

    LocationID int not null,

    constraint SaleLocationXref_PK primary key (SaleID,LocationID)

    )

    create table #Sale (

    SaleID int not null,

    SaleStart datetime not null,

    SaleEnd datetime not null,

    constraint Sale_PK primary key (SaleID)

    )

    create table #SaleItemXref (

    SaleID int not null,

    ItemID int not null,

    constraint SaleItemXref_PK primary key (SaleID,ItemID)

    )

    insert into #SaleLocationXref values (1,1),(2,1),(4,1),(5,1),

    (3,2)

    insert into #Sale values (1,'01/01/2012','01/07/2012'),

    (2,'01/06/2012','01/10/2012'),

    (3,'01/01/2012','02/07/2012'),

    (4,'02/08/2012','02/15/2012'),

    (5,'01/09/2012','01/16/2012')

    insert into #SaleItemXref values (1,1),(1,2),(1,3),

    (2,1),(2,4),(2,5),

    (3,5),(3,6),(3,7),

    (4,5),(4,6),(4,7),

    (5,7),(5,8),(5,1)

    ;WITH Tally (n) AS (

    SELECT TOP 10000 10+ROW_NUMBER() OVER (ORDER BY (SELECT NULL))

    FROM sys.all_columns t1 CROSS JOIN sys.all_columns t2)

    INSERT INTO #SaleLocationXref

    SELECT DISTINCT SaleID * n, LocationID * n

    FROM #SaleLocationXref

    CROSS APPLY Tally

    ;WITH Tally (n) AS (

    SELECT TOP 10000 10+ROW_NUMBER() OVER (ORDER BY (SELECT NULL))

    FROM sys.all_columns t1 CROSS JOIN sys.all_columns t2)

    INSERT INTO #Sale

    SELECT SaleID, SaleStart, SaleEnd

    FROM (

    SELECT SaleID=SaleID * n, SaleStart, SaleEnd

    ,n=ROW_NUMBER() OVER (PARTITION BY SaleID * n ORDER BY (SELECT NULL))

    FROM #Sale

    CROSS APPLY Tally) a

    WHERE n = 1

    ;WITH Tally (n) AS (

    SELECT TOP 10000 10+ROW_NUMBER() OVER (ORDER BY (SELECT NULL))

    FROM sys.all_columns t1 CROSS JOIN sys.all_columns t2)

    INSERT INTO #SaleItemXref

    SELECT DISTINCT SaleID * n, ItemID * n

    FROM #SaleItemXref

    CROSS APPLY Tally

    DECLARE @Holder1 INT, @Holder2 INT

    PRINT '';

    PRINT '---- Revised query'

    SET STATISTICS TIME ON

    ;WITH Sale AS (

    SELECT a.SaleID, a.SaleStart, a.SaleEnd, b.LocationID, c.ItemID

    FROM #Sale a

    JOIN #SaleLocationXref b ON a.SaleID = b.SaleID

    JOIN #SaleItemXref c ON c.SaleID = a.SaleID

    )

    SELECT @Holder1=SaleID, @Holder2=LocationID

    FROM (

    SELECT a.SaleID, a.SaleStart, a.SaleEnd, a.LocationID, a.ItemID

    , SaleID2=b.SaleID, LocationID2=b.LocationID, ItemID2=b.ItemID

    ,n=ROW_NUMBER() OVER (PARTITION BY a.SaleID, a.LocationID ORDER BY (SELECT NULL))

    FROM Sale a

    CROSS APPLY (

    SELECT SaleID, LocationID, ItemID

    FROM Sale b

    WHERE (b.SaleStart BETWEEN a.SaleStart AND b.SaleEnd OR

    b.SaleEnd BETWEEN a.SaleStart AND b.SaleEnd) AND a.SaleID <> b.SaleID) b

    WHERE a.LocationID = b.LocationID AND a.ItemID = b.ItemID) c

    WHERE n=1

    SET STATISTICS TIME OFF

    PRINT '';

    PRINT '---- Original query'

    SET STATISTICS TIME ON

    select distinct @Holder1=SaleID, @Holder2=LocationID

    from (

    select SIX.SaleID, LocationID, ItemID,

    Count(*) over (partition by LocationID, ItemID) as Cnt

    from #SaleItemXref SIX

    join (

    select distinct S1.SaleID, SLX1.LocationID

    from #Sale S1

    join #Sale S2 -- Compare sales

    on S1.SaleID <> S2.SaleID

    and (S1.SaleStart <= S2.SaleEnd -- Compare dates overlap

    and S2.SaleStart <= S1.SaleEnd)

    join #SaleLocationXref SLX1

    on S1.SaleID = SLX1.SaleID

    join #SaleLocationXREF SLX2

    on S2.SaleID = SLX2.SaleID

    and SLX1.LocationID = SLX2.LocationID -- Compare location overlap

    ) #SaleOverlap

    on #SaleOverlap.SaleID = SIX.SaleID

    ) ItemOverlap

    where ItemOverlap.Cnt > 1 -- Compare item overlap

    SET STATISTICS TIME OFF

    PRINT '';

    PRINT '---- Original query with MAXDOP 1'

    SET STATISTICS TIME ON

    select distinct @Holder1=SaleID, @Holder2=LocationID

    from (

    select SIX.SaleID, LocationID, ItemID,

    Count(*) over (partition by LocationID, ItemID) as Cnt

    from #SaleItemXref SIX

    join (

    select distinct S1.SaleID, SLX1.LocationID

    from #Sale S1

    join #Sale S2 -- Compare sales

    on S1.SaleID <> S2.SaleID

    and (S1.SaleStart <= S2.SaleEnd -- Compare dates overlap

    and S2.SaleStart <= S1.SaleEnd)

    join #SaleLocationXref SLX1

    on S1.SaleID = SLX1.SaleID

    join #SaleLocationXREF SLX2

    on S2.SaleID = SLX2.SaleID

    and SLX1.LocationID = SLX2.LocationID -- Compare location overlap

    ) #SaleOverlap

    on #SaleOverlap.SaleID = SIX.SaleID

    ) ItemOverlap

    where ItemOverlap.Cnt > 1 -- Compare item overlap

    OPTION (MAXDOP 1)

    SET STATISTICS TIME OFF

    PRINT '';

    PRINT '---- Lynn''s Query';

    SET STATISTICS TIME ON;

    with BaseData as (

    select

    s.SaleID,

    s.SaleStart,

    s.SaleEnd,

    slx.LocationID,

    six.ItemID

    from

    #Sale s

    inner join #SaleLocationXref slx

    on (s.SaleID = slx.SaleID)

    inner join #SaleItemXref six

    on (s.SaleID = six.SaleID)

    )

    select

    --bd.SaleStart,

    --bd.SaleEnd,

    --bd.ItemID,

    @Holder1 = bd.SaleID,

    @Holder2 = bd.LocationID

    from

    BaseData bd

    where

    exists(select 1

    from BaseData bd1

    where

    bd.SaleID <> bd1.SaleID and

    bd.LocationID = bd1.LocationID and

    bd.ItemID = bd1.ItemID and

    bd.SaleStart <= bd1.SaleEnd and

    bd.SaleEnd >= bd1.SaleStart)

    order by

    bd.ItemID,

    bd.LocationID,

    bd.SaleID

    ;

    SET STATISTICS TIME OFF

    PRINT '';

    PRINT '---- Lynn''s Query with MAXDOP(1)';

    SET STATISTICS TIME ON;

    with BaseData as (

    select

    s.SaleID,

    s.SaleStart,

    s.SaleEnd,

    slx.LocationID,

    six.ItemID

    from

    #Sale s

    inner join #SaleLocationXref slx

    on (s.SaleID = slx.SaleID)

    inner join #SaleItemXref six

    on (s.SaleID = six.SaleID)

    )

    select

    --bd.SaleStart,

    --bd.SaleEnd,

    --bd.ItemID,

    @Holder1 = bd.SaleID,

    @Holder2 = bd.LocationID

    from

    BaseData bd

    where

    exists(select 1

    from BaseData bd1

    where

    bd.SaleID <> bd1.SaleID and

    bd.LocationID = bd1.LocationID and

    bd.ItemID = bd1.ItemID and

    bd.SaleStart <= bd1.SaleEnd and

    bd.SaleEnd >= bd1.SaleStart)

    order by

    bd.ItemID,

    bd.LocationID,

    bd.SaleID

    OPTION (MAXDOP 1);

    SET STATISTICS TIME OFF

    go

    drop table #SaleLocationXref

    drop table #Sale

    drop table #SaleItemXref

    And the results when run on a test VM:

    ---- Revised query

    SQL Server Execution Times:

    CPU time = 2768 ms, elapsed time = 1646 ms.

    ---- Original query

    SQL Server Execution Times:

    CPU time = 2609 ms, elapsed time = 1372 ms.

    ---- Original query with MAXDOP 1

    SQL Server Execution Times:

    CPU time = 2312 ms, elapsed time = 2337 ms.

    ---- Lynn's Query

    SQL Server Execution Times:

    CPU time = 1141 ms, elapsed time = 641 ms.

    ---- Lynn's Query with MAXDOP(1)

    SQL Server Execution Times:

    CPU time = 969 ms, elapsed time = 969 ms.