Can I pull DISTINCT customers with SQL based on most recent row record only?


  • set statistics io,time off

    DROP TABLE latestrecords ;
    GO
    CREATE TABLE latestrecords
    (
    Id int,
    Email varchar(200),
    Customer_FirstName varchar(200),
    Status varchar(200),
    RecordTimestamp DATETIME
    )
    GO
    DECLARE @RowsToInsert as int = 1000000
    DECLARE @RowsPerId as int = 4 /*!Modified*/
    DECLARE @DistinctIds as int = @RowsToInsert /@RowsPerId
    INSERT INTO latestrecords
    SELECT t.N%@DistinctIds ,CONCAT('test', t.N%@DistinctIds,'@test.com'),CONCAT('Name',t.N%@DistinctIds),'New',DATEADD(minute,t.n,'July 15, 2018 10:08 PM')
    from dbo.tally t
    where t.N <= @RowsToInsert

    CREATE INDEX IX_latestrecords_1 ON latestrecords(Id,RecordTimestamp) INCLUDE (Email,Customer_FirstName,Status) ;

    SELECT COUNT(*) Count FROM latestrecords
    SELECT COUNT(distinct id) [distinct id] FROM latestrecords

    IF OBJECT_ID('tempdb..#t1') IS NOT NULL
    DROP TABLE #t1
    IF OBJECT_ID('tempdb..#t2') IS NOT NULL
    DROP TABLE #t2

    set statistics io,time on
    PRINT '************* row_number, with index *************'
    select Id,Email,Customer_FirstName,Status,RecordTimestamp
    into #t1
    from
    (
    select *,
    row_number()over(partition by Id /*!Modified*/ order by RecordTimestamp desc) as rnk
    from latestrecords
    )ranked_records
    where rnk=1
    PRINT '************* CTE, with index *************'
    ;WITH CTE AS (select distinct Id from latestrecords)
    SELECT T.*
    into #t2
    FROM CTE
    CROSS APPLY(SELECT TOP(1) *
    FROM latestrecords X
    WHERE X.Id =CTE.Id
    ORDER BY X.RecordTimestamp desc) as T

    set statistics io,time off
    go

    drop INDEX IX_latestrecords_1 ON latestrecords;

    IF OBJECT_ID('tempdb..#t1') IS NOT NULL
    DROP TABLE #t1
    IF OBJECT_ID('tempdb..#t2') IS NOT NULL
    DROP TABLE #t2

    set statistics io,time on
    PRINT '************* row_number, NO index *************'
    select Id,Email,Customer_FirstName,Status,RecordTimestamp
    into #t1
    from
    (
    select *,
    row_number()over(partition by Id /*!Modified*/ order by RecordTimestamp desc) as rnk
    from latestrecords
    )ranked_records
    where rnk=1
    PRINT '************* CTE, NO index *************'
    ;WITH CTE AS (select distinct Id from latestrecords)
    SELECT T.*
    into #t2
    FROM CTE
    CROSS APPLY(SELECT TOP(1) *
    FROM latestrecords X
    WHERE X.Id =CTE.Id
    ORDER BY X.RecordTimestamp desc) as T

    set statistics io,time off

    SQL DBA,SQL Server MVP(07, 08, 09) "It's a dog-eat-dog world, and I'm wearing Milk-Bone underwear." "Norm", on "Cheers". Also from "Cheers", from "Carla": "You need to know 3 things about Tortelli men: Tortelli men draw women like flies; Tortelli men treat women like flies; Tortelli men's brains are in their flies".

  • Thank you everyone for the help - Everyone's response was very helpful!

  • mikewill - Tuesday, July 24, 2018 9:33 PM

    Thank you everyone for the help - Everyone's response was very helpful!

    In summary, you will probably get better performance using the ROW_NUMBER method if you have less than 5 rows per Id. If you have more rows per Id than this you will probably get better performance using the CTE CROSS APPLY TOP(1) method with the additional requirement that you have an index on (Id, RecordTimestamp). As always, it needs to be tested on your data.

Viewing 3 posts - 31 through 33 (of 33 total)

You must be logged in to reply to this topic. Login to reply