check 2 date columns for same date

  • Here is the situation:

    I have multiple events with start and end dates.

    I've been asked to find the difference between the start and end dates for each event for each clientID.

    Simple enough using datediff.

    BUT.....

    The user wants to treat 2 (or more) records as ONE if the end date of the first record is the same as the start date of the next.

    use master

    --drop table #temp

    create table #temp(

    pk_id int,

    clientID int,

    lastName char(20),

    firstName char(20),

    startDate datetime,

    endDate datetime

    )

    insert into #temp(pk_id, clientID, lastName, firstName, startDate, endDate)

    select 1, 1, 'Jones', 'Mary', '2011-01-01', '2011-01-05' union all

    select 2, 1, 'Jones', 'Mary', '2011-01-05', '2011-01-10' union all

    select 3, 5, 'Barnes', 'Victor', '2011-01-05', '2011-02-10' union all

    select 4, 1, 'Jones', 'Mary', '2011-01-10', '2011-01-15' union all

    select 5, 4, 'Green', 'Sue', '2011-01-04', '2011-01-15' union all

    select 6, 1, 'Jones', 'Mary', '2011-01-16', '2011-01-31' union all

    select 7, 2, 'Smith', 'John', '2011-01-02', '2011-01-31' union all

    select 8, 3, 'Brown', 'Fred', '2011-02-02', '2011-02-15' union all

    select 9, 4, 'Green', 'Sue', '2011-01-15', '2011-01-31' union all

    select 10, 4, 'Green', 'Sue', '2011-02-15', '2011-02-21'

    Using the code below we get 10 rows returned from the temp table.

    select clientID, lastName, firstName, startDate, endDate, datediff(dd, startDate, enddate) as dtDiff

    from #temp

    order by clientID, startDate, endDate

    What I need to have is 7 rows:

    clientID lastName firstName startDate endDate dtDiff

    1 Jones Mary 2011-01-01 00:00:00.000 2011-01-15 00:00:00.000 14

    1 Jones Mary 2011-01-16 00:00:00.000 2011-01-31 00:00:00.000 15

    2 Smith John 2011-01-02 00:00:00.000 2011-01-31 00:00:00.000 29

    3 Brown Fred 2011-02-02 00:00:00.000 2011-02-15 00:00:00.000 13

    4 Green Sue 2011-01-04 00:00:00.000 2011-01-31 00:00:00.000 27

    4 Green Sue 2011-02-15 00:00:00.000 2011-02-21 00:00:00.000 6

    5 Barnes Victor 2011-01-05 00:00:00.000 2011-02-10 00:00:00.000 36

    As you can see Mary Jones and Sue Green each have 2 events even though there are 4 and 3 records respectively in the source table. There will never be overlapping date ranges for a clientID.

    I know there are many cursor based options for this problem, but I'm looking for a simple SQL solution as we may need to transfer the SQL code into BusinessObjects.

    Thanks in advance for any assistance.

    Tim Hansen


    Living in Paradise: N 34°16'07.99" W 119°12'17.58"

  • Based on what you have provided, here is what I came up with to solve your problem.

    --drop table #temp

    create table #TimeTemp(

    pk_id int,

    clientID int,

    lastName char(20),

    firstName char(20),

    startDate datetime,

    endDate datetime

    )

    insert into #TimeTemp(pk_id, clientID, lastName, firstName, startDate, endDate)

    select 1, 1, 'Jones', 'Mary', '2011-01-01', '2011-01-05' union all

    select 2, 1, 'Jones', 'Mary', '2011-01-05', '2011-01-10' union all

    select 3, 5, 'Barnes', 'Victor', '2011-01-05', '2011-02-10' union all

    select 4, 1, 'Jones', 'Mary', '2011-01-10', '2011-01-15' union all

    select 5, 4, 'Green', 'Sue', '2011-01-04', '2011-01-15' union all

    select 6, 1, 'Jones', 'Mary', '2011-01-16', '2011-01-31' union all

    select 7, 2, 'Smith', 'John', '2011-01-02', '2011-01-31' union all

    select 8, 3, 'Brown', 'Fred', '2011-02-02', '2011-02-15' union all

    select 9, 4, 'Green', 'Sue', '2011-01-15', '2011-01-31' union all

    select 10, 4, 'Green', 'Sue', '2011-02-15', '2011-02-21';

    with TimeTemp (

    pk_id,

    clientID,

    lastName,

    firstName,

    startDate,

    endDate,

    rn

    ) as (

    select

    pk_id,

    clientID,

    lastName,

    firstName,

    startDate,

    endDate,

    ROW_NUMBER() over (partition by clientID order by startDate)

    from

    #TimeTemp

    )

    , TestDate (

    pk_id,

    clientID,

    lastName,

    firstName,

    startDate,

    endDate,

    groupid

    ) as (

    select

    tt1.pk_id,

    tt1.clientID,

    tt1.lastName,

    tt1.firstName,

    tt1.startDate,

    tt1.endDate,

    tt1.rn - ISNULL(tt2.rn,0)

    from

    TimeTemp tt1

    left outer join TimeTemp tt2

    on (tt1.clientID = tt2.clientID

    and tt1.startDate = tt2.endDate)

    )

    select

    clientID,

    lastName,

    firstName,

    min(startDate) as startDate,

    max(endDate) as endDate,

    datediff(dd,min(startDate),max(endDate)) as DtDiff

    from

    TestDate td

    group by

    clientID,

    lastName,

    firstName,

    groupid

    order by

    td.clientID;

    drop table #TimeTemp;

    Hope this helps, and thank you for providing us with the DDL, sample data, and expected results. It helped a lot.

  • Another way

    WITH Starts AS (

    SELECT a.clientID, a.lastName, a.firstName, a.startDate

    FROM #temp a

    WHERE NOT EXISTS (SELECT * FROM #temp b

    WHERE b.clientID=a.clientID

    AND b.endDate=a.startDate)),

    Ends AS (

    SELECT a.clientID, a.endDate

    FROM #temp a

    WHERE NOT EXISTS (SELECT * FROM #temp b

    WHERE b.clientID=a.clientID

    AND b.startDate=a.endDate))

    SELECT s.clientID, s.lastName, s.firstName, s.startDate,

    MIN(e.endDate) AS endDate,

    DATEDIFF(day,s.startDate,MIN(e.endDate)) AS dtDiff

    FROM Starts s

    INNER JOIN Ends e ON e.clientID=s.clientID

    AND s.startDate<e.endDate

    GROUP BY s.clientID, s.lastName, s.firstName, s.startDate

    ORDER BY s.clientID, s.startDate;

    ____________________________________________________

    Deja View - The strange feeling that somewhere, sometime you've optimised this query before

    How to get the best help on a forum

    http://www.sqlservercentral.com/articles/Best+Practices/61537
  • Thanks Lynn.

    I will give this solution to the SQL developer working on this project and let him see if it will work for him.

    Tim Hansen


    Living in Paradise: N 34°16'07.99" W 119°12'17.58"

  • NOTE: The first code is much more efficient. Just based on the sample data the first had a scan count of 2 and logical reads of 2, while the second had 4 scan counts and 30 logical reads.

    For better, quicker answers, click on the following...
    http://www.sqlservercentral.com/articles/Best+Practices/61537/

    For better answers on performance questions, click on the following...
    http://www.sqlservercentral.com/articles/SQLServerCentral/66909/

  • Hate to burst my own bubble, but there is a bug in my code. The code as posted will only work if there is no gap in the first set of records. If there is a gap between the first and second record, it fails.

    Here is some new code. To fully understand what is happening, you need to read the article I reference below regarding Running Totals. You want the article written my Jeff Moden, not mine, as he explains how it works. Then you need to read the discussion as well. If I recall there has been verification code written, and that is discussed in the discuss thread as well.

    create table dbo.TimeData( -- Represents source data

    pk_id int,

    clientID int,

    lastName char(20),

    firstName char(20),

    startDate datetime,

    endDate datetime

    );

    insert into dbo.TimeData(pk_id, clientID, lastName, firstName, startDate, endDate)

    select 1, 1, 'Jones', 'Mary', '2011-01-01', '2011-01-05' union all

    select 2, 1, 'Jones', 'Mary', '2011-01-06', '2011-01-10' union all

    select 3, 5, 'Barnes', 'Victor', '2011-01-05', '2011-02-10' union all

    select 4, 1, 'Jones', 'Mary', '2011-01-10', '2011-01-15' union all

    select 5, 4, 'Green', 'Sue', '2011-01-04', '2011-01-15' union all

    select 6, 1, 'Jones', 'Mary', '2011-01-15', '2011-01-31' union all

    select 7, 2, 'Smith', 'John', '2011-01-02', '2011-01-31' union all

    select 8, 3, 'Brown', 'Fred', '2011-02-02', '2011-02-15' union all

    select 9, 4, 'Green', 'Sue', '2011-01-15', '2011-01-31' union all

    select 10, 4, 'Green', 'Sue', '2011-02-15', '2011-02-21';

    create table #TimeTemp( -- Work table

    pk_id int,

    clientID int,

    lastName char(20),

    firstName char(20),

    startDate datetime,

    endDate datetime,

    groupid int null,

    primary key clustered (clientID, startDate)

    );

    insert into #TimeTemp (

    pk_id,

    clientID,

    lastName,

    firstName,

    startDate,

    endDate

    )

    select

    pk_id,

    clientID,

    lastName,

    firstName,

    startDate,

    endDate

    from

    dbo.TimeData;

    declare @currentDate datetime,

    @groupid int,

    @clientID int;

    set @currentDate = '19000101'; -- zero date, hopefully not equal to any date in your system

    set @groupid = 0;

    set @clientID = 0;

    update #TimeTemp set

    @groupid = groupid = @groupid + case when clientID <> @clientID then 1

    when startDate <> @currentDate then 1

    else 0 end,

    @currentDate = endDate,

    @clientID = clientID

    from

    #TimeTemp with (tablockx)

    option

    (maxdop 1);

    select * from #TimeTemp; -- Show what is in the work table after the update

    select

    clientID,

    lastName,

    firstName,

    min(startDate) as startDate,

    max(endDate) as endDate,

    datediff(dd,min(startDate),max(endDate)) as DtDiff

    from

    #TimeTemp td

    group by

    clientID,

    lastName,

    firstName,

    groupid

    order by

    td.clientID;

    drop table #TimeTemp;

    drop table dbo.TimeData;

Viewing 6 posts - 1 through 5 (of 5 total)

You must be logged in to reply to this topic. Login to reply