• Hi All,

    Thanks to SSC Veteran's assistance, I am close to the correct result set.

    During testing, I found that when 2 rows overlap, duplicate rows appear. An example is:

    John Smith, Vacation, 12/02/2014, 25/02/2014

    John Smith, Natwest, 14/02/2014, 28/02/2014

    The expected result should be the Natwest job start date is the next day after vacation:

    John Smith, Vacation, 12/02/2014, 25/02/2014

    John Smith, Natwest, 26/02/2014, 28/02/2014

    However, I see the following:

    John Smith, Vacation, 12/02/2014, 25/02/2014

    John Smith, Natwest, 14/02/2014, 28/02/2014

    John Smith, Natwest, 26/02/2014, 28/02/2014

    Any pointers would be appreciated. Here is the DDL.

    set dateformat dmy

    if OBJECT_ID('tempdb..#Something') is not null

    drop table #Something

    create table #Something

    (

    ID int,

    Employee varchar(20),

    Job varchar(20),

    StartDate datetime,

    EndDate datetime,

    Workload int

    )

    insert #Something

    select *

    from (Values

    (1, 'John Doe', 'HSBC', '01/01/2013', '31/12/2013', 100)

    ,(2, 'John Doe', 'Vacation', '17/06/2013', '21/06/2013', 100)

    ,(3, 'John Doe', 'Vacation', '19/08/2013', '23/08/2013', 100)

    ,(4, 'John Doe', 'Barclays', '01/01/2014', '31/01/2014', 50)

    ,(5, 'John Doe', 'Santander', '06/01/2014', '25/01/2014', 50)

    ,(6, 'John Doe', 'Vacation', '13/01/2014', '17/01/2014', 100)

    ,(7, 'John Doe', 'Natwest', '14/02/2014', '28/02/2014', 100)

    ,(8, 'John Doe', 'Vacation', '12/02/2014', '25/02/2014', 100)

    ) x(a,b,c,d,e,f)

    ;

    WITH VacationCTE AS (--This CTE grabs just the Vacation rows so we can compare and split dates from them

    SELECT ID,

    Employee,

    Job,

    StartDate,

    EndDate,

    Workload

    FROM #Something

    WHERE Job = 'Vacation'

    ),

    NewRowsCTE AS ( --This CTE creates just new rows starting after the vacations for each banking job

    SELECT a.ID,

    a.Employee,

    a.Job,

    DATEADD (d,1,b.EndDate) AS StartDate,

    a.EndDate,

    a.Workload

    FROM #Something a

    INNER JOIN VacationCTE b

    ON a.StartDate <= b.EndDate

    AND a.EndDate > b.StartDate

    AND a.EndDate > b.EndDate -- This is needed because if the vacation ends when the project does, there is no split row after

    ),

    UnionCTE AS ( -- This CTE merges the new rows with the existing ones

    SELECT ID,

    Employee,

    Job,

    StartDate,

    EndDate,

    Workload

    FROM #Something

    UNION ALL

    SELECT ID,

    Employee,

    Job,

    StartDate,

    EndDate,

    Workload

    FROM NewRowsCTE

    ),

    FixEndDateCTE as (

    SELECT CONVERT (CHAR,c.ID)+CONVERT (CHAR,c.StartDate) AS FixID, min(d.StartDate) as StartDate

    FROM UnionCTE c

    LEFT OUTER JOIN VacationCTE d

    ON c.StartDate < d.StartDate

    AND c.EndDate >= d.StartDate

    WHERE c.Job <> 'Vacation'

    GROUP BY CONVERT (CHAR,c.ID)+CONVERT (CHAR,c.StartDate)

    )

    SELECT row_number() OVER (ORDER BY e.Startdate),

    e.Employee,

    e.Job,

    e.StartDate,

    CASE WHEN f.StartDate IS NULL

    THEN e.EndDate

    ELSE DATEADD (d,-1,f.StartDate)

    END,

    e.Workload

    FROM UnionCTE e

    LEFT OUTER JOIN FixEndDateCTE f

    ON (CONVERT (CHAR,e.ID)+CONVERT (CHAR,e.StartDate)) = f.FixID

    ORDER BY e.StartDate