Hi All,
Thanks to SSC Veteran's assistance, I am close to the correct result set.
During testing, I found that when 2 rows overlap, duplicate rows appear. An example is:
John Smith, Vacation, 12/02/2014, 25/02/2014
John Smith, Natwest, 14/02/2014, 28/02/2014
The expected result should be the Natwest job start date is the next day after vacation:
John Smith, Vacation, 12/02/2014, 25/02/2014
John Smith, Natwest, 26/02/2014, 28/02/2014
However, I see the following:
John Smith, Vacation, 12/02/2014, 25/02/2014
John Smith, Natwest, 14/02/2014, 28/02/2014
John Smith, Natwest, 26/02/2014, 28/02/2014
Any pointers would be appreciated. Here is the DDL.
set dateformat dmy
if OBJECT_ID('tempdb..#Something') is not null
drop table #Something
create table #Something
(
ID int,
Employee varchar(20),
Job varchar(20),
StartDate datetime,
EndDate datetime,
Workload int
)
insert #Something
select *
from (Values
(1, 'John Doe', 'HSBC', '01/01/2013', '31/12/2013', 100)
,(2, 'John Doe', 'Vacation', '17/06/2013', '21/06/2013', 100)
,(3, 'John Doe', 'Vacation', '19/08/2013', '23/08/2013', 100)
,(4, 'John Doe', 'Barclays', '01/01/2014', '31/01/2014', 50)
,(5, 'John Doe', 'Santander', '06/01/2014', '25/01/2014', 50)
,(6, 'John Doe', 'Vacation', '13/01/2014', '17/01/2014', 100)
,(7, 'John Doe', 'Natwest', '14/02/2014', '28/02/2014', 100)
,(8, 'John Doe', 'Vacation', '12/02/2014', '25/02/2014', 100)
) x(a,b,c,d,e,f)
;
WITH VacationCTE AS (--This CTE grabs just the Vacation rows so we can compare and split dates from them
SELECT ID,
Employee,
Job,
StartDate,
EndDate,
Workload
FROM #Something
WHERE Job = 'Vacation'
),
NewRowsCTE AS ( --This CTE creates just new rows starting after the vacations for each banking job
SELECT a.ID,
a.Employee,
a.Job,
DATEADD (d,1,b.EndDate) AS StartDate,
a.EndDate,
a.Workload
FROM #Something a
INNER JOIN VacationCTE b
ON a.StartDate <= b.EndDate
AND a.EndDate > b.StartDate
AND a.EndDate > b.EndDate -- This is needed because if the vacation ends when the project does, there is no split row after
),
UnionCTE AS ( -- This CTE merges the new rows with the existing ones
SELECT ID,
Employee,
Job,
StartDate,
EndDate,
Workload
FROM #Something
UNION ALL
SELECT ID,
Employee,
Job,
StartDate,
EndDate,
Workload
FROM NewRowsCTE
),
FixEndDateCTE as (
SELECT CONVERT (CHAR,c.ID)+CONVERT (CHAR,c.StartDate) AS FixID, min(d.StartDate) as StartDate
FROM UnionCTE c
LEFT OUTER JOIN VacationCTE d
ON c.StartDate < d.StartDate
AND c.EndDate >= d.StartDate
WHERE c.Job <> 'Vacation'
GROUP BY CONVERT (CHAR,c.ID)+CONVERT (CHAR,c.StartDate)
)
SELECT row_number() OVER (ORDER BY e.Startdate),
e.Employee,
e.Job,
e.StartDate,
CASE WHEN f.StartDate IS NULL
THEN e.EndDate
ELSE DATEADD (d,-1,f.StartDate)
END,
e.Workload
FROM UnionCTE e
LEFT OUTER JOIN FixEndDateCTE f
ON (CONVERT (CHAR,e.ID)+CONVERT (CHAR,e.StartDate)) = f.FixID
ORDER BY e.StartDate