SQL to combine rows based on dates and other common factors

Question

Post reply

SQL to combine rows based on dates and other common factors

SQLStalker

Mr or Mrs. 500

Points: 557
More actions
July 3, 2013 at 11:40 am

#279944

Hi All,
We have a database for employees, jobs and work allocation. I am trying to find an efficient way to combine rows based on date and work load but excluding vacation allocations.
Here is an example of the current data set:
IDEmployeeJobStartDateEndDateWorkload
1John DoeHSBC01/01/201331/12/2013100
2John DoeVacation17/06/201321/06/2013100
3John DoeHSBC01/01/201431/12/2014100
4John DoeVacation19/08/201323/08/2013100
5John DoeBarclays01/01/201431/01/201450
6John DoeBarclays01/01/201531/01/201550
7John DoeSantander06/01/201425/01/201450
8John DoeSantander05/03/201417/07/201480
9John DoeSantander05/03/201417/07/201450
10John DoeVacation13/01/201417/01/2014100
The final result should be as follows (bold indicates the changed rows excluding header). Note that the IDs are not important at this point and also the work allocation to the job 'Santander' are separate rows as one is at 50% workload and the other is 80. The rows should only be combined if the employee, job and workload is the same. 'Vacations' are not combined at all.
IDEmployeeJobStartDateEndDateWorkload
1John DoeHSBC01/01/201331/12/2014100
2John DoeVacation17/06/201321/06/2013100
4John DoeVacation19/08/201323/08/2013100
5John DoeBarclays01/01/201431/01/201550
7John DoeSantander06/01/201417/07/201450
8John DoeSantander05/03/201417/07/201480
10John DoeVacation13/01/201417/01/2014100
Here is the DDL which another forumite kindly provided for an earlier query. Any suggestions would be greatly appreciated.
set dateformat dmy
if OBJECT_ID('tempdb..#Something') is not null
drop table #Something
create table #Something
(
ID int,
Employee varchar(20),
Job varchar(20),
StartDate datetime,
EndDate datetime,
Workload int
)
insert #Something
select *
from (Values
(1, 'John Doe', 'HSBC', '01/01/2013', '31/12/2013', 100)
,(2, 'John Doe', 'Vacation', '17/06/2013', '21/06/2013', 100)
,(3, 'John Doe', 'HSBC', '01/01/2014', '31/12/2014', 100)
,(4, 'John Doe', 'Vacation', '19/08/2013', '23/08/2013', 100)
,(5, 'John Doe', 'Barclays', '01/01/2014', '31/01/2014', 50)
,(6, 'John Doe', 'Barclays', '01/01/2015', '31/01/2015', 50)
,(7, 'John Doe', 'Santander', '06/01/2014', '25/01/2014', 50)
,(8, 'John Doe', 'Santander', '05/03/2014', '17/07/2014', 80)
,(9, 'John Doe', 'Santander', '05/03/2014', '17/07/2014', 50)
,(10, 'John Doe', 'Vacation', '13/01/2014', '17/01/2014', 100)
) x(a,b,c,d,e,f)
select *
from #Something
Forum Etiquette: How to post Reporting Services problems

[/url]

Forum Etiquette: How to post data/code on a forum to get the best help - by Jeff Moden
[/url]

How to Post Performance Problems - by Gail Shaw
[/url]

mister.magoo SSC-Forever Points: 47068 More actions · Answer 1

How about this

select min(ID) ID,

Employee,

Job,

min(StartDate),

max(EndDate),

Workload

from #Something

where Job<>'Vacation'

group by Employee,

Job,

Workload

union all

select ID,

Employee,

Job,

StartDate,

EndDate,

Workload

from #Something

where Job='Vacation'

order by ID

MM

select geometry::STGeomFromWKB(0x0106000000020000000103000000010000000B0000001000000000000840000000000000003DD8CCCCCCCCCC0840000000000000003DD8CCCCCCCCCC08408014AE47E17AFC3F040000000000104000CDCCCCCCCCEC3F9C999999999913408014AE47E17AFC3F9C99999999991340000000000000003D0000000000001440000000000000003D000000000000144000000000000000400400000000001040000000000000F03F100000000000084000000000000000401000000000000840000000000000003D0103000000010000000B000000000000000000143D000000000000003D009E99999999B93F000000000000003D009E99999999B93F8014AE47E17AFC3F400000000000F03F00CDCCCCCCCCEC3FA06666666666FE3F8014AE47E17AFC3FA06666666666FE3F000000000000003D1800000000000040000000000000003D18000000000000400000000000000040400000000000F03F000000000000F03F000000000000143D0000000000000040000000000000143D000000000000003D, 0);

LinksUp SSCertifiable Points: 6526 More actions · Answer 2

Here is another possible solution using cte's. While it makes seeing what the query does a bit easier, they are usually not as efficient as a solution without one!

;with cte as

(

select ID,

ROW_NUMBER() OVER(PARTITION by employee, job, [workload] order by ID) rowNum

from #Something

where Job <> 'Vacation'

),

cte1 as

(

select ID,

ROW_NUMBER() OVER(Partition by ID order by ID) rowNum

from #Something

where Job = 'Vacation'

)

select s.ID, s.Employee, s.job, s.StartDate, s.EndDate, s.workload

from #Something s

left outer join cte c on c.id = s.ID

left outer join cte1 n on n.ID = s.ID

where n.RowNum > 0 or c.RowNum = 1

order by s.ID

__________________________________________________________________________________________________________
How to Post to get the most: http://www.sqlservercentral.com/articles/Best+Practices/61537/

Steven Willis SSCrazy Eights Points: 9893 More actions · Answer 3

IF OBJECT_ID('tempdb..#Something') is not null

DROP TABLE #Something

CREATE TABLE #Something

(

ID int,

Employee varchar(20),

Job varchar(20),

StartDate datetime,

EndDate datetime,

Workload int

)

INSERT #Something

SELECT *

FROM (VALUES

(1, 'John Doe', 'HSBC', '01/01/2013', '31/12/2013', 100)

,(2, 'John Doe', 'Vacation', '17/06/2013', '21/06/2013', 100)

,(3, 'John Doe', 'HSBC', '01/01/2014', '31/12/2014', 100)

,(4, 'John Doe', 'Vacation', '19/08/2013', '23/08/2013', 100)

,(5, 'John Doe', 'Barclays', '01/01/2014', '31/01/2014', 50)

,(6, 'John Doe', 'Barclays', '01/01/2015', '31/01/2015', 50)

,(7, 'John Doe', 'Santander', '06/01/2014', '25/01/2014', 50)

,(8, 'John Doe', 'Santander', '05/03/2014', '17/07/2014', 80)

,(9, 'John Doe', 'Santander', '05/03/2014', '17/07/2014', 50)

,(10, 'John Doe', 'Vacation', '13/01/2014', '17/01/2014', 100)

) x(a,b,c,d,e,f)

SELECT

ROW_NUMBER() OVER (ORDER BY StartDate, EndDate) AS ID,

r.*

FROM

(

SELECT DISTINCT

Employee,

Job,

MIN(StartDate) OVER (PARTITION BY Employee, Job, Workload) AS StartDate,

MIN(EndDate) OVER (PARTITION BY Employee, Job, Workload) AS EndDate,

Workload

FROM

#Something

WHERE

Job <> 'vacation'

) r

IDEmployeeJobStartDateEndDateWorkload

1John DoeHSBC2013-01-01 00:00:00.0002013-12-31 00:00:00.000100

2John DoeBarclays2014-01-01 00:00:00.0002014-01-31 00:00:00.00050

3John DoeSantander2014-01-06 00:00:00.0002014-01-25 00:00:00.00050

4John DoeSantander2014-03-05 00:00:00.0002014-07-17 00:00:00.00080

SQLStalker Mr or Mrs. 500 Points: 557 More actions · Answer 4

Thank you all for your responses.

mister.magoo, your query appears to be giving the correct results.

Another question. Is there a way of merging rows only if they are separated by vacation? So the criteria is the rows must have the same employee, same job, same workload and separated by vacation.

The current data set is:

IDEmployeeJobStartDateEndDateWorkload

1John DoeHSBC2013-08-012013-08-14100

2John DoeVacation2013-08-152013-08-20100

3John DoeHSBC2013-08-212013-08-31100

4John DoeVacation2013-09-202013-09-26100

5John DoeHSBC2013-09-272013-09-30100

6John DoeBarclays2013-10-012013-10-1050

7John DoeVacation2013-10-112013-10-17100

8John DoeBarclays2013-10-182013-10-2450

9John DoeBarclays2013-10-262013-10-3150

10John DoeSantander2013-11-012013-11-0750

11John DoeVacation2013-11-082013-11-15100

12John DoeSantander2013-11-162013-11-2450

13John DoeSantander2013-11-252013-11-3080

The expected results (rows merged are in bold):

IDEmployeeJob StartDate EndDate Workload

1John DoeHSBC 2013-08-01 2013-08-31100

2John DoeVacation2013-08-15 2013-08-20100

4John DoeVacation2013-09-20 2013-09-26100

5John DoeHSBC 2013-09-27 2013-09-30100

6John DoeBarclays2013-10-01 2013-10-2450

7John DoeVacation2013-10-11 2013-10-17100

9John DoeBarclays2013-10-26 2013-10-3150

10John DoeSantander2013-11-01 2013-11-2450

11John DoeVacation2013-11-08 2013-11-15100

13John DoeSantander2013-11-25 2013-11-3080

Please find below the DDL. Any help is appreciated.

set dateformat dmy

if OBJECT_ID('tempdb..#Something') is not null

drop table #Something

create table #Something

(

ID int,

Employee varchar(20),

Job varchar(20),

StartDate datetime,

EndDate datetime,

[Workload] int

)

insert #Something

select *

from (Values

(1, 'John Doe', 'HSBC', '01/08/2013', '14/08/2013', 100)

,(2, 'John Doe', 'Vacation', '15/08/2013', '20/08/2013', 100)

,(3, 'John Doe', 'HSBC', '21/08/2013', '31/08/2013', 100)

,(4, 'John Doe', 'Vacation', '20/09/2013', '26/09/2013', 100)

,(5, 'John Doe', 'HSBC', '27/09/2013', '30/09/2013', 100)

,(6, 'John Doe', 'Barclays', '01/10/2013', '10/10/2013', 50)

,(7, 'John Doe', 'Vacation', '11/10/2013', '17/10/2013', 100)

,(8, 'John Doe', 'Barclays', '18/10/2013', '24/10/2013', 50)

,(9, 'John Doe', 'Barclays', '26/10/2013', '31/10/2013', 50)

,(10, 'John Doe', 'Santander', '01/11/2013', '07/11/2013', 50)

,(11, 'John Doe', 'Vacation', '08/11/2013', '15/11/2013', 100)

,(12, 'John Doe', 'Santander', '16/11/2013', '24/11/2013', 50)

,(13, 'John Doe', 'Santander', '25/11/2013', '30/11/2013', 80)

) x(a,b,c,d,e,f)

select *

from #Something

order by StartDate