Since there's very few rate codes present, I thought of a way you can have your cake (return delimited strings) and eat it too (get relatively good performance).
1. The all in one query.
WITH lu AS
(
SELECT rate_code
,combo=POWER(2, ROW_NUMBER() OVER (ORDER BY rate_code)-1)
FROM
(
SELECT rate_code
FROM #worker_rate
GROUP BY rate_code
) a
),
UNIQUEnTuples (n, Tuples, ID) AS (
SELECT combo, CAST(rate_code AS VARCHAR(8000)), rate_code
FROM lu
UNION ALL
SELECT n.n+t.combo, CAST(t.rate_code AS VARCHAR(8000)) + ',' + n.Tuples, rate_code
FROM UNIQUEnTuples n
CROSS APPLY (
SELECT combo, rate_code
FROM lu t
WHERE t.rate_code < n.ID) t
)
SELECT grp, worker_id, combo=Tuples
FROM
(
SELECT grp=COUNT(*), worker_id, combo=SUM(combo)
FROM #worker_rate a
JOIN lu b ON a.rate_code = b.rate_code
GROUP BY worker_id
) a
JOIN UNIQUEnTuples b ON a.combo = b.n
ORDER BY 1, 2;
2. Divide and conquer (split above into 3 steps)
WITH lu AS
(
SELECT rate_code
,combo=POWER(2, ROW_NUMBER() OVER (ORDER BY rate_code)-1)
FROM
(
SELECT rate_code
FROM #worker_rate
GROUP BY rate_code
) a
)
SELECT rate_code, combo, combo2=CAST(NULL AS VARCHAR(8000))
INTO #lookup2
FROM lu;
WITH UNIQUEnTuples (n, Tuples, ID) AS
(
SELECT combo, CAST(rate_code AS VARCHAR(8000)), rate_code
FROM #lookup2
UNION ALL
SELECT n.n+t.combo, CAST(t.rate_code AS VARCHAR(8000)) + ',' + n.Tuples, rate_code
FROM UNIQUEnTuples n
CROSS APPLY (
SELECT combo, rate_code
FROM #lookup2 t
WHERE t.rate_code < n.ID) t
)
MERGE #lookup2 t
USING UNIQUEnTuples s
ON t.combo = s.n
WHEN MATCHED THEN
UPDATE SET combo2 = s.Tuples
WHEN NOT MATCHED THEN
INSERT (rate_code, combo, combo2)
VALUES (s.Tuples, s.n, s.Tuples);
SELECT grp, worker_id, combo=combo2
FROM
(
SELECT grp=COUNT(*), worker_id, combo=SUM(combo)
FROM #worker_rate a
JOIN #lookup2 b ON a.rate_code = b.rate_code
GROUP BY worker_id
) a
JOIN #lookup2 b ON a.combo = b.combo
ORDER BY 1, 2;
Timings (just my 4 queries at 10M rows):
Method ElapsedMS
Improved concat of rate codes as chars 105503
Divide and Conquer but in one step 24816
Use nTuples to create strings 46926
Divide and Conquer 30913
And the test harness:
create table #worker_rate (
worker_id int
,rate_code nvarchar(10) collate database_default
primary key (rate_code,worker_id)
)
declare @rows int = 10000000 --How many rows of data to create
--Create test data
;with tt(x) as (
select row_number() over (order by (select null)) from
(values (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) x10 (n),
(values (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) x100 (n),
(values (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) x1000 (n),
(values (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) x10000 (n),
(values (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) x100000 (n),
(values (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) x1000000 (n),
(values (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) x10000000 (n)
)
insert into #worker_rate (worker_id, rate_code)
select top(@rows) tt.x, rc.code
from tt
inner join (values ('a'),('b'),('c')) rc(code)
on tt.x % 4 <> 0
or rc.code <> 'c'
DECLARE @StartDT DATETIME = GETDATE();
--Original example
--;with c as (
-- select distinct w.worker_id, r.combo
-- from #worker_rate w
-- cross apply (
-- select combo = stuff((select ',' + r.rate_code
-- from #worker_rate r
-- where w.worker_id = r.worker_id
-- order by r.rate_code
-- for xml path('')),1,1,'')
-- ) r
--)
--select grp = dense_rank() over (order by c.combo), c.worker_id, c.combo
--from c
--order by 1, 2;
--SELECT [Method]='Original concat of rate codes as chars'
-- ,[ElapsedMS]=DATEDIFF(millisecond, @StartDT, GETDATE());
--SELECT @StartDT = GETDATE();
SELECT grp=COUNT(*), worker_id, combo=STUFF(
(
SELECT ',' + rate_code
FROM #worker_rate b
WHERE a.worker_id = b.worker_id
ORDER BY rate_code
FOR XML PATH('')
), 1, 1, '')
FROM #worker_rate a
GROUP BY worker_id
ORDER BY 1, 2;
SELECT [Method]='Improved concat of rate codes as chars'
,[ElapsedMS]=DATEDIFF(millisecond, @StartDT, GETDATE());
SELECT @StartDT = GETDATE();
--Optimal example
--create table #lookup (
-- rate_code nvarchar(10) collate database_default primary key
-- ,combo smallint
--)
--insert into #lookup (rate_code,combo)
--select q.rate_code, power(2, q.combo-1)
--from (
-- select distinct rate_code, combo=dense_rank() over (order by rate_code)
-- from #worker_rate
--) q
--select grp=dense_rank() over (order by combo), worker_id, combo
--from (
-- select p.worker_id, combo=sum(l.combo)
-- from #worker_rate p
-- inner join #lookup l on l.rate_code = p.rate_code
-- group by p.worker_id
--) q
--order by 1, 2
--SELECT [Method]='''Optimal'' Example of Divide and Conquer'
-- ,[ElapsedMS]=DATEDIFF(millisecond, @StartDT, GETDATE());
--SELECT @StartDT = GETDATE();
WITH lu AS
(
SELECT rate_code
,combo=POWER(2, ROW_NUMBER() OVER (ORDER BY rate_code)-1)
FROM
(
SELECT rate_code
FROM #worker_rate
GROUP BY rate_code
) a
)
SELECT grp=COUNT(*), worker_id, combo=SUM(combo)
FROM #worker_rate a
JOIN lu b ON a.rate_code = b.rate_code
GROUP BY worker_id
ORDER BY 1, 2;
SELECT [Method]='Divide and Conquer but in one step'
,[ElapsedMS]=DATEDIFF(millisecond, @StartDT, GETDATE());
SELECT @StartDT = GETDATE();
WITH lu AS
(
SELECT rate_code
,combo=POWER(2, ROW_NUMBER() OVER (ORDER BY rate_code)-1)
FROM
(
SELECT rate_code
FROM #worker_rate
GROUP BY rate_code
) a
),
UNIQUEnTuples (n, Tuples, ID) AS (
SELECT combo, CAST(rate_code AS VARCHAR(8000)), rate_code
FROM lu
UNION ALL
SELECT n.n+t.combo, CAST(t.rate_code AS VARCHAR(8000)) + ',' + n.Tuples, rate_code
FROM UNIQUEnTuples n
CROSS APPLY (
SELECT combo, rate_code
FROM lu t
WHERE t.rate_code < n.ID) t
)
SELECT grp, worker_id, combo=Tuples
FROM
(
SELECT grp=COUNT(*), worker_id, combo=SUM(combo)
FROM #worker_rate a
JOIN lu b ON a.rate_code = b.rate_code
GROUP BY worker_id
) a
JOIN UNIQUEnTuples b ON a.combo = b.n
ORDER BY 1, 2;
SELECT [Method]='Use nTuples to create strings'
,[ElapsedMS]=DATEDIFF(millisecond, @StartDT, GETDATE());
SELECT @StartDT = GETDATE();
WITH lu AS
(
SELECT rate_code
,combo=POWER(2, ROW_NUMBER() OVER (ORDER BY rate_code)-1)
FROM
(
SELECT rate_code
FROM #worker_rate
GROUP BY rate_code
) a
)
SELECT rate_code, combo, combo2=CAST(NULL AS VARCHAR(8000))
INTO #lookup2
FROM lu;
WITH UNIQUEnTuples (n, Tuples, ID) AS
(
SELECT combo, CAST(rate_code AS VARCHAR(8000)), rate_code
FROM #lookup2
UNION ALL
SELECT n.n+t.combo, CAST(t.rate_code AS VARCHAR(8000)) + ',' + n.Tuples, rate_code
FROM UNIQUEnTuples n
CROSS APPLY (
SELECT combo, rate_code
FROM #lookup2 t
WHERE t.rate_code < n.ID) t
)
MERGE #lookup2 t
USING UNIQUEnTuples s
ON t.combo = s.n
WHEN MATCHED THEN
UPDATE SET combo2 = s.Tuples
WHEN NOT MATCHED THEN
INSERT (rate_code, combo, combo2)
VALUES (s.Tuples, s.n, s.Tuples);
SELECT grp, worker_id, combo=combo2
FROM
(
SELECT grp=COUNT(*), worker_id, combo=SUM(combo)
FROM #worker_rate a
JOIN #lookup2 b ON a.rate_code = b.rate_code
GROUP BY worker_id
) a
JOIN #lookup2 b ON a.combo = b.combo
ORDER BY 1, 2;
SELECT [Method]='Divide and Conquer'
,[ElapsedMS]=DATEDIFF(millisecond, @StartDT, GETDATE());
SELECT @StartDT = GETDATE();
GO
--drop table #lookup;
drop table #lookup2;
drop table #worker_rate;
Note that neither of these new queries will do well if you have scores of rate codes.
Links to explain UNIQUEnTuples[/url] and how I made it faster.
My thought question: Have you ever been told that your query runs too fast?
My advice:
INDEXing a poor-performing query is like putting sugar on cat food. Yeah, it probably tastes better but are you sure you want to eat it?
The path of least resistance can be a slippery slope. Take care that fixing your fixes of fixes doesn't snowball and end up costing you more than fixing the root cause would have in the first place.
Need to UNPIVOT? Why not CROSS APPLY VALUES instead?[/url]
Since random numbers are too important to be left to chance, let's generate some![/url]
Learn to understand recursive CTEs by example.[/url]
[url url=http://www.sqlservercentral.com/articles/St