Skipping group with FOR XML Path('')

  • pdanes (5/20/2015)


    Thanks, I've looked at them. Interesting results - an order of magnitude difference from the slowest to fastest.

    This might be caused by the safer method, as Dwain already pointed out. That method will allow to use characters that the others won't, but it will hurt performance.

    Removing that option, my computer won't give a clear winner because, after several runs, the winner wasn't consistent even if the performance was.

    Here's a test using the same concatenation method for the 3 solutions (apples to apples) and cleaning the buffers to avoid advantages.

    CREATE TABLE #Test

    (

    id INT NOT NULL

    ,value VARCHAR(5) NOT NULL

    );

    SET NOCOUNT ON;

    WITH Tally (n) AS

    (

    -- 10,000 rows

    SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL))

    FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) a(n)

    CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) b(n)

    CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) c(n)

    CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d(n)

    )

    INSERT INTO #Test

    SELECT n, c

    -- 10,000 IDs

    FROM Tally a

    -- 25 rows to each ID

    CROSS APPLY

    (

    SELECT SUBSTRING('ABCDFGHIJKLMNOPQRSTUVWXYZ', n, 1)

    FROM Tally

    WHERE n <= 25

    ) b (c)

    UNION ALL

    SELECT n, 'E'

    -- E in only the even IDs

    FROM Tally a

    WHERE n % 2 = 0;

    DECLARE @ID INT, @Value VARCHAR(5);

    DBCC FREEPROCCACHE WITH NO_INFOMSGS

    DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS

    PRINT '+++ Dwain''s solution (heap):';

    SET STATISTICS TIME ON;

    SELECT @ID=id

    ,@Value=STUFF(

    (

    SELECT '-' + b.value

    FROM #Test b

    WHERE a.id = b.ID

    ORDER BY b.value

    FOR XML PATH('')

    ), 1, 1, '')

    FROM #Test a

    GROUP BY id

    HAVING COUNT(CASE WHEN value = 'E' THEN 1 END) > 0;

    SET STATISTICS TIME OFF;

    DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS

    PRINT '+++ Luis''s solution (heap):';

    SET STATISTICS TIME ON;

    SELECT @ID=id,

    @Value=STUFF((SELECT '-' + value FROM #Test it WHERE t.id = it.id

    ORDER BY value

    FOR XML PATH('')), 1, 1, '')

    FROM #Test t

    WHERE id IN (SELECT id FROM #Test WHERE value = 'E')

    GROUP BY id;

    SET STATISTICS TIME OFF;

    DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS

    PRINT '+++ Lynn''s solution (heap):';

    SET STATISTICS TIME ON;

    with BaseIds as (

    select distinct tt.Id from #Test tt where not exists(select 1 from #Test tt1 where tt.Id = tt1.Id and tt1.value in ('E'))

    )

    select @ID=bi.Id, @Value=stuff((select '-' + tt.value from #Test tt where bi.Id = tt.Id order by tt.value for xml path('')),1,1,'')

    from BaseIds bi;

    SET STATISTICS TIME OFF;

    ALTER TABLE #Test

    ADD CONSTRAINT t_pk PRIMARY KEY (id, value);

    DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS

    PRINT '+++ Dwain''s solution (PRIMARY KEY):';

    SET STATISTICS TIME ON;

    SELECT @ID=id

    ,@Value=STUFF(

    (

    SELECT '-' + b.value

    FROM #Test b

    WHERE a.id = b.ID

    ORDER BY b.value

    FOR XML PATH('')

    ), 1, 1, '')

    FROM #Test a

    GROUP BY id

    HAVING COUNT(CASE WHEN value = 'E' THEN 1 END) > 0;

    SET STATISTICS TIME OFF;

    DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS

    PRINT '+++ Luis''s solution (PRIMARY KEY):';

    SET STATISTICS TIME ON;

    SELECT @ID=id,

    @Value=STUFF((SELECT '-' + value FROM #Test it WHERE t.id = it.id

    ORDER BY value

    FOR XML PATH('')), 1, 1, '')

    FROM #Test t

    WHERE id IN (SELECT id FROM #Test WHERE value = 'E')

    GROUP BY id;

    SET STATISTICS TIME OFF;

    DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS

    PRINT '+++ Lynn''s solution (PRIMARY KEY):';

    SET STATISTICS TIME ON;

    with BaseIds as (

    select distinct tt.Id from #Test tt where not exists(select 1 from #Test tt1 where tt.Id = tt1.Id and tt1.value in ('E'))

    )

    select @ID=bi.Id, @Value=stuff((select '-' + tt.value from #Test tt where bi.Id = tt.Id order by tt.value for xml path('')),1,1,'')

    from BaseIds bi;

    SET STATISTICS TIME OFF;

    DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS

    ALTER TABLE #Test

    DROP CONSTRAINT t_pk;

    CREATE UNIQUE NONCLUSTERED INDEX t_ix1 ON #Test (id, value);

    DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS

    PRINT '+++ Dwain''s solution (UNIQUE NONCLUSTERED INDEX):';

    SET STATISTICS TIME ON;

    SELECT @ID=id

    ,@Value=STUFF(

    (

    SELECT '-' + b.value

    FROM #Test b

    WHERE a.id = b.ID

    ORDER BY b.value

    FOR XML PATH('')

    ), 1, 1, '')

    FROM #Test a

    GROUP BY id

    HAVING COUNT(CASE WHEN value = 'E' THEN 1 END) > 0;

    SET STATISTICS TIME OFF;

    DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS

    PRINT '+++ Luis''s solution (UNIQUE NONCLUSTERED INDEX):';

    SET STATISTICS TIME ON;

    SELECT @ID=id,

    @Value=STUFF((SELECT '-' + value FROM #Test it WHERE t.id = it.id

    ORDER BY value

    FOR XML PATH('')), 1, 1, '')

    FROM #Test t

    WHERE id IN (SELECT id FROM #Test WHERE value = 'E')

    GROUP BY id;

    SET STATISTICS TIME OFF;

    DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS

    PRINT '+++ Lynn''s solution (UNIQUE NONCLUSTERED INDEX):';

    SET STATISTICS TIME ON;

    with BaseIds as (

    select distinct tt.Id from #Test tt where not exists(select 1 from #Test tt1 where tt.Id = tt1.Id and tt1.value in ('E'))

    )

    select @ID=bi.Id, @Value=stuff((select '-' + tt.value from #Test tt where bi.Id = tt.Id order by tt.value for xml path('')),1,1,'')

    from BaseIds bi;

    SET STATISTICS TIME OFF;

    GO

    DROP TABLE #Test;

    +++ Dwain's solution (heap):

    SQL Server parse and compile time:

    CPU time = 955 ms, elapsed time = 955 ms.

    SQL Server Execution Times:

    CPU time = 1235 ms, elapsed time = 1230 ms.

    Warning: Null value is eliminated by an aggregate or other SET operation.

    +++ Luis's solution (heap):

    SQL Server parse and compile time:

    CPU time = 15 ms, elapsed time = 33 ms.

    SQL Server Execution Times:

    CPU time = 1549 ms, elapsed time = 1160 ms.

    +++ Lynn's solution (heap):

    SQL Server parse and compile time:

    CPU time = 16 ms, elapsed time = 22 ms.

    SQL Server Execution Times:

    CPU time = 1561 ms, elapsed time = 1162 ms.

    +++ Dwain's solution (PRIMARY KEY):

    SQL Server parse and compile time:

    CPU time = 3 ms, elapsed time = 3 ms.

    SQL Server Execution Times:

    CPU time = 375 ms, elapsed time = 515 ms.

    Warning: Null value is eliminated by an aggregate or other SET operation.

    +++ Luis's solution (PRIMARY KEY):

    SQL Server parse and compile time:

    CPU time = 16 ms, elapsed time = 70 ms.

    SQL Server Execution Times:

    CPU time = 312 ms, elapsed time = 399 ms.

    +++ Lynn's solution (PRIMARY KEY):

    SQL Server parse and compile time:

    CPU time = 16 ms, elapsed time = 27 ms.

    SQL Server Execution Times:

    CPU time = 297 ms, elapsed time = 383 ms.

    +++ Dwain's solution (UNIQUE NONCLUSTERED INDEX):

    SQL Server parse and compile time:

    CPU time = 15 ms, elapsed time = 45 ms.

    SQL Server Execution Times:

    CPU time = 375 ms, elapsed time = 839 ms.

    Warning: Null value is eliminated by an aggregate or other SET operation.

    +++ Luis's solution (UNIQUE NONCLUSTERED INDEX):

    SQL Server parse and compile time:

    CPU time = 0 ms, elapsed time = 39 ms.

    SQL Server Execution Times:

    CPU time = 297 ms, elapsed time = 753 ms.

    +++ Lynn's solution (UNIQUE NONCLUSTERED INDEX):

    SQL Server parse and compile time:

    CPU time = 0 ms, elapsed time = 41 ms.

    SQL Server Execution Times:

    CPU time = 328 ms, elapsed time = 629 ms.

    Luis C.
    General Disclaimer:
    Are you seriously taking the advice and code from someone from the internet without testing it? Do you at least understand it? Or can it easily kill your server?

    How to post data/code on a forum to get the best help: Option 1 / Option 2

Viewing post 16 (of 16 total)

You must be logged in to reply to this topic. Login to reply