Faster way to populate number table variation with inline tally?

  • Hi,

    I have to create a table like this across a bunch of servers. Does anyone see a faster way to do it? I'm thinking that I'm overlooking something with needing two additional CTEs, but maybe not. I have it at 17 seconds, which isn't much faster than a while loop solution that's currently in place.

    Thanks

    DECLARE @START DATETIME,

    @msg NVARCHAR(MAX) = N''

    USE tempdb

    SELECT @START = GETDATE()

    CREATE TABLE dbo.EulerSource ( [SID] INT, Euler BIGINT )

    ;WITH E1(n) AS

    (

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

    ),-- 10

    E2(n) AS (SELECT 1 FROM E1 CROSS JOIN E1 AS b), -- 10*10

    E3(n) AS (SELECT 1 FROM E1 CROSS JOIN E2),-- 10*100

    E4(n) AS (SELECT 1 FROM E1 CROSS JOIN E3) ,-- 10*1000

    E5(n) AS (SELECT 1 FROM E1 CROSS JOIN E4) ,-- 10*10000

    E6(n) AS (SELECT 1 FROM E1 CROSS JOIN E5) ,-- 10*100000

    E7(n) AS (SELECT 1 FROM E1 CROSS JOIN E6) ,-- 10*1000000

    E8(n) AS (SELECT 1 FROM E1 CROSS JOIN E7) ,-- 10*10000000

    EulerOrder AS (

    SELECT TOP 12963600

    [EulerOrder] = (ROW_NUMBER() OVER (ORDER BY (SELECT NULL) ) -1) / 3600

    FROM e8

    ),

    EulerRuler AS (

    SELECT eo.EulerOrder,

    [EulerRuler] = ROW_NUMBER() OVER(PARTITION BY eo.EulerOrder ORDER BY (SELECT NULL) ) - 1

    FROM EulerOrder eo

    )

    INSERT dbo.EulerSource

    ( [SID] ,

    Euler

    )

    SELECT el.EulerOrder ,

    el.EulerRuler

    FROM EulerRuler el

    WHERE el.EulerRuler >= el.EulerOrder

    ORDER BY el.EulerOrder

    SELECT @msg = 'CTE table created in '

    + CAST(DATEDIFF(MILLISECOND, @START, GETDATE()) AS VARCHAR)

    + ' milliseconds.'

    PRINT (@msg)

    SELECT @START = GETDATE()

    CREATE CLUSTERED INDEX [CQoS_SID] ON [dbo].[EulerSource]

    (

    [SID] ASC

    )WITH (ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]

    SELECT @msg = 'CTE clustered index created at '

    + CAST(DATEDIFF(MILLISECOND, @START, GETDATE()) AS VARCHAR)

    + ' milliseconds.'

    PRINT @msg

  • Quick(er) suggestion, simplifying the query and loosing the sort required by the parallel execution plan

    😎

    USE tempdb;

    GO

    SET NOCOUNT ON;

    DECLARE @INT_BUCKET INT = 0;

    DECLARE @BIGINT_BUCKET BIGINT = 0;

    DECLARE @TIMING_RESULTS TABLE

    (

    TR_ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED

    ,TR_TS DATETIME2(7) NOT NULL DEFAULT (SYSDATETIME())

    ,TR_TX VARCHAR(100) NOT NULL

    );

    DECLARE @SAMPLE_SIZE INT = 12963600;

    -- Test one starts

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

    ;WITH E1(n) AS

    (

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

    ),-- 10

    E2(n) AS (SELECT 1 FROM E1 CROSS JOIN E1 AS b), -- 10*10

    E3(n) AS (SELECT 1 FROM E1 CROSS JOIN E2),-- 10*100

    E4(n) AS (SELECT 1 FROM E1 CROSS JOIN E3) ,-- 10*1000

    E5(n) AS (SELECT 1 FROM E1 CROSS JOIN E4) ,-- 10*10000

    E6(n) AS (SELECT 1 FROM E1 CROSS JOIN E5) ,-- 10*100000

    E7(n) AS (SELECT 1 FROM E1 CROSS JOIN E6) ,-- 10*1000000

    E8(n) AS (SELECT 1 FROM E1 CROSS JOIN E7) ,-- 10*10000000

    EulerOrder AS (

    SELECT TOP 12963600 -- using a variable here results in a non-parallel plan (@SAMPLE_SIZE)

    [EulerOrder] = (ROW_NUMBER() OVER (ORDER BY (SELECT NULL) ) -1) / 3600

    FROM e8

    ),

    EulerRuler AS (

    SELECT eo.EulerOrder,

    [EulerRuler] = ROW_NUMBER() OVER(PARTITION BY eo.EulerOrder ORDER BY (SELECT NULL) ) - 1

    FROM EulerOrder eo

    )

    SELECT

    @INT_BUCKET = el.EulerOrder

    ,@BIGINT_BUCKET = el.EulerRuler

    FROM EulerRuler el

    WHERE el.EulerRuler >= el.EulerOrder

    ORDER BY el.EulerOrder

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

    ;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

    ,NUMS(N) AS (SELECT TOP(@SAMPLE_SIZE) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9)

    ,EulerOrder AS

    (

    SELECT TOP (@SAMPLE_SIZE)

    [EulerOrder] = NM.N / 3600

    ,[EulerRuler] = NM.N % 3600

    FROM NUMS NM

    )

    SELECT

    @INT_BUCKET = EU.EulerOrder

    ,@BIGINT_BUCKET = EU.EulerRuler

    FROM EulerOrder EU

    WHERE EU.EulerRuler >= EU.EulerOrder;

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

    SELECT

    TR.TR_TX AS OPERATION

    ,DATEDIFF(MICROSECOND, MIN(TR.TR_TS), MAX(TR.TR_TS)) AS DURATION

    FROM @TIMING_RESULTS TR

    GROUP BY TR.TR_TX

    ORDER BY 2;

    /* uncomment this section to get the execution plan */

    ;WITH XMLNAMESPACES ( DEFAULT 'http://schemas.microsoft.com/sqlserver/2004/07/showplan')

    ,QUERY_PLAN AS

    (

    SELECT

    XP.query_plan

    FROM sys.dm_exec_requests ER

    CROSS APPLY sys.dm_exec_query_plan(ER.plan_handle) XP

    WHERE ER.session_id = @@SPID

    )

    SELECT

    QP.query_plan

    FROM QUERY_PLAN QP;

    Results

    OPERATIONDURATION

    Eirikur's version3778410

    sqldriver's version10323425

  • As always, the documentation is in the code. First, the run results.

    OPERATION DURATION

    ---------------------------------------------------------------------------------------------------- -----------

    Jeff's version, which also creates and populates the table... 3014173

    Eirikur's version 4624264

    sqldriver's version 16524945

    USE tempdb;

    GO

    SET NOCOUNT ON;

    DECLARE @INT_BUCKET INT = 0;

    DECLARE @BIGINT_BUCKET BIGINT = 0;

    DECLARE @TIMING_RESULTS TABLE

    (

    TR_ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED

    ,TR_TS DATETIME2(7) NOT NULL DEFAULT (SYSDATETIME())

    ,TR_TX VARCHAR(100) NOT NULL

    );

    DECLARE @SAMPLE_SIZE INT = 12963600;

    -- Test one starts

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

    ;WITH E1(n) AS

    (

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

    ),-- 10

    E2(n) AS (SELECT 1 FROM E1 CROSS JOIN E1 AS b), -- 10*10

    E3(n) AS (SELECT 1 FROM E1 CROSS JOIN E2),-- 10*100

    E4(n) AS (SELECT 1 FROM E1 CROSS JOIN E3) ,-- 10*1000

    E5(n) AS (SELECT 1 FROM E1 CROSS JOIN E4) ,-- 10*10000

    E6(n) AS (SELECT 1 FROM E1 CROSS JOIN E5) ,-- 10*100000

    E7(n) AS (SELECT 1 FROM E1 CROSS JOIN E6) ,-- 10*1000000

    E8(n) AS (SELECT 1 FROM E1 CROSS JOIN E7) ,-- 10*10000000

    EulerOrder AS (

    SELECT TOP 12963600 -- using a variable here results in a non-parallel plan (@SAMPLE_SIZE)

    [EulerOrder] = (ROW_NUMBER() OVER (ORDER BY (SELECT NULL) ) -1) / 3600

    FROM e8

    ),

    EulerRuler AS (

    SELECT eo.EulerOrder,

    [EulerRuler] = ROW_NUMBER() OVER(PARTITION BY eo.EulerOrder ORDER BY (SELECT NULL) ) - 1

    FROM EulerOrder eo

    )

    SELECT

    @INT_BUCKET = el.EulerOrder

    ,@BIGINT_BUCKET = el.EulerRuler

    FROM EulerRuler el

    WHERE el.EulerRuler >= el.EulerOrder

    ORDER BY el.EulerOrder

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

    -----------------------------------------------------------------------------------------------------------------------

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

    ;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

    ,NUMS(N) AS (SELECT TOP(@SAMPLE_SIZE) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9)

    ,EulerOrder AS

    (

    SELECT TOP (@SAMPLE_SIZE)

    [EulerOrder] = NM.N / 3600

    ,[EulerRuler] = NM.N % 3600

    FROM NUMS NM

    )

    SELECT

    @INT_BUCKET = EU.EulerOrder

    ,@BIGINT_BUCKET = EU.EulerRuler

    FROM EulerOrder EU

    WHERE EU.EulerRuler >= EU.EulerOrder;

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

    -----------------------------------------------------------------------------------------------------------------------

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Jeff''s version, which also creates and populates the table...')

    ;

    --=====================================================================================================================

    -- Create and populate the test table with the product of a simple "TRIANGULAR JOIN" such that for every value of

    -- [SID] (0 thru 3599), the Euler column will have a value greater than or equal to the [SID] thru 3599. For

    -- reference purposes, a "Triangular Join" is half of a "Square Join", which is also known as a "Cartesian

    -- Product" or "Cross Join".

    --=====================================================================================================================

    --===== If the test table alreeady exists, drop it to make reruns in SSMS easier

    IF OBJECT_ID('dbo.EulerSource','U') IS NOT NULL

    DROP TABLE dbo.EulerSource

    ;

    --===== Create AND populate the table on-the-fly.

    WITH

    E1(N) AS (

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

    ) -- 10^ 1 = 10

    ,E4(N) AS (SELECT 1 FROM E1 a, E1 b, E1 c, E1 d) -- 10^ 4 = 10,000

    ,E16(N) AS (SELECT 1 FROM E1 a, E1 b, E1 c, E1 d) -- 10^16 = 1 followed by more zero's than you can shake a stick at.

    ,Tally(N) AS (

    SELECT 0 UNION ALL --This is faster than subtracting a 1 for every ROW_NUMBER().

    SELECT TOP 3599 N = ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E16

    )

    SELECT [SID] = ISNULL(CAST(t1.N AS INT),0) -- ISNULL makes the column NOT NULL

    ,Euler = ISNULL(CAST(t2.N AS INT),0) -- ISNULL makes the column NOT NULL

    INTO dbo.EulerSource -- This creates the table

    FROM Tally t1, Tally t2 -- Yep... 'nuther happy little Cross Join here...

    WHERE t2.N >= t1.N -- ... and this limits it to a Triangular Join.

    ;

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Jeff''s version, which also creates and populates the table...');

    SELECT

    TR.TR_TX AS OPERATION

    ,DATEDIFF(MICROSECOND, MIN(TR.TR_TS), MAX(TR.TR_TS)) AS DURATION

    FROM @TIMING_RESULTS TR

    GROUP BY TR.TR_TX

    ORDER BY DURATION;

    --ORDER BY 2; --ORDER BY column ordinal has been deprecated and should not be used.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • @sqldriver,

    Apologies. Forgot to post the code to create the clustered index. SQL Server REALLY likes unique indexes and REALLY likes NOT NULL columns. With that in mind, that's why I used the ISNULL() thing in the code I posted. It forces the result table in the SELECT/INTO (which, as an added bonus, is also minimally logged if the DB is in the BULK LOGGED or SIMPLE recovery mode as is the creation of the clustered index) to produce NOT NULL columns. Since the composite of both columns form a unique set of rows, is seems natural to make unique clustered index that doesn't allow NULLs by making the composite of the two columns the PK.

    You might also want to reconsider the method you're currently using for your inline Tally Table. That's a whole lot of code to just get to 10^8 and the extra cross joins do tend to slow the larger number generations down a bit.

    Finally, since this table probably won't be written to or updated (although it might be dropped and recreated), I force the FILLFACTOR to 100 for space conservation (if someone has changed the default to something lower) and for a bit of extra speed during SELECTs.

    --===== Create the unique index as the PRIMARY KEY.

    ALTER TABLE dbo.EulerSource

    ADD CONSTRAINT PK_EulerSource PRIMARY KEY CLUSTERED ([SID] ASC, Euler ASC)

    WITH (ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 100) ON [PRIMARY]

    ;

    As a final note, this generates fast enough. Do you really need for it to be a table?

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • Just brilliant as always Jeff, Chapeau!

    Quick modification, mainly dropping the ISNULL as the set will not contain any nulls

    😎

    Run results

    OPERATION DURATION

    --------------------------------------------------------------- -----------

    Eirikur's version with Jeff's tricks 2416806

    Jeff's version, which also creates and populates the table... 2628008

    Eirikur's version 3656208

    sqldriver's version 10546826

    The code (test harness)

    USE tempdb;

    GO

    SET NOCOUNT ON;

    DECLARE @INT_BUCKET INT = 0;

    DECLARE @BIGINT_BUCKET BIGINT = 0;

    DECLARE @TIMING_RESULTS TABLE

    (

    TR_ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED

    ,TR_TS DATETIME2(7) NOT NULL DEFAULT (SYSDATETIME())

    ,TR_TX VARCHAR(100) NOT NULL

    );

    DECLARE @SAMPLE_SIZE INT = 12963600;

    -- Test one starts

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

    ;WITH E1(n) AS

    (

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

    ),-- 10

    E2(n) AS (SELECT 1 FROM E1 CROSS JOIN E1 AS b), -- 10*10

    E3(n) AS (SELECT 1 FROM E1 CROSS JOIN E2),-- 10*100

    E4(n) AS (SELECT 1 FROM E1 CROSS JOIN E3) ,-- 10*1000

    E5(n) AS (SELECT 1 FROM E1 CROSS JOIN E4) ,-- 10*10000

    E6(n) AS (SELECT 1 FROM E1 CROSS JOIN E5) ,-- 10*100000

    E7(n) AS (SELECT 1 FROM E1 CROSS JOIN E6) ,-- 10*1000000

    E8(n) AS (SELECT 1 FROM E1 CROSS JOIN E7) ,-- 10*10000000

    EulerOrder AS (

    SELECT TOP 12963600 -- using a variable here results in a non-parallel plan (@SAMPLE_SIZE)

    [EulerOrder] = (ROW_NUMBER() OVER (ORDER BY (SELECT NULL) ) -1) / 3600

    FROM e8

    ),

    EulerRuler AS (

    SELECT eo.EulerOrder,

    [EulerRuler] = ROW_NUMBER() OVER(PARTITION BY eo.EulerOrder ORDER BY (SELECT NULL) ) - 1

    FROM EulerOrder eo

    )

    SELECT

    @INT_BUCKET = el.EulerOrder

    ,@BIGINT_BUCKET = el.EulerRuler

    FROM EulerRuler el

    WHERE el.EulerRuler >= el.EulerOrder

    ORDER BY el.EulerOrder

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('sqldriver''s version');

    -----------------------------------------------------------------------------------------------------------------------

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

    ;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

    ,NUMS(N) AS (SELECT TOP(@SAMPLE_SIZE) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS N FROM T T1,T T2,T T3,T T4,T T5,T T6,T T7,T T8,T T9)

    ,EulerOrder AS

    (

    SELECT --TOP (@SAMPLE_SIZE)

    [EulerOrder] = NM.N / 3600

    ,[EulerRuler] = NM.N % 3600

    FROM NUMS NM

    )

    SELECT

    @INT_BUCKET = EU.EulerOrder

    ,@BIGINT_BUCKET = EU.EulerRuler

    FROM EulerOrder EU

    WHERE EU.EulerRuler >= EU.EulerOrder;

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version');

    -----------------------------------------------------------------------------------------------------------------------

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Jeff''s version, which also creates and populates the table...')

    ;

    --=====================================================================================================================

    -- Create and populate the test table with the product of a simple "TRIANGULAR JOIN" such that for every value of

    -- [SID] (0 thru 3599), the Euler column will have a value greater than or equal to the [SID] thru 3599. For

    -- reference purposes, a "Triangular Join" is half of a "Square Join", which is also known as a "Cartesian

    -- Product" or "Cross Join".

    --=====================================================================================================================

    --===== If the test table alreeady exists, drop it to make reruns in SSMS easier

    IF OBJECT_ID('dbo.EulerSource','U') IS NOT NULL

    DROP TABLE dbo.EulerSource

    ;

    --===== Create AND populate the table on-the-fly.

    WITH

    E1(N) AS (

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL

    SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1

    ) -- 10^ 1 = 10

    ,E4(N) AS (SELECT 1 FROM E1 a, E1 b, E1 c, E1 d) -- 10^ 4 = 10,000

    ,E16(N) AS (SELECT 1 FROM E1 a, E1 b, E1 c, E1 d) -- 10^16 = 1 followed by more zero's than you can shake a stick at.

    ,Tally(N) AS (

    SELECT 0 UNION ALL --This is faster than subtracting a 1 for every ROW_NUMBER().

    SELECT TOP 3599 N = ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E16

    )

    SELECT [SID] = ISNULL(CAST(t1.N AS INT),0) -- ISNULL makes the column NOT NULL

    ,Euler = ISNULL(CAST(t2.N AS INT),0) -- ISNULL makes the column NOT NULL

    INTO dbo.EulerSource -- This creates the table

    FROM Tally t1, Tally t2 -- Yep... 'nuther happy little Cross Join here...

    WHERE t2.N >= t1.N -- ... and this limits it to a Triangular Join.

    ;

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Jeff''s version, which also creates and populates the table...');

    -----------------------------------------------------------------------------------------------------------------------

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version with Jeff''s tricks');

    --===== If the test table alreeady exists, drop it to make reruns in SSMS easier

    IF OBJECT_ID('dbo.EEulerSource','U') IS NOT NULL

    DROP TABLE dbo.EEulerSource

    ;

    ;WITH T(N) AS (SELECT N FROM (VALUES (NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL)) AS X(N))

    ,NUMS(N) AS (SELECT 0 AS N UNION ALL SELECT TOP(3599) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS N FROM T T1,T T2,T T3,T T4)

    SELECT

    [SID] = EUO.N

    ,Euler = EUR.N

    INTO dbo.EEulerSource

    FROM NUMS EUO, NUMS EUR

    WHERE EUR.N >= EUO.N;

    INSERT INTO @TIMING_RESULTS(TR_TX) VALUES('Eirikur''s version with Jeff''s tricks');

    -----------------------------------------------------------------------------------------------------------------------

    SELECT

    TR.TR_TX AS OPERATION

    ,DATEDIFF(MICROSECOND, MIN(TR.TR_TS), MAX(TR.TR_TS)) AS DURATION

    FROM @TIMING_RESULTS TR

    GROUP BY TR.TR_TX

    ORDER BY DURATION;

    --ORDER BY 2; --ORDER BY column ordinal has been deprecated and should not be used.

    /* uncomment this section to get the execution plan */

    ;WITH XMLNAMESPACES ( DEFAULT 'http://schemas.microsoft.com/sqlserver/2004/07/showplan')

    ,QUERY_PLAN AS

    (

    SELECT

    XP.query_plan

    FROM sys.dm_exec_requests ER

    CROSS APPLY sys.dm_exec_query_plan(ER.plan_handle) XP

    WHERE ER.session_id = @@SPID

    )

    SELECT

    QP.query_plan

    FROM QUERY_PLAN QP;

  • Holy cow. Those are both way prettier solutions. Thanks to both of you.

    I ran them on my test box and got close enough results.

    Jeff, first, you need to apologize about as much as an Italian mackerel needs to set an alarm for Saturday morning 😀

    Second, this table is part of a process that runs hourly. So while it is much faster, I don't think we want to waste the compute time generating it that often. I know it looks a little strange.

    Eirikur - mid-post edit - taking out the ISNULL is a bit faster on generation, but I get an error when I try to create the index Jeff defined, which will be a nice boost to the calculations done using this table, I think (also owe you a PM).

    Msg 8111, Level 16, State 1, Line 137

    Cannot define PRIMARY KEY constraint on nullable column in table 'EulerSource'.

    Msg 1750, Level 16, State 0, Line 137

    Could not create constraint. See previous errors.

    Thanks

  • Eirikur Eiriksson (12/27/2014)


    Just brilliant as always Jeff, Chapeau!

    Quick modification, mainly dropping the ISNULL as the set will not contain any nulls

    😎

    Thanks, Eirikur. You're the consummate Gentleman and I'm frequently humbled by your gracious comments. :blush:

    I absolutely agree. The ISNULL does slow things down but it's essential in this case. Like I said in the embedded documentation, it's there to force the columns that are created during the SELECT/INTO (which builds the table on-the-fly) to be NOT NULL so that we can actually deploy a composite PK over the two columns. That, notwithstanding, I agree... tables don't actually need PKs. They just need a UNIQUE CLUSTERED INDEX and that doesn't require NOT NULL columns. I wanted to make sure that the columns showed up as NOT NULL because the optimizer loves NOT NULL columns, not to mention avoiding the page level NULLable column bits. By itself, such an optimization won't amount to much on this single table. Over several hundred tables, it does add up.

    Like Granny used to say, "Mind the pennies and the dollars will take care of themselves." 😀

    Still, it's a great thing to point out as a performance point for the creation of such tables and I sure do appreciate you taking the time to make the change and run the test.

    As a bit of a sidebar, it's a shame that CTEs are truly treated as inline views and that a single self-join of a CTE causes two executions of the CTE.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • sqldriver (12/27/2014)


    Holy cow. Those are both way prettier solutions. Thanks to both of you.

    I ran them on my test box and got close enough results.

    Jeff, first, you need to apologize about as much as an Italian mackerel needs to set an alarm for Saturday morning 😀

    Second, this table is part of a process that runs hourly. So while it is much faster, I don't think we want to waste the compute time generating it that often. I know it looks a little strange.

    Eirikur - mid-post edit - taking out the ISNULL is a bit faster on generation, but I get an error when I try to create the index Jeff defined, which will be a nice boost to the calculations done using this table, I think (also owe you a PM).

    Msg 8111, Level 16, State 1, Line 137

    Cannot define PRIMARY KEY constraint on nullable column in table 'EulerSource'.

    Msg 1750, Level 16, State 0, Line 137

    Could not create constraint. See previous errors.

    Thanks

    You bet. Thank you very much for the feedback.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • Eirikur Eiriksson (12/27/2014)


    Run results

    OPERATION DURATION

    --------------------------------------------------------------- -----------

    Eirikur's version with Jeff's tricks 2416806

    Jeff's version, which also creates and populates the table... 2628008

    Eirikur's version 3656208

    sqldriver's version 10546826

    p.s. I continue to be amazed at the performance of the hardware you're running on, Eirikur. My little 4 processor i5 laptop doesn't stand a chance.

    Shifting gears a bit and just to share a conviction of mine, I've recently been involved in a discussion about throwing heavy iron at problem code rather than fixing the code itself. As impressed as I am with the hardware, this is an outstanding example of how much more ROI one can get by knowing the ropes of T-SQL. The original problem on my laptop took 16524945 uS and took 10546826 uS on that wonderful box of yours. That's an improvement of 36%, which is awesome for "just" a hardware change. Using the time from my lesser powered laptop even for the version that uses ISNULL(), the time dropped to 3014173, which is nearly an 82% improvement. In simpler terms, the hardware improvement allowed for a 1/2X improvement. The change in code that we both made exceeds a 4.4X improvement in performance.

    I wonder where those folks on that other discussion I've been having will find a computer that actually runs 4.4X faster and how much it will cost? 😀 And it doesn't appear that MPP will actually help for a query like this one because 1) parallelism is being used even in the original query and 2) you actually might have to rewrite code anyway to take advantage of MPP because it's a different paradigm.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • Jeff Moden (12/28/2014)


    Eirikur Eiriksson (12/27/2014)


    Just brilliant as always Jeff, Chapeau!

    Quick modification, mainly dropping the ISNULL as the set will not contain any nulls

    😎

    Thanks, Eirikur. You're the consummate Gentleman and I'm frequently humbled by your gracious comments. :blush:

    I absolutely agree. The ISNULL does slow things down but it's essential in this case. Like I said in the embedded documentation, it's there to force the columns that are created during the SELECT/INTO (which builds the table on-the-fly) to be NOT NULL so that we can actually deploy a composite PK over the two columns. That, notwithstanding, I agree... tables don't actually need PKs. They just need a UNIQUE CLUSTERED INDEX and that doesn't require NOT NULL columns. I wanted to make sure that the columns showed up as NOT NULL because the optimizer loves NOT NULL columns, not to mention avoiding the page level NULLable column bits. By itself, such an optimization won't amount to much on this single table. Over several hundred tables, it does add up.

    Like Granny used to say, "Mind the pennies and the dollars will take care of themselves." 😀

    Still, it's a great thing to point out as a performance point for the creation of such tables and I sure do appreciate you taking the time to make the change and run the test.

    As a bit of a sidebar, it's a shame that CTEs are truly treated as inline views and that a single self-join of a CTE causes two executions of the CTE.

    Thanks Jeff and point taken;-), which by the way I missed in the first pass, looking too much at the sequence generation but not the bigger picture of building a "usable" table containing the sequence. The ISNULL cost is close to 10% which is hardly anything when it comes to the benefits of having non-nullable columns as you mentioned. Comparing the method to other bulk-insert type methods it outperforms all I've tested (Merge, Traceflag 610 etc.) by at least 20-25%.

    😎

  • Jeff Moden (12/28/2014)


    As a bit of a sidebar, it's a shame that CTEs are truly treated as inline views and that a single self-join of a CTE causes two executions of the CTE.

    Jeff,

    What you're saying there really freaked me out.

    And you confirm this has never changed in later versions of SQL Server?

    So many times I used a CTE where the first excraction has the purpose of drastically lowering the row count of the table I will be dealing with later on. Instead every reference to the first extraction re-executes it.

    I guess it's time to get back to real, live tables.

  • I'm not exactly sure how to test that, but I do have a 2014 Dev edition installed, and ran the below code.

    If there's something in particular you'd like me to try to confirm this behavior, let me know.

    SET NOCOUNT ON

    SET STATISTICS IO, TIME ON

    /*

    SELECT @@VERSION

    Microsoft SQL Server 2014 - 12.0.2456.0 (X64)

    Dec 11 2014 17:32:03

    Copyright (c) Microsoft Corporation

    Developer Edition (64-bit) on Windows NT 6.1 <X64> (Build 7601: Service Pack 1)

    */

    USE AdventureWorks2014

    ;

    WITH pp

    AS ( SELECT p.BusinessEntityID ,

    p.Title ,

    p.FirstName ,

    p.LastName

    FROM Person.Person AS p

    WHERE p.BusinessEntityID % 2 = 0

    )

    SELECT *

    FROM pp p1

    JOIN pp p2 ON p2.BusinessEntityID = p1.BusinessEntityID

    /*

    SQL Server Execution Times:

    CPU time = 0 ms, elapsed time = 0 ms.

    Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    */

    ;

    WITH pp

    AS ( SELECT p.BusinessEntityID ,

    p.Title ,

    p.FirstName ,

    p.LastName

    FROM Person.Person AS p

    WHERE p.BusinessEntityID % 2 = 0

    ),

    pp2 AS( SELECT p1.BusinessEntityID ,

    p1.Title ,

    p1.FirstName ,

    p1.LastName

    FROM pp p1

    )

    SELECT *

    FROM pp2 p1

    JOIN pp2 p2 ON p2.BusinessEntityID = p1.BusinessEntityID

    /*

    SQL Server Execution Times:

    CPU time = 32 ms, elapsed time = 137 ms.

    Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    */

    EDIT: had top 10 in one query but not the other.

  • Michael Meierruth (12/29/2014)


    Jeff Moden (12/28/2014)


    As a bit of a sidebar, it's a shame that CTEs are truly treated as inline views and that a single self-join of a CTE causes two executions of the CTE.

    Jeff,

    What you're saying there really freaked me out.

    And you confirm this has never changed in later versions of SQL Server?

    So many times I used a CTE where the first excraction has the purpose of drastically lowering the row count of the table I will be dealing with later on. Instead every reference to the first extraction re-executes it.

    I guess it's time to get back to real, live tables.

    I haven't checked that on 2014 but 2012 created a separate execution plan for each call to the "Tally" cte in the code I posted.

    For me, the query I posted isn't a real problem to me because, even though executed twice, runs so fast that it doesn't matter. On other queries where I might need a self join to a CTE, I've been known to do a SELECT/INTO a Temp Table, maybe add an index, and the join to the table twice. "It Depends" on how much work the CTE has to do and these types of self joins dot come up very often.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • sqldriver (12/29/2014)


    I'm not exactly sure how to test that, but I do have a 2014 Dev edition installed, and ran the below code.

    If there's something in particular you'd like me to try to confirm this behavior, let me know.

    SET NOCOUNT ON

    SET STATISTICS IO, TIME ON

    /*

    SELECT @@VERSION

    Microsoft SQL Server 2014 - 12.0.2456.0 (X64)

    Dec 11 2014 17:32:03

    Copyright (c) Microsoft Corporation

    Developer Edition (64-bit) on Windows NT 6.1 <X64> (Build 7601: Service Pack 1)

    */

    USE AdventureWorks2014

    ;

    WITH pp

    AS ( SELECT p.BusinessEntityID ,

    p.Title ,

    p.FirstName ,

    p.LastName

    FROM Person.Person AS p

    WHERE p.BusinessEntityID % 2 = 0

    )

    SELECT *

    FROM pp p1

    JOIN pp p2 ON p2.BusinessEntityID = p1.BusinessEntityID

    /*

    SQL Server Execution Times:

    CPU time = 0 ms, elapsed time = 0 ms.

    Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    */

    ;

    WITH pp

    AS ( SELECT p.BusinessEntityID ,

    p.Title ,

    p.FirstName ,

    p.LastName

    FROM Person.Person AS p

    WHERE p.BusinessEntityID % 2 = 0

    ),

    pp2 AS( SELECT p1.BusinessEntityID ,

    p1.Title ,

    p1.FirstName ,

    p1.LastName

    FROM pp p1

    )

    SELECT *

    FROM pp2 p1

    JOIN pp2 p2 ON p2.BusinessEntityID = p1.BusinessEntityID

    /*

    SQL Server Execution Times:

    CPU time = 32 ms, elapsed time = 137 ms.

    Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    */

    EDIT: had top 10 in one query but not the other.

    Just look at the actual execution plan for two sets of blocks that look similar.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • Jeff Moden (12/29/2014)


    sqldriver (12/29/2014)


    I'm not exactly sure how to test that, but I do have a 2014 Dev edition installed, and ran the below code.

    If there's something in particular you'd like me to try to confirm this behavior, let me know.

    SET NOCOUNT ON

    SET STATISTICS IO, TIME ON

    /*

    SELECT @@VERSION

    Microsoft SQL Server 2014 - 12.0.2456.0 (X64)

    Dec 11 2014 17:32:03

    Copyright (c) Microsoft Corporation

    Developer Edition (64-bit) on Windows NT 6.1 <X64> (Build 7601: Service Pack 1)

    */

    USE AdventureWorks2014

    ;

    WITH pp

    AS ( SELECT p.BusinessEntityID ,

    p.Title ,

    p.FirstName ,

    p.LastName

    FROM Person.Person AS p

    WHERE p.BusinessEntityID % 2 = 0

    )

    SELECT *

    FROM pp p1

    JOIN pp p2 ON p2.BusinessEntityID = p1.BusinessEntityID

    /*

    SQL Server Execution Times:

    CPU time = 0 ms, elapsed time = 0 ms.

    Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    */

    ;

    WITH pp

    AS ( SELECT p.BusinessEntityID ,

    p.Title ,

    p.FirstName ,

    p.LastName

    FROM Person.Person AS p

    WHERE p.BusinessEntityID % 2 = 0

    ),

    pp2 AS( SELECT p1.BusinessEntityID ,

    p1.Title ,

    p1.FirstName ,

    p1.LastName

    FROM pp p1

    )

    SELECT *

    FROM pp2 p1

    JOIN pp2 p2 ON p2.BusinessEntityID = p1.BusinessEntityID

    /*

    SQL Server Execution Times:

    CPU time = 32 ms, elapsed time = 137 ms.

    Table 'Person'. Scan count 2, logical reads 7638, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    */

    EDIT: had top 10 in one query but not the other.

    Just look at the actual execution plan for two sets of blocks that look similar.

    See attached; doesn't seem to.

    I can test on 2012 later tonight. The laptop I had it installed on is sleeping with the BlackBerrys, so need a few to dig up the CD and install it somewhere else.

Viewing 15 posts - 1 through 15 (of 29 total)

You must be logged in to reply to this topic. Login to reply