RE: Which one is better select * into # temp from tableA Vs create #temp table insert into...

SSC Guru

Points: 100851

June 23, 2011 at 5:31 pm

This conversation resparked a curiousity on my part.

The following was tested in a 2k8 R1 environment off my local drives. End result, and if you read the notes you'll see why: No indexing, SELECT INTO. If indexing, fully prebuild it. Test yourself of course, but that's now my rule of thumb.

-- DROP TABLE CoreTable

-- DROP TABLE AggregateMe

CREATE TABLE CoreTable (ctID INT IDENTITY( 1, 1) NOT NULL,

ctNCID INT NULL,

ctAttr1 INT NOT NULL,

ctAttr2 INT NOT NULL,

CONSTRAINT PK_CoreTable PRIMARY KEY CLUSTERED (ctID)

)

GO

CREATE INDEX idx_CoreTable_Attr ON CoreTable (ctAttr1, ctAttr2)

GO

CREATE TABLE AggregateMe (ctID INT NOT NULL,

ctNCID INT NOT NULL,

amSumUpVal DECIMAL(20,4) NULL,

amAvgUpVal DECIMAL(20,4) NULL)

GO

CREATE CLUSTERED INDEX cidx_AggregateMe ON AggregateMe (ctID)

GO

-- Generate some test data:

INSERT INTO CoreTable (ctAttr1, ctAttr2)

SELECT

rn%10 AS Attr1,

rn%20 AS Attr2

FROM

(SELECT TOP 100000

Row_number() OVER (ORDER BY RAND()) AS rn

FROM

master..syscolumns AS s1

CROSS JOIN

master..syscolumns AS s2

CROSS JOIN

master..syscolumns AS s3

) AS drv

UPDATE CoreTable SET ctNCID = 100005 - ctID --(we want it in a different order)

-- TRUNCATE TABLE AggregateMe

INSERT INTO AggregateMe

SELECT

ct.ctID,

ct.ctNCID,

CONVERT( DECIMAL ( 20, 4), CHECKSUM( NEWID())) / CONVERT( DECIMAL ( 20, 4), CHECKSUM( NEWID())) AS amSumUpVal,

CONVERT( DECIMAL ( 20, 4), CHECKSUM( NEWID())) / CONVERT( DECIMAL ( 20, 4), CHECKSUM( NEWID())) AS amAvgUpVal

FROM

CoreTable AS ct

CROSS JOIN

(SELECT 1 AS n UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5

UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9 UNION ALL SELECT 0)

AS drvMultiplier

GO

select top 10 * from CoreTable

SELECT TOP 50 * FROM AggregateMe

GO

-- So, we have our baseline data in place, now, for testing. Run twice.

SET STATISTICS IO, TIME ON

PRINT 'Create Table'

CREATE TABLE #PreBuild (ctID INT, SumVal Decimal(20,4), AvgVal Decimal(20,4))

PRINT 'Insert Into'

INSERT INTO #PreBuild

SELECT

ctID,

SUM( amSumUpVal) AS SumVal,

AVG( amAvgUpVal) AS AvgVal

FROM

AggregateMe AS am

GROUP BY

ctID

HAVING

SUM( AmSumUpVal) > 0

PRINT 'SELECT INTO'

SELECT

ctID,

SUM( amSumUpVal) AS SumVal,

AVG( amAvgUpVal) AS AvgVal

INTO

#SelectInto

FROM

AggregateMe AS am

GROUP BY

ctID

HAVING

SUM( AmSumUpVal) > 0

SET STATISTICS TIME, IO OFF

IF OBJECT_ID( 'tempdb..#PreBuild') IS NOT NULL

DROP TABLE #PreBuild

IF OBJECT_ID( 'tempdb..#SelectInto') IS NOT NULL

DROP TABLE #SelectInto

GO

-- So, in general, no indexes and whatnot on the tables, the SELECT INTO runs at about 10-15ms faster.

-- This is insignificant in general, especially in context of this build.

-- This is only important in hyper-optimization.

-- NEXT!

-- Index discussions.

-- Note the ctNCID is at the exact same selectivity level as ctID on purpose.

-- This is a functionality optimization test, not a real-world scenario. The idea is to prove

-- the underlying theory, without having to include a heuristics discussion.

-- First, building off the clustered index in AggregateMe

SET NOCOUNT ON

PRINT 'CREATE TABLE - CLUSTERED INDEX BEFORE INSERT, Using ctID'

SET STATISTICS TIME ON

CREATE TABLE #PreBuild (ctID INT, SumVal Decimal(20,4), AvgVal Decimal(20,4))

SET STATISTICS TIME OFF

PRINT 'CREATE INDEX - CLUSTERED INDEX BEFORE INSERT, Using ctID'

SET STATISTICS TIME ON

CREATE CLUSTERED INDEX cidx_Prebuild ON #PreBuild (ctID)

SET STATISTICS TIME OFF

PRINT 'INSERT INTO - CLUSTERED INDEX BEFORE INSERT, Using ctID'

SET STATISTICS TIME ON

INSERT INTO #PreBuild

SELECT

ctID,

SUM( amSumUpVal) AS SumVal,

AVG( amAvgUpVal) AS AvgVal

FROM

AggregateMe AS am

GROUP BY

ctID

HAVING

SUM( AmSumUpVal) > 0

SET STATISTICS TIME OFF

PRINT 'CREATE TABLE - CLUSTERED INDEX AFTER INSERT, Using ctID'

SET STATISTICS TIME ON

CREATE TABLE #PreBuild2 (ctID INT, SumVal Decimal(20,4), AvgVal Decimal(20,4))

SET STATISTICS TIME OFF

PRINT 'INSERT INTO - CLUSTERED INDEX AFTER INSERT, Using ctID'

SET STATISTICS TIME ON

INSERT INTO #PreBuild2

SELECT

ctID,

SUM( amSumUpVal) AS SumVal,

AVG( amAvgUpVal) AS AvgVal

FROM

AggregateMe AS am

GROUP BY

ctID

HAVING

SUM( AmSumUpVal) > 0

SET STATISTICS TIME OFF

PRINT 'CREATE INDEX - CLUSTERED INDEX AFTER INSERT, Using ctID'

SET STATISTICS TIME ON

CREATE CLUSTERED INDEX cidx_Prebuild ON #PreBuild2 (ctID)

SET STATISTICS TIME OFF

PRINT 'SELECT INTO'

SET STATISTICS TIME ON

SELECT

ctID,

SUM( amSumUpVal) AS SumVal,

AVG( amAvgUpVal) AS AvgVal

INTO

#SelectInto

FROM

AggregateMe AS am

GROUP BY

ctID

HAVING

SUM( AmSumUpVal) > 0

SET STATISTICS TIME OFF

PRINT 'CREATE INDEX - CLUSTERED INDEX AFTER INSERT, Using ctID'

SET STATISTICS TIME ON

CREATE CLUSTERED INDEX cidx_SelectInto ON #SelectInto (ctID)

SET STATISTICS TIME OFF

IF OBJECT_ID( 'tempdb..#PreBuild') IS NOT NULL

DROP TABLE #PreBuild

IF OBJECT_ID( 'tempdb..#PreBuild2') IS NOT NULL

DROP TABLE #PreBuild2

IF OBJECT_ID( 'tempdb..#SelectInto') IS NOT NULL

DROP TABLE #SelectInto

GO

-- The reason I wrapped the ON/OFF this way is to emphasize the CREATE INDEX having

-- three unique time components involved that must be summed to have the actual full

-- time it took to build the index. In this case ~150 milliseconds, much higher than

-- the extra ~20 milliseconds that it takes to insert into the table if it's pre-built.

-- Since SELECT INTO can't have the clustered pre-built, it takes less time then the

-- prebuilt table with post-built clustered, but is still more time consuming then the

-- full prebuilt.

-- With this test, we've proven that building the clustered index on the temp table

-- before inserting data that is grouped on the clustered index from the source table

-- is faster. Let's test the ctNCID, which we'll index equivalently.

--This code is the exact same as above, just using the ctNCID column instead.

SET NOCOUNT ON

PRINT 'CREATE TABLE - CLUSTERED INDEX BEFORE INSERT, Using ctID'

SET STATISTICS TIME ON

CREATE TABLE #PreBuild (ctNCID INT, SumVal Decimal(20,4), AvgVal Decimal(20,4))

SET STATISTICS TIME OFF

PRINT 'CREATE INDEX - CLUSTERED INDEX BEFORE INSERT, Using ctID'

SET STATISTICS TIME ON

CREATE CLUSTERED INDEX cidx_Prebuild ON #PreBuild (ctNCID)

SET STATISTICS TIME OFF

PRINT 'INSERT INTO - CLUSTERED INDEX BEFORE INSERT, Using ctID'

SET STATISTICS TIME ON

INSERT INTO #PreBuild

SELECT

ctNCID,

SUM( amSumUpVal) AS SumVal,

AVG( amAvgUpVal) AS AvgVal

FROM

AggregateMe AS am

GROUP BY

ctNCID

HAVING

SUM( AmSumUpVal) > 0

SET STATISTICS TIME OFF

PRINT 'CREATE TABLE - CLUSTERED INDEX AFTER INSERT, Using ctID'

SET STATISTICS TIME ON

CREATE TABLE #PreBuild2 (ctNCID INT, SumVal Decimal(20,4), AvgVal Decimal(20,4))

SET STATISTICS TIME OFF

PRINT 'INSERT INTO - CLUSTERED INDEX AFTER INSERT, Using ctID'

SET STATISTICS TIME ON

INSERT INTO #PreBuild2

SELECT

ctNCID,

SUM( amSumUpVal) AS SumVal,

AVG( amAvgUpVal) AS AvgVal

FROM

AggregateMe AS am

GROUP BY

ctNCID

HAVING

SUM( AmSumUpVal) > 0

SET STATISTICS TIME OFF

PRINT 'CREATE INDEX - CLUSTERED INDEX AFTER INSERT, Using ctID'

SET STATISTICS TIME ON

CREATE CLUSTERED INDEX cidx_Prebuild ON #PreBuild2 (ctNCID)

SET STATISTICS TIME OFF

PRINT 'SELECT INTO'

SET STATISTICS TIME ON

SELECT

ctNCID,

SUM( amSumUpVal) AS SumVal,

AVG( amAvgUpVal) AS AvgVal

INTO

#SelectInto

FROM

AggregateMe AS am

GROUP BY

ctNCID

HAVING

SUM( AmSumUpVal) > 0

SET STATISTICS TIME OFF

PRINT 'CREATE INDEX - CLUSTERED INDEX AFTER INSERT, Using ctID'

SET STATISTICS TIME ON

CREATE CLUSTERED INDEX cidx_SelectInto ON #SelectInto (ctNCID)

SET STATISTICS TIME OFF

IF OBJECT_ID( 'tempdb..#PreBuild') IS NOT NULL

DROP TABLE #PreBuild

IF OBJECT_ID( 'tempdb..#PreBuild2') IS NOT NULL

DROP TABLE #PreBuild2

IF OBJECT_ID( 'tempdb..#SelectInto') IS NOT NULL

DROP TABLE #SelectInto

GO

-- Skipping the gory details, the fully pre-built STILL wins.

-- Even though it takes on average about 40ms longer to insert all the data,

-- the subsequent clustered index build takes about 180ms.

-- So, no indexing, go with SELECT INTO. If you need indexing, fully pre-build it.

- Craig Farrell

Never stop learning, even if it hurts. Ego bruises are practically mandatory as you learn unless you've never risked enough to make a mistake.

For better assistance in answering your questions[/url] | Forum Netiquette
For index/tuning help, follow these directions.[/url] |Tally Tables[/url]
Twitter: @AnyWayDBA