RE: Gain Space Using XML data type

Paul White

SSC Guru

Points: 150467

August 4, 2009 at 4:53 am

#1034164

This script is closely based on that in the article.

I would strongly encourage the author of this article to try it out 🙂

Nice idea for an article, but size and speed are not XML strengths! 😉

Paul

-- *****************************************************************************************

-- WARNING: This script contains commands to clear the server-wide data and procedure caches

-- Do not run on anything other than a TEST instance!

--

-- Please ensure Actual Execution Plan is OFF for representative results

-- Total test time is 10 to 60 seconds (including test data creation), depending on hardware

-- *****************************************************************************************

USE tempdb;

GO

--

-- Drop the test tables if they exist

--

IF OBJECT_ID(N'dbo.Rainfall_Con', N'U') IS NOT NULL DROP TABLE dbo.Rainfall_Con

IF OBJECT_ID(N'dbo.Rainfall_XML', N'U') IS NOT NULL DROP TABLE Rainfall_XML;

IF OBJECT_ID(N'tempdb..#BitBucket', N'U') IS NOT NULL DROP TABLE #BitBucket;

GO

--

-- Create the conventional table

--

CREATE TABLE dbo.Rainfall_Con

(

[LocID] INTEGER NOT NULL,

[Date] DATETIME NOT NULL,

[Hour] TINYINT NOT NULL,

[Rainfall] DECIMAL(10,8) NOT NULL,

);

GO

--

-- Populate the conventional table with one year's data for 100 locations

-- This should complete in less than 30 seconds

--

DECLARE @Rows BIGINT;

SET @Rows = 100 * 365 * 24; -- 24 hours' data for 365 days for 100 locations

RAISERROR('Creating test data...', 0, 1) WITH NOWAIT;

;WITH Numbers (N)

AS (

-- Dynamically create rows numbered 0 to 875999

SELECT TOP (@Rows)

ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1

FROM master.sys.all_columns C1,

master.sys.all_columns C2,

master.sys.all_columns C3

)

INSERT dbo.Rainfall_Con WITH (TABLOCKX)

(

[LocID],

[Date],

[Hour],

[Rainfall]

)

SELECT LocID = (N / 365 / 24) + 1,

[Date] = DATEADD(DAY, (N / 24) % 366 , '2008-01-01 00:00:00.000'),

[Hour] = N % 24,

[Rainfall] = RAND(CHECKSUM(NEWID())) * 10 -- A new random value for each row

FROM Numbers;

GO

--

-- Now create the primary key

-- Doing this *after* inserting the data results in a compact and contiguous index

-- If the index were to exist during the insert, index pages may split resulting in

-- a lower data density, and significant fragmentation

--

RAISERROR('Creating index...', 0, 1) WITH NOWAIT;

ALTER TABLE dbo.Rainfall_Con

ADD CONSTRAINT PK_Rainfall_Con

PRIMARY KEY CLUSTERED([Date], [Hour], [LocID])

WITH (FILLFACTOR = 100);

GO

--

-- Create the XML table

--

CREATE TABLE dbo.Rainfall_XML

(

[LocID] INTEGER NOT NULL,

[Date] DATETIME NOT NULL,

[Rainfall] XML NOT NULL,

);

GO

--

-- Populate the XML table from the conventional table

--

RAISERROR('Creating XML data...', 0, 1) WITH NOWAIT;

INSERT Rainfall_XML WITH (TABLOCKX)

SELECT [LocID],

[Date],

(

-- The hourly data as XML

SELECT H = [Hour],

V = [Rainfall]

FROM dbo.Rainfall_Con A WITH (READUNCOMMITTED)

WHERE A.[LocID] = B.[LocID]

AND A.[Date] = B.[Date]

FOR XML RAW('RN'), ROOT('RT')

)

FROM Rainfall_Con B WITH (READUNCOMMITTED)

GROUP BY

[LocID],

[Date];

GO

--

-- Add the primary key now, for the same reasons as before

--

RAISERROR('Creating index...', 0, 1) WITH NOWAIT;

ALTER TABLE dbo.Rainfall_XML

ADD CONSTRAINT PK_Rainfall_XML

PRIMARY KEY CLUSTERED([Date], [LocID])

WITH (FILLFACTOR = 100);

GO

--

-- Show the space used by each table

--

-- Results on my system:

--

-- Conventional : 876,000 rows, 27016KB reserved, 26856KB data, 96KB index, 64KB unused

-- XML : 36,500 rows, 36504KB reserved, 36504KB data, 112KB index, 64KB unused

--

-- ### XML representation is 35% *larger* ###

--

EXECUTE sp_spaceused Rainfall_Con;

EXECUTE sp_spaceused Rainfall_XML;

GO

--

-- This table is used to capture the results from the test

-- SELECT statements. This avoids any delays in sending the

-- rows to the client from skewing the results

--

CREATE TABLE #BitBucket

(

[LocID] INTEGER NOT NULL,

[Date] DATETIME NOT NULL,

[Hour] TINYINT NOT NULL,

[Rainfall] DECIMAL(10,8) NOT NULL,

);

GO

--

-- TEST START

--

RAISERROR('Starting test', 0, 1) WITH NOWAIT;

--

-- Remove all ad-hoc SQL query plans from cache

--

DBCC FREESYSTEMCACHE (N'SQL Plans');

GO

--

-- Start with a cold data cache

-- Each implementation is run 10 times

-- One time with a cold cache, then nine times with a warm cache

-- This seems more representative to me, feel free to change it

--

DBCC DROPCLEANBUFFERS;

GO

/* SQL */

INSERT #BitBucket WITH (TABLOCKX)

SELECT TP.[LocID],

TP.[Date],

TP.[Hour],

TP.[Rainfall]

FROM dbo.Rainfall_Con TP WITH (READUNCOMMITTED)

WHERE TP.[Date] = '20080106'

AND TP.[Hour] = 15

ORDER BY

LocID ASC;

GO 10

--

-- For fairness, recreate the dump table for the second run

-- TRUNCATE TABLE might give the second run an unfair advantage

-- since mixed and uniform extent allocations would not need

-- to be re-done

--

DROP TABLE #BitBucket;

GO

CREATE TABLE #BitBucket

(

[LocID] INTEGER NOT NULL,

[Date] DATETIME NOT NULL,

[Hour] TINYINT NOT NULL,

[Rainfall] DECIMAL(10,8) NOT NULL,

);

GO

--

-- Second run - XML

--

DBCC DROPCLEANBUFFERS;

GO

/* XML */

INSERT #BitBucket WITH (TABLOCKX)

SELECT TP.LocID,

TP.[Date],

[Hour] = Rainfall.value('(/RT/RN[@H=15]/@H)[1]','tinyint'),

Rainfall_Con = Rainfall.value('(/RT/RN[@H=15]/@V)[1]','numeric(10,8)')

FROM dbo.Rainfall_XML TP WITH (READUNCOMMITTED)

WHERE TP.[Date] = '20080106'

ORDER BY

LocID;

GO 10

--

-- Show the total execution resource usage for all ten runs

--

SELECT query = LEFT(QT.[text], 9),

execution_plan = QP.query_plan,

run_count = QS.execution_count,

total_cpu_time_µs = QS.total_worker_time,

total_logical_reads = QS.total_logical_reads,

total_elapsed_time_µs = QS.total_elapsed_time,

avg_cpu_time_µs = QS.total_worker_time / QS.execution_count,

avg_logical_reads = QS.total_logical_reads / QS.execution_count,

avg_elapsed_time_µs = QS.total_elapsed_time / QS.execution_count

FROM sys.dm_exec_query_stats QS

CROSS

APPLY sys.dm_exec_sql_text (QS.[sql_handle]) QT

CROSS

APPLY sys.dm_exec_query_plan (QS.[plan_handle]) QP

WHERE QT.[text] LIKE '%Rainfall%'

AND QT.[text] NOT LIKE '%dm_exec_query_stats%'

ORDER BY

QS.last_execution_time ASC;

GO

--

-- Sample results:

--

-- SQL Averages: CPU 683µs; Elapsed time: 683µs; Logical reads: 114

-- XML Averages: CPU 80078µs; Elapsed time: 83691µs; Logical reads: 126

--

-- Tidy up

--

GO

IF OBJECT_ID(N'dbo.Rainfall_Con', N'U') IS NOT NULL DROP TABLE dbo.Rainfall_Con

IF OBJECT_ID(N'dbo.Rainfall_XML', N'U') IS NOT NULL DROP TABLE Rainfall_XML;

IF OBJECT_ID(N'tempdb..#BitBucket', N'U') IS NOT NULL DROP TABLE #BitBucket;

GO

RAISERROR('Test run complete', 0, 1) WITH NOWAIT;

--

-- END SCRIPT

--

Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi