This script is closely based on that in the article.
I would strongly encourage the author of this article to try it out 🙂
Nice idea for an article, but size and speed are not XML strengths! 😉
Paul
-- *****************************************************************************************
-- WARNING: This script contains commands to clear the server-wide data and procedure caches
-- Do not run on anything other than a TEST instance!
--
-- Please ensure Actual Execution Plan is OFF for representative results
-- Total test time is 10 to 60 seconds (including test data creation), depending on hardware
-- *****************************************************************************************
USE tempdb;
GO
--
-- Drop the test tables if they exist
--
IF OBJECT_ID(N'dbo.Rainfall_Con', N'U') IS NOT NULL DROP TABLE dbo.Rainfall_Con
IF OBJECT_ID(N'dbo.Rainfall_XML', N'U') IS NOT NULL DROP TABLE Rainfall_XML;
IF OBJECT_ID(N'tempdb..#BitBucket', N'U') IS NOT NULL DROP TABLE #BitBucket;
GO
--
-- Create the conventional table
--
CREATE TABLE dbo.Rainfall_Con
(
[LocID] INTEGER NOT NULL,
[Date] DATETIME NOT NULL,
[Hour] TINYINT NOT NULL,
[Rainfall] DECIMAL(10,8) NOT NULL,
);
GO
--
-- Populate the conventional table with one year's data for 100 locations
-- This should complete in less than 30 seconds
--
DECLARE @Rows BIGINT;
SET @Rows = 100 * 365 * 24; -- 24 hours' data for 365 days for 100 locations
RAISERROR('Creating test data...', 0, 1) WITH NOWAIT;
;WITH Numbers (N)
AS (
-- Dynamically create rows numbered 0 to 875999
SELECT TOP (@Rows)
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1
FROM master.sys.all_columns C1,
master.sys.all_columns C2,
master.sys.all_columns C3
)
INSERT dbo.Rainfall_Con WITH (TABLOCKX)
(
[LocID],
[Date],
[Hour],
[Rainfall]
)
SELECT LocID = (N / 365 / 24) + 1,
[Date] = DATEADD(DAY, (N / 24) % 366 , '2008-01-01 00:00:00.000'),
[Hour] = N % 24,
[Rainfall] = RAND(CHECKSUM(NEWID())) * 10 -- A new random value for each row
FROM Numbers;
GO
--
-- Now create the primary key
-- Doing this *after* inserting the data results in a compact and contiguous index
-- If the index were to exist during the insert, index pages may split resulting in
-- a lower data density, and significant fragmentation
--
RAISERROR('Creating index...', 0, 1) WITH NOWAIT;
ALTER TABLE dbo.Rainfall_Con
ADD CONSTRAINT PK_Rainfall_Con
PRIMARY KEY CLUSTERED([Date], [Hour], [LocID])
WITH (FILLFACTOR = 100);
GO
--
-- Create the XML table
--
CREATE TABLE dbo.Rainfall_XML
(
[LocID] INTEGER NOT NULL,
[Date] DATETIME NOT NULL,
[Rainfall] XML NOT NULL,
);
GO
--
-- Populate the XML table from the conventional table
--
RAISERROR('Creating XML data...', 0, 1) WITH NOWAIT;
INSERT Rainfall_XML WITH (TABLOCKX)
SELECT [LocID],
[Date],
(
-- The hourly data as XML
SELECT H = [Hour],
V = [Rainfall]
FROM dbo.Rainfall_Con A WITH (READUNCOMMITTED)
WHERE A.[LocID] = B.[LocID]
AND A.[Date] = B.[Date]
FOR XML RAW('RN'), ROOT('RT')
)
FROM Rainfall_Con B WITH (READUNCOMMITTED)
GROUP BY
[LocID],
[Date];
GO
--
-- Add the primary key now, for the same reasons as before
--
RAISERROR('Creating index...', 0, 1) WITH NOWAIT;
ALTER TABLE dbo.Rainfall_XML
ADD CONSTRAINT PK_Rainfall_XML
PRIMARY KEY CLUSTERED([Date], [LocID])
WITH (FILLFACTOR = 100);
GO
--
-- Show the space used by each table
--
-- Results on my system:
--
-- Conventional : 876,000 rows, 27016KB reserved, 26856KB data, 96KB index, 64KB unused
-- XML : 36,500 rows, 36504KB reserved, 36504KB data, 112KB index, 64KB unused
--
-- ### XML representation is 35% *larger* ###
--
EXECUTE sp_spaceused Rainfall_Con;
EXECUTE sp_spaceused Rainfall_XML;
GO
--
-- This table is used to capture the results from the test
-- SELECT statements. This avoids any delays in sending the
-- rows to the client from skewing the results
--
CREATE TABLE #BitBucket
(
[LocID] INTEGER NOT NULL,
[Date] DATETIME NOT NULL,
[Hour] TINYINT NOT NULL,
[Rainfall] DECIMAL(10,8) NOT NULL,
);
GO
--
-- TEST START
--
RAISERROR('Starting test', 0, 1) WITH NOWAIT;
--
-- Remove all ad-hoc SQL query plans from cache
--
DBCC FREESYSTEMCACHE (N'SQL Plans');
GO
--
-- Start with a cold data cache
-- Each implementation is run 10 times
-- One time with a cold cache, then nine times with a warm cache
-- This seems more representative to me, feel free to change it
--
DBCC DROPCLEANBUFFERS;
GO
/* SQL */
INSERT #BitBucket WITH (TABLOCKX)
SELECT TP.[LocID],
TP.[Date],
TP.[Hour],
TP.[Rainfall]
FROM dbo.Rainfall_Con TP WITH (READUNCOMMITTED)
WHERE TP.[Date] = '20080106'
AND TP.[Hour] = 15
ORDER BY
LocID ASC;
GO 10
--
-- For fairness, recreate the dump table for the second run
-- TRUNCATE TABLE might give the second run an unfair advantage
-- since mixed and uniform extent allocations would not need
-- to be re-done
--
DROP TABLE #BitBucket;
GO
CREATE TABLE #BitBucket
(
[LocID] INTEGER NOT NULL,
[Date] DATETIME NOT NULL,
[Hour] TINYINT NOT NULL,
[Rainfall] DECIMAL(10,8) NOT NULL,
);
GO
--
-- Second run - XML
--
DBCC DROPCLEANBUFFERS;
GO
/* XML */
INSERT #BitBucket WITH (TABLOCKX)
SELECT TP.LocID,
TP.[Date],
[Hour] = Rainfall.value('(/RT/RN[@H=15]/@H)[1]','tinyint'),
Rainfall_Con = Rainfall.value('(/RT/RN[@H=15]/@V)[1]','numeric(10,8)')
FROM dbo.Rainfall_XML TP WITH (READUNCOMMITTED)
WHERE TP.[Date] = '20080106'
ORDER BY
LocID;
GO 10
--
-- Show the total execution resource usage for all ten runs
--
SELECT query = LEFT(QT.[text], 9),
execution_plan = QP.query_plan,
run_count = QS.execution_count,
total_cpu_time_µs = QS.total_worker_time,
total_logical_reads = QS.total_logical_reads,
total_elapsed_time_µs = QS.total_elapsed_time,
avg_cpu_time_µs = QS.total_worker_time / QS.execution_count,
avg_logical_reads = QS.total_logical_reads / QS.execution_count,
avg_elapsed_time_µs = QS.total_elapsed_time / QS.execution_count
FROM sys.dm_exec_query_stats QS
CROSS
APPLY sys.dm_exec_sql_text (QS.[sql_handle]) QT
CROSS
APPLY sys.dm_exec_query_plan (QS.[plan_handle]) QP
WHERE QT.[text] LIKE '%Rainfall%'
AND QT.[text] NOT LIKE '%dm_exec_query_stats%'
ORDER BY
QS.last_execution_time ASC;
GO
--
-- Sample results:
--
-- SQL Averages: CPU 683µs; Elapsed time: 683µs; Logical reads: 114
-- XML Averages: CPU 80078µs; Elapsed time: 83691µs; Logical reads: 126
--
-- Tidy up
--
GO
IF OBJECT_ID(N'dbo.Rainfall_Con', N'U') IS NOT NULL DROP TABLE dbo.Rainfall_Con
IF OBJECT_ID(N'dbo.Rainfall_XML', N'U') IS NOT NULL DROP TABLE Rainfall_XML;
IF OBJECT_ID(N'tempdb..#BitBucket', N'U') IS NOT NULL DROP TABLE #BitBucket;
GO
RAISERROR('Test run complete', 0, 1) WITH NOWAIT;
--
-- END SCRIPT
--
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi