• This script is closely based on that in the article.

    I would strongly encourage the author of this article to try it out 🙂

    Nice idea for an article, but size and speed are not XML strengths! 😉

    Paul

    -- *****************************************************************************************

    -- WARNING: This script contains commands to clear the server-wide data and procedure caches

    -- Do not run on anything other than a TEST instance!

    --

    -- Please ensure Actual Execution Plan is OFF for representative results

    -- Total test time is 10 to 60 seconds (including test data creation), depending on hardware

    -- *****************************************************************************************

    USE tempdb;

    GO

    --

    -- Drop the test tables if they exist

    --

    IF OBJECT_ID(N'dbo.Rainfall_Con', N'U') IS NOT NULL DROP TABLE dbo.Rainfall_Con

    IF OBJECT_ID(N'dbo.Rainfall_XML', N'U') IS NOT NULL DROP TABLE Rainfall_XML;

    IF OBJECT_ID(N'tempdb..#BitBucket', N'U') IS NOT NULL DROP TABLE #BitBucket;

    GO

    --

    -- Create the conventional table

    --

    CREATE TABLE dbo.Rainfall_Con

    (

    [LocID] INTEGER NOT NULL,

    [Date] DATETIME NOT NULL,

    [Hour] TINYINT NOT NULL,

    [Rainfall] DECIMAL(10,8) NOT NULL,

    );

    GO

    --

    -- Populate the conventional table with one year's data for 100 locations

    -- This should complete in less than 30 seconds

    --

    DECLARE @Rows BIGINT;

    SET @Rows = 100 * 365 * 24; -- 24 hours' data for 365 days for 100 locations

    RAISERROR('Creating test data...', 0, 1) WITH NOWAIT;

    ;WITH Numbers (N)

    AS (

    -- Dynamically create rows numbered 0 to 875999

    SELECT TOP (@Rows)

    ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1

    FROM master.sys.all_columns C1,

    master.sys.all_columns C2,

    master.sys.all_columns C3

    )

    INSERT dbo.Rainfall_Con WITH (TABLOCKX)

    (

    [LocID],

    [Date],

    [Hour],

    [Rainfall]

    )

    SELECT LocID = (N / 365 / 24) + 1,

    [Date] = DATEADD(DAY, (N / 24) % 366 , '2008-01-01 00:00:00.000'),

    [Hour] = N % 24,

    [Rainfall] = RAND(CHECKSUM(NEWID())) * 10 -- A new random value for each row

    FROM Numbers;

    GO

    --

    -- Now create the primary key

    -- Doing this *after* inserting the data results in a compact and contiguous index

    -- If the index were to exist during the insert, index pages may split resulting in

    -- a lower data density, and significant fragmentation

    --

    RAISERROR('Creating index...', 0, 1) WITH NOWAIT;

    ALTER TABLE dbo.Rainfall_Con

    ADD CONSTRAINT PK_Rainfall_Con

    PRIMARY KEY CLUSTERED([Date], [Hour], [LocID])

    WITH (FILLFACTOR = 100);

    GO

    --

    -- Create the XML table

    --

    CREATE TABLE dbo.Rainfall_XML

    (

    [LocID] INTEGER NOT NULL,

    [Date] DATETIME NOT NULL,

    [Rainfall] XML NOT NULL,

    );

    GO

    --

    -- Populate the XML table from the conventional table

    --

    RAISERROR('Creating XML data...', 0, 1) WITH NOWAIT;

    INSERT Rainfall_XML WITH (TABLOCKX)

    SELECT [LocID],

    [Date],

    (

    -- The hourly data as XML

    SELECT H = [Hour],

    V = [Rainfall]

    FROM dbo.Rainfall_Con A WITH (READUNCOMMITTED)

    WHERE A.[LocID] = B.[LocID]

    AND A.[Date] = B.[Date]

    FOR XML RAW('RN'), ROOT('RT')

    )

    FROM Rainfall_Con B WITH (READUNCOMMITTED)

    GROUP BY

    [LocID],

    [Date];

    GO

    --

    -- Add the primary key now, for the same reasons as before

    --

    RAISERROR('Creating index...', 0, 1) WITH NOWAIT;

    ALTER TABLE dbo.Rainfall_XML

    ADD CONSTRAINT PK_Rainfall_XML

    PRIMARY KEY CLUSTERED([Date], [LocID])

    WITH (FILLFACTOR = 100);

    GO

    --

    -- Show the space used by each table

    --

    -- Results on my system:

    --

    -- Conventional : 876,000 rows, 27016KB reserved, 26856KB data, 96KB index, 64KB unused

    -- XML : 36,500 rows, 36504KB reserved, 36504KB data, 112KB index, 64KB unused

    --

    -- ### XML representation is 35% *larger* ###

    --

    EXECUTE sp_spaceused Rainfall_Con;

    EXECUTE sp_spaceused Rainfall_XML;

    GO

    --

    -- This table is used to capture the results from the test

    -- SELECT statements. This avoids any delays in sending the

    -- rows to the client from skewing the results

    --

    CREATE TABLE #BitBucket

    (

    [LocID] INTEGER NOT NULL,

    [Date] DATETIME NOT NULL,

    [Hour] TINYINT NOT NULL,

    [Rainfall] DECIMAL(10,8) NOT NULL,

    );

    GO

    --

    -- TEST START

    --

    RAISERROR('Starting test', 0, 1) WITH NOWAIT;

    --

    -- Remove all ad-hoc SQL query plans from cache

    --

    DBCC FREESYSTEMCACHE (N'SQL Plans');

    GO

    --

    -- Start with a cold data cache

    -- Each implementation is run 10 times

    -- One time with a cold cache, then nine times with a warm cache

    -- This seems more representative to me, feel free to change it

    --

    DBCC DROPCLEANBUFFERS;

    GO

    /* SQL */

    INSERT #BitBucket WITH (TABLOCKX)

    SELECT TP.[LocID],

    TP.[Date],

    TP.[Hour],

    TP.[Rainfall]

    FROM dbo.Rainfall_Con TP WITH (READUNCOMMITTED)

    WHERE TP.[Date] = '20080106'

    AND TP.[Hour] = 15

    ORDER BY

    LocID ASC;

    GO 10

    --

    -- For fairness, recreate the dump table for the second run

    -- TRUNCATE TABLE might give the second run an unfair advantage

    -- since mixed and uniform extent allocations would not need

    -- to be re-done

    --

    DROP TABLE #BitBucket;

    GO

    CREATE TABLE #BitBucket

    (

    [LocID] INTEGER NOT NULL,

    [Date] DATETIME NOT NULL,

    [Hour] TINYINT NOT NULL,

    [Rainfall] DECIMAL(10,8) NOT NULL,

    );

    GO

    --

    -- Second run - XML

    --

    DBCC DROPCLEANBUFFERS;

    GO

    /* XML */

    INSERT #BitBucket WITH (TABLOCKX)

    SELECT TP.LocID,

    TP.[Date],

    [Hour] = Rainfall.value('(/RT/RN[@H=15]/@H)[1]','tinyint'),

    Rainfall_Con = Rainfall.value('(/RT/RN[@H=15]/@V)[1]','numeric(10,8)')

    FROM dbo.Rainfall_XML TP WITH (READUNCOMMITTED)

    WHERE TP.[Date] = '20080106'

    ORDER BY

    LocID;

    GO 10

    --

    -- Show the total execution resource usage for all ten runs

    --

    SELECT query = LEFT(QT.[text], 9),

    execution_plan = QP.query_plan,

    run_count = QS.execution_count,

    total_cpu_time_µs = QS.total_worker_time,

    total_logical_reads = QS.total_logical_reads,

    total_elapsed_time_µs = QS.total_elapsed_time,

    avg_cpu_time_µs = QS.total_worker_time / QS.execution_count,

    avg_logical_reads = QS.total_logical_reads / QS.execution_count,

    avg_elapsed_time_µs = QS.total_elapsed_time / QS.execution_count

    FROM sys.dm_exec_query_stats QS

    CROSS

    APPLY sys.dm_exec_sql_text (QS.[sql_handle]) QT

    CROSS

    APPLY sys.dm_exec_query_plan (QS.[plan_handle]) QP

    WHERE QT.[text] LIKE '%Rainfall%'

    AND QT.[text] NOT LIKE '%dm_exec_query_stats%'

    ORDER BY

    QS.last_execution_time ASC;

    GO

    --

    -- Sample results:

    --

    -- SQL Averages: CPU 683µs; Elapsed time: 683µs; Logical reads: 114

    -- XML Averages: CPU 80078µs; Elapsed time: 83691µs; Logical reads: 126

    --

    -- Tidy up

    --

    GO

    IF OBJECT_ID(N'dbo.Rainfall_Con', N'U') IS NOT NULL DROP TABLE dbo.Rainfall_Con

    IF OBJECT_ID(N'dbo.Rainfall_XML', N'U') IS NOT NULL DROP TABLE Rainfall_XML;

    IF OBJECT_ID(N'tempdb..#BitBucket', N'U') IS NOT NULL DROP TABLE #BitBucket;

    GO

    RAISERROR('Test run complete', 0, 1) WITH NOWAIT;

    --

    -- END SCRIPT

    --