• Here's an interesting variation:

    This script is the same as the last, except it also includes a test run with a denormalized version.

    Sample results included in the script comments as before.

    (The script also includes commented-out sections to try using vardecimal 'compression' too)

    Paul

    -- *****************************************************************************************

    -- WARNING: This script contains commands to clear the server-wide data and procedure caches

    -- Do not run on anything other than a TEST instance!

    --

    -- Please ensure Actual Execution Plan is OFF for representative results

    -- Total test time is 30s to 1 minute (including test data creation), depending on hardware

    -- *****************************************************************************************

    --

    -- Create a test database

    --

    USE master;

    GO

    RAISERROR('Creating test database...', 0, 1) WITH NOWAIT;

    CREATE DATABASE [E059791C-4F24-40CF-A54A-B21C34F6610D];

    GO

    --

    -- Switch to it

    --

    USE [E059791C-4F24-40CF-A54A-B21C34F6610D];

    GO

    --

    -- Enable vardecimal format

    --

    --EXECUTE sp_db_vardecimal_storage_format

    -- @dbname = [E059791C-4F24-40CF-A54A-B21C34F6610D],

    -- @vardecimal_storage_format = 'ON';

    GO

    --

    -- Drop the test tables if they exist

    --

    IF OBJECT_ID(N'dbo.Rainfall_Con', N'U') IS NOT NULL DROP TABLE dbo.Rainfall_Con

    IF OBJECT_ID(N'dbo.Rainfall_XML', N'U') IS NOT NULL DROP TABLE Rainfall_XML;

    IF OBJECT_ID(N'dbo.Rainfall_DN', N'U') IS NOT NULL DROP TABLE dbo.Rainfall_Con_DN;

    IF OBJECT_ID(N'tempdb..#BitBucket', N'U') IS NOT NULL DROP TABLE #BitBucket;

    GO

    --

    -- Create the conventional table

    --

    CREATE TABLE dbo.Rainfall_Con

    (

    [LocID] INTEGER NOT NULL,

    [Date] DATETIME NOT NULL,

    [Hour] TINYINT NOT NULL,

    [Rainfall] DECIMAL(10,8) NOT NULL,

    );

    GO

    --

    -- Populate the conventional table with one year's data for 100 locations

    -- This should complete in less than 30 seconds

    --

    DECLARE @Rows BIGINT;

    SET @Rows = 100 * 365 * 24; -- 24 hours' data for 365 days for 100 locations

    RAISERROR('Creating test data...', 0, 1) WITH NOWAIT;

    ;WITH Numbers (N)

    AS (

    -- Dynamically create rows numbered 0 to 875999

    SELECT TOP (@Rows)

    ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1

    FROM master.sys.all_columns C1,

    master.sys.all_columns C2,

    master.sys.all_columns C3

    )

    INSERT dbo.Rainfall_Con WITH (TABLOCKX)

    (

    [LocID],

    [Date],

    [Hour],

    [Rainfall]

    )

    SELECT LocID = (N / 365 / 24) + 1,

    [Date] = DATEADD(DAY, (N / 24) % 366 , '2008-01-01 00:00:00.000'),

    [Hour] = N % 24,

    [Rainfall] = RAND(CHECKSUM(NEWID())) * 10 -- A new random value for each row

    FROM Numbers;

    GO

    --

    -- Now create the primary key

    -- Doing this *after* inserting the data results in a compact and contiguous index

    -- If the index were to exist during the insert, index pages may split resulting in

    -- a lower data density, and significant fragmentation

    --

    RAISERROR('Creating index...', 0, 1) WITH NOWAIT;

    ALTER TABLE dbo.Rainfall_Con

    ADD CONSTRAINT PK_Rainfall_Con

    PRIMARY KEY CLUSTERED([Date], [Hour], [LocID])

    WITH (FILLFACTOR = 100);

    GO

    --

    -- Create the XML table

    --

    CREATE TABLE dbo.Rainfall_XML

    (

    [LocID] INTEGER NOT NULL,

    [Date] DATETIME NOT NULL,

    [Rainfall] XML NOT NULL,

    );

    GO

    --

    -- Populate the XML table from the conventional table

    --

    RAISERROR('Creating XML data...', 0, 1) WITH NOWAIT;

    INSERT Rainfall_XML WITH (TABLOCKX)

    SELECT [LocID],

    [Date],

    (

    -- The hourly data as XML

    SELECT H = [Hour],

    V = [Rainfall]

    FROM dbo.Rainfall_Con A WITH (READUNCOMMITTED)

    WHERE A.[LocID] = B.[LocID]

    AND A.[Date] = B.[Date]

    FOR XML RAW('RN'), ROOT('RT')

    )

    FROM Rainfall_Con B WITH (READUNCOMMITTED)

    GROUP BY

    [LocID],

    [Date];

    GO

    --

    -- Add the primary key now, for the same reasons as before

    --

    RAISERROR('Creating index...', 0, 1) WITH NOWAIT;

    ALTER TABLE dbo.Rainfall_XML

    ADD CONSTRAINT PK_Rainfall_XML

    PRIMARY KEY CLUSTERED([Date], [LocID])

    WITH (FILLFACTOR = 100);

    GO

    --

    -- Create the denormalized table

    --

    CREATE TABLE dbo.Rainfall_Con_DN

    (

    [LocID] INTEGER NOT NULL,

    [Date] DATETIME NOT NULL,

    [0] DECIMAL(10,8) NOT NULL,

    [1] DECIMAL(10,8) NOT NULL,

    [2] DECIMAL(10,8) NOT NULL,

    [3] DECIMAL(10,8) NOT NULL,

    [4] DECIMAL(10,8) NOT NULL,

    [5] DECIMAL(10,8) NOT NULL,

    [6] DECIMAL(10,8) NOT NULL,

    [7] DECIMAL(10,8) NOT NULL,

    [8] DECIMAL(10,8) NOT NULL,

    [9] DECIMAL(10,8) NOT NULL,

    [10] DECIMAL(10,8) NOT NULL,

    [11] DECIMAL(10,8) NOT NULL,

    [12] DECIMAL(10,8) NOT NULL,

    [13] DECIMAL(10,8) NOT NULL,

    [14] DECIMAL(10,8) NOT NULL,

    [15] DECIMAL(10,8) NOT NULL,

    [16] DECIMAL(10,8) NOT NULL,

    [17] DECIMAL(10,8) NOT NULL,

    [18] DECIMAL(10,8) NOT NULL,

    [19] DECIMAL(10,8) NOT NULL,

    [20] DECIMAL(10,8) NOT NULL,

    [21] DECIMAL(10,8) NOT NULL,

    [22] DECIMAL(10,8) NOT NULL,

    [23] DECIMAL(10,8) NOT NULL,

    );

    GO

    --EXECUTE sp_tableoption

    -- @TableNamePattern = [dbo.Rainfall_Con_DN],

    -- @OptionName = 'vardecimal storage format' ,

    -- @OptionValue = 'ON';

    GO

    --

    -- Populate the denormalized table from the conventional table

    --

    RAISERROR('Creating denormalized data...', 0, 1) WITH NOWAIT;

    INSERT dbo.Rainfall_Con_DN WITH (TABLOCKX)

    (

    [LocID],

    [Date],

    [0],[1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[11],[12],

    [13],[14],[15],[16],[17],[18],[19],[20],[21],[22],[23]

    )

    SELECT P.[LocID],

    P.[Date],

    P.[0],P.[1],P.[2],P.[3],P.[4],P.[5],P.[6],

    P.[7],P.[8],P.[9],P.[10],P.[11],P.[12],

    P.[13],P.[14],P.[15],P.[16],P.[17],P.[18],

    P.[19],P.[20],P.[21],P.[22],P.[23]

    FROM (

    SELECT DISTINCT

    [LocID],

    [Date]

    FROM dbo.Rainfall_Con WITH (READUNCOMMITTED)

    ) B

    CROSS

    APPLY (

    -- The hourly data

    SELECT H = [Hour],

    V = [Rainfall]

    FROM dbo.Rainfall_Con A WITH (READUNCOMMITTED)

    WHERE A.[LocID] = B.[LocID]

    AND A.[Date] = B.[Date]

    ) A

    PIVOT (

    MAX(V)

    FOR H

    IN (

    [0],[1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[11],[12],

    [13],[14],[15],[16],[17],[18],[19],[20],[21],[22],[23]

    )

    ) P

    GO

    --

    -- Add the primary key now, for the same reasons as before

    --

    RAISERROR('Creating index...', 0, 1) WITH NOWAIT;

    ALTER TABLE dbo.Rainfall_Con_DN

    ADD CONSTRAINT PK_Rainfall_Con_DN

    PRIMARY KEY CLUSTERED([Date], [LocID])

    WITH (FILLFACTOR = 100);

    GO

    --

    -- Show the space used by each table

    --

    -- Results on my system:

    --

    -- Conventional : 876,000 rows, 27016KB reserved, 26856KB data, 96KB index, 64KB unused

    -- XML : 36,500 rows, 36504KB reserved, 36504KB data, 112KB index, 64KB unused

    -- Denormalized : 36,500 rows, 8968KB reserved, 8856KB data, 40KB index, 72KB unused

    -- Denormalized : 36,500 rows, 7240KB reserved, 7128KB data, 40KB index, 72KB unused (with vardecimal storage)

    --

    -- ### XML representation is 35% *larger* ###

    --

    EXECUTE sp_spaceused Rainfall_Con;

    EXECUTE sp_spaceused Rainfall_XML;

    EXECUTE sp_spaceused Rainfall_Con_DN;

    GO

    --

    -- This table is used to capture the results from the test

    -- SELECT statements. This avoids any delays in sending the

    -- rows to the client from skewing the results

    --

    CREATE TABLE #BitBucket

    (

    [LocID] INTEGER NOT NULL,

    [Date] DATETIME NOT NULL,

    [Hour] TINYINT NOT NULL,

    [Rainfall] DECIMAL(10,8) NOT NULL,

    );

    GO

    --

    -- TEST START

    --

    RAISERROR('Starting test', 0, 1) WITH NOWAIT;

    --

    -- Remove all ad-hoc SQL query plans from cache

    --

    DBCC FREESYSTEMCACHE (N'SQL Plans');

    GO

    --

    -- Start with a cold data cache

    -- Each implementation is run 10 times

    -- One time with a cold cache, then nine times with a warm cache

    -- This seems more representative to me, feel free to change it

    --

    DBCC DROPCLEANBUFFERS;

    GO

    /* SQL */

    INSERT #BitBucket WITH (TABLOCKX)

    SELECT TP.[LocID],

    TP.[Date],

    TP.[Hour],

    TP.[Rainfall]

    FROM dbo.Rainfall_Con TP WITH (READUNCOMMITTED)

    WHERE TP.[Date] = '20080106'

    AND TP.[Hour] = 15

    ORDER BY

    LocID ASC;

    GO 10

    --

    -- For fairness, recreate the dump table for the second run

    -- TRUNCATE TABLE might give the second run an unfair advantage

    -- since mixed and uniform extent allocations would not need

    -- to be re-done

    --

    DROP TABLE #BitBucket;

    GO

    CREATE TABLE #BitBucket

    (

    [LocID] INTEGER NOT NULL,

    [Date] DATETIME NOT NULL,

    [Hour] TINYINT NOT NULL,

    [Rainfall] DECIMAL(10,8) NOT NULL,

    );

    GO

    --

    -- Second run - XML

    --

    DBCC DROPCLEANBUFFERS;

    GO

    /* XML */

    INSERT #BitBucket WITH (TABLOCKX)

    SELECT TP.LocID,

    TP.[Date],

    [Hour] = Rainfall.value('(/RT/RN[@H=15]/@H)[1]','tinyint'),

    Rainfall_Con = Rainfall.value('(/RT/RN[@H=15]/@V)[1]','numeric(10,8)')

    FROM dbo.Rainfall_XML TP WITH (READUNCOMMITTED)

    WHERE TP.[Date] = '20080106'

    ORDER BY

    LocID;

    GO 10

    --

    -- Recreate the dump table for the third run

    --

    DROP TABLE #BitBucket;

    GO

    CREATE TABLE #BitBucket

    (

    [LocID] INTEGER NOT NULL,

    [Date] DATETIME NOT NULL,

    [Hour] TINYINT NOT NULL,

    [Rainfall] DECIMAL(10,8) NOT NULL,

    );

    GO

    --

    -- Third run - denormalized

    --

    DBCC DROPCLEANBUFFERS;

    GO

    /* DN */

    INSERT #BitBucket WITH (TABLOCKX)

    SELECT TP.LocID,

    TP.[Date],

    [Hour] = CONVERT(TINYINT, 15),

    Rainfall = [15]

    FROM dbo.Rainfall_Con_DN TP WITH (READUNCOMMITTED)

    WHERE TP.[Date] = '20080106'

    ORDER BY

    LocID;

    GO 10

    --

    -- Show the total execution resource usage for all ten runs

    --

    SELECT query = LEFT(QT.[text], 9),

    execution_plan = QP.query_plan,

    run_count = QS.execution_count,

    total_cpu_time_µs = QS.total_worker_time,

    total_logical_reads = QS.total_logical_reads,

    total_elapsed_time_µs = QS.total_elapsed_time,

    avg_cpu_time_µs = QS.total_worker_time / QS.execution_count,

    avg_logical_reads = QS.total_logical_reads / QS.execution_count,

    avg_elapsed_time_µs = QS.total_elapsed_time / QS.execution_count

    FROM sys.dm_exec_query_stats QS

    CROSS

    APPLY sys.dm_exec_sql_text (QS.[sql_handle]) QT

    CROSS

    APPLY sys.dm_exec_query_plan (QS.[plan_handle]) QP

    WHERE QT.[text] LIKE '%Rainfall%'

    AND QT.[text] NOT LIKE '%dm_exec_query_stats%'

    ORDER BY

    QS.last_execution_time ASC;

    GO

    --

    -- Sample results:

    --

    -- SQL Averages: CPU 390µs; Elapsed time: 4785µs; Logical reads: 113

    -- DN Averages: CPU 488µs; Elapsed time: 1660µs; Logical reads: 117 (without vardecimal)

    -- XML Averages: CPU 76171µs; Elapsed time: 79492µs; Logical reads: 126

    --

    -- Tidy up

    --

    GO

    IF OBJECT_ID(N'dbo.Rainfall_Con', N'U') IS NOT NULL DROP TABLE dbo.Rainfall_Con

    IF OBJECT_ID(N'dbo.Rainfall_XML', N'U') IS NOT NULL DROP TABLE Rainfall_XML;

    IF OBJECT_ID(N'dbo.Rainfall_DN', N'U') IS NOT NULL DROP TABLE dbo.Rainfall_Con_DN;

    IF OBJECT_ID(N'tempdb..#BitBucket', N'U') IS NOT NULL DROP TABLE #BitBucket;

    GO

    USE master;

    GO

    DROP DATABASE [E059791C-4F24-40CF-A54A-B21C34F6610D];

    GO

    RAISERROR('Test run complete', 0, 1) WITH NOWAIT;

    --

    -- END SCRIPT

    --