Blog Post

Why UniqueIdentifier is a Bad Choice for a Clustered Index in SQL Server

,

I had a question today about why it was “bad” to use a UniqueIdentifier as the data type for a clustered index in SQL Server. After all, on the surface that sounds like a good choice, since a UniqueIdentifier is guaranteed to be unique, right?  I have run into many developers over the years that are seduced by this idea…

Unfortunately, using a UniqueIdentifier is not a good choice from SQL Server’s perspective. UniqueIdentifier is a relatively large data type, so your table will be larger than if you can use a numeric data type such as Integer. Far worse is how quickly your clustered index will become fragmented as you INSERT even a relatively small number of rows into the table. This can hurt query performance for certain types of queries, and it makes your clustered index much larger.

In the example below, simply INSERTING 2000 rows of data into an empty table gives you over 99% fragmentation and nearly doubles the size of the table (compared to using an Integer for the clustered index).  Using a default value of NEWSEQUENTIALID() for the myID column alleviates the fragmentation issue, but you still have a clustered index that is about 30% larger than if you use an integer data type for the clustered index.

Note: The script below only works for SQL Server 2008 and above. There are only a couple of minor changes needed to make it work on SQL Server 2005.

-- Why UNIQUEIDENTIFIER is a bad choice for a clustered index data type
-- Glenn Berry 
-- March 2010
-- http://glennberrysqlperformance.spaces.live.com/
-- Twitter: GlennAlanBerry
-- Switch to Master
USE [master];
GO
-- Drop database if it exists
IF  EXISTS (SELECT name FROM sys.databases WHERE name = N'TestDB')
    BEGIN
        ALTER DATABASE [TestDB] SET  SINGLE_USER WITH ROLLBACK IMMEDIATE;
        DROP DATABASE [TestDB];
    END
GO
-- Create new database (change file paths if needed
CREATE DATABASE [TestDB] ON  PRIMARY 
( NAME = N'TestDB', FILENAME = N'C:\SQLData\TestDB.mdf' , 
  SIZE = 5MB , FILEGROWTH = 50MB )
 LOG ON 
( NAME = N'TestDB_log', FILENAME = N'C:\SQLLogs\TestDB.ldf' , 
  SIZE = 2MB , FILEGROWTH = 50MB)
GO
-- Switch to new database
USE TestDB;
GO
-- Do some things that will grow the database and fragment indexes
-- Create a "bad" table 
-- (never use a UNIQUEIDENTIFIER for your PK, clustered index!)
IF  EXISTS (SELECT * FROM sys.objects 
            WHERE object_id = OBJECT_ID(N'[dbo].[TestBadTable]') 
            AND type in (N'U'))
DROP TABLE [dbo].[TestBadTable];
GO
CREATE TABLE [dbo].[TestBadTable](
    [myID] [uniqueidentifier] NOT NULL,
    [myColumn] [varchar](2000) NULL,
 CONSTRAINT [PK_TestBadTable] PRIMARY KEY CLUSTERED 
(
    [myID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, 
 ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY];
GO
-- Insert some data to fragment the clustered table
SET NOCOUNT ON;
DECLARE @iteration INT = 0;
WHILE @iteration < 2000
BEGIN
    INSERT INTO dbo.TestBadTable(myID, myColumn)
    SELECT NEWID(), REPLICATE('A', 2000);
    
    SET @iteration = @iteration + 1;
END
GO
-- End of insert loop
-- Check Table size (7.9MB)
EXEC sp_spaceused TestBadTable, True;
-- Check how badly the clustered index is fragmented (99.3%)
SELECT avg_fragmentation_in_percent, page_count
FROM sys.dm_db_index_physical_stats 
(DB_ID(), OBJECT_ID(N'TestBadTable'), NULL , NULL, N'Limited') 
OPTION (MAXDOP 1);
-- Reorganize the clustered index (this is always an online operation)
ALTER INDEX [PK_TestBadTable] 
ON [dbo].[TestBadTable] REORGANIZE WITH ( LOB_COMPACTION = ON )
GO
-- Check how badly the clustered index is fragmented (2.4%)
SELECT avg_fragmentation_in_percent, page_count
FROM sys.dm_db_index_physical_stats 
(DB_ID(), OBJECT_ID(N'TestBadTable'), NULL , NULL, N'Limited') 
OPTION (MAXDOP 1);
-- Rebuild the clustered index (this can be done online with Enterprise Edition)
ALTER INDEX [PK_TestBadTable] ON [dbo].[TestBadTable] REBUILD PARTITION = ALL 
WITH ( PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, ALLOW_ROW_LOCKS  = ON, 
ALLOW_PAGE_LOCKS  = ON, ONLINE = ON, SORT_IN_TEMPDB = OFF )
GO
-- Create another table using NEWSEQUENTIALID as a default
IF  EXISTS (SELECT * FROM sys.objects 
            WHERE object_id = OBJECT_ID(N'[dbo].[TestNotAsBadTable]') 
            AND type in (N'U'))
DROP TABLE [dbo].[TestNotAsBadTable];
GO
CREATE TABLE [dbo].[TestNotAsBadTable](
    [myID] [uniqueidentifier]  NOT NULL DEFAULT NEWSEQUENTIALID(),
    [myColumn] [varchar](2000) NULL,
 CONSTRAINT [PK_TestNotAsBadTable] PRIMARY KEY CLUSTERED 
(
    [myID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, 
 ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY];
GO
-- Insert some data to fragment the clustered table
SET NOCOUNT ON;
DECLARE @iteration INT = 0;
WHILE @iteration < 2000
BEGIN
    INSERT INTO dbo.TestNotAsBadTable(myColumn)
    SELECT REPLICATE('A', 2000);
    
    SET @iteration = @iteration + 1;
END
GO
-- End of insert loop
-- Check Table size (5.3MB)
EXEC sp_spaceused TestNotAsBadTable, True;
-- Check how badly the clustered index is fragmented (1%)
SELECT avg_fragmentation_in_percent, page_count
FROM sys.dm_db_index_physical_stats 
(DB_ID(), OBJECT_ID(N'TestNotAsBadTable'), NULL , NULL, N'Limited') 
OPTION (MAXDOP 1);
-- Reorganize the clustered index (this is always an online operation)
ALTER INDEX [PK_TestNotAsBadTable] 
ON [dbo].[TestNotAsBadTable] REORGANIZE WITH ( LOB_COMPACTION = ON )
GO
-- Check how badly the clustered index is fragmented (2.4%)
SELECT avg_fragmentation_in_percent, page_count
FROM sys.dm_db_index_physical_stats 
(DB_ID(), OBJECT_ID(N'TestNotAsBadTable'), NULL , NULL, N'Limited') 
OPTION (MAXDOP 1);
-- Rebuild the clustered index (this can be done online with Enterprise Edition)
ALTER INDEX [PK_TestNotAsBadTable] ON [dbo].[TestNotAsBadTable] REBUILD PARTITION = ALL 
WITH ( PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, ALLOW_ROW_LOCKS  = ON, 
ALLOW_PAGE_LOCKS  = ON, ONLINE = ON, SORT_IN_TEMPDB = OFF )
GO
-- Create a much better table
IF  EXISTS (SELECT * FROM sys.objects 
            WHERE object_id = OBJECT_ID(N'[dbo].[TestGoodTable]') 
            AND type in (N'U'))
DROP TABLE [dbo].[TestGoodTable];
GO
CREATE TABLE dbo.TestGoodTable
(MyID INT IDENTITY(1,1) NOT NULL,
 MyColumn VARCHAR(2000) NULL,
 CONSTRAINT PK_TestGoodTable PRIMARY KEY CLUSTERED 
(MyID ASC)
WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, 
ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY];
GO
-- Insert some data to fragment the clustered table
SET NOCOUNT ON;
DECLARE @iteration INT = 0;
WHILE @iteration < 2000
BEGIN
    INSERT INTO dbo.TestGoodTable(MyColumn)
    SELECT REPLICATE('A', 2000);
    
    SET @iteration = @iteration + 1;
END
GO
-- End of insert loop
-- Check Table size (4MB)
EXEC sp_spaceused TestGoodTable, True;
-- Check how badly the clustered index is fragmented (less than 1%)
SELECT avg_fragmentation_in_percent, page_count
FROM sys.dm_db_index_physical_stats (DB_ID(), OBJECT_ID(N'TestGoodTable'), NULL , NULL, N'Limited') 
OPTION (MAXDOP 1);
-- Reorganize the clustered index (this is always an online operation)
ALTER INDEX [PK_TestGoodTable] 
ON [dbo].[TestGoodTable] REORGANIZE WITH ( LOB_COMPACTION = ON )
GO
-- Rebuild the clustered index (this can be done online with Enterprise Edition)
ALTER INDEX [PK_TestGoodTable] ON [dbo].[TestGoodTable] REBUILD PARTITION = ALL 
WITH ( PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, 
ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON, ONLINE = ON, SORT_IN_TEMPDB = OFF )
GO

Rate

You rated this post out of 5. Change rating

Share

Share

Rate

You rated this post out of 5. Change rating