Good idea Mark. I also added an ugly XML split to count the number of "items", then modified the test script to clear the cache between each piece of code to attempt to keep it fairer.
SET NOCOUNT ON;
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment;
END;
--1,000,000 Random rows of data
SELECT IDENTITY(INT,1,1) AS ID, CAST(LEFT(yourData, LEN(yourData) - 1) AS NTEXT) AS yourData
INTO #testEnvironment
FROM (SELECT TOP 1000000
REPLICATE(
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
REPLACE(NEWID(),'-','')
,'0',''),'9',''),'8',''),'7',''),'6',''),'5',''),'4',''),'3',''),'2',''),'1','') + '.',
(ABS(CHECKSUM(NEWID())) % 2) + 2)
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3
)a(yourData);
DECLARE @HOLDER INT, @Duration CHAR(12), @StartTime DATETIME;
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM (SELECT ID, yourData, MAX(rn)
FROM (SELECT ID, yourData, split.Part.value('text()[1]', 'VARCHAR(MAX)'), ROW_NUMBER() OVER(PARTITION BY yourData ORDER BY (SELECT NULL))
FROM (SELECT CAST('<p>' + REPLACE(CAST(yourData AS VARCHAR(MAX)) COLLATE Latin1_General_BIN2,'.','</p><p>') + '</p>' AS XML),
CAST(yourData AS VARCHAR(MAX)), ID
FROM #testEnvironment) innerQ(xmlField, yourData, ID)
CROSS APPLY innerQ.xmlField.nodes('p') split(Part)
) a(ID, yourData,splitData,rn)
GROUP BY ID, yourData
) a(ID, yourData, rn)
WHERE rn = 2;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('Duration for ugly xml split = %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM #testEnvironment
WHERE CHARINDEX('.', yourData, CHARINDEX('.', yourData) + 1) = 0;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('Duration for CHARINDEX = %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM #testEnvironment
WHERE LEN(CAST(yourData AS NVARCHAR(MAX)))-1 = LEN(REPLACE(CAST(yourData AS NVARCHAR(MAX)) COLLATE Latin1_General_BIN2,'.',''));
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('Duration for LEN = %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM #testEnvironment
WHERE CHARINDEX('.', yourData COLLATE Latin1_General_BIN2 , CHARINDEX('.', yourData COLLATE Latin1_General_BIN2) + 1) = 0;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('Duration for CHARINDEX with COLLATE = %s',0,1,@Duration) WITH NOWAIT;
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
Duration for ugly xml split = 00:00:48:980
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
Duration for CHARINDEX = 00:00:08:640
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
Duration for LEN = 00:00:09:087
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
Duration for CHARINDEX with COLLATE = 00:00:05:323
So the CHARINDEX with COLLATE that Mark suggested is the fastest so far - whilst my ugly XML splitter proves its worth as considerably slower 😀