Luis Cazares (10/25/2012)
I was so close, thanks to the QotD that made me remember the PARSENAME function.My code
SELECT @HOLDER = ID
FROM #testEnvironment
WHERE PARSENAME( CAST(yourData AS NVARCHAR(MAX)),3) IS NULL
AND PARSENAME(CAST(yourData AS NVARCHAR(MAX)), 2) IS NOT NULL
Results using Cadavre's last test environment (without the XML). I deleted the "DBCC execution completed" comments.
Duration for CHARINDEX = 00:00:18:363
Duration for LEN = 00:00:09:347
Duration for PARSENAME = 00:00:06:870
Duration for COLLATE = 00:00:06:667
Very good, that was sort of the idea I was trying to implement with the XML splitter. Didn't think of PARSENAME (foolishly!).
Also, we can get rid of the "DBCC execution completed" comments by adding "WITH NO_INFOMSGS" to the script, like so: -
SET NOCOUNT ON;
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment;
END;
--1,000,000 Random rows of data
SELECT IDENTITY(INT,1,1) AS ID, CAST(LEFT(yourData, LEN(yourData) - 1) AS NTEXT) AS yourData
INTO #testEnvironment
FROM (SELECT TOP 1000000
REPLICATE(
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
REPLACE(NEWID(),'-','')
,'0',''),'9',''),'8',''),'7',''),'6',''),'5',''),'4',''),'3',''),'2',''),'1','') + '.',
(ABS(CHECKSUM(NEWID())) % 2) + 2)
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3
)a(yourData);
DECLARE @HOLDER INT, @Duration CHAR(12), @StartTime DATETIME;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM (SELECT ID, yourData, MAX(rn)
FROM (SELECT ID, yourData, split.Part.value('text()[1]', 'VARCHAR(MAX)'), ROW_NUMBER() OVER(PARTITION BY yourData ORDER BY (SELECT NULL))
FROM (SELECT CAST('<p>' + REPLACE(CAST(yourData AS VARCHAR(MAX)) COLLATE Latin1_General_BIN2,'.','</p><p>') + '</p>' AS XML),
CAST(yourData AS VARCHAR(MAX)), ID
FROM #testEnvironment) innerQ(xmlField, yourData, ID)
CROSS APPLY innerQ.xmlField.nodes('p') split(Part)
) a(ID, yourData,splitData,rn)
GROUP BY ID, yourData
) a(ID, yourData, rn)
WHERE rn = 2;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('Duration for ugly xml split = %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM #testEnvironment
WHERE CHARINDEX('.', yourData, CHARINDEX('.', yourData) + 1) = 0;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('Duration for CHARINDEX = %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM #testEnvironment
WHERE LEN(CAST(yourData AS NVARCHAR(MAX)))-1 = LEN(REPLACE(CAST(yourData AS NVARCHAR(MAX)) COLLATE Latin1_General_BIN2,'.',''));
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('Duration for LEN = %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM #testEnvironment
WHERE CHARINDEX('.', yourData COLLATE Latin1_General_BIN2, CHARINDEX('.', yourData COLLATE Latin1_General_BIN2) + 1) = 0;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('Duration for CHARINDEX with COLLATE = %s',0,1,@Duration) WITH NOWAIT;
DBCC FREEPROCCACHE WITH NO_INFOMSGS;
DBCC DROPCLEANBUFFERS WITH NO_INFOMSGS;
SELECT @StartTime = GETDATE();
SELECT @HOLDER = ID
FROM #testEnvironment
WHERE PARSENAME(CAST(yourData AS NVARCHAR(MAX)),3) IS NULL
AND PARSENAME(CAST(yourData AS NVARCHAR(MAX)), 2) IS NOT NULL;
SELECT @Duration = CONVERT(CHAR(12),GETDATE()-@StartTime,114);
RAISERROR('Duration for PARSENAME = %s',0,1,@Duration) WITH NOWAIT;
Duration for ugly xml split = 00:00:47:197
Duration for CHARINDEX = 00:00:07:593
Duration for LEN = 00:00:08:163
Duration for CHARINDEX with COLLATE = 00:00:04:787
Duration for PARSENAME = 00:00:05:193
Luis Cazares (10/25/2012)
Davin21 (10/25/2012)
How about this?
SELECT *
FROM #testenvironment
WHERE PARSENAME(CAST(yourdata AS NVARCHAR(MAX)),3) IS NULL
I had the same idea, but I added an extra condition because your query will return results with no period (or dot) and with more than 3.
I've got the agree with Luis, it's a good idea but it could return incorrect results.