Shall we take a look at performance?
SET NOCOUNT ON;
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment;
END;
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
REPLACE(CAST(NEWID() AS VARCHAR(36)),'-',' ') AS string
INTO #testEnvironment
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;
CREATE CLUSTERED INDEX idx_clu_testEnvironment_ID ON #testEnvironment (ID);
DECLARE @findMe CHAR(1) = ' ', @HOLDER INT;
PRINT '========== BASELINE ==========';
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
SELECT @HOLDER = COUNT(*)
FROM #testEnvironment;
SET STATISTICS TIME OFF;
SET STATISTICS IO OFF;
PRINT REPLICATE('=',80);
PRINT '========== DATALENGTH - DATALENGTH REPLACE ==========';
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
SELECT @HOLDER = DATALENGTH (string) - DATALENGTH(REPLACE(string,@findMe,''))
FROM #testEnvironment;
SET STATISTICS TIME OFF;
SET STATISTICS IO OFF;
PRINT REPLICATE('=',80);
PRINT '========== TALLY ==========';
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
WITH e1(n) AS (SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1), -- 10 rows
e2(n) AS (SELECT a.n FROM e1 a CROSS JOIN e1 b), -- 100 rows
e4(n) AS (SELECT a.n FROM e2 a CROSS JOIN e2 b), -- 10,000 rows
tally(n) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM e4)
SELECT @HOLDER = COUNT(SUBSTRING(string, n, 1))
FROM tally
CROSS APPLY #testEnvironment
WHERE SUBSTRING(string, n, 1) = @findMe
AND n <= DATALENGTH(string);
SET STATISTICS TIME OFF;
SET STATISTICS IO OFF;
PRINT REPLICATE('=',80);
Results:
========== BASELINE ==========
Table '#testEnvironment'. Scan count 5, logical reads 6818, physical reads 0, read-ahead reads 12, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 78 ms, elapsed time = 36 ms.
================================================================================
========== DATALENGTH - DATALENGTH REPLACE ==========
Table '#testEnvironment'. Scan count 1, logical reads 6818, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 5641 ms, elapsed time = 5647 ms.
================================================================================
========== TALLY ==========
Table 'Worktable'. Scan count 10000, logical reads 3283222, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table '#testEnvironment'. Scan count 1, logical reads 6818, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 22890 ms, elapsed time = 8734 ms.
================================================================================
Of course, if we're going to look at performance, perhaps we should remember the bug with replace.
With that in mind, I propose changing ColdCoffee's solution to this: -
DECLARE @String VARCHAR(100), @CharToFind VARCHAR(1);
SET @String = 'AAAA BBBCB NNNNN NEEEEE ERERERERERE ';
SET @CharToFind = ' '
SELECT CountOfCharsInTheString = DATALENGTH(@String) - DATALENGTH(REPLACE(@String COLLATE Latin1_General_BIN2, @CharToFind, ''));
Let's check out the performance gain.
SET NOCOUNT ON;
IF object_id('tempdb..#testEnvironment') IS NOT NULL
BEGIN
DROP TABLE #testEnvironment;
END;
SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,
REPLACE(CAST(NEWID() AS VARCHAR(36)),'-',' ') AS string
INTO #testEnvironment
FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;
CREATE CLUSTERED INDEX idx_clu_testEnvironment_ID ON #testEnvironment (ID);
DECLARE @findMe CHAR(1) = ' ', @HOLDER INT;
PRINT '========== BASELINE ==========';
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
SELECT @HOLDER = COUNT(*)
FROM #testEnvironment;
SET STATISTICS TIME OFF;
SET STATISTICS IO OFF;
PRINT REPLICATE('=',80);
PRINT '========== DATALENGTH - DATALENGTH REPLACE ==========';
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
SELECT @HOLDER = DATALENGTH (string) - DATALENGTH(REPLACE(string,@findMe,''))
FROM #testEnvironment;
SET STATISTICS TIME OFF;
SET STATISTICS IO OFF;
PRINT REPLICATE('=',80);
PRINT '========== DATALENGTH - DATALENGTH REPLACE COLLATE Latin1_General_BIN2 ==========';
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
SELECT @HOLDER = DATALENGTH (string) - DATALENGTH(REPLACE(string COLLATE Latin1_General_BIN2,@findMe,''))
FROM #testEnvironment;
SET STATISTICS TIME OFF;
SET STATISTICS IO OFF;
PRINT REPLICATE('=',80);
========== BASELINE ==========
Table '#testEnvironment'. Scan count 5, logical reads 6818, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 109 ms, elapsed time = 35 ms.
================================================================================
========== DATALENGTH - DATALENGTH REPLACE ==========
Table '#testEnvironment'. Scan count 1, logical reads 6818, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 5594 ms, elapsed time = 5643 ms.
================================================================================
========== DATALENGTH - DATALENGTH REPLACE COLLATE Latin1_General_BIN2 ==========
Table '#testEnvironment'. Scan count 1, logical reads 6818, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 922 ms, elapsed time = 922 ms.
================================================================================
The huge difference remains no matter what order you execute or how many times you run it.