Home Forums SQL Server 2008 T-SQL (SS2K8) tsql query - Count the number of spaces in a string RE: tsql query - Count the number of spaces in a string

  • Shall we take a look at performance?

    SET NOCOUNT ON;

    IF object_id('tempdb..#testEnvironment') IS NOT NULL

    BEGIN

    DROP TABLE #testEnvironment;

    END;

    SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,

    REPLACE(CAST(NEWID() AS VARCHAR(36)),'-',' ') AS string

    INTO #testEnvironment

    FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;

    CREATE CLUSTERED INDEX idx_clu_testEnvironment_ID ON #testEnvironment (ID);

    DECLARE @findMe CHAR(1) = ' ', @HOLDER INT;

    PRINT '========== BASELINE ==========';

    SET STATISTICS IO ON;

    SET STATISTICS TIME ON;

    SELECT @HOLDER = COUNT(*)

    FROM #testEnvironment;

    SET STATISTICS TIME OFF;

    SET STATISTICS IO OFF;

    PRINT REPLICATE('=',80);

    PRINT '========== DATALENGTH - DATALENGTH REPLACE ==========';

    SET STATISTICS IO ON;

    SET STATISTICS TIME ON;

    SELECT @HOLDER = DATALENGTH (string) - DATALENGTH(REPLACE(string,@findMe,''))

    FROM #testEnvironment;

    SET STATISTICS TIME OFF;

    SET STATISTICS IO OFF;

    PRINT REPLICATE('=',80);

    PRINT '========== TALLY ==========';

    SET STATISTICS IO ON;

    SET STATISTICS TIME ON;

    WITH e1(n) AS (SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1), -- 10 rows

    e2(n) AS (SELECT a.n FROM e1 a CROSS JOIN e1 b), -- 100 rows

    e4(n) AS (SELECT a.n FROM e2 a CROSS JOIN e2 b), -- 10,000 rows

    tally(n) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM e4)

    SELECT @HOLDER = COUNT(SUBSTRING(string, n, 1))

    FROM tally

    CROSS APPLY #testEnvironment

    WHERE SUBSTRING(string, n, 1) = @findMe

    AND n <= DATALENGTH(string);

    SET STATISTICS TIME OFF;

    SET STATISTICS IO OFF;

    PRINT REPLICATE('=',80);

    Results:

    ========== BASELINE ==========

    Table '#testEnvironment'. Scan count 5, logical reads 6818, physical reads 0, read-ahead reads 12, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    SQL Server Execution Times:

    CPU time = 78 ms, elapsed time = 36 ms.

    ================================================================================

    ========== DATALENGTH - DATALENGTH REPLACE ==========

    Table '#testEnvironment'. Scan count 1, logical reads 6818, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    SQL Server Execution Times:

    CPU time = 5641 ms, elapsed time = 5647 ms.

    ================================================================================

    ========== TALLY ==========

    Table 'Worktable'. Scan count 10000, logical reads 3283222, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#testEnvironment'. Scan count 1, logical reads 6818, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    SQL Server Execution Times:

    CPU time = 22890 ms, elapsed time = 8734 ms.

    ================================================================================

    Of course, if we're going to look at performance, perhaps we should remember the bug with replace.

    With that in mind, I propose changing ColdCoffee's solution to this: -

    DECLARE @String VARCHAR(100), @CharToFind VARCHAR(1);

    SET @String = 'AAAA BBBCB NNNNN NEEEEE ERERERERERE ';

    SET @CharToFind = ' '

    SELECT CountOfCharsInTheString = DATALENGTH(@String) - DATALENGTH(REPLACE(@String COLLATE Latin1_General_BIN2, @CharToFind, ''));

    Let's check out the performance gain.

    SET NOCOUNT ON;

    IF object_id('tempdb..#testEnvironment') IS NOT NULL

    BEGIN

    DROP TABLE #testEnvironment;

    END;

    SELECT TOP 1000000 IDENTITY(INT,1,1) AS ID,

    REPLACE(CAST(NEWID() AS VARCHAR(36)),'-',' ') AS string

    INTO #testEnvironment

    FROM master.dbo.syscolumns sc1, master.dbo.syscolumns sc2, master.dbo.syscolumns sc3;

    CREATE CLUSTERED INDEX idx_clu_testEnvironment_ID ON #testEnvironment (ID);

    DECLARE @findMe CHAR(1) = ' ', @HOLDER INT;

    PRINT '========== BASELINE ==========';

    SET STATISTICS IO ON;

    SET STATISTICS TIME ON;

    SELECT @HOLDER = COUNT(*)

    FROM #testEnvironment;

    SET STATISTICS TIME OFF;

    SET STATISTICS IO OFF;

    PRINT REPLICATE('=',80);

    PRINT '========== DATALENGTH - DATALENGTH REPLACE ==========';

    SET STATISTICS IO ON;

    SET STATISTICS TIME ON;

    SELECT @HOLDER = DATALENGTH (string) - DATALENGTH(REPLACE(string,@findMe,''))

    FROM #testEnvironment;

    SET STATISTICS TIME OFF;

    SET STATISTICS IO OFF;

    PRINT REPLICATE('=',80);

    PRINT '========== DATALENGTH - DATALENGTH REPLACE COLLATE Latin1_General_BIN2 ==========';

    SET STATISTICS IO ON;

    SET STATISTICS TIME ON;

    SELECT @HOLDER = DATALENGTH (string) - DATALENGTH(REPLACE(string COLLATE Latin1_General_BIN2,@findMe,''))

    FROM #testEnvironment;

    SET STATISTICS TIME OFF;

    SET STATISTICS IO OFF;

    PRINT REPLICATE('=',80);

    ========== BASELINE ==========

    Table '#testEnvironment'. Scan count 5, logical reads 6818, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    SQL Server Execution Times:

    CPU time = 109 ms, elapsed time = 35 ms.

    ================================================================================

    ========== DATALENGTH - DATALENGTH REPLACE ==========

    Table '#testEnvironment'. Scan count 1, logical reads 6818, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    SQL Server Execution Times:

    CPU time = 5594 ms, elapsed time = 5643 ms.

    ================================================================================

    ========== DATALENGTH - DATALENGTH REPLACE COLLATE Latin1_General_BIN2 ==========

    Table '#testEnvironment'. Scan count 1, logical reads 6818, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    SQL Server Execution Times:

    CPU time = 922 ms, elapsed time = 922 ms.

    ================================================================================

    The huge difference remains no matter what order you execute or how many times you run it.


    Forever trying to learn
    My blog - http://www.cadavre.co.uk/
    For better, quicker answers on T-SQL questions, click on the following...http://www.sqlservercentral.com/articles/Best+Practices/61537/
    For better, quicker answers on SQL Server performance related questions, click on the following...http://www.sqlservercentral.com/articles/SQLServerCentral/66909/