Fuzzy-String Search: Find misspelled information with T-SQL
An optimized Damerau-Levenshtein Distance (DLD) algorithm for "fuzzy" string matching in Transact-SQL 2000-2008
2014-01-10 (first published: 2012-09-18)
30,985 reads
/* This will name Ignite SQL hashes that have not been named yet, based on the object (or batch) they are within.
** Runs on MS-SQL2005+ (uses VARCHAR(MAX) to concatenate SQL sections).
** Run this script in ignite_repository or dpa_repository database (but if you do not, it will just error and exit without doing anything).
** @DOIT = 0 by default so no changes will be made; change @DOIT = 1 to actually add the names.
** It works best if you have a shorthand code for the object (or batch), with no spaces in it.
** Place a comment like this near the beginning of the batch (at the end of the CREATE line of the object):
CREATE PROCEDURE [schema].[procname] -- Ignite=ShortObjCode VersionCode
** Place a comment like this within or directly after each statement: -- Ignite=ShortObjCode ShortStmtCode
** The hash for the statement will then be named "ShortObjCode ShortStmtCode VersionCode" as long as
** Ignite stored the entire object script (or batch), which it might not if it was very long.
** But it will work without such comments, by using the first 18 chars of the Object name.
** YYMMDD and the last 4 digits of the hash are used, if necessary, to enforce uniqueness.
** The last result set will be the SQL Text that still could not be named (not in an object, and no such comment).
** If you are unable to inject such comments into the SQL, you may be able to see a pattern in the SQL
** which would enable you to synthesize an 18-char "object name" yourself. Search for "synthesize" below to see where.
** The last column of both result sets is a link to the Historical Chart for that SQL, so you can check its usage.
** (That link is built assuming Ignite is running on localhost, but you can just replace localhost below.)
** Free to use, developed by tomkellerconsulting.com 14 Sep 2012.
** Submitted to Confio Support 13 March 2013. Submitted to sqlservercentral.com 25 Jan 2014.
** Updated 28 Feb 2014 to use LEFT(NEWID(), 4) -- random
** instead of RIGHT(CONVERT(VARCHAR(20), TS.H), 4) -- last 4 digits of the hash
** to avoid duplicates in large sets of un-named hashes. Also added "double-check not duplicate" to final insert.
** Also set @DOIT = 0 by default so no changes will be made unless requested, and select Note if no names inserted.
** Updated 17 Feb 2015 to handle 0 SS and -1 SE (specified statement start and end, replace with 1 and sql text length).
** Also mention dpa_repository as altenative to ignite_repository since Confio Ignite is now SolarWinds DPA.
** Also specify all column names in selects, so that STMT_Specified_Statement and ST_SQL_Text can be XML links.
*/DECLARE @dbid VARCHAR(3), @dbname NVARCHAR(100), @tbl_name SYSNAME
, @sch SYSNAME, @SQL NVARCHAR(4000), @IgniteHost VARCHAR(100)
, @P INT, @C VARCHAR(30), @L INT, @DOIT INT -- in case you are nervous about executing this script,
SELECT @C = '-- Ignite=', @L = LEN( @C), @DOIT = 0 -- set @DOIT to 0 to just see what names it would assign
, @IgniteHost = 'http://localhost:8123/iwc/sqlDisplay.iwc?' -- change localhost to where you access Ignite
/*create temp tables*/
IF OBJECT_ID('tempdb..#tbl_csr', 'U') IS NOT NULL
DROP TABLE #tbl_csr
CREATE TABLE #tbl_csr (id SMALLINT NOT NULL PRIMARY KEY, name NVARCHAR(100) NOT NULL)
IF OBJECT_ID('tempdb..#tmpSNsql', 'U') IS NOT NULL
DROP TABLE #tmpSNsql
CREATE TABLE #tmpSNsql (I INT IDENTITY(1,1) NOT NULL PRIMARY KEY
, id SMALLINT NOT NULL, instance NVARCHAR(100) NOT NULL
, H BIGINT NOT NULL UNIQUE
, D DATETIME NOT NULL
, P INT NOT NULL -- section number
, SS INT NOT NULL -- start of specified statement
, SE INT NOT NULL -- end of specified statement (-1 for rest of SQL Text)
, STMTL INT NOT NULL -- length of specified statement
, STL INT NOT NULL -- SQL Text Length
, STMTP INT NULL -- position of @C comment in specified statement
, STP INT NULL -- position of @C comment in SQL Text, should be on object CREATE statement
, PNAME VARCHAR(30) NULL -- object that SQL Text should include CREATE statement of
, STMTC VARCHAR(30) NULL -- @C comment in specified statement
, STC VARCHAR(30) NULL -- @C comment in SQL Text
, NM VARCHAR(30) NULL -- limited in Confio Ignite GUI, although ignite.CON_SQL_NAME.NAME allows 100
, STMT VARCHAR(3500) NULL -- specified statement
, ST VARCHAR(MAX) NOT NULL -- SQL Text
)
/*I am going to verify the schema that owns the Ignite objects,
so that we make certain to qualify our object names*/
SET NOCOUNT ON
SELECT @sch = SCHEMA_NAME(schema_id)
FROM sys.objects
WHERE name = 'COND'
IF ( @@ROWCOUNT <> 1 ) BEGIN
SET NOCOUNT OFF
PRINT 'Please run this script in ignite_repository or dpa_repository database, where COND table is.'
END ELSE BEGIN
/*OK, insert the contents of COND into a table variable, and use that for outer cursor*/
SET @SQL = 'INSERT INTO #tbl_csr SELECT id, name FROM ' + @sch + '.COND'
EXEC sp_executesql @SQL
-- SELECT * FROM #tbl_csr
/*build cursor to loop through registered dbs*/
DECLARE out_cursor CURSOR FOR
SELECT id, name
FROM #tbl_csr
ORDER BY id
OPEN out_cursor
FETCH NEXT FROM out_cursor INTO @dbid, @dbname
WHILE @@FETCH_STATUS = 0
BEGIN
/*ok, we want to name the SQL for each registered db*/
SET @P = 0 -- get just first section of multi-segment SQL that was split
PRINT 'SQL executed to gather hashes without names per instance:'
SET @SQL = 'INSERT INTO #tmpSNsql (id, instance, H, D, P, SS, SE, STMTL, STL, PNAME, ST)
SELECT ' + CONVERT(VARCHAR(5), @dbid) + ' AS id, ' + QUOTENAME(@dbname, '''') + ' AS instance
, ST.H, ST.D, ST.P
, ISNULL(NULLIF( ST.SS, 0), 1) AS SS
, ISNULL(NULLIF( ST.SE, -1), LEN(ST.ST)) AS SE
, ISNULL(NULLIF( ST.SE, -1), LEN(ST.ST)) -ISNULL(NULLIF( ST.SS, 0), 1) +1 AS STMTL -- SE less SS
, LEN(ST.ST) AS STL
, LEFT(RIGHT( ST.PNAME, CHARINDEX(''.'', REVERSE( ST.PNAME) + ''.'') -1), 30) AS PNAME -- just name, no schema
, CAST(ST.ST AS VARCHAR(MAX)) AS ST
FROM [' + @sch + '].[CONST_' + @dbid + '] ST
LEFT OUTER JOIN [' + @sch + '].[CON_SQL_NAME] SN ON SN.[HASH] = ST.H
LEFT OUTER JOIN #tmpSNsql TS ON TS.H = ST.H
WHERE ST.P = ' + CONVERT(VARCHAR(10), @P) + ' -- just first section
AND (ST.PNAME IS NOT NULL OR ST.ST IS NOT NULL) -- have object name or SQL Text
AND SN.[HASH] IS NULL -- not yet named
AND TS.H IS NULL -- not already collected (same hash can be on diff instance if db was restored across)
ORDER BY D, H
'
PRINT (@SQL)
EXEC sp_executesql @SQL
WHILE ( @@ROWCOUNT > 0 )
BEGIN -- append following sections of SQL that was split
SET @P = @P +1 -- next section
PRINT 'SQL executed to append following sections of SQL that was split, per instance, per section:'
SET @SQL = 'UPDATE TS
SET TS.STL = TS.STL + LEN(ST.ST)
, TS.ST = TS.ST + ST.ST
, TS.P = ST.P
FROM #tmpSNsql TS JOIN [' + @sch + '].[CONST_' + @dbid + '] ST ON ST.H = TS.H
AND ST.P = ' + CONVERT(VARCHAR(10), @P) + ' -- section number
'
PRINT (@SQL)
EXEC sp_executesql @SQL
END
FETCH NEXT FROM out_cursor INTO @dbid, @dbname
END
CLOSE out_cursor;
DEALLOCATE out_cursor;
/* get specified statement */
UPDATE #tmpSNsql
SET STMT = LEFT(SUBSTRING( ST, SS, STMTL), 3500)
WHERE STMTL > 0 AND SE <= STL;
/* get position of @C comment in SQL Text, should be on object CREATE statement */
WITH A AS
(
SELECT H, PATINDEX('%' + @C + '%', ST) AS STP
FROM #tmpSNsql
)
UPDATE TS
SET TS.STP = A.STP
FROM A JOIN #tmpSNsql TS ON TS.H = A.H
WHERE A.STP > 0;
/* get position of @C comment in specified statement */ WITH A AS
(
SELECT H, PATINDEX('%' + @C + '%', STMT) AS STMTP
FROM #tmpSNsql
)
UPDATE TS
SET TS.STMTP = A.STMTP
FROM A JOIN #tmpSNsql TS ON TS.H = A.H
WHERE A.STMTP > 0
/*get comment in SQL*/
UPDATE #tmpSNsql
SET STC = LEFT(SUBSTRING( ST, STP +@L, CHARINDEX(CHAR(13), ST +CHAR(13), STP + @L) -STP -@L), 30)
WHERE STP IS NOT NULL
/*get comment in STMT*/
UPDATE #tmpSNsql
SET STMTC = LEFT(SUBSTRING( STMT, STMTP +@L, CHARINDEX(CHAR(13), STMT +CHAR(13), STMTP + @L) -STMTP -@L), 30)
WHERE STMTP IS NOT NULL;
/*combine comments*/
WITH A AS
(
SELECT H, STC, STMTC, CHARINDEX(' ', STMTC) AS STMTSP, CHARINDEX(' ', STC) AS STSP -- space positions
FROM #tmpSNsql
WHERE STMTC IS NOT NULL AND STC IS NOT NULL
),
B AS
(
SELECT H, STC, STMTC, STSP, LEFT( STC, STSP -1) AS STP, LEFT( STMTC, STMTSP -1) AS STMTP -- prefixes
FROM A
WHERE A.STMTSP = A.STSP
),
C AS
(
SELECT H, STP, RTRIM(RIGHT( STC, LEN( STC) -STSP)) AS STS, RTRIM(RIGHT( STMTC, LEN( STMTC) -STSP)) AS STMTS -- suffixes
FROM B
WHERE STMTP = STP -- compare prefixes, which should both be shorthand identifier for same object
)
UPDATE TS
SET TS.NM = C.STP + ' ' + LEFT( C.STMTS, 30 - LEN(C.STP) - LEN(C.STS) -2) + ' ' + C.STS
FROM C JOIN #tmpSNsql TS ON TS.H = C.H
WHERE C.STS LIKE 'v[0-9]%'
/* synthesize an 18-char "object name" yourself if necessary */ UPDATE TS
SET PNAME = LEFT('IgniteRestart', 18)
FROM #tmpSNsql TS
WHERE TS.NM IS NULL AND TS.PNAME IS NULL
AND TS.ST LIKE '-- Isolate top waits for server instance since last restart or statistics clear%'
/* add more like this as necessary:
UPDATE TS
SET PNAME = LEFT((constant or function of TS.ST), 18)
FROM #tmpSNsql TS
WHERE TS.NM IS NULL AND TS.PNAME IS NULL
AND (some pattern is detected in TS.ST)
*/
PRINT 'SQL executed once, to add suffix where necessary for uniqueness:'
SET @SQL = 'UPDATE TS SET -- SELECT TS.H, TS.NM, TS.STMTC, TS.STC, TS.PNAME,
NM = LEFT(COALESCE( TS.NM, TS.STMTC, TS.STC, TS.PNAME), 30 -6-1 -4-1)
+ '' '' + RIGHT(CONVERT(CHAR(8), D, 112), 6) -- YYMMDD
+ '' '' + LEFT(NEWID(), 4) -- random
FROM #tmpSNsql TS LEFT OUTER JOIN [' + @sch + '].[CON_SQL_NAME] SN ON SN.NAME = TS.NM
LEFT OUTER JOIN
(SELECT NM, COUNT(*) AS Dups
FROM #tmpSNsql
GROUP BY NM
HAVING COUNT(*) > 1
) D ON D.NM = TS.NM
WHERE (TS.NM IS NULL -- no name calculated yet
OR SN.NAME IS NOT NULL -- name already used
OR D.NM IS NOT NULL -- duplicate in current set
) AND (TS.NM IS NOT NULL OR TS.PNAME IS NOT NULL) -- name calculated, or have object name
'
PRINT (@SQL)
EXEC sp_executesql @SQL
SELECT 'Un-named SQLs with NM to be assigned' AS [First Set IS]
SET NOCOUNT OFF -- show the count of names about to set
SELECT TS.I, TS.id, TS.instance, TS.H, TS.D, TS.P, TS.SS, TS.SE, TS.STMTL, TS.STL, TS.STMTP, TS.STP, TS.PNAME, TS.STMTC, TS.STC, TS.NM
, CONVERT(XML, N'<?query --' + NCHAR(13) + NCHAR(10) + TS.STMT + NCHAR(13) + NCHAR(10) + N'--?>') AS STMT_Specified_Statement
, CONVERT(XML, N'<?query --' + NCHAR(13) + NCHAR(10) + TS.ST + NCHAR(13) + NCHAR(10) + N'--?>') AS ST_SQL_Text
, @IgniteHost + 'db_id=' + CONVERT(VARCHAR(5), TS.id) + '&sql=' + CONVERT(VARCHAR(20), TS.H) AS Historical_Chart
FROM #tmpSNsql TS
WHERE TS.NM IS NOT NULL -- name calculated (maybe just object name)
ORDER BY TS.I
/* actually do the insert of new names */ SET @SQL = 'INSERT INTO [' + @sch + '].[CON_SQL_NAME] ([HASH], NAME)
SELECT TS.H, TS.NM
FROM #tmpSNsql TS
LEFT OUTER JOIN [' + @sch + '].[CON_SQL_NAME] SN1 ON SN1.NAME = TS.NM
LEFT OUTER JOIN [' + @sch + '].[CON_SQL_NAME] SN2 ON SN2.[HASH] = TS.H
LEFT OUTER JOIN
(SELECT NM, COUNT(*) AS Dups
FROM #tmpSNsql
GROUP BY NM
HAVING COUNT(*) > 1
) D ON D.NM = TS.NM
WHERE SN1.NAME IS NULL -- double-check name not already used
AND SN2.[HASH] IS NULL -- double-check not already named
AND D.NM IS NULL -- double-check not duplicate
AND TS.NM IS NOT NULL -- name calculated (maybe just object name)
'
PRINT (@SQL)
IF ( @DOIT = 1 ) BEGIN
EXEC sp_executesql @SQL
IF ( @@ROWCOUNT = 0 )
SELECT 'No hash names actually inserted' AS [Note]
END ELSE
SELECT 'Did not actually execute the SQL to insert hash names (set @DOIT=1 to do so).' AS [Note]
SET NOCOUNT ON
SELECT 'Un-named SQLs which could not be named (maybe you can synthesize an 18-char "object name" yourself)' AS [Last Set IS]
SET NOCOUNT OFF -- show the count of names could not set
SELECT TS.I, TS.id, TS.instance, TS.H, TS.D, TS.P, TS.SS, TS.SE, TS.STMTL, TS.STL, TS.STMTP, TS.STP, TS.PNAME, TS.STMTC, TS.STC, TS.NM
, CONVERT(XML, N'<?query --' + NCHAR(13) + NCHAR(10) + TS.STMT + NCHAR(13) + NCHAR(10) + N'--?>') AS STMT_Specified_Statement
, CONVERT(XML, N'<?query --' + NCHAR(13) + NCHAR(10) + TS.ST + NCHAR(13) + NCHAR(10) + N'--?>') AS ST_SQL_Text
, @IgniteHost + 'db_id=' + CONVERT(VARCHAR(5), TS.id) + '&sql=' + CONVERT(VARCHAR(20), TS.H) AS Historical_Chart
FROM #tmpSNsql TS
WHERE TS.NM IS NULL -- name NOT calculated (no object name)
ORDER BY TS.ST, TS.I
END -- in ignite_repository or dpa_repository