Technical Article

Comparing Stored Procedures, Part 2

,

This picks up from an earlier article on trying to quantify the comparison of two stored procedures.

 

For a more complete explanation, please go to my blog entry here.

 

Here's a sample of that blog entry that summarizes what this script does:

"I devised an algorithm that would compare blocks of lines between them, starting with the largest possible sequence to compare (the size of this would be the number of lines of the smaller spd), and then compare smaller and smaller sequences until the comparison size was just one line (which is what was compared in Comparing Stored Procedures, Part 1). The trick to this approach is that if a match was found between to large sequences of lines, the subsequences in them would never be compared. Although this certainly helps with performance, the real reason behind this is that the fact that large blocks of code matched is very meaningful. Take this example: the string 'ABCD' matches 'ABDC' more than it matches 'DCBA'. How much "more" it matches is the subject of a later posting, where we establish our scoring algorithm."

 

Written by Jesse McLain

jesse@jessemclain.com

www.jessemclain.com

www.jessesql.blogspot.com

 

/*************************************************************************************************
**        File: Compare Sequences.sql
**        Desc: Second in a series of scripts demonstrating a quantitative comparison between the text
**                of two stored procedures
** 
**        Return values: report & results
** 
**        Called by: 
** 
**        Parameters:
**        Input
**        ----------
**        none
**
**        Output
**        -----------
**        none
**
**        Auth: Jesse mcLain
**        Email: jesse@jessemclain.com
**        Web: www.jessemclain.com
**        Blog: www.jessesql.blogspot.com
**        Date: 01/31/2008
**
***************************************************************************************************
**        Change History
***************************************************************************************************
**        Date:        Author:                Description:
**        --------    --------            -------------------------------------------
**        20080213    Jesse McLain        Created script
**************************************************************************************************/DECLARE @spd1 varchar(max)
SET @spd1 = 'Test Sequence "AZXYZBC"'
DECLARE @spd2 varchar(max)
SET @spd2 = 'Test Sequence "BCDAEAXYZ"'

SET NOCOUNT ON

CREATE TABLE #spd1 (
  CodeLineTxt varchar(max), /* stores the original line of code */  CodeLineNum int not null identity(1,1), /* stores the line number */  MatchLineNum int /* stores the matching line of code from spd #2 */)

INSERT INTO #spd1 (CodeLineTxt) VALUES ('A')
INSERT INTO #spd1 (CodeLineTxt) VALUES ('Z')
INSERT INTO #spd1 (CodeLineTxt) VALUES ('X')
INSERT INTO #spd1 (CodeLineTxt) VALUES ('Y')
INSERT INTO #spd1 (CodeLineTxt) VALUES ('Z')
INSERT INTO #spd1 (CodeLineTxt) VALUES ('B')
INSERT INTO #spd1 (CodeLineTxt) VALUES ('C')
UPDATE #spd1 SET MatchLineNum = 0


CREATE TABLE #spd2 (
  CodeLineTxt varchar(max),
  CodeLineNum int not null identity(1,1),
  MatchLineNum int
)

INSERT INTO #spd2 (CodeLineTxt) VALUES ('B')
INSERT INTO #spd2 (CodeLineTxt) VALUES ('C')
INSERT INTO #spd2 (CodeLineTxt) VALUES ('D')
INSERT INTO #spd2 (CodeLineTxt) VALUES ('A')
INSERT INTO #spd2 (CodeLineTxt) VALUES ('E')
INSERT INTO #spd2 (CodeLineTxt) VALUES ('A')
INSERT INTO #spd2 (CodeLineTxt) VALUES ('X')
INSERT INTO #spd2 (CodeLineTxt) VALUES ('Y')
INSERT INTO #spd2 (CodeLineTxt) VALUES ('Z')

UPDATE #spd2 SET MatchLineNum = 0


DECLARE @cnt1 int
SELECT @cnt1 = COUNT(*) FROM #spd1
DECLARE @cnt2 int
SELECT @cnt2 = COUNT(*) FROM #spd2
DECLARE @CmprSz int
SELECT @CmprSz = CASE WHEN @cnt1 < @cnt2 THEN @cnt1 ELSE @cnt2 END
DECLARE @idx1 int
DECLARE @idx2 int
DECLARE @idx2_last int
DECLARE @idx3 int
DECLARE @NumCmprs int


/* the idea here is to compare blocks of lines from one table to blocks from the other table. we start with the blocks of largest size
and continue decreasing size until we get to blocks of one line each. Whenever a block matches that of a block in the other table, all 
matching lines are flagged to indicate the match, so as to exclude those matches from subsequent comparisons. 
*/
WHILE @CmprSz > 0
BEGIN
  SET @NumCmprs = @cnt1 - @CmprSz + 1
  SET @idx1 = 1

  WHILE @idx1 <= @NumCmprs
  BEGIN
    /* ensure that subset is not yet matched: */    IF (SELECT COUNT(*) FROM #spd1 WHERE CodeLineNum BETWEEN @idx1 AND (@idx1 + @CmprSz - 1) AND MatchLineNum = 0) = @CmprSz
    BEGIN
      DECLARE @cdtxt1 varchar(max); SELECT @cdtxt1 = CodeLineTxt FROM #spd1 WHERE CodeLineNum = @idx1

      /* we need to find the first match in table 2. since there can be multiple rows in table 2 that match
      the head of the subset from table 1, we need to check for multiples: */      SET @idx2_last = 0
      SET @idx2 = 0

      DECLARE @AllMatch tinyint; SET @AllMatch = 0
  
      WHILE @AllMatch = 0 AND @idx2 IS NOT NULL
      BEGIN
        SELECT @idx2 = MIN(CodeLineNum) 
          FROM #spd2 
          WHERE CodeLineTxt = @cdtxt1
          AND CodeLineNum > @idx2_last
          AND CodeLineNum <= (@cnt2 - @CmprSz + 1) /* ensures that table 2 subset is big enough */          AND MatchLineNum = 0

        IF @idx2 IS NOT NULL
        BEGIN
          SET @idx3 = 1
 
          /* now check all matches. the easiest way to implement this checking is to first 
          assume that all lines match, and then record that fact that at least one does not. */          SET @AllMatch = 1
          WHILE @idx3 <= @CmprSz AND @AllMatch = 1
          BEGIN
            IF NOT EXISTS (SELECT 1 
              FROM #spd1 T1 
              JOIN #spd2 T2 
              ON T1.CodeLineTxt = T2.CodeLineTxt 
              AND T1.CodeLineNum = (@idx1 + @idx3 - 1)
              AND T2.CodeLineNum = (@idx2 + @idx3 - 1))

              SET @AllMatch = 0
 
            SET @idx3 = @idx3 + 1
          END

          IF @AllMatch = 1
          BEGIN
            UPDATE #spd1 SET MatchLineNum = @idx2
              WHERE CodeLineNum BETWEEN @idx1 AND (@idx1 + @CmprSz - 1)

            UPDATE #spd2 SET MatchLineNum = @idx1
              WHERE CodeLineNum BETWEEN @idx2 AND (@idx2 + @CmprSz - 1)
          END
        END

        SET @idx2_last = @idx2
      END /* WHILE @AllMatch = 1 AND @idx2 IS NOT NULL */    END /* IF (SELECT COUNT(*) FROM #spd1 WHERE CodeLineNum BETWEEN ... */
    SET @idx1 = @idx1 + 1
  END /* WHILE @idx1 <= @NumCmprs */
  SET @CmprSz = @CmprSz - 1
END /* WHILE @CmprSz > 0 */

PRINT 'spd1: ' + @spd1
PRINT 'spd2: ' + @spd2
PRINT ''

SELECT CodeLineTxt = CONVERT(varchar(10), CodeLineTxt), CodeLineNum, MatchLineNum FROM #spd1
SELECT CodeLineTxt = CONVERT(varchar(10), CodeLineTxt), CodeLineNum, MatchLineNum FROM #spd2


DECLARE @cnt1a int; SELECT @cnt1a = COUNT(*) FROM #spd1 T1 WHERE MatchLineNum <> 0
DECLARE @cnt2a int; SELECT @cnt2a = COUNT(*) FROM #spd2 T1 WHERE MatchLineNum <> 0

PRINT ''
PRINT 'Percentage match between the spds: ' + LTRIM(STR(100.0 * (@cnt1a / (1.0 * @cnt1) + @cnt2a / (1.0 * @cnt2)) / 2, 10, 2)) + '%'

PRINT 'Percentage of spd1 found in spd2: ' + LTRIM(STR(100.0 * @cnt1a / (1.0 * @cnt1), 10, 2)) + '%'

PRINT 'Percentage of spd2 found in spd1: ' + LTRIM(STR(100.0 * @cnt2a / (1.0 * @cnt2), 10, 2)) + '%'
PRINT ''
PRINT ''


DROP TABLE #spd1
DROP TABLE #spd2

SET NOCOUNT OFF

Rate

You rated this post out of 5. Change rating

Share

Share

Rate

You rated this post out of 5. Change rating