Comparing Stored Procedures, Part 3

,

This picks up from an earlier article on trying to quantify the comparison of two stored procedures. This script creates a stored procedure which will make performace testing easier to evaluate.

For a more complete explanation, please go to my blog entry here, which provides a mathematical analysis of the complexity of the matching algorithm.

 

Written by Jesse McLain

jesse@jessemclain.com

www.jessemclain.com

www.jessesql.blogspot.com

 

/*************************************************************************************************
** Proc: "spd_SequenceCompare"
** Desc: This stored procedure will compare the items in two sequences. The sequences should
** be stored in two tables, Seq1 and Seq2, which should have the following structure:
** 
** CREATE TABLE Seq1 (
** CodeLineTxt varchar(max), /* stores the original line of code */** CodeLineNum int not null identity(1,1), /* stores the line number */** MatchLineNum int /* stores the matching line of code from spd #2 */** )
** 
** This spd will compare smaller and smaller subsequences in the tables, and when it
** finds a match, it will note it in the column MatchLineNum. 
** 
** 
** 
** Return values: none
** 
** Called by: 
** 
** Parameters:
** Input
** ----------
** none
**
** Output
** -----------
** none
**
** Auth: Jesse McLain
** Email: jesse@jessemclain.com
** Web: www.jessemclain.com
** Blog: http://jessesql.blogspot.com/2009/02/comparing-spds-part-3-performance.html
**
** Date: 02/16/2008
**
***************************************************************************************************
** Change History
***************************************************************************************************
** Date: Author: Description:
** -------- -------- -------------------------------------------
** 20080216 Jesse McLain Created script
**************************************************************************************************/
CREATE PROCEDURE spd_SequenceCompare AS

SET NOCOUNT ON

DECLARE @CmprSz int
DECLARE @idx1 int
DECLARE @idx2 int
DECLARE @idx2_last int
DECLARE @idx3 int
DECLARE @NumCmprs int
DECLARE @cdtxt1 varchar(max); 
DECLARE @AllMatch tinyint; 
DECLARE @Seq1Size int; SELECT @Seq1Size = COUNT(*) FROM Seq1
DECLARE @Seq2Size int; SELECT @Seq2Size = COUNT(*) FROM Seq1

SELECT @CmprSz = CASE WHEN @Seq1Size < @Seq2Size THEN @Seq1Size ELSE @Seq2Size END

/* the idea here is to compare blocks of lines from one table to blocks 
 from the other table. we start with the blocks of largest size
 and continue decreasing size until we get to blocks of one line each. 
 Whenever a block matches that of a block in the other table, all 
 matching lines are flagged to indicate the match, so as to exclude 
 those matches from subsequent comparisons. 
*/
WHILE @CmprSz > 0
BEGIN
 SET @NumCmprs = @Seq1Size - @CmprSz + 1
 SET @idx1 = 1

 WHILE @idx1 <= @NumCmprs
 BEGIN
 /* ensure that subset is not yet matched: */ IF (SELECT COUNT(*) FROM Seq1 WHERE CodeLineNum BETWEEN @idx1 AND (@idx1 + @CmprSz - 1) AND MatchLineNum = 0) = @CmprSz
 BEGIN
 SELECT @cdtxt1 = CodeLineTxt FROM Seq1 WHERE CodeLineNum = @idx1

 /* we need to find the first match in table 2. since there can be multiple rows in table 2 that match
 the head of the subset from table 1, we need to check for multiples: */ SET @idx2_last = 0
 SET @idx2 = 0

 SET @AllMatch = 0
 
 WHILE @AllMatch = 0 AND @idx2 IS NOT NULL
 BEGIN
 SELECT @idx2 = MIN(CodeLineNum) 
 FROM Seq2 
 WHERE CodeLineTxt = @cdtxt1
 AND CodeLineNum > @idx2_last
 AND CodeLineNum <= (@Seq2Size - @CmprSz + 1) /* ensures that table 2 subset is big enough */ AND MatchLineNum = 0

 IF @idx2 IS NOT NULL
 BEGIN
 SET @idx3 = 1

 /* now check all matches. the easiest way to implement this checking is to first 
 assume that all lines match, and then record that fact that at least one does not. */ SET @AllMatch = 1
 WHILE @idx3 <= @CmprSz AND @AllMatch = 1
 BEGIN
 IF NOT EXISTS (SELECT 1 
 FROM Seq1 T1 
 JOIN Seq2 T2 
 ON T1.CodeLineTxt = T2.CodeLineTxt 
 AND T1.CodeLineNum = (@idx1 + @idx3 - 1)
 AND T2.CodeLineNum = (@idx2 + @idx3 - 1))

 SET @AllMatch = 0

 SET @idx3 = @idx3 + 1
 END

 IF @AllMatch = 1
 BEGIN
 UPDATE Seq1 SET MatchLineNum = @idx2
 WHERE CodeLineNum BETWEEN @idx1 AND (@idx1 + @CmprSz - 1)

 UPDATE Seq2 SET MatchLineNum = @idx1
 WHERE CodeLineNum BETWEEN @idx2 AND (@idx2 + @CmprSz - 1)
 END
 END

 SET @idx2_last = @idx2
 END /* WHILE @AllMatch = 1 AND @idx2 IS NOT NULL */ END /* IF (SELECT COUNT(*) FROM Seq1 WHERE CodeLineNum BETWEEN ... */
 SET @idx1 = @idx1 + 1
 END /* WHILE @idx1 <= @NumCmprs */
 SET @CmprSz = @CmprSz - 1
END /* WHILE @CmprSz > 0 */
RETURN

Rate

Share

Share

Rate