Technical Article

Compare Parallel Data Streams

,

This a script to compare two parallel data streams, in my case production and QA.  Two extracts were created and a mapping table was created as a look-up between old and new recordids.  A final table TestResults was created to hold the results of the comparison.  The comparison table names and join syntax are controlled by variables. 
The script uses syscolumns to find the common column names and compares each row in the two tables, column by column and inserts any differences in to the results table.
The comparisons support nulls and casts any column with the word 'date' in the name as a datetime before conmparing.

declare @icID int,
@iOldIDint,
@vcTable1varchar(255),
@vcTable2varchar(255),
@vcColumn varchar(255),
@vcAuditColumnvarchar(255),
@vcJoin1varchar(255),
@vcJoin2varchar(255),
@vcQuery  varchar(2000)


-- table variable to hold the list of common column names
declare @tblCommonColumns  table (
cID int identity, 
ColumnNamevarchar(255) )

Select @vcTable1 = 'ProdRaw',--> OriginalData
@vcTable2= 'QARaw',--> New Data
@vcJoin1 = ' Join dbo.TestMapping tm on t1.RecordID = tm.ProdRecordID and tm.IsProcessed = 0 ', --> added check for processed
@vcJoin2 = ' t2.RecordID = tm.QARecordID',
@iOldID= 0

Insert  @tblCommonColumns
(ColumnName)
-- get the list of all common column names
Select t.name
From-- First derived table returns all the column names for table 1
(Select sc.name
From sysobjects so
joinsyscolumns sc (nolock) on so.id = sc.id
whereso.name = @vcTable1 ) t
Join -- Second derived table returns all the column names for table 2
(Select sc.name
From sysobjects so
joinsyscolumns sc (nolock) on so.id = sc.id
whereso.name = @vcTable2) t2
on t.name = t2.name  --> By joining them we get a list of common columns

-- Remove the list of Audit Columns that will never be the same, and other columns that we dont really care about comparing
Delete @tblCommonColumns
Where ColumnName in ('RecordID', 'FileID', 'SourceFileName')


While  1 = 1-- Do

Begin

-- reset the column Name
Set @vcColumn = null

-- Get the first Column to work with
Select top 1   @icID    = cID,
@vcColumn   = ColumnName
From @tblCommonColumns
Where cID > @iOldID
order by cID

-- check the column name
If @vcColumn is null
Break

Set @vcQuery = 

'Insert into tempdb.dbo.TestResults ' + 
'(OldRecordID, NewRecordID, OldValue, NewValue, ColumnName) ' +
'Select  t1.RecordID, t2.RecordID, t1.' + @vcColumn   + ', t2.' + @vcColumn + ', ''' + @vcColumn  + ''''+
 ' From '  + @vcTable1    + ' t1  ' +
 @vcJoin1 +
 ' Join '  + @vcTable2    + ' t2 on ' + @vcJoin2 +
' and Isnull(' + 
Case
When CharIndex('Date', @vcColumn) > 0 Then 'Cast(t1.' + @vcColumn + ' as Datetime)'
Else 't1.' + @vcColumn
End + ', '''') <> IsNull(' + 
Case
When CharIndex('Date', @vcColumn) > 0 Then 'Cast(t2.' + @vcColumn + ' as Datetime)'
Else 't2.' + @vcColumn
End + ', '''')'
 
--Print @vcQuery
Exec (@vcQuery)


-- set the next seed
Set @iOldID = @icID

END

Rate

You rated this post out of 5. Change rating

Share

Share

Rate

You rated this post out of 5. Change rating