djphatic (6/25/2012)
For testing purposes I created a spreadsheet with 3 columns, Column A, B and C each with the values A B C respectively in ~60,000 rows and used this as my data source.Using the script below:
' Microsoft SQL Server Integration Services Script Component
' Write scripts using Microsoft Visual Basic 2008.
' ScriptMain is the entry point class of the script.
Imports System
Imports System.Data
Imports System.Math
Imports Microsoft.SqlServer.Dts.Pipeline.Wrapper
Imports Microsoft.SqlServer.Dts.Runtime.Wrapper
Imports Microsoft.SqlServer.Dts.Pipeline
Imports System.Text
Imports System.Security.Cryptography
<Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute()> _
<CLSCompliant(False)> _
Public Class ScriptMain
Inherits UserComponent
Private InputBuffer As PipelineBuffer
Public Overrides Sub ProcessInput(ByVal InputID As Integer, ByVal Buffer As PipelineBuffer)
InputBuffer = Buffer
MyBase.ProcessInput(InputID, Buffer)
End Sub
Public Overrides Sub PreExecute()
MyBase.PreExecute()
'
' Add your code here for preprocessing or remove if not needed
''
End Sub
Public Overrides Sub PostExecute()
MyBase.PostExecute()
'
' Add your code here for postprocessing or remove if not needed
' You can set read/write variables here, for example:
' Me.Variables.MyIntVar = 100
''
End Sub
Public Overrides Sub InputSource_ProcessInputRow(ByVal Row As InputSourceBuffer)
Dim counter As Integer = 0
Dim values As New StringBuilder
For counter = 0 To inputBuffer.ColumnCount - 1
Dim value As Object
value = inputBuffer.Item(counter)
values.Append(value)
Next
Row.Concat = values.ToString
Row.HashValue = CreateHash(values.ToString())
End Sub
Public Shared Function CreateHash(ByVal data As String) As String
Dim dataToHash As Byte() = (New UnicodeEncoding()).GetBytes(data)
Dim md5 As MD5 = New MD5CryptoServiceProvider()
Dim hashedData As Byte() = md5.ComputeHash(dataToHash)
RNGCryptoServiceProvider.Create().GetBytes(dataToHash)
Dim s As String = Convert.ToBase64String(hashedData, Base64FormattingOptions.None)
Return s
End Function
End Class
I get the following results:
RowRow 1 onwards
Column AA
Column BB
Column CC
HashValuecWd0tTqSuYUlu6F5oVnMcQ==
ConcatABC
RowRow 11167 onwards
Column AA
Column BB
Column CC
HashValueEP2rhBgzqDEc0qmEjWlQBA==
ConcatABCcWd0tTqSuYUlu6F5oVnMcQ==ABC
RowRow 24194 onwards
Column AA
Column BB
Column CC
HashValuedLHcMhE2RJZ6ew7Jd8oezQ==
ConcatABCEP2rhBgzqDEc0qmEjWlQBA==ABCcWd0tTqSuYUlu6F5oVnMcQ==ABC
RowRow 37221 onwards
Column AA
Column BB
Column CC
HashValue93k+mMDI1x5OoZ0cjtz7Hw==
ConcatABCdLHcMhE2RJZ6ew7Jd8oezQ==ABCEP2rhBgzqDEc0qmEjWlQBA==ABCcWd0tTqSuYUlu6F5oVnMcQ==ABC
RowRow 50248 onwards
Column AA
Column BB
Column CC
HashValueRHGVCjMX9usJb160IvP6RQ==
ConcatABC93k+mMDI1x5OoZ0cjtz7Hw==ABCdLHcMhE2RJZ6ew7Jd8oezQ==ABCEP2rhBgzqDEc0qmEjWlQBA==ABCcWd0tTqSuYUlu6F5oVnMcQ==ABC
RowRow 58881 onwards
Column AA
Column BB
Column CC
HashValuecWd0tTqSuYUlu6F5oVnMcQ==
ConcatABC
Looking at the concat results the previous buffers HashValue is getting include in the InputSource for some reason. When I add the line Row.ColumnCount = InputBuffer.ColumnCount the value is 5 for each row even though I've only selected 3 columns as part of InputSource.
Hello, I tried to replicate your example. I created dbo.testMD5_Source and testMD5_Target tables, loaded 60000 records in source with same values A,B,C using below code:
CREATE TABLE dbo.testMD5_Source(ID INT IDENTITY(1,1), A VARCHAR(1),B VARCHAR(1),C VARCHAR(1))
CREATE TABLE dbo.testMD5_Target(ID INT, A VARCHAR(1),B VARCHAR(1),C VARCHAR(1),CheckSum NVARCHAR(35));
DECLARE @Counter INT=1
WHILE @Counter<=6000
BEGIN
INSERT INTO dbo.testMD5_Source(A,B,C) VALUES('A','B','C');
INSERT INTO dbo.testMD5_Source(A,B,C) VALUES('A','B','C');
INSERT INTO dbo.testMD5_Source(A,B,C) VALUES('A','B','C');
INSERT INTO dbo.testMD5_Source(A,B,C) VALUES('A','B','C');
INSERT INTO dbo.testMD5_Source(A,B,C) VALUES('A','B','C');
INSERT INTO dbo.testMD5_Source(A,B,C) VALUES('A','B','C');
INSERT INTO dbo.testMD5_Source(A,B,C) VALUES('A','B','C');
INSERT INTO dbo.testMD5_Source(A,B,C) VALUES('A','B','C');
INSERT INTO dbo.testMD5_Source(A,B,C) VALUES('A','B','C');
INSERT INTO dbo.testMD5_Source(A,B,C) VALUES('A','B','C');
SELECT @Counter=@Counter+1;
END
SELECT COUNT(1) FROM dbo.testMD5_Source --60,000
--after 1st execution of package
SELECT COUNT(1) FROM dbo.testMD5_Target --60,000
SELECT DISTINCT A,B,C,CHECKSUM FROM dbo.testMD5_Target --A B C pLWCYR7/9KIyBsc7004Ftg==
--after 2nd execution of package
SELECT COUNT(1) FROM dbo.testMD5_Target --1,20,000
SELECT DISTINCT A,B,C,CHECKSUM FROM dbo.testMD5_Target --A B C pLWCYR7/9KIyBsc7004Ftg==
--after 3rd execution of package
SELECT COUNT(1) FROM dbo.testMD5_Target --1,80,000
SELECT DISTINCT A,B,C,CHECKSUM FROM dbo.testMD5_Target --A B C pLWCYR7/9KIyBsc7004Ftg==
I didn't observe any issues. Same checksum was calculated for all records. I tested this by executing the package thrice.
I have made below changes to the code to compute md5 checksum:
1) Appended delimiter after each column. This is avoid computing same hash for (A NULL NULL), (NULL A NULL) and (NULL NULL A).
2) Converted string to upper case before computing checksum. This is to avoid computing different hash for (A B C) and (a b c)
Below is the complete code to compute md5 checksum:
#Region "Help: Introduction to the Script Component"
' The Script Component allows you to perform virtually any operation that can be accomplished in
' a .Net application within the context of an Integration Services data flow.
' Expand the other regions which have "Help" prefixes for examples of specific ways to use
' Integration Services features within this script component.
#End Region
#Region "Imports"
Imports System
Imports System.Data
Imports System.Math
Imports Microsoft.SqlServer.Dts.Pipeline.Wrapper
Imports Microsoft.SqlServer.Dts.Runtime.Wrapper
Imports Microsoft.SqlServer.Dts.Pipeline
Imports System.Text
Imports System.Security.Cryptography
#End Region
' This is the class to which to add your code. Do not change the name, attributes, or parent
' of this class.
<Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute> _
<CLSCompliant(False)> _
Public Class ScriptMain
Inherits UserComponent
#Region "Help: Using Integration Services variables and parameters"
'To use a variable in this script, first ensure that the variable has been added to
'either the list contained in the ReadOnlyVariables property or the list contained in
'the ReadWriteVariables property of this script component, according to whether or not your
'code needs to write into the variable. To do so, save this script, close this instance of
'Visual Studio, and update the ReadOnlyVariables and ReadWriteVariables properties.
'To use a parameter in this script, follow the same steps. Parameters are always read-only.
'Example of reading from a variable or parameter:
' startTime = Variables.MyStartTime;
'Example of writing to a variable:
' Variables.myStringVariable = "new value";
#End Region
#Region "Help: Using Integration Services Connnection Managers"
'Some types of connection managers can be used in this script component. See the help topic
'"Working with Connection Managers Programatically" for details.
'To use a connection manager in this script, first ensure that the connection manager has
'been added to either the list of connection managers on the Connection Managers page of the
'script component editor. To add the connection manager, save this script, close this instance of
' Visual Studio, and add the Connection Manager to the list.
'If the component needs to hold a connection open while processing rows, override the
'AcquireConnections and ReleaseConnections methods.
'Example of using an ADO.Net connection manager to acquire a SqlConnection:
' Dim rawConnection As Object = Connections.SalesDB.AcquireConnection(Transaction)
' Dim salesDBConn As SqlConnection = CType(rawConnection, SqlConnection)
'Example of using a File connection manager to acquire a file path:
' Dim rawConnection As Object = Connections.Prices_zip.AcquireConnection(Transaction)
' Dim filePath As String = CType(rawConnection, String)
'Example of releasing a connection manager:
' Connections.SalesDB.ReleaseConnection(rawConnection)
#End Region
#Region "Help: Firing Integration Services Events"
'This script component can fire events.
'Example of firing an error event:
' ComponentMetaData.FireError(10, "Process Values", "Bad value", "", 0, cancel)
'Example of firing an information event:
' ComponentMetaData.FireInformation(10, "Process Values", "Processing has started", "", 0, fireAgain)
'Example of firing a warning event:
' ComponentMetaData.FireWarning(10, "Process Values", "No rows were received", "", 0)
#End Region
Private inputBuffer As PipelineBuffer
Public Overrides Sub ProcessInput(ByVal InputID As Integer, ByVal Buffer As Microsoft.SqlServer.Dts.Pipeline.PipelineBuffer)
inputBuffer = Buffer
MyBase.ProcessInput(InputID, Buffer)
End Sub
Public Shared Function CreateHash(ByVal data As String) As String
Dim dataToHash As Byte() = (New UnicodeEncoding()).GetBytes(data)
Dim md5 As MD5 = New MD5CryptoServiceProvider()
Dim hashedData As Byte() = md5.ComputeHash(dataToHash)
RNGCryptoServiceProvider.Create().GetBytes(dataToHash)
Dim s As String = Convert.ToBase64String(hashedData, Base64FormattingOptions.None)
Return s
End Function
'This method is called once, before rows begin to be processed in the data flow.
'
'You can remove this method if you don't need to do anything here.
Public Overrides Sub PreExecute()
MyBase.PreExecute()
'
' Add your code here
'
End Sub
' This method is called after all the rows have passed through this component.
'
' You can delete this method if you don't need to do anything here.
Public Overrides Sub PostExecute()
MyBase.PostExecute()
'
' Add your code here
'
End Sub
'This method is called once for every row that passes through the component from InputSource.
'
'Example of reading a value from a column in the the row:
' zipCode = Row.ZipCode
'
'Example of writing a value to a column in the row:
' Row.ZipCode = zipCode
Public Overrides Sub InputSource_ProcessInputRow(ByVal Row As InputSourceBuffer)
Dim counter As Integer = 0
Dim values As New StringBuilder
For counter = 0 To inputBuffer.ColumnCount - 1
Dim value As Object
value = inputBuffer.Item(counter)
values.Append(value)
values.Append("Þ")
Next
Row.HashValue = CreateHash(values.ToString().ToUpper())
End Sub
End Class
Thanks,
Swapnil