Blog Post

Data Warehousing Tip: Using T-SQL vs SSIS for Slowly Changing Dimension (SCD) Types 1 and 2

,

Here’s an example of using T-SQL to process a Slowly Changing Type 1&2 Dimension . Note:  SSIS has an SCD transform ,however, it does not process large dimensional datasets very quickly.
…eh, I’m putting it too nicely, the SSIS SCD transform is painfully slow!  This T-SQL performs 20X quicker than the SSIS SCD transform.
USE [SomeDB]
GO
/****** Object: StoredProcedure [dbo].[sp_UpsertDimGuest] Script Date: 9/11/2013 6:09:04 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[sp_UpsertDimGuest]
AS
BEGIN
–DROP Constraints
–SCD 2 Historical Attribute Tracking via Upsert
INSERT INTO [dbo].[DimGuest]
( [GuestContactSrgKey],
[SourceSystem]
,[GuestIDAlternateKey]
,[GuestAcct]
,[GuestFirstName]
,[GuestLastName]
,[ETLLoadID]
,[EffStartDt]
,[EffEndDt]
,[IsCurrent]
)
SELECT
[GuestContactSrgKey]
,[SourceSystem]
,[GuestIDAlternateKey]
,[GuestAcct]
,[GuestFirstName]
,[GuestLastName]
,[ETLLoadID]
,GETDATE()
,’1/1/3000′
,’true’
FROM (
MERGE GTDW.dbo.DimGuest AS [Target]
USING GTDW_Staging.dbo.DimGuestStagingFinal AS [Source]
ON Target.GuestIDAlternateKey = Source.GuestIDAlternateKey
AND Target.IsCurrent = 1
WHEN MATCHED AND
(Target.[GuestFirstName] <> Source.[GuestFirstName]
OR Target.[GuestLastName] <> Source.[GuestLastName]
)
THEN UPDATE SET
IsCurrent = 0
,EffEndDt = GETDATE()
WHEN NOT MATCHED BY TARGET
THEN INSERT (
[GuestContactSrgKey]
,[SourceSystem]
,[GuestIDAlternateKey]
,[GuestAcct]
,[GuestFirstName]
,[GuestLastName]
,[ETLLoadID]
,[EffStartDt]
,[EffEndDt]
,[IsCurrent]
)
VALUES (
Source.[GuestContactSrgKey]
,Source.[SourceSystem]
,Source.[GuestIDAlternateKey]
,Source.[GuestAcct]
,Source.[GuestFirstName]
,Source.[GuestLastName]
,Source.[ETLLoadID]
,GETDATE()
,’1/1/3000′
,’true’
)
WHEN NOT MATCHED BY SOURCE AND Target.IsCurrent = 1
THEN UPDATE SET
IsCurrent = 0
,EffEndDt = GETDATE()
OUTPUT $action AS Action
,[Source].*
) AS MergeOutput
WHERE MergeOutput.Action = ‘UPDATE’
AND GuestIDAlternateKey IS NOT NULL
;
–Changing Attributes – history is not recorded
UPDATE DimG
SET
[GuestContactSrgKey] = DSF.[GuestContactSrgKey]
,[GuestAcct] = DSF.[GuestAcct]
,[ETLLoadID] = DSF.[ETLLoadID]
FROM DimGuest DimG
INNER JOIN [GTDW_Staging].[dbo].[DimGuestStagingFinal] DSF
ON DimG.GuestIDAlternateKey = DSF.GuestIDAlternateKey
AND DimG.IsCurrent = 1 –Optional
AND (
DimG.[GuestAcct] <> DSF.[GuestAcct]
)
END

Rate

You rated this post out of 5. Change rating

Share

Share

Rate

You rated this post out of 5. Change rating