April 26, 2015 at 8:03 pm
With merge/insert statements ...
Is DISTINCT best way to handle problem of source table containing duplicate rows, along with WHERE NOT IN statement?
the source dataset is large and having to do DISTINCT and further filtering is taxing on the ETL. Better way?
DDL
source table
CREATE TABLE [dbo].[source](
[Product_ID] [INT] NOT NULL,
[ProductCode] [VARCHAR](20) NULL,
[ProductName] [VARCHAR](100) NULL,
[ProductColor] [VARCHAR](20) NULL,
[QuantityAvailable] [INT] NULL,
[SupplierName] [VARCHAR](50) NULL,
[SupplierRegion] [VARCHAR](30) NULL
)
data for source
insert INTO dbo.source (Product_ID, ProductCode, ProductName, ProductColor, QuantityAvailable, SupplierName, SupplierRegion)
VALUES
(1, 'A001', 'Apple', 'Red', 500, 'Great Apple Supply', 'NorthEast'),
(1, 'A001', 'Apple', 'Red', 500, 'Great Apple Supply', 'NorthEast')
target table
source2
CREATE TABLE [dbo].[source2](
[Product_ID] [INT] NOT NULL,
[ProductCode] [VARCHAR](20) NULL,
[ProductName] [VARCHAR](100) NULL,
[ProductColor] [VARCHAR](20) NULL,
[QuantityAvailable] [INT] NULL,
[SupplierName] [VARCHAR](50) NULL,
[SupplierRegion] [VARCHAR](30) NULL
)
merge statement
SET IDENTITY_INSERT dbo.source2 on
MERGE INTO dbo.source2 T
USING (
SELECT
distinct
Product_ID,
ProductCode,
ProductName,
ProductColor,
QuantityAvailable,
SupplierName,
SupplierRegion
FROM dbo.source s1
WHERE s1.product_id NOT in (SELECT product_id FROM source2 s2 WHERE s2.product_id = s1.product_id)
) Source
ON Source.Product_ID = T.Product_ID
WHEN NOT MATCHED BY TARGET THEN
INSERT (Product_ID, ProductCode, ProductName, ProductColor, QuantityAvailable, SupplierName, SupplierRegion)
VALUES(
Source.Product_ID,
Source.ProductCode,
Source.ProductName,
Source.ProductColor,
Source.QuantityAvailable,
Source.SupplierName,
Source.SupplierRegion
)
WHEN MATCHED THEN UPDATE
SET
T.ProductCode = SOURCE.ProductCode,
T.ProductName = SOURCE.ProductName,
T.ProductColor = SOURCE.ProductColor,
T.QuantityAvailable = Source.QuantityAvailable,
T.SupplierName = SOURCE.SupplierName,
T.SupplierRegion = SOURCE.SupplierRegion;
Viewing 0 posts
You must be logged in to reply to this topic. Login to reply