merge statement when source table supplies duplicates

  • With merge/insert statements ...

    Is DISTINCT best way to handle problem of source table containing duplicate rows, along with WHERE NOT IN statement?

    the source dataset is large and having to do DISTINCT and further filtering is taxing on the ETL. Better way?

    DDL

    source table

    CREATE TABLE [dbo].[source](

    [Product_ID] [INT] NOT NULL,

    [ProductCode] [VARCHAR](20) NULL,

    [ProductName] [VARCHAR](100) NULL,

    [ProductColor] [VARCHAR](20) NULL,

    [QuantityAvailable] [INT] NULL,

    [SupplierName] [VARCHAR](50) NULL,

    [SupplierRegion] [VARCHAR](30) NULL

    )

    data for source

    insert INTO dbo.source (Product_ID, ProductCode, ProductName, ProductColor, QuantityAvailable, SupplierName, SupplierRegion)

    VALUES

    (1, 'A001', 'Apple', 'Red', 500, 'Great Apple Supply', 'NorthEast'),

    (1, 'A001', 'Apple', 'Red', 500, 'Great Apple Supply', 'NorthEast')

    target table

    source2

    CREATE TABLE [dbo].[source2](

    [Product_ID] [INT] NOT NULL,

    [ProductCode] [VARCHAR](20) NULL,

    [ProductName] [VARCHAR](100) NULL,

    [ProductColor] [VARCHAR](20) NULL,

    [QuantityAvailable] [INT] NULL,

    [SupplierName] [VARCHAR](50) NULL,

    [SupplierRegion] [VARCHAR](30) NULL

    )

    merge statement

    SET IDENTITY_INSERT dbo.source2 on

    MERGE INTO dbo.source2 T

    USING (

    SELECT

    distinct

    Product_ID,

    ProductCode,

    ProductName,

    ProductColor,

    QuantityAvailable,

    SupplierName,

    SupplierRegion

    FROM dbo.source s1

    WHERE s1.product_id NOT in (SELECT product_id FROM source2 s2 WHERE s2.product_id = s1.product_id)

    ) Source

    ON Source.Product_ID = T.Product_ID

    WHEN NOT MATCHED BY TARGET THEN

    INSERT (Product_ID, ProductCode, ProductName, ProductColor, QuantityAvailable, SupplierName, SupplierRegion)

    VALUES(

    Source.Product_ID,

    Source.ProductCode,

    Source.ProductName,

    Source.ProductColor,

    Source.QuantityAvailable,

    Source.SupplierName,

    Source.SupplierRegion

    )

    WHEN MATCHED THEN UPDATE

    SET

    T.ProductCode = SOURCE.ProductCode,

    T.ProductName = SOURCE.ProductName,

    T.ProductColor = SOURCE.ProductColor,

    T.QuantityAvailable = Source.QuantityAvailable,

    T.SupplierName = SOURCE.SupplierName,

    T.SupplierRegion = SOURCE.SupplierRegion;

Viewing 0 posts

You must be logged in to reply to this topic. Login to reply