• Abu Dina (6/24/2013)


    This is still giving me problems! :unsure:

    DECLARE @pairs TABLE (dropped_id INT, retained_id INT)

    INSERT INTO @pairs

    SELECT 16492, 23011 UNION ALL

    SELECT 23011,24014 UNION ALL

    SELECT 23011,24951 UNION ALL

    SELECT 16492,24951 UNION ALL

    SELECT 16492,29381 UNION ALL

    SELECT 24951,29381 UNION ALL

    SELECT 23011,29381 UNION ALL

    SELECT 24014,66665 UNION ALL

    SELECT 3344,66665

    gives back: (using my method, Dwain's and chriM's)

    The last solution I posted uses a series of updates. Choose one of them and run it four times, something like this:

    DECLARE @pairs TABLE (Duplicate_ID INT, Master_ID INT)

    INSERT INTO @pairs

    SELECT 16492, 23011 UNION ALL

    SELECT 23011,24014 UNION ALL

    SELECT 23011,24951 UNION ALL

    SELECT 16492,24951 UNION ALL

    SELECT 16492,29381 UNION ALL

    SELECT 24951,29381 UNION ALL

    SELECT 23011,29381 UNION ALL

    SELECT 24014,66665 UNION ALL

    SELECT 3344,66665;

    -- preprocess the sample data: there isn't a dupe and a master,

    -- they're simply members of a set of two.

    IF object_id('TempDB..#Temp') IS NOT NULL DROP TABLE #Temp;

    WITH SequencedData AS (

    SELECT *, SetNo = ROW_NUMBER() OVER (ORDER BY Duplicate_ID, Master_ID)

    FROM @pairs

    )

    SELECT SetNo, Duplicate_ID

    INTO #Temp

    FROM SequencedData

    UNION ALL

    SELECT SetNo, Master_ID

    FROM SequencedData;

    -- (102 row(s) affected)

    ---------------------------------------------------------------------------

    -- FOUR updates have to be executed with the sample data to complete.

    ---------------------------------------------------------------------------

    DECLARE @RowsUpdated INT

    SET @RowsUpdated = 1

    WHILE @RowsUpdated > 0

    BEGIN

    UPDATE t SET SetNo = c.newset

    FROM #Temp t

    INNER JOIN (

    SELECT d.SetNo, newset = MIN(newset)

    FROM (

    SELECT Duplicate_ID, SetNo, newset = MIN(SetNo) OVER (PARTITION BY Duplicate_ID)

    FROM #Temp

    ) d

    GROUP BY d.SetNo

    HAVING MIN(newset) <> d.SetNo

    ) c ON c.SetNo = t.SetNo;

    SET @RowsUpdated = @@rowcount

    END

    ---------------------------------------------------------------------------

    -- Final result set

    ---------------------------------------------------------------------------

    SELECT t.Duplicate, x.Duplicate_ID

    FROM (

    SELECT SetNo, Duplicate = MIN(Duplicate_ID)

    FROM #Temp

    GROUP BY SetNo

    ) t

    CROSS APPLY (

    SELECT DISTINCT *

    FROM #Temp ti

    WHERE ti.SetNo = t.SetNo

    AND ti.Duplicate_ID <> t.Duplicate

    ) x;

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden