Abu Dina (6/24/2013)
This is still giving me problems! :unsure:
DECLARE @pairs TABLE (dropped_id INT, retained_id INT)
INSERT INTO @pairs
SELECT 16492, 23011 UNION ALL
SELECT 23011,24014 UNION ALL
SELECT 23011,24951 UNION ALL
SELECT 16492,24951 UNION ALL
SELECT 16492,29381 UNION ALL
SELECT 24951,29381 UNION ALL
SELECT 23011,29381 UNION ALL
SELECT 24014,66665 UNION ALL
SELECT 3344,66665
gives back: (using my method, Dwain's and chriM's)
The last solution I posted uses a series of updates. Choose one of them and run it four times, something like this:
DECLARE @pairs TABLE (Duplicate_ID INT, Master_ID INT)
INSERT INTO @pairs
SELECT 16492, 23011 UNION ALL
SELECT 23011,24014 UNION ALL
SELECT 23011,24951 UNION ALL
SELECT 16492,24951 UNION ALL
SELECT 16492,29381 UNION ALL
SELECT 24951,29381 UNION ALL
SELECT 23011,29381 UNION ALL
SELECT 24014,66665 UNION ALL
SELECT 3344,66665;
-- preprocess the sample data: there isn't a dupe and a master,
-- they're simply members of a set of two.
IF object_id('TempDB..#Temp') IS NOT NULL DROP TABLE #Temp;
WITH SequencedData AS (
SELECT *, SetNo = ROW_NUMBER() OVER (ORDER BY Duplicate_ID, Master_ID)
FROM @pairs
)
SELECT SetNo, Duplicate_ID
INTO #Temp
FROM SequencedData
UNION ALL
SELECT SetNo, Master_ID
FROM SequencedData;
-- (102 row(s) affected)
---------------------------------------------------------------------------
-- FOUR updates have to be executed with the sample data to complete.
---------------------------------------------------------------------------
DECLARE @RowsUpdated INT
SET @RowsUpdated = 1
WHILE @RowsUpdated > 0
BEGIN
UPDATE t SET SetNo = c.newset
FROM #Temp t
INNER JOIN (
SELECT d.SetNo, newset = MIN(newset)
FROM (
SELECT Duplicate_ID, SetNo, newset = MIN(SetNo) OVER (PARTITION BY Duplicate_ID)
FROM #Temp
) d
GROUP BY d.SetNo
HAVING MIN(newset) <> d.SetNo
) c ON c.SetNo = t.SetNo;
SET @RowsUpdated = @@rowcount
END
---------------------------------------------------------------------------
-- Final result set
---------------------------------------------------------------------------
SELECT t.Duplicate, x.Duplicate_ID
FROM (
SELECT SetNo, Duplicate = MIN(Duplicate_ID)
FROM #Temp
GROUP BY SetNo
) t
CROSS APPLY (
SELECT DISTINCT *
FROM #Temp ti
WHERE ti.SetNo = t.SetNo
AND ti.Duplicate_ID <> t.Duplicate
) x;
For fast, accurate and documented assistance in answering your questions, please read this article.
Understanding and using APPLY, (I) and (II) Paul White
Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden