Hi Bickie
Here's a solution which seems to work with an extended sample data set.
Sample data:
DROP TABLE #test2
CREATE TABLE #test2 (indkey NVARCHAR(2), datakey NVARCHAR(4), landline NVARCHAR(11), mobile NVARCHAR(11), email NVARCHAR(20))
INSERT INTO #test2 VALUES
('1','0001','01234567890','0712345679','1@test.co.uk') -- YES
,('2','0001','01234567890','','1@test.co.uk') -- NO, dupe on indkey = 1 (datakey & landline)
,('3','0002','01234567890','','2@test.co.uk') -- NO, dupe on indkey = 1 (landline)
,('4','0002','01234567891','','2@test.co.uk') -- YES
,('5','0002','','07123456789','')-- NO, dupe on indkey = 4 (datakey)
,('6','0003','01234567892','07123456791','') -- YES
,('7','0004','01234567893','07123456792','') -- YES
,('8','0005','01234567894','07123456793','2@test.co.uk') -- YES
,('9','0005','01234567898','07123456793','2@test.co.uk') -- NO, dupe on indkey = 8 (datakey)
,('10','0008','01234567895','07123456793','9@test.co.uk') -- YES
,('11','0008','01234567895','','5@test.co.uk') -- NO, dupe on indkey = 10 (datakey)
,('12','0008','01234567896','','6@test.co.uk') -- NO, dupe on indkey = 10 (datakey)
,('13','0009','01234567896','','6@test.co.uk') -- NO, dupe on indkey = 10 (landline)
,('14','0009','01234567889','','6@test.co.uk') -- YES
,('15','0010','01189567889','','6@test.co.uk') -- YES
,('16','0010','01189567890','','6@test.co.uk') -- NO, dupe on indkey = 15 (datakey)
,('17','0001','03189567889','','6@test.co.uk') -- NO, dupe on indkey = 1 (datakey)
,('18','0002','03189567890','','6@test.co.uk') -- NO, dupe on indkey = 4 (datakey)
,('19','0003','03189567891','','6@test.co.uk') -- NO, dupe on indkey = 6 (datakey)
Code:
;WITH Level1 AS (
SELECT indkey, datakey, landline
FROM (
SELECT indkey, datakey, landline,
rnll = ROW_NUMBER() OVER(PARTITION BY landline ORDER BY datakey),
rndk = ROW_NUMBER() OVER(PARTITION BY datakey ORDER BY landline)
FROM #test2
WHERE landline <> ''
) d
WHERE rnll = 1 AND rndk = 1
),
Level2 AS (
SELECT indkey, datakey, landline
FROM (
SELECT
indkey, datakey, landline,
rnll = ROW_NUMBER() OVER(PARTITION BY landline ORDER BY datakey),
rndk = ROW_NUMBER() OVER(PARTITION BY datakey ORDER BY landline)
FROM #test2 t
WHERE landline <> ''
AND NOT EXISTS (SELECT 1 FROM Level1 l WHERE l.landline = t.landline OR l.datakey = t.datakey)
) d
WHERE rnll = 1 AND rndk = 1
)
SELECT * FROM Level1
UNION ALL
SELECT * FROM Level2
ORDER BY datakey, landline
What I suggest you do is this: run the code against a larger sample or your actual data and spool the results into a temp table. Check the temp table for dupes on either datakey or landline. If there are dupes, extend the sample data set such that when the code is run against it, the dupes show - then post the extended sample data set here.
SELECT *
INTO #ResultsForChecking
FROM Level1
UNION ALL
SELECT * FROM Level2
ORDER BY datakey, landline
Cheers
ChrisM
For better assistance in answering your questions, please read this[/url].
Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]