RE: Help with Counts – SQLServerCentral

ChrisM@home

SSC-Insane

Points: 24260

January 17, 2012 at 2:57 am

#1434472

Hi Bickie

Here's a solution which seems to work with an extended sample data set.

Sample data:

DROP TABLE #test2

CREATE TABLE #test2 (indkey NVARCHAR(2), datakey NVARCHAR(4), landline NVARCHAR(11), mobile NVARCHAR(11), email NVARCHAR(20))

INSERT INTO #test2 VALUES

('1','0001','01234567890','0712345679','1@test.co.uk') -- YES

,('2','0001','01234567890','','1@test.co.uk') -- NO, dupe on indkey = 1 (datakey & landline)

,('3','0002','01234567890','','2@test.co.uk') -- NO, dupe on indkey = 1 (landline)

,('4','0002','01234567891','','2@test.co.uk') -- YES

,('5','0002','','07123456789','')-- NO, dupe on indkey = 4 (datakey)

,('6','0003','01234567892','07123456791','') -- YES

,('7','0004','01234567893','07123456792','') -- YES

,('8','0005','01234567894','07123456793','2@test.co.uk') -- YES

,('9','0005','01234567898','07123456793','2@test.co.uk') -- NO, dupe on indkey = 8 (datakey)

,('10','0008','01234567895','07123456793','9@test.co.uk') -- YES

,('11','0008','01234567895','','5@test.co.uk') -- NO, dupe on indkey = 10 (datakey)

,('12','0008','01234567896','','6@test.co.uk') -- NO, dupe on indkey = 10 (datakey)

,('13','0009','01234567896','','6@test.co.uk') -- NO, dupe on indkey = 10 (landline)

,('14','0009','01234567889','','6@test.co.uk') -- YES

,('15','0010','01189567889','','6@test.co.uk') -- YES

,('16','0010','01189567890','','6@test.co.uk') -- NO, dupe on indkey = 15 (datakey)

,('17','0001','03189567889','','6@test.co.uk') -- NO, dupe on indkey = 1 (datakey)

,('18','0002','03189567890','','6@test.co.uk') -- NO, dupe on indkey = 4 (datakey)

,('19','0003','03189567891','','6@test.co.uk') -- NO, dupe on indkey = 6 (datakey)

Code:

;WITH Level1 AS (

SELECT indkey, datakey, landline

FROM (

SELECT indkey, datakey, landline,

rnll = ROW_NUMBER() OVER(PARTITION BY landline ORDER BY datakey),

rndk = ROW_NUMBER() OVER(PARTITION BY datakey ORDER BY landline)

FROM #test2

WHERE landline <> ''

) d

WHERE rnll = 1 AND rndk = 1

),

Level2 AS (

SELECT indkey, datakey, landline

FROM (

SELECT

indkey, datakey, landline,

rnll = ROW_NUMBER() OVER(PARTITION BY landline ORDER BY datakey),

rndk = ROW_NUMBER() OVER(PARTITION BY datakey ORDER BY landline)

FROM #test2 t

WHERE landline <> ''

AND NOT EXISTS (SELECT 1 FROM Level1 l WHERE l.landline = t.landline OR l.datakey = t.datakey)

) d

WHERE rnll = 1 AND rndk = 1

)

SELECT * FROM Level1

UNION ALL

SELECT * FROM Level2

ORDER BY datakey, landline

What I suggest you do is this: run the code against a larger sample or your actual data and spool the results into a temp table. Check the temp table for dupes on either datakey or landline. If there are dupes, extend the sample data set such that when the code is run against it, the dupes show - then post the extended sample data set here.

SELECT *

INTO #ResultsForChecking

FROM Level1

UNION ALL

SELECT * FROM Level2

ORDER BY datakey, landline

Cheers

ChrisM

[font="Arial"]^{Low-hanging fruit picker and defender of the moggies}[/font]

For better assistance in answering your questions, please read this[/url].

Understanding and using APPLY, (I)[/url] and (II)[/url] Paul White[/url]

Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]