RE: eliminating duplicates – SQLServerCentral

SSC Eights!

Points: 809

July 1, 2008 at 8:21 am

Well going to the first post, its clear the the post is most unclear in its requirements. Shouting at posters wont help under any circumstances or imposing random conditions , e.g. its now SQL 2000, not SQL 2005 etc..

There is something that makes posts clear, test data , source input / output - makes life easier to understand than a bunch of words.

A test-framework helps any post, users know what the goals are..

Anyways.. here is A LOOP ! - But it does whats required..

INPUT:

acctproc_idnameacct_id

0Record 0 - Non-DuplicateNULL

0Record 1 - DuplicateNULL

0Record 3 - DuplicateNULL

OUTPUT:

acctproc_idnameacct_id

0Record 1 - DuplicateNULL

0Record 3 - DuplicateNULL

TASK: Remove first Duplicate row, Remove non-duplicate rows.

CODE:

USE [tempdb]

--Create Table

drop table [my_tab]

CREATE TABLE [dbo].[my_tab](

[acctproc_id] [int] NOT NULL,

[name] [varchar](255) NOT NULL,

[acct_id] [int] null)

-- Add clustered unique index, with IGNORE_DUP_KEY "ON"

--create UNIQUE CLUSTERED index [my_tab_unique_row_removal] on [dbo].[my_tab]

--( [acctproc_id],[name],[acct_id] ASC

--)

--WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = ON, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)

--ON [PRIMARY]

--Display table output (will be blank atm)

select * from my_tab

-- Insert some data, 3 rows, 1 dup row = 2 end rows in table

insert into dbo.my_tab (

acctproc_id,

[name],

acct_id

) values ( 0, 'Record 0 - Non-Duplicate', NULL )

insert into dbo.my_tab (

acctproc_id,

[name],

acct_id

) values ( 0, 'Record 1 - Duplicate', NULL )

insert into dbo.my_tab (

acctproc_id,

[name],

acct_id

) values ( 0, 'Record 1 - Duplicate', NULL )

insert into dbo.my_tab (

acctproc_id,

[name],

acct_id

) values ( 0, 'Record 1 - Duplicate', NULL )

insert into dbo.my_tab (

acctproc_id,

[name],

acct_id

) values ( 0, 'Record 3 - Duplicate', NULL )

insert into dbo.my_tab (

acctproc_id,

[name],

acct_id

) values ( 0, 'Record 3 - Duplicate', NULL )

--Display table output : Will have 4 rows in. dup rows present

select * from my_tab

-- DELETE ALL Rows which are NOT Duplicates !

delete from my_tab

from my_tab a

-- Get first duplicate record for delete

join (select top 1 acctproc_id, [name], acct_id

from dbo.my_tab

group by acctproc_id, name, acct_id

having count(*) =1 ) b on

-- Cope with a NULL Join condition for row where null possible on acct_id

(a.acct_id = b.acct_id or a.acct_id is null and b.acct_id is null)

and a.name = b.name

and a.acctproc_id = b.acctproc_id

declare @myvarloop int

set @myvarloop =0

-- RUN THIS LOOP ! No TEMP TABLES, Table vars etc.. !

-- GRAB a quick count check to see if any DUP rows exist

while (select top 1 a.count_me

from (

select count(*) count_me, row_number() over (order by acctproc_id, name, acct_id) row

from dbo.my_tab

group by acctproc_id, name, acct_id

having count(*) >1 a

where

a.row > @myvarloop) > 0

begin

-- GRAB the First Duplicate ROW Number !

select @myvarloop = b.row

from

(select top 1 a.count_me, a.row

from (

select count(*) count_me, row_number() over (order by acctproc_id, name, acct_id) row

from dbo.my_tab

group by acctproc_id, name, acct_id

having count(*) >1) a

where

a.row > @myvarloop) b

select @myvarloop

-- Start our loop deleting the dup rows

-- set rowcount 1 -- ensure we only delete 1 row, cannot delete the two dup records

-- We'll use the TOP Clause here, for a SQL 2005 mechanism, SQL 2000, use set rowcount 1

delete top (1) from my_tab

from my_tab a

-- Get first duplicate record for delete

join (select top 1 acctproc_id, [name], acct_id,

row_number() over (order by acctproc_id, name, acct_id) row

from dbo.my_tab

group by acctproc_id, name, acct_id

having count(*) > 1) b on

-- Cope with a NULL Join condition for row where null possible on acct_id

(a.acct_id = b.acct_id or a.acct_id is null and b.acct_id is null)

and a.name = b.name

and a.acctproc_id = b.acctproc_id

and row = @myvarloop

end

select * from my_tab