RE: Query for duplicates – SQLServerCentral

SSC Guru

Points: 182882

October 4, 2015 at 5:30 am

sqllearner1234 (10/3/2015)
I'm using this to create a view for duplicates , but no of records coming out of this view are more than I'm passing in:(this query works fine in SQL server but when I try in postgresql it doesn't) this is little off-topic on this forum but if I get any help that would be wonderful.
1.CREATE VIEW test AS
2. (
3. SELECT field1,
4. field2,
5. field3,field4,
6. field5 ,
7. field6,
8. id,
9. field7,
10. COUNT(*) AS REC_COUNT
11. FROM tablename GROUP BY
12. field1,
13. field2,
14. field3 ,
15. field4,
16. field5,
17. field6,
18. id,
19. field7
20. HAVING COUNT(*) > 1

Hi and welcome to the forums. Your code has two flaws, firstly the redundant opening parenthesis in line two which is a syntax error, secondly a logical error where the id column is included in the group by, if this is a unique row identity it will make all rows unique and the query will not return any rows.

😎

Here is a quick example you can experiment with

USE tempdb;

/**********************************************************

Safe place

**********************************************************/

GO

SET NOCOUNT ON;

/**********************************************************

Global parameters */

/**********************************************************

Set @SAMPLE_SIZE to the desired number of rows

in the set

*/

DECLARE @SAMPLE_SIZE INT = 1000;

/**********************************************************

@VARIANCE controls the number of distinct values for

each column in the set, actual values produced range

from 0 to (@VARIANCE - 1). Higher numbers produces

fewer duplications, lower number produces higer number

of duplications.

**********************************************************/

DECLARE @VARIANCE INT = 2;

/**********************************************************

Sample Set DDL

Drops the table if it exists and (re)creates it.

**********************************************************/

IF OBJECT_ID(N'dbo.TBL_DUPE') IS NOT NULL DROP TABLE dbo.TBL_DUPE;

CREATE TABLE dbo.TBL_DUPE

(

[ID] INT IDENTITY(1,1) NOT NULL CONSTRAINT PK_DBL_DUPE_DUPE_ID PRIMARY KEY

,[FIELD01] INT NOT NULL

,[FIELD02] INT NOT NULL

,[FIELD03] INT NOT NULL

,[FIELD04] INT NOT NULL

,[FIELD05] INT NOT NULL

,[FIELD06] INT NOT NULL

,[FIELD07] INT NOT NULL

);

/**********************************************************

Generate sample data

**********************************************************/

INSERT INTO dbo.TBL_DUPE(FIELD01,FIELD02,FIELD03,FIELD04,FIELD05,FIELD06,FIELD07)

SELECT TOP(@SAMPLE_SIZE)

ABS(CHECKSUM(NEWID())) % @VARIANCE

,ABS(CHECKSUM(NEWID())) % @VARIANCE

FROM sys.all_columns SAC1

CROSS JOIN sys.all_columns SAC2

CROSS JOIN sys.all_columns SAC3

;

/**********************************************************

Return all duplicated rows with count and min/max id

of each distinct value duplicated.

**********************************************************/

SELECT

MIN(D.ID) AS MIN_ID

,MAX(D.ID) AS CAX_ID

,COUNT(*) AS R_CNT

,D.FIELD01

,D.FIELD02

,D.FIELD03

,D.FIELD04

,D.FIELD05

,D.FIELD06

,D.FIELD07

FROM dbo.TBL_DUPE D

GROUP BY D.FIELD01

,D.FIELD02

,D.FIELD03

,D.FIELD04

,D.FIELD05

,D.FIELD06

,D.FIELD07

HAVING COUNT(*) > 1;

/**********************************************************

Return all duplicated values, retaining the lowest ID

value.

**********************************************************/

SELECT

XD.ID

,XD.ID_RID

,XD.FIELD01

,XD.FIELD02

,XD.FIELD03

,XD.FIELD04

,XD.FIELD05

,XD.FIELD06

,XD.FIELD07

FROM

(

SELECT

D.ID

,ROW_NUMBER() OVER

(

PARTITION BY D.FIELD01

,D.FIELD02

,D.FIELD03

,D.FIELD04

,D.FIELD05

,D.FIELD06

,D.FIELD07

ORDER BY D.ID ASC

) AS ID_RID

,D.FIELD01

,D.FIELD02

,D.FIELD03

,D.FIELD04

,D.FIELD05

,D.FIELD06

,D.FIELD07

FROM dbo.TBL_DUPE D

) AS XD

WHERE XD.ID_RID > 1;

/**********************************************************

List the distinct rows from the set with the lowest

ID value

**********************************************************/

SELECT

XD.ID

,XD.ID_RID

,XD.FIELD01

,XD.FIELD02

,XD.FIELD03

,XD.FIELD04

,XD.FIELD05

,XD.FIELD06

,XD.FIELD07

FROM

(

SELECT

D.ID

,ROW_NUMBER() OVER

(

PARTITION BY D.FIELD01

,D.FIELD02

,D.FIELD03

,D.FIELD04

,D.FIELD05

,D.FIELD06

,D.FIELD07

ORDER BY D.ID ASC

) AS ID_RID

,D.FIELD01

,D.FIELD02

,D.FIELD03

,D.FIELD04

,D.FIELD05

,D.FIELD06

,D.FIELD07

FROM dbo.TBL_DUPE D

) AS XD

WHERE XD.ID_RID = 1;

/**********************************************************

List the distinct rows from the set with the highest

ID value

**********************************************************/

SELECT

XD.ID

,XD.ID_RID

,XD.FIELD01

,XD.FIELD02

,XD.FIELD03

,XD.FIELD04

,XD.FIELD05

,XD.FIELD06

,XD.FIELD07

FROM

(

SELECT

D.ID

,ROW_NUMBER() OVER

(

PARTITION BY D.FIELD01

,D.FIELD02

,D.FIELD03

,D.FIELD04

,D.FIELD05

,D.FIELD06

,D.FIELD07

ORDER BY D.ID DESC

) AS ID_RID

,D.FIELD01

,D.FIELD02

,D.FIELD03

,D.FIELD04

,D.FIELD05

,D.FIELD06

,D.FIELD07

FROM dbo.TBL_DUPE D

) AS XD

WHERE XD.ID_RID = 1;