Home Forums SQL Server 2008 T-SQL (SS2K8) Rank duplicates, but only rows involved in duplicates RE: Rank duplicates, but only rows involved in duplicates

  • An alternative solution which doesn't require any joining. It scales well for large sets but has a weakness of returning false positives although fairly unlikely with this type of data. As a bonus, the B_CHS (binary_checksum) column is a "distinct group name".

    😎

    ;WITH BASE_DATA AS

    (

    SELECT

    BINARY_CHECKSUM(B.Phone,CHAR(124), B.Address) AS B_CHS

    ,B.Business_pk

    FROM dbo.TestBusiness B

    )

    SELECT

    BD.B_CHS

    ,BD.Business_pk

    INTO #BD_SORT

    FROM BASE_DATA BD;

    CREATE UNIQUE NONCLUSTERED INDEX TMP_BD_SORT_DD ON #BD_SORT ( B_CHS ASC, Business_pk ASC);

    ;WITH BS_SORT AS

    (

    SELECT

    BS.Business_pk

    ,ROW_NUMBER() OVER (PARTITION BY BS.B_CHS ORDER BY BS.Business_pk ) AS BS_RID

    FROM #BD_SORT BS

    )

    SELECT * FROM BS_SORT WHERE BS_RID > 1;

    DROP TABLE #BD_SORT;