Remove duplicates from query

  • I am working with a bunch of records that have duplicates on the Persid and the intPercentID where there are duplicates I want to remove when I stick them in the temp table, I tried join on tempo table and doing not exists but still inserts, so now I am trying a merge but same thing. how can I keep duplicates from being inserted in the temp table. I made a cursor as well but its slow as heck, but it does work. trying better ways.

    Create table #TempStr (STRId int not null Identity(1,1) primary key, Persid int, percentId int, dtCreated datetime, CreatedBy int)

    Create table #NewStr (STRId int, Persid int, percentId int, dtCreated datetime, CreatedBy int)

    INSERT #TempStr (Persid, percentId, dtCreated, CreatedBy)

    select intPersonnelID, intPercentID, dtSubmitted, intSubmittedBy from tblSTR

    whereintpercentId in (61,62) group by intPercentID, intPersonnelID, dtSubmitted, intSubmittedBy

    UNION ALL

    select intPersonnelID, intPercentID, dtSubmitted, intSubmittedBy from tblSTRHist

    where intpercentId in (61,62) group by intPercentID, intPersonnelID, dtSubmitted, intSubmittedBy

    order by dtSubmitted asc

    Merge into #NewStr as ns

    USING(SelectSTRId, Persid, percentId, dtCreated, CreatedBy

    From#TempStr) s on s.Persid = ns.Persid and s.percentId = ns.percentId

    WHEN MATCHED THEN

    UPDATE SET

    ns.dtCreated = s.dtCreated,

    ns.CreatedBy = s.CreatedBy

    WHEN NOT MATCHED THEN

    INSERT (STRId, Persid, percentId, dtCreated, CreatedBy)

    VALUES (s.STRId, s.Persid, s.percentId, s.dtCreated, s.CreatedBy);

    select * from #NewStr order by Persid

    data set

    StrId PersId percentId dtCCreated CreatedBy

    3 74 61 2013-04-22 00:00:00.00010594

    2157 121 61 2015-01-09 00:00:00.0002092

    2158 121 62 2015-01-09 00:00:00.0002092

    4505 121 62 2015-09-20 00:00:00.00010562 Duplicate should have updated

    4040 125 62 2015-08-26 00:00:00.0003436

    4139 126 62 2015-08-31 00:00:00.0006732

    3859 128 62 2015-08-20 00:00:00.00012506

    2755 128 61 2015-02-25 00:00:00.00012506

    2756 128 61 2015-02-25 00:00:00.00012506 Duplicates should have updated

    4774 143 62 2015-10-05 00:00:00.00010374

    4243 149 62 2015-09-02 00:00:00.0002637

    3100 149 62 2015-05-20 00:00:00.00014491 Duplicate should have updated List goes on.

  • Stubby Bunny (10/6/2015)


    I am working with a bunch of records that have duplicates on the Persid and the intPercentID where there are duplicates I want to remove when I stick them in the temp table, I tried join on tempo table and doing not exists but still inserts, so now I am trying a merge but same thing. how can I keep duplicates from being inserted in the temp table. I made a cursor as well but its slow as heck, but it does work. trying better ways.

    Create table #TempStr (STRId int not null Identity(1,1) primary key, Persid int, percentId int, dtCreated datetime, CreatedBy int)

    Create table #NewStr (STRId int, Persid int, percentId int, dtCreated datetime, CreatedBy int)

    INSERT #TempStr (Persid, percentId, dtCreated, CreatedBy)

    select intPersonnelID, intPercentID, dtSubmitted, intSubmittedBy from tblSTR

    whereintpercentId in (61,62) group by intPercentID, intPersonnelID, dtSubmitted, intSubmittedBy

    UNION ALL

    select intPersonnelID, intPercentID, dtSubmitted, intSubmittedBy from tblSTRHist

    where intpercentId in (61,62) group by intPercentID, intPersonnelID, dtSubmitted, intSubmittedBy

    order by dtSubmitted asc

    Merge into #NewStr as ns

    USING(SelectSTRId, Persid, percentId, dtCreated, CreatedBy

    From#TempStr) s on s.Persid = ns.Persid and s.percentId = ns.percentId

    WHEN MATCHED THEN

    UPDATE SET

    ns.dtCreated = s.dtCreated,

    ns.CreatedBy = s.CreatedBy

    WHEN NOT MATCHED THEN

    INSERT (STRId, Persid, percentId, dtCreated, CreatedBy)

    VALUES (s.STRId, s.Persid, s.percentId, s.dtCreated, s.CreatedBy);

    select * from #NewStr order by Persid

    data set

    StrId PersId percentId dtCCreated CreatedBy

    3 74 61 2013-04-22 00:00:00.00010594

    2157 121 61 2015-01-09 00:00:00.0002092

    2158 121 62 2015-01-09 00:00:00.0002092

    4505 121 62 2015-09-20 00:00:00.00010562 Duplicate should have updated

    4040 125 62 2015-08-26 00:00:00.0003436

    4139 126 62 2015-08-31 00:00:00.0006732

    3859 128 62 2015-08-20 00:00:00.00012506

    2755 128 61 2015-02-25 00:00:00.00012506

    2756 128 61 2015-02-25 00:00:00.00012506 Duplicates should have updated

    4774 143 62 2015-10-05 00:00:00.00010374

    4243 149 62 2015-09-02 00:00:00.0002637

    3100 149 62 2015-05-20 00:00:00.00014491 Duplicate should have updated List goes on.

    What constitutes a duplicate? Just if PresId and percnetId are duplicated?

  • yes, I cannot have the persId or the percentID duplicated for any record.

  • maybe....

    CREATE TABLE #sampledata(

    StrId INTEGER NOT NULL PRIMARY KEY

    ,PersId INTEGER NOT NULL

    ,percentId INTEGER NOT NULL

    ,dtCCreated DATETIME NOT NULL

    ,CreatedBy INTEGER NOT NULL

    );

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (3,74,61,'2013/04/22',10594);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (2157,121,61,'2015/01/09',2092);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (2158,121,62,'2015/01/09',2092);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (4505,121,62,'2015/09/20',10562);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (4040,125,62,'2015/08/26',3436);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (4139,126,62,'2015/08/31',6732);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (3859,128,62,'2015/08/20',12506);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (2755,128,61,'2015/02/25',12506);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (2756,128,61,'2015/02/25',12506);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (4774,143,62,'2015/10/05',10374);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (4243,149,62,'2015/09/02',2637);

    INSERT INTO #sampledata(StrId,PersId,percentId,dtCCreated,CreatedBy) VALUES (3100,149,62,'2015/05/20',14491);

    SELECT

    *

    FROM #sampledata

    ORDER BY PersId , percentId;

    WITH cte as (

    SELECT

    *

    , ROW_NUMBER() OVER (PARTITION BY persid,percentId ORDER BY StrId) AS RN

    FROM #sampledata

    )

    SELECT

    StrId

    , PersId

    , percentId

    , dtCCreated

    , CreatedBy

    INTO #temp

    FROM cte

    WHERE RN = 1;

    SELECT

    *

    FROM #temp

    ORDER BY PersId , percentId;

    DROP TABLE #sampledata;

    DROP TABLE #temp

    ________________________________________________________________
    you can lead a user to data....but you cannot make them think
    and remember....every day is a school day

Viewing 4 posts - 1 through 3 (of 3 total)

You must be logged in to reply to this topic. Login to reply