How to optimization of a stored proc

  • Hi All,

    Need some help on optimizing a stored proc. I see some of the update statements written multiple times. Instead of writing multiple updates , can we re-write in one single statement  which might avoid reading same table multiple times. This stored proc is doing a lot of logical reads almost like 76TB of data. Do u see any anti-patterns. I am looking at indexing but I am more afraid the way this stored proc code been written for handling duplicate data.

    This below SQL statement is taking a lot of time

    INSERT INTO CONTACT_PERSON_ID
    (
    ROWID_OBJECT,
    ROWID_OBJECT_MATCHED,
    STATUS,
    PERSON_ID,
    EMAIL,
    EMAIL_MATCHED,
    CREATE_DATE,
    LAST_UPDATE_DATE
    )
    SELECT d.ROWID_OBJECT,
    d.min_object,
    'NEW',
    l.all_rowid,
    c.COMM_VAL,
    CC.COMM_VAL,
    GETDATE(),
    GETDATE()
    FROM #potential_dups d
    inner join #person_list l
    on d.min_object = l.min_object
    left outer join c_b_party_comm (nolock) c
    on d.ROWID_OBJECT = c.PARTY_ID
    and c.COMM_TYP_CD = 'EMAIL'
    left outer join c_b_party_comm (nolock) cC
    on d.min_object = cC.PARTY_ID
    and cC.COMM_TYP_CD = 'EMAIL'
    WHERE d.rowid_object <> d.min_object
    union
    SELECT d.rowid_object_matched,
    d.min_object,
    'NEW',
    l.all_rowid,
    c.COMM_VAL,
    cc.COMM_VAL,
    GETDATE(),
    GETDATE()
    FROM #potential_dups d
    inner join #person_list l
    on d.min_object = l.min_object
    left outer join c_b_party_comm (nolock) C
    on d.ROWID_OBJECT_MATCHED = C.PARTY_ID
    and C.COMM_TYP_CD = 'EMAIL'
    left outer join c_b_party_comm (nolock) CC
    on d.min_object = CC.PARTY_ID
    and CC.COMM_TYP_CD = 'EMAIL'
    WHERE d.rowid_object_matched <> d.min_object

    Complete stored proc code below. Please provide recommendations.

    Looking for ways to simply this code and reduce the execution time.

    ALTER PROCEDURE [dbo].[usp_check_potential_dups_person_id]
    (
    @STATUS_CODE VARCHAR(20) OUTPUT,
    @STATUS_DESC VARCHAR(3000) OUTPUT
    )
    AS
    declare @cnt int,
    @inc int,
    @rowid_object varchar(20),
    @rowid_object_matched varchar(20),
    @min_object varchar(20)
    declare @min_obj varchar(20),
    @last_update int,
    @run char(1)
    BEGIN
    SET NOCOUNT ON

    BEGIN TRY
    IF OBJECT_ID('tempdb..#potential_dups') IS NOT NULL
    DROP TABLE #potential_dups
    IF OBJECT_ID('tempdb..#person_list') IS NOT NULL
    DROP TABLE #person_list

    -- delete record having same people id
    delete d
    from contact_potential_dups d,
    c_b_party p,
    c_b_party c
    where d.ROWID_OBJECT = p.rowid_object
    and d.ROWID_OBJECT_MATCHED = c.rowid_object
    and p.people_id = c.people_id

    -- Create Temp table which needs to be processed
    select rowid_object,
    rowid_object_matched,
    rowid_object_matched as min_object,
    orig_rowid_object,
    orig_rowid_object_matched,
    cast(null as varchar(8000)) person_id
    into #potential_dups
    from
    (
    select case
    when cast(d.rowid_object as int) > cast(rowid_object_matched as int) then
    ROWID_OBJECT_MATCHED
    else
    d.ROWID_OBJECT
    end rowid_object_matched,
    case
    when cast(d.rowid_object as int) < cast(rowid_object_matched as int) then
    ROWID_OBJECT_MATCHED
    else
    d.ROWID_OBJECT
    end rowid_object,
    d.ROWID_OBJECT orig_rowid_object,
    ROWID_OBJECT_MATCHED orig_rowid_object_matched
    from CONTACT_POTENTIAL_DUPS (nolock) d
    ) aa
    set @cnt = @@ROWCOUNT;

    select @inc = 0,
    @last_update = @cnt,
    @run = 'Y'

    CREATE NONCLUSTERED INDEX inx_potential_dups_temp1
    ON [dbo].[#potential_dups] ([rowid_object_matched])
    INCLUDE ([min_object])

    CREATE NONCLUSTERED INDEX inx_potential_dups_temp2
    ON [dbo].[#potential_dups] ([rowid_object])
    INCLUDE ([min_object])

    -- Look will continue till either run='N' or increment is more than total recods
    while @cnt > @inc and @run = 'Y'
    begin
    set @run = 'N'

    -- Update min_object if matching (match_rowid=rowid) having higher min_object
    update pd
    set pd.min_object = sp.min_object
    from #potential_dups pd
    cross apply
    (
    select min(cast(sp.min_object as int)) min_object
    from #potential_dups sp
    where pd.rowid_object_matched = sp.rowid_object
    ) sp
    where cast(pd.min_object as int) > cast(sp.min_object as int)

    set @last_update = @@ROWCOUNT;
    if @run = 'N'
    and @last_update > 0
    set @run = 'Y'

    -- Update min_object if matching (rowid=match_rowid) having higher min_object
    update pd
    set pd.min_object = sp.min_object
    from #potential_dups pd
    cross apply
    (
    select min(cast(sp.min_object as int)) min_object
    from #potential_dups sp
    where pd.rowid_object = sp.rowid_object_matched
    ) sp
    where cast(pd.min_object as int) > cast(sp.min_object as int)

    set @last_update = @@ROWCOUNT;
    if @run = 'N'
    and @last_update > 0
    set @run = 'Y'

    -- Update min_object if matching (rowid=rowid) having higher min_object
    update pd
    set pd.min_object = sp.min_object
    from #potential_dups pd
    cross apply
    (
    select min(cast(sp.min_object as int)) min_object
    from #potential_dups sp
    where pd.rowid_object = sp.rowid_object
    ) sp
    where cast(pd.min_object as int) > cast(sp.min_object as int)

    set @last_update = @@ROWCOUNT;
    if @run = 'N'
    and @last_update > 0
    set @run = 'Y'

    -- Update min_object if matching (rowid=match_rowid) having higher min_object
    update pd
    set pd.min_object = sp.min_object
    from #potential_dups pd
    cross apply
    (
    select min(cast(sp.min_object as int)) min_object
    from #potential_dups sp
    where pd.rowid_object = sp.rowid_object_matched
    ) sp
    where cast(pd.min_object as int) > cast(sp.min_object as int)

    set @last_update = @@ROWCOUNT;
    if @run = 'N'
    and @last_update > 0
    set @run = 'Y'

    -- Update min_object if matching (match_rowid=match_rowid) having higher min_object
    update pd
    set pd.min_object = sp.min_object
    from #potential_dups pd
    cross apply
    (
    select min(cast(sp.min_object as int)) min_object
    from #potential_dups sp
    where pd.rowid_object_matched = sp.rowid_object_matched
    ) sp
    where cast(pd.min_object as int) > cast(sp.min_object as int)

    set @last_update = @@ROWCOUNT;
    if @run = 'N'
    and @last_update > 0
    set @run = 'Y'

    set @inc = @inc + 1;
    end

    -- Get list of connected rowid for each min object
    SELECT min_object,
    all_rowid = substring(
    ltrim(STUFF(
    (
    SELECT distinct
    ', ' + cast(ltrim(rtrim(rowid_object)) as varchar)
    from
    (
    select min_object,
    rowid_object
    from #potential_dups
    union
    select min_object,
    rowid_object_matched
    from #potential_dups
    ) b
    WHERE b.min_object = a.min_object --ORDER BY CAST(cast(rowid_object as varchar) AS INT)
    FOR XML path('')
    ),
    1,
    1,
    ''
    )
    ),
    1,
    8000
    )
    into #person_list
    FROM #potential_dups (NOLOCK) a
    GROUP BY min_object
    -- ORDER BY 1

    create nonclustered index idx_person_1 on #person_list (min_object)

    -- remove all previous data
    truncate table CONTACT_PERSON_ID

    INSERT INTO CONTACT_PERSON_ID
    (
    ROWID_OBJECT,
    ROWID_OBJECT_MATCHED,
    STATUS,
    PERSON_ID,
    EMAIL,
    EMAIL_MATCHED,
    CREATE_DATE,
    LAST_UPDATE_DATE
    )
    SELECT d.ROWID_OBJECT,
    d.min_object,
    'NEW',
    l.all_rowid,
    c.COMM_VAL,
    CC.COMM_VAL,
    GETDATE(),
    GETDATE()
    FROM #potential_dups d
    inner join #person_list l
    on d.min_object = l.min_object
    left outer join c_b_party_comm (nolock) c
    on d.ROWID_OBJECT = c.PARTY_ID
    and c.COMM_TYP_CD = 'EMAIL'
    left outer join c_b_party_comm (nolock) cC
    on d.min_object = cC.PARTY_ID
    and cC.COMM_TYP_CD = 'EMAIL'
    WHERE d.rowid_object <> d.min_object
    union
    SELECT d.rowid_object_matched,
    d.min_object,
    'NEW',
    l.all_rowid,
    c.COMM_VAL,
    cc.COMM_VAL,
    GETDATE(),
    GETDATE()
    FROM #potential_dups d
    inner join #person_list l
    on d.min_object = l.min_object
    left outer join c_b_party_comm (nolock) C
    on d.ROWID_OBJECT_MATCHED = C.PARTY_ID
    and C.COMM_TYP_CD = 'EMAIL'
    left outer join c_b_party_comm (nolock) CC
    on d.min_object = CC.PARTY_ID
    and CC.COMM_TYP_CD = 'EMAIL'
    WHERE d.rowid_object_matched <> d.min_object



    -------------------------- my comments - do a set based operations instead of updating column by column
    -- Update person id back to NULL so latest person id value can be updated
    update contact_potential_dups
    set PERSON_ID = NULL

    -- Update person id back to potential dups table
    update d
    set PERSON_ID = p.person_id
    from contact_potential_dups d,
    [CONTACT_PERSON_ID] p
    where d.ROWID_OBJECT = p.ROWID_OBJECT
    and d.person_id is null

    update d
    set PERSON_ID = p.person_id
    from contact_potential_dups d,
    [CONTACT_PERSON_ID] p
    where d.ROWID_OBJECT_MATCHED = p.ROWID_OBJECT
    and d.person_id is null

    update d
    set PERSON_ID = p.person_id
    from contact_potential_dups d,
    [CONTACT_PERSON_ID] p
    where d.ROWID_OBJECT = p.ROWID_OBJECT_MATCHED
    and d.person_id is null

    update d
    set PERSON_ID = p.person_id
    from contact_potential_dups d,
    [CONTACT_PERSON_ID] p
    where d.ROWID_OBJECT_MATCHED = p.ROWID_OBJECT_MATCHED
    and d.person_id is null
    --------------------------

    drop table #potential_dups
    drop table #person_list

    SET @status_code = 'SUCCESS';
    SET @status_desc = 'Process Complete'
    RETURN
    END TRY
    BEGIN CATCH
    SET @status_code = 'ERROR' --'SQLERR-'+CAST(ERROR_NUMBER() AS VARCHAR)
    SET @status_desc
    = CONCAT(
    'ERROR_SEVERITY : ',
    CAST(ERROR_SEVERITY() AS VARCHAR),
    ', ERROR_STATE : ',
    CAST(ERROR_STATE() AS VARCHAR),
    ', ERROR_PROCEDURE : ',
    ERROR_PROCEDURE(),
    ', ERROR_LINE : ',
    CAST(ERROR_LINE() AS VARCHAR),
    ', ERROR_MESSAGE : ',
    ERROR_MESSAGE()
    )

    END CATCH
    RETURN
    END
    go

     

    Regards,

    Sam

     

  • I would initially try creating the temporary tables (#potential_dups and #person_list) before you insert into it and create the CLUSTERED indexes inx_potential_dups_temp1 and index idx_person_1 instead of NONCLUSTERED, and create that index before you insert into the table:

    CREATE CLUSTERED INDEX inx_potential_dups_temp1
    ON [dbo].[#potential_dups] ([rowid_object_matched], min_object);
    create unique clustered index idx_person_1 on #person_list(min_object);

    The second index you should include min_object in the key not in the include:

    CREATE NONCLUSTERED INDEX inx_potential_dups_temp2
    ON [dbo].[#potential_dups] ([rowid_object], [min_object]);

    Also, don't cast the columns as int in the select this will make the query non-sargable just insert them as int when you initially insert them into the temporary table.

  • duplicate

Viewing 3 posts - 1 through 2 (of 2 total)

You must be logged in to reply to this topic. Login to reply