Table scan paranoia

  • I have a stored procedure hitting a 761969 row table and doing several table scans on it. It is indexed, as I'll show below. I can't get an actual query plan at the moment, but I pulled what I could out of the cache, and it's attached. Table and index definitions and relevant code are below.

    Table:

    CREATE TABLE [dbo].[tbl_extr_items](

    [item_idx] [int] NOT NULL,

    [Computer_Id] [smallint] NULL,

    [Collection_Surr_ID] [int] NULL,

    [group_id] [int] NULL,

    [parent_id] [int] NULL,

    [clan_id] [int] NULL,

    [clan_flag] [char](1) NULL,

    [clan_count_expected] [int] NULL,

    [clan_count_processed] [int] NULL,

    [org_path] [nvarchar](max) NULL,

    [path_within_container_offset] [smallint] NULL,

    [index_within_path] [int] NULL,

    [org_name] [nvarchar](520) NULL,

    [Extension_id] [smallint] NULL,

    [Orig_Extension_id] [smallint] NULL,

    [item_type_id] [smallint] NULL,

    [item_file_size] [bigint] NULL,

    [datecreated] [datetime] NULL,

    [datemodified] [datetime] NULL,

    [dateaccessed] [datetime] NULL,

    [hash_value] [char](32) NULL,

    [top_level_item] [bit] NULL,

    [attachment_type] [int] NULL,

    [metadata_extraction_status] [bit] NULL,

    [mso_metadata_extraction_status] [bit] NULL,

    [active] [int] NULL,

    [date_filtered] [bit] NULL,

    [keyword_searched] [bit] NULL,

    [deduped] [bit] NULL,

    [TextExtractionStatus] [int] NULL,

    [process_status] [int] NULL,

    [process_tech_error] [int] NULL,

    [header_recognition_status] [int] NULL,

    [process_start] [datetime] NULL,

    [duplicate_of] [bigint] NULL,

    [password_used] [nvarchar](260) NULL,

    [Printed_Computer_id] [smallint] NULL,

    [QCed_Computer_id] [smallint] NULL,

    [Comments] [varchar](255) NULL,

    [Error_id] [int] NULL,

    [pr_img_count] [int] NULL,

    [reprint_count] [int] NULL,

    [qc_tiff_count] [int] NULL,

    [embedded_extraction_status] [int] NULL,

    [print_with] [smallint] NULL,

    [file_type_id] [smallint] NULL,

    [RollingExportingSetID] [int] NULL,

    [ExceptionTypeID] [int] NULL,

    [qc_review] [bit] NULL,

    [utf8_processed] [bit] NULL,

    [PDFScanStatusSessionID] [uniqueidentifier] NULL,

    [MSOItemsSessionID] [uniqueidentifier] NULL,

    CONSTRAINT [PK_tbl_extr_items_Item_idx_3E6F809B-F53A-48FC-81A1-E6F7504DF641] PRIMARY KEY CLUSTERED

    (

    [item_idx] ASC

    )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]

    ) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

    GO

    SET ANSI_PADDING OFF

    GO

    Vendor created indexes:

    CREATE INDEX [IX_tbl_extr_items_Collection_Surr_Id_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [Collection_Surr_ID] ) INCLUDE ( [active]) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [Ind_collsurr_prostatus_item_computer_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [Collection_Surr_ID], [process_status] ) INCLUDE ( [Computer_Id], [item_idx]) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [Ind_Comp_colsur_item_prostatus_ext_itemtype_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [Computer_Id], [Collection_Surr_ID], [item_idx], [process_status], [Extension_id], [item_type_id] ) INCLUDE ( [active], [attachment_type], [clan_count_expected], [clan_count_processed], [clan_flag], [clan_id], [Comments], [date_filtered], [dateaccessed], [datecreated], [datemodified], [deduped], [duplicate_of], [embedded_extraction_status], [Error_id], [ExceptionTypeID], [file_type_id], [group_id], [hash_value], [header_recognition_status], [index_within_path], [item_file_size], [keyword_searched], [metadata_extraction_status], [mso_metadata_extraction_status], [org_name], [org_path], [Orig_Extension_id], [parent_id], [password_used], [path_within_container_offset], [pr_img_count], [print_with], [Printed_Computer_id], [process_start], [process_tech_error], [qc_review], [qc_tiff_count], [QCed_Computer_id], [reprint_count], [RollingExportingSetID], [TextExtractionStatus], [top_level_item], [utf8_processed]) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [IX_tbl_extr_items_deduped_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [deduped] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [IX_tbl_extr_items_Extension_id_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [Extension_id] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    ALTER TABLE [dbo].[tbl_extr_items] ADD CONSTRAINT [PK_tbl_extr_items_Item_idx_3E6F809B-F53A-48FC-81A1-E6F7504DF641] PRIMARY KEY CLUSTERED ( [item_idx] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [IX_tbl_extr_items_item_type_id_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [item_type_id] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [Ind_item_colSurr_proStatus_clan_item_idx_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [item_type_id], [Collection_Surr_ID], [process_status], [clan_id], [item_idx] ) INCLUDE ( [hash_value]) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [IX_tbl_extr_items_parent_id_clan_id_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [parent_id], [clan_id] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [Ind_Parent_Process_status_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [parent_id], [process_status] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [Ind_proStatus_colsur_item_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [process_status], [Collection_Surr_ID], [item_idx] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [Ind_proStatus_parent_item_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [process_status], [parent_id], [item_idx] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    Query:

    BEGIN

    Update dbo.tbl_extr_items

    set TextExtractionStatus = (@PRM_Computer_id + 10000), Computer_id = @PRM_Computer_id

    where item_idx in

    (

    Select top (@PRM_BufferSize) B.item_idx

    from dbo.tbl_extr_items As B with (nolock)

    JOIN tbl_extensions d with (nolock) on b.extension_id = d.extension_id

    join item_types it with (nolock) on b.item_type_id = it.item_type_id

    where TextExtractionStatus is null

    and collection_surr_id = @PRM_Collection_Surr_id

    and active = 1 and process_status >= 200

    order by item_idx

    )

    END

    Thanks

  • Firstly, you have multiple redundant indexes on that table.

    Let's see...

    CREATE INDEX [IX_tbl_extr_items_Collection_Surr_Id_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [Collection_Surr_ID] ) INCLUDE ( [active]) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [Ind_collsurr_prostatus_item_computer_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [Collection_Surr_ID], [process_status] ) INCLUDE ( [Computer_Id], [item_idx]) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    These two can be merged. Add [active] as an additional include column on the second and drop the first.

    CREATE INDEX [IX_tbl_extr_items_item_type_id_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [item_type_id] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    This one is entirely redundant. Drop it.

    CREATE INDEX [Ind_Parent_Process_status_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [parent_id], [process_status] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    Given that you have another index with parent_id as the leading column and an index on process_status, parent_id, this can be dropped. Any query that used it can use one of the other two.

    If I get time later, I'll poke at the execution plan.

    Gail Shaw
    Microsoft Certified Master: SQL Server, MVP, M.Sc (Comp Sci)
    SQL In The Wild: Discussions on DB performance with occasional diversions into recoverability

    We walk in the dark places no others will enter
    We stand on the bridge and no one may pass
  • sqldriver (5/9/2014)


    I have a stored procedure hitting a 761969 row table and doing several table scans on it. It is indexed, as I'll show below. I can't get an actual query plan at the moment, but I pulled what I could out of the cache, and it's attached. Table and index definitions and relevant code are below.

    Is there a question here? You posted a lot of information but there is nothing to indicate what the issue is.

    BTW, you really should stop littering your queries with table hints. You have xlock, rowlock and nolock splattered everywhere in here. Do you know what all those hints do? Can you explain them in detail without reference materials? Do you have a valid reason for using them? Are you cool with missing and/or duplicate data? Unless you really know what you are doing with locking you should let the sql engine handle it. When you use table hints like this you are saying that you know better how to handle this than the sql engine.

    _______________________________________________________________

    Need help? Help us help you.

    Read the article at http://www.sqlservercentral.com/articles/Best+Practices/61537/ for best practices on asking questions.

    Need to split a string? Try Jeff Modens splitter http://www.sqlservercentral.com/articles/Tally+Table/72993/.

    Cross Tabs and Pivots, Part 1 – Converting Rows to Columns - http://www.sqlservercentral.com/articles/T-SQL/63681/
    Cross Tabs and Pivots, Part 2 - Dynamic Cross Tabs - http://www.sqlservercentral.com/articles/Crosstab/65048/
    Understanding and Using APPLY (Part 1) - http://www.sqlservercentral.com/articles/APPLY/69953/
    Understanding and Using APPLY (Part 2) - http://www.sqlservercentral.com/articles/APPLY/69954/

  • GilaMonster (5/9/2014)


    Firstly, you have multiple redundant indexes on that table.

    Let's see...

    CREATE INDEX [IX_tbl_extr_items_Collection_Surr_Id_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [Collection_Surr_ID] ) INCLUDE ( [active]) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    CREATE INDEX [Ind_collsurr_prostatus_item_computer_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [Collection_Surr_ID], [process_status] ) INCLUDE ( [Computer_Id], [item_idx]) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    These two can be merged. Add [active] as an additional include column on the second and drop the first.

    CREATE INDEX [IX_tbl_extr_items_item_type_id_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [item_type_id] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    This one is entirely redundant. Drop it.

    CREATE INDEX [Ind_Parent_Process_status_3E6F809B-F53A-48FC-81A1-E6F7504DF641] ON [dbo].[tbl_extr_items] ( [parent_id], [process_status] ) WITH (FILLFACTOR=100, ONLINE=?, SORT_IN_TEMPDB=?);

    Given that you have another index with parent_id as the leading column and an index on process_status, parent_id, this can be dropped. Any query that used it can use one of the other two.

    If I get time later, I'll poke at the execution plan.

    Hi Gail,

    Thanks, but I can't really modify these. They're vendor indexes and they stink, but I'm stuck with them.

  • Tell the vendor to fix their garbage (politely). They probably don't have a clue about good indexes.

    If you can't drop or change indexes, how do you expect to tune the query?

    Gail Shaw
    Microsoft Certified Master: SQL Server, MVP, M.Sc (Comp Sci)
    SQL In The Wild: Discussions on DB performance with occasional diversions into recoverability

    We walk in the dark places no others will enter
    We stand on the bridge and no one may pass
  • Sean Lange (5/9/2014)


    sqldriver (5/9/2014)


    I have a stored procedure hitting a 761969 row table and doing several table scans on it. It is indexed, as I'll show below. I can't get an actual query plan at the moment, but I pulled what I could out of the cache, and it's attached. Table and index definitions and relevant code are below.

    Is there a question here? You posted a lot of information but there is nothing to indicate what the issue is.

    BTW, you really should stop littering your queries with table hints. You have xlock, rowlock and nolock splattered everywhere in here. Do you know what all those hints do? Can you explain them in detail without reference materials? Do you have a valid reason for using them? Are you cool with missing and/or duplicate data? Unless you really know what you are doing with locking you should let the sql engine handle it. When you use table hints like this you are saying that you know better how to handle this than the sql engine.

    Hi Sean,

    This is not my code. I've seen the rants around here and elsewhere for long enough to know not to use these things, but when I try to tell other people they just stare blankly at me. They're apparently in heavy rotation out there in the world, these hints. The one dev who actually responds to my emails about hints said that WITH NOLOCK was the norm at his last shop. Sorry man.

    I'm looking into why there's a table scan on a sort of large, heavily indexed table. The reasons I've seen for it, namely inequality predicates, %like% searches, no CI are not at play.

    Thanks

  • How many rows in the table, how many rows does the query return? If it's most of the table, then the table scan could be the most efficient way.

    btw, lack of a clustered index doesn't cause table scans. Inequality predicates don't cause table scans.

    Gail Shaw
    Microsoft Certified Master: SQL Server, MVP, M.Sc (Comp Sci)
    SQL In The Wild: Discussions on DB performance with occasional diversions into recoverability

    We walk in the dark places no others will enter
    We stand on the bridge and no one may pass
  • sqldriver (5/9/2014)


    Sean Lange (5/9/2014)


    sqldriver (5/9/2014)


    I have a stored procedure hitting a 761969 row table and doing several table scans on it. It is indexed, as I'll show below. I can't get an actual query plan at the moment, but I pulled what I could out of the cache, and it's attached. Table and index definitions and relevant code are below.

    Is there a question here? You posted a lot of information but there is nothing to indicate what the issue is.

    BTW, you really should stop littering your queries with table hints. You have xlock, rowlock and nolock splattered everywhere in here. Do you know what all those hints do? Can you explain them in detail without reference materials? Do you have a valid reason for using them? Are you cool with missing and/or duplicate data? Unless you really know what you are doing with locking you should let the sql engine handle it. When you use table hints like this you are saying that you know better how to handle this than the sql engine.

    Hi Sean,

    This is not my code. I've seen the rants around here and elsewhere for long enough to know not to use these things, but when I try to tell other people they just stare blankly at me. They're apparently in heavy rotation out there in the world, these hints. The one dev who actually responds to my emails about hints said that WITH NOLOCK was the norm at his last shop. Sorry man.

    I'm looking into why there's a table scan on a sort of large, heavily indexed table. The reasons I've seen for it, namely inequality predicates, %like% searches, no CI are not at play.

    Thanks

    I feel your pain. I didn't realize this was vendor code or I probably wouldn't have gone a rant. 😉 Although maybe you should go on a rant to the vendor.

    I have met with resistance in the past about the NOLOCK hint. When showing some proof isn't enough I relay a real world scenario that I was involved with where a health insurance company required that hint on every single query. Then when the debit cards started approving transactions when there was not enough funds or denying them when there was enough money they finally listened to my appeals. At the end of this they spend literally thousands of hours first adding that hint, then doing lots of analysis on the code why the transactions were not working correctly and finally removing all those hints. :w00t:

    Here are a few articles I refer to frequently that do a good explaining and demonstrating how nasty nolock can be.

    http://sqlblogcasts.com/blogs/tonyrogerson/archive/2006/11/10/1280.aspx

    http://blogs.msdn.com/b/davidlean/archive/2009/04/06/sql-server-nolock-hint-other-poor-ideas.aspx

    http://www.jasonstrate.com/2012/06/the-side-effect-of-nolock/[/url]

    _______________________________________________________________

    Need help? Help us help you.

    Read the article at http://www.sqlservercentral.com/articles/Best+Practices/61537/ for best practices on asking questions.

    Need to split a string? Try Jeff Modens splitter http://www.sqlservercentral.com/articles/Tally+Table/72993/.

    Cross Tabs and Pivots, Part 1 – Converting Rows to Columns - http://www.sqlservercentral.com/articles/T-SQL/63681/
    Cross Tabs and Pivots, Part 2 - Dynamic Cross Tabs - http://www.sqlservercentral.com/articles/Crosstab/65048/
    Understanding and Using APPLY (Part 1) - http://www.sqlservercentral.com/articles/APPLY/69953/
    Understanding and Using APPLY (Part 2) - http://www.sqlservercentral.com/articles/APPLY/69954/

  • GilaMonster (5/9/2014)


    Tell the vendor to fix their garbage (politely). They probably don't have a clue about good indexes.

    If you can't drop or change indexes, how do you expect to tune the query?

    I was just going to rub some dirt on it.

    Here's the thing: I know a table scan isn't the end of the world. I'm just looking at all this and trying to figure out why it's happening. It's not meeting any of the conditions I usually see being met when a table scan occurs. It's a bit of a curiosity to me.

    I'll add a helpful index and test that, but I don't want to start dropping and modifying the vendor indexes and then have to roll back changes before an offshore support call, you know?

    Thanks

  • sqldriver (5/9/2014)


    GilaMonster (5/9/2014)


    Tell the vendor to fix their garbage (politely). They probably don't have a clue about good indexes.

    If you can't drop or change indexes, how do you expect to tune the query?

    I was just going to rub some dirt on it.

    Here's the thing: I know a table scan isn't the end of the world. I'm just looking at all this and trying to figure out why it's happening. It's not meeting any of the conditions I usually see being met when a table scan occurs. It's a bit of a curiosity to me.

    I'll add a helpful index and test that, but I don't want to start dropping and modifying the vendor indexes and then have to roll back changes before an offshore support call, you know?

    Thanks

    There's about 8 years of code liberally peppered with no lock hints. The only code I can hope to influence is in-house, which there's plenty of. But for that I get answers on it like "we only load at the beginning of a job" and "we only archive at the end of end of a job" so all the inserts, updates, and deletes are confined to a certain portion of the life cycle or confined to windows when users won't be running searches, so there theoretically won't be data skews like what would occur in a busy OLTP environment. So then why waste time adding the hint? Apparently everyone is so polite they don't want to block anyone else doing anything.

    Thanks

  • I can't see any useful covering indexes for that query, so it's probably just a case of the indexes being insufficiently selective and not covering. Cheaper to scan the table than do all the necessary key lookups.

    Gail Shaw
    Microsoft Certified Master: SQL Server, MVP, M.Sc (Comp Sci)
    SQL In The Wild: Discussions on DB performance with occasional diversions into recoverability

    We walk in the dark places no others will enter
    We stand on the bridge and no one may pass
  • Thanks Gail. That's the much smarter way of saying what I was thinking.

  • sqldriver (5/9/2014)


    I have a stored procedure hitting a 761969 row table and doing several table scans on it...

    Properly written queries will cut down on those scans. Consider the query you posted. It can be rewritten like this:

    ;WITH updater AS (

    SELECT TOP(@PRM_BufferSize)

    TextExtractionStatus,

    NEWTextExtractionStatus = (@PRM_Computer_id + 10000),

    Computer_id,

    NEWComputer_id = @PRM_Computer_id

    FROM dbo.tbl_extr_items b

    JOIN tbl_extensions d

    ON b.extension_id = d.extension_id

    JOIN item_types it

    ON b.item_type_id = it.item_type_id

    WHERE b.TextExtractionStatus IS NULL

    AND collection_surr_id = @PRM_Collection_Surr_id

    AND active = 1

    AND process_status >= 200

    ORDER BY item_idx

    )

    UPDATE updater SET

    TextExtractionStatus = NEWTextExtractionStatus,

    Computer_id = NEWComputer_id;

    - because item_idx is unique. Here's a little test to show how it works:

    DROP TABLE #test; DROP TABLE #Second;

    CREATE TABLE #test (item_idx INT IDENTITY(1,1) PRIMARY KEY, TextExtractionStatus INT, Computer_id INT);

    INSERT INTO #test (TextExtractionStatus, Computer_id)

    VALUES (NULL, 10),(NULL, 10),(NULL, 10),(NULL, 10),(NULL, 10),(NULL, 10),(NULL, 10),(NULL, 10),(NULL, 10),(NULL, 10);

    CREATE TABLE #Second (item_idx INT, Something INT)

    INSERT INTO #Second (item_idx, Something)

    VALUES (1,1),(2,1),(4,1),(5,1),(6,1),(7,1),(8,1),(9,1),(10,1),(11,1);

    SELECT * FROM #test;

    DECLARE @PRM_BufferSize INT = 3, @PRM_Computer_id INT = 5;

    WITH updater AS (

    SELECT TOP(@PRM_BufferSize)

    TextExtractionStatus,

    NEWTextExtractionStatus = (@PRM_Computer_id + 10000),

    Computer_id,

    NEWComputer_id = @PRM_Computer_id

    FROM #test t

    INNER JOIN #Second s ON s.item_idx = t.item_idx

    )

    UPDATE updater SET

    TextExtractionStatus = NEWTextExtractionStatus,

    Computer_id = NEWComputer_id;

    SELECT * FROM #test;


    [font="Arial"]Low-hanging fruit picker and defender of the moggies[/font]

    For better assistance in answering your questions, please read this[/url].


    Understanding and using APPLY, (I)[/url] and (II)[/url] Paul White[/url]

    Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]

  • Thanks Chris. I'll do some testing this week with that concept.

Viewing 14 posts - 1 through 13 (of 13 total)

You must be logged in to reply to this topic. Login to reply