Query of 100 mil rows with multiple parameters

  • Hello,

    I have a challange with a table with around 100 million rows which look like:

    CREATE TABLE [dbo].[BRFCLSeaWeekHis](

    [Year] [smallint] NOT NULL,

    [Week] [smallint] NOT NULL,

    [PortFrom] [char](5) NOT NULL,

    [PortTo] [char](5) NOT NULL,

    [CarrierID] [char](4) NOT NULL,

    [ConType] [varchar](4) NOT NULL,

    [ConSize] [smallint] NOT NULL,

    [Commodity] [nvarchar](30) NOT NULL,

    [IncotermCode] [varchar](4) NOT NULL,

    [QuoteType] [varchar](2) NOT NULL,

    [CurrencyCode] [char](3) NOT NULL,

    [RateTotal] [decimal](18, 2) NOT NULL,

    [SeaTotal] [decimal](18, 2) NOT NULL,

    [Total] [decimal](18, 2) NOT NULL,

    CONSTRAINT [PK_BRFCLSeaWeekHis] PRIMARY KEY CLUSTERED

    (

    [Year] ASC,

    [Week] ASC,

    [PortFrom] ASC,

    [PortTo] ASC,

    [CarrierID] ASC,

    [ConType] ASC,

    [ConSize] ASC,

    [Commodity] ASC,

    [IncotermCode] ASC,

    [QuoteType] ASC

    )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]

    ) ON [PRIMARY]

    CREATE NONCLUSTERED INDEX [IX_BRFCLSeaWeekHis_QuoteType] ON [dbo].[BRFCLSeaWeekHis]

    (

    [QuoteType] ASC

    )

    INCLUDE ( [Year],

    [Week],

    [PortFrom],

    [PortTo],

    [CarrierID],

    [ConType],

    [ConSize],

    [Commodity],

    [IncotermCode],

    [CurrencyCode],

    [RateTotal],

    [SeaTotal],

    [Total]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]

    The SP I run looks more or less like this, and takes aprox. 50 secs

    CREATE PROCEDURE [dbo].[spSchBuyingRatesHistoryWeek]

    (

    @TradeLanes varchar(6000),

    @Containers varchar(4000),

    @QuoteType nvarchar(2),

    @Incoterms nvarchar(500),

    @CurrencyCode char(3),

    @DateFrom smalldatetime,

    @DateTo smalldatetime,

    @Commodities nvarchar(4000),

    @Carriers varchar(4000),

    @IncludeCheapest bit = 1,

    @IncludeMostExpensive bit = 1,

    @OrderBy varchar(10) = 'SeaTotal',

    @IsIndicatorsCommodityDependant bit = 0

    )

    AS

    BEGIN

    SET NOCOUNT ON

    --SET @TradeLanes = '<ROOT><TradeLane PortFrom="CNSHA" PortTo="DKCPH" /></ROOT>'

    ----SET @Containers = '<ROOT><Container ConType="DV" ConSize="20" /><Container ConType="DV" ConSize="40" /><Container ConType="HC" ConSize="40" /></ROOT>'

    --SET @Containers = '<ROOT><Container ConType="DV" ConSize="20" /></ROOT>'

    --SET @QuoteType = 'I'

    --SET @Incoterms = '<ROOT><Incoterm Code="FOB" /></ROOT>'

    --SET @CurrencyCode = 'USD'

    --SET @DateFrom = '2010-01-01'

    --SET @DateTo = '2012-12-31'

    --SET @Commodities = '<ROOT><Commodity Name="FAK" /></ROOT>'

    --SET @Carriers = '<ROOT><Carrier Code="KKLU" /></ROOT>'

    --SET@IncludeCheapest = 1

    --SET@IncludeMostExpensive = 1

    --SET@OrderBy = 'SeaTotal'

    DECLARE @tradeLanesHandle int

    DECLARE @containerHandle int

    DECLARE @carrierHandle int

    DECLARE @commodityHandle int

    DECLARE @incotermsHandle int

    --Iniciate the xml handlers

    EXEC sp_xml_preparedocument @tradeLanesHandle OUTPUT, @TradeLanes

    EXEC sp_xml_preparedocument @containerHandle OUTPUT, @Containers

    EXEC sp_xml_preparedocument @carrierHandle OUTPUT, @Carriers

    EXEC sp_xml_preparedocument @commodityHandle OUTPUT, @Commodities

    EXEC sp_xml_preparedocument @incotermsHandle OUTPUT, @Incoterms

    SELECT PortFrom, PortTo INTO #TradeLanes FROM OPENXML(@tradeLanesHandle, '/ROOT/TradeLane') WITH (PortFrom nchar(5),PortTo nchar(5)) AS tradelanes;

    CREATE UNIQUE CLUSTERED INDEX IX_2 on #TradeLanes (PortFrom, PortTo);

    SELECT coms.Name INTO #Commodities FROM OPENXML(@commodityHandle, '/ROOT/Commodity') WITH ([Name] nvarchar(30)) AS coms;

    CREATE UNIQUE CLUSTERED INDEX IX_4 on #Commodities ([Name]);

    SELECT cars.Code INTO #Carriers FROM OPENXML(@carrierHandle, '/ROOT/Carrier') WITH (Code nchar(4)) AS cars;

    CREATE UNIQUE CLUSTERED INDEX IX_5 on #Carriers (Code);

    SELECT ConType, consize INTO #Cons FROM OPENXML (@containerHandle, '/ROOT/Container') WITH (ConType nvarchar(4), ConSize smallint) AS cons;

    CREATE UNIQUE CLUSTERED INDEX IX_6 on #Cons (ConType, consize);

    SELECT Code INTO #Incoterms FROM OPENXML (@incotermsHandle, '/ROOT/Incoterm') WITH (Code nvarchar(4)) AS cons;

    CREATE UNIQUE CLUSTERED INDEX IX_7 on #Incoterms (Code);

    DECLARE @WeekFrom int,@WeekTo int, @YearFrom int, @YearTo int;

    SET @WeekFrom = dbo.sf_IsoWeekOfYear(@DateFrom);

    SET @WeekTo = dbo.sf_IsoWeekOfYear(@DateTo);

    SET @YearFrom = DATEPART(yy, @DateFrom);

    SET @YearTo = DATEPART(yy, @DateTo);

    SELECT dbo.sf_WeekdayFromIsoWeekNumber([Year], [Week],7) AS [Day],

    [Year], [Week], FCL.PortFrom, FCL.PortTo, FCL.CarrierID, FCL.ConType, FCL.ConSize, FCL.Commodity, FCL.IncotermCode, FCL.QuoteType, FCL.CurrencyCode, RateTotal, SeaTotal, Total

    FROM

    BRFCLSeaWeekHis AS FCL

    INNER JOIN #cons c ON fcl.ConType = c.ConType AND fcl.ConSize = c.ConSize

    INNER JOIN #TradeLanes as tl on tl.PortFrom = FCL.portFrom AND tl.PortTo = FCL.portTo

    INNER JOIN #Incoterms as i ON i.Code = FCL.IncotermCode

    WHERE QuoteType = @QuoteType AND

    (CASE WHEN FCL.Year > @YearFrom THEN 1 WHEN (FCL.Week >= @WeekFrom AND FCL.Year = @YearFrom) THEN 1 ELSE 0 END) = 1

    AND (CASE WHEN FCL.Year < @YearTo THEN 1 WHEN (FCL.Week <= @WeekTo AND FCL.Year = @YearTo) THEN 1 ELSE 0 END) = 1

    AND (CASE WHEN @Commodities IS NULL THEN 1 WHEN Commodity IN (SELECT Name FROM #Commodities) THEN 1 ELSE 0 END) = 1

    AND (CASE WHEN @Carriers IS NULL THEN 1 WHEN CarrierID IN (SELECT Code FROM #Carriers) THEN 1 ELSE 0 END) = 1

    --Remove tmp tables

    IF OBJECT_ID('tempdb..#Tradelanes') IS NOT NULL

    DROP TABLE #Tradelanes

    IF OBJECT_ID('tempdb..#Commodities') IS NOT NULL

    DROP TABLE #Commodities;

    IF OBJECT_ID('tempdb..#Carriers') IS NOT NULL

    DROP TABLE #Carriers;

    IF OBJECT_ID('tempdb..#cons') IS NOT NULL

    DROP TABLE #cons;

    IF OBJECT_ID('tempdb..#Incoterms') IS NOT NULL

    DROP TABLE #Incoterms

    --Remove handles

    EXEC sp_xml_removedocument @tradeLanesHandle

    EXEC sp_xml_removedocument @commodityHandle

    EXEC sp_xml_removedocument @carrierHandle

    EXEC sp_xml_removedocument @containerHandle

    EXEC sp_xml_removedocument @incotermsHandle

    END

    I attached the xml of the execution plan. Any ideas on how I could optimize this?

  • Hi Mark

    Two or three points to consider.

    Firstly this is a "Catch-all query". In a nutshell, the number of rows returned could vary dramatically depending upon the parameters passed in. You'll have a minimum number of rows of n and a maximum of about 25 million. No single execution plan could be optimal for both. Gail Shaw has an excellent article here [/url]describing how you can deal with this type of query.

    Secondly, your ordinary index has most of the columns of the clustered index as INCLUDE columns. This is unnecessary because nonclustered indexes contain the cluster keys - they're already included behind the scenes.

    Thirdly, and this point follows on from 2: since every nonclustered index contains the cluster keys, it might be advantageous for you to use a single surrogate key instead, such as an identity column.

    SSC has an excellent indexing article by David Durant here[/url].

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • For a quick win, change your non-clustered index like so:

    CREATE NONCLUSTERED INDEX [IX_BRFCLSeaWeekHis_QuoteType] ON [dbo].[BRFCLSeaWeekHis]

    (

    [QuoteType] ASC, Year

    )

    INCLUDE (

    [CurrencyCode],

    [RateTotal],

    [SeaTotal],

    [Total])

    and your query like so:

    WHERE QuoteType = @QuoteType

    AND FCL.Year >= @YearFrom

    AND FCL.Year <= @YearTo

    AND (CASE WHEN FCL.Year > @YearFrom THEN 1

    WHEN (FCL.Week >= @WeekFrom AND FCL.Year = @YearFrom) THEN 1

    ELSE 0 END) = 1

    AND (CASE WHEN FCL.Year < @YearTo THEN 1 WHEN (FCL.Week <= @WeekTo AND FCL.Year = @YearTo) THEN 1 ELSE 0 END) = 1

    AND (CASE WHEN @Commodities IS NULL THEN 1 WHEN FCL.Commodity IN (SELECT Name FROM #Commodities) THEN 1 ELSE 0 END) = 1

    AND (CASE WHEN @Carriers IS NULL THEN 1 WHEN FCL.CarrierID IN (SELECT Code FROM #Carriers) THEN 1 ELSE 0 END) = 1

    with OPTION RECOMPILE.

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • Hi Chris,

    Thanks for the leads, but even with recompile and changed index, the time is the same.

    In relation to your first post, then I will not likely get anymore than a few 1000 rows as result (maybe 10t-20t), as it will not include more than 5 trade lanes and there are around 250.000 different tradelanes in the table. Does that make it easier to optimize for that specific purpose?

    Furthermore, where can I see if the CPU or the disk (or something third) is the bottleneck?

  • mark.kristensen (2/28/2014)


    Hi Chris,

    Thanks for the leads, but even with recompile and changed index, the time is the same.

    In relation to your first post, then I will not likely get anymore than a few 1000 rows as result (maybe 10t-20t), as it will not include more than 5 trade lanes and there are around 250.000 different tradelanes in the table. Does that make it easier to optimize for that specific purpose?

    Furthermore, where can I see if the CPU or the disk (or something third) is the bottleneck?

    Thanks for the feedback, Mark. Can you post the actual execution plan please?

    Efficient filtering for those tradelanes is the first thing I'd pursue. You currently have way too many rows coming off the big table. That stacks up as logical reads if the data is in cache, physical reads if not.

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • I have attached a file to the first post - is that what you need or if not, then please tell me how to get it 🙂

  • I have this from the stats

    Table '#Result_000000050139'. Scan count 0, logical reads 51, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#Carriers_00000005013C'. Scan count 0, logical reads 170, physical reads 1, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#Commodities_00000005013B'. Scan count 0, logical reads 1066, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#Cons_00000005013D'. Scan count 0, logical reads 3366, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#Incoterms_00000005013E'. Scan count 0, logical reads 13464, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table 'BRFCLSeaWeekHis'. Scan count 1, logical reads 806044, physical reads 0, read-ahead reads 800135, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#TradeLanes_00000005013A'. Scan count 1, logical reads 2, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    SQL Server Execution Times:

    CPU time = 36020 ms, elapsed time = 77212 ms.

  • mark.kristensen (2/28/2014)


    I have attached a file to the first post - is that what you need or if not, then please tell me how to get it 🙂

    Execution plan with the mods I suggested?

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • Ah - here it is

  • Thanks.

    Try placing the nonclustered index keys the other way around:

    CREATE NONCLUSTERED INDEX [IX_BRFCLSeaWeekHis_QuoteType] ON [dbo].[BRFCLSeaWeekHis]

    (

    Year, [QuoteType] ASC

    )

    INCLUDE (

    [CurrencyCode],

    [RateTotal],

    [SeaTotal],

    [Total])

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • Thanks for the suggestion, but pretty much the same result

    Are there any other tools I can use to dig deeper?

    Table '#Result____00000005022E'. Scan count 0, logical reads 51, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#Carriers______00000005022B'. Scan count 0, logical reads 170, physical reads 1, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#Commodities____00000005022A'. Scan count 0, logical reads 1066, physical reads 1, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#Cons____00000005022C'. Scan count 0, logical reads 3366, physical reads 1, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#Incoterms____00000005022D'. Scan count 0, logical reads 13464, physical reads 1, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table 'BRFCLSeaWeekHis'. Scan count 1, logical reads 778057, physical reads 1, read-ahead reads 775951, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    Table '#TradeLanes____000000050229'. Scan count 1, logical reads 2, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.

    SQL Server Execution Times:

    CPU time = 36410 ms, elapsed time = 73626 ms.

  • I guess my first recommendation would be to make the PK a nonclustered index. It's currently way too wide to be of any practical use as a clustered index and will cause BTree of the index on quotetype column to bloat quite badly (remember that all the columns of a clustered index are added to all nonclustered indexes) making it read many more pages per lookup than it should need to. I would put a new clustered index on year and week because that seems to be predominate in all of your queries.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

  • A significant number of implicit conversions are interfering with optimisation of this query. Here's a few recommendations to start with:

    Cast @YearFrom and @YearTo to same datatype as BRFCLSeaWeekHis.Year

    Cast @QuoteType to same datatype as BRFCLSeaWeekHis.QuoteType

    Temp table #TradeLanes: cast PortFrom and PortTo to same datatype as BRFCLSeaWeekHis.PortFrom and BRFCLSeaWeekHis.PortTo

    Temp table #Incoterms: cast Code to same datatype as BRFCLSeaWeekHis.IncotermCode

    Get these done, run the query again and post the execution plan.

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • Ok, now I think we are getting close. Down to 47 secs, but now it also suggests a new index:

    CREATE NONCLUSTERED INDEX [<Name of Missing Index, sysname,>]

    ON [dbo].[BRFCLSeaWeekHis] ([PortFrom],[PortTo],[QuoteType],[Year])

    INCLUDE ([Week],[CarrierID],[ConType],[ConSize],[Commodity],[IncotermCode],[CurrencyCode],[RateTotal],[SeaTotal],[Total])

    @jeff, thanks for the suggestion. I was not aware that all columns got copied down.

  • Once more with details:

    [Expr1031] = Scalar Operator(CONVERT_IMPLICIT(int,[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[Year] as [FCL].[Year],0)),

    [Expr1032] = Scalar Operator(CONVERT_IMPLICIT(int,[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[Week] as [FCL].[Week],0)),

    [Expr1033] = Scalar Operator(CONVERT_IMPLICIT(nchar(3),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[CurrencyCode] as [FCL].[CurrencyCode],0)),

    [Expr1034] = Scalar Operator(CONVERT_IMPLICIT(decimal(18,5),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[RateTotal] as [FCL].[RateTotal],0)),

    [Expr1035] = Scalar Operator(CONVERT_IMPLICIT(decimal(18,5),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[SeaTotal] as [FCL].[SeaTotal],0)),

    [Expr1036] = Scalar Operator(CONVERT_IMPLICIT(decimal(18,5),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[Total] as [FCL].[Total],0)),

    [Expr1037] = Scalar Operator(CONVERT_IMPLICIT(nvarchar(4),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[IncotermCode] as [FCL].[IncotermCode],0)),

    [Expr1038] = Scalar Operator(CONVERT_IMPLICIT(nvarchar(4),[FlexQuotes].[dbo].[BRFCLSeaWeekHis].[ConType] as [FCL].[ConType],0))

    Warnings: Type conversion in expression (CONVERT_IMPLICIT(nvarchar(4),[FCL].[ConType],0)) may affect "CardinalityEstimate" in query plan choice, Type conversion in expression (CONVERT_IMPLICIT(nvarchar(4),[FCL].[IncotermCode],0)) may affect "CardinalityEstimate" in query plan choice

    Set the datatype of your variables and temp table columns to be the same as the columns they are matching in BRFCLSeaWeekHis.

    Then post the actual execution plan.

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

Viewing 15 posts - 1 through 15 (of 39 total)

You must be logged in to reply to this topic. Login to reply