Rewriting an OUTER APPLY as a LEFT JOIN for performance?

  • Hi,

    I've been tasked with revising a query that is running too slowly.  The slowest part of the query appears to be an outer apply.  Having heard some discussion about APPLY causing performance issues in some SQL, I figured I'd see if I could rewrite this as a LEFT JOIN, but I'm realizing that I'm not sure how to go about doing that given the SELECT TOP 1, ORDER BY and date comparisons that are occurring in the outer apply portion.

    I would greatly appreciate any suggestions on the ideal way to go about writing such a query.  A highly simplified bit of example code is below.

    Ideally, the #ItemCost table would not have duplicates, but this is the scenario I'm working with.

    --Order table

    CREATE

    TABLE #Order (

    OrderNumber

    VARCHAR(10),

    OrderType

    VARCHAR(10),

    OrderDate

    DATE

    )

    INSERT

    INTO #Order VALUES ('100','Type A','02/15/2017')

    INSERT

    INTO #Order VALUES ('315','Type H','01/11/2011')

    --Item table

    CREATE

    TABLE #OrderItem (

    Item

    VARCHAR(10),

    SerialNumber

    VARCHAR(10),

    OrderNumber

    VARCHAR(10)

    )

    INSERT

    INTO #OrderItem VALUES ('Bobsled','2873847','100')

    INSERT

    INTO #OrderItem VALUES ('Watch','0009593','315')

    --Table with duplicates. We only care about the most recent Available Date for each Item/OrderNumber combo.

    CREATE

    TABLE #ItemCost (

    Item

    VARCHAR(10),

    AvailableDate

    DATE,

    OrderNumber

    VARCHAR(10),

    Cost

    MONEY

    )

    INSERT

    INTO #ItemCost VALUES ('Bobsled','05/01/2013','100',145.18)

    INSERT

    INTO #ItemCost VALUES ('Bobsled','03/24/2015','100',131.05)

    INSERT

    INTO #ItemCost VALUES ('Bobsled','06/01/2016','100',138.96)

    INSERT

    INTO #ItemCost VALUES ('Watch','01/03/2011','315',11.06)

    INSERT

    INTO #ItemCost VALUES ('Watch','04/04/2013','315',11.51)

    --Current query to retrieve most recent item's cost

    SELECT

    o

    .OrderNumber,

    oi

    .Item,

    oi

    .SerialNumber,

    ic

    .Cost

    FROM

    #Order o

    JOIN

    #OrderItem oi ON oi.OrderNumber = o.OrderNumber

    OUTER

    APPLY

    (

    SELECT TOP 1 Cost

    FROM #ItemCost ic

    WHERE ic.Item = oi.Item

    AND ic.OrderNumber = o.OrderNumber

    AND ic.AvailableDate <= o.OrderDate

    ORDER BY ic.AvailableDate DESC

    ) ic

    --Dropping the tables

    DROP

    TABLE #OrderItem

    DROP

    TABLE #ItemCost

    DROP

    TABLE #Order

  • Do you have the query plan from this query that you can post, as a .sqlplan file?  Also, what indexes are on these 3 tables?  The OUTER APPLY should work fine if there is an index similar to the following:

    CREATE NONCLUSTERED INDEX [IX_ItemCost_Recentness] ON #ItemCost (Item, OrderNumber, AvailableDate DESC) INCLUDE (Cost);

  • Perhaps something like this, but I agree it would help to see the execution plan of the current query along with the DDL for the tables and all indexes currently defined.


    --Order table
    CREATE TABLE #Order (
      OrderNumber VARCHAR(10),
      OrderType VARCHAR(10),
      OrderDate DATE
    );

    INSERT INTO #Order
    VALUES ('100','Type A','02/15/2017'),
           ('315','Type H','01/11/2011');

    --Item table
    CREATE TABLE #OrderItem (
      Item VARCHAR(10),
      SerialNumber VARCHAR(10),
      OrderNumber VARCHAR(10)
    );

    INSERT INTO #OrderItem
    VALUES ('Bobsled','2873847','100'),
           ('Watch','0009593','315');

    --Table with duplicates. We only care about the most recent Available Date for each Item/OrderNumber combo.
    CREATE TABLE #ItemCost (
      Item VARCHAR(10),
      AvailableDate DATE,
      OrderNumber VARCHAR(10),
      Cost MONEY
    )

    INSERT INTO #ItemCost
    VALUES ('Bobsled','05/01/2013','100',145.18),
           ('Bobsled','03/24/2015','100',131.05),
           ('Bobsled','06/01/2016','100',138.96),
           ('Watch','01/03/2011','315',11.06),
           ('Watch','04/04/2013','315',11.51);

    --Current query to retrieve most recent item's cost
    SELECT
      o.OrderNumber,
      oi.Item,
      oi.SerialNumber,
      ic.Cost
    FROM
      #Order o
      INNER JOIN
      #OrderItem oi
        ON oi.OrderNumber = o.OrderNumber
      OUTER APPLY(SELECT TOP 1
                    Cost
                  FROM
                    #ItemCost ic
                  WHERE
                    ic.Item = oi.Item
                    AND ic.OrderNumber = o.OrderNumber
                    AND ic.AvailableDate <= o.OrderDate
                  ORDER BY
                    ic.AvailableDate DESC) ic

    -- using a left outer join
    SELECT
      o.OrderNumber,
      oi.Item,
      oi.SerialNumber,
      ic.Cost
    FROM
      #Order o
      INNER JOIN
      #OrderItem oi
        ON oi.OrderNumber = o.OrderNumber
      LEFT OUTER JOIN (
        select
          ic.OrderNumber
          , ic.Item
          , ic.Cost
          , rn = row_number() over (partition by ic.OrderNumber, ic.Item order by ic.AvailableDate desc)
        from
          #Order o
          left outer join #ItemCost ic
            on ic.OrderNumber = o.OrderNumber
        where
          ic.AvailableDate <= o.OrderDate
      ) ic
        on o.OrderNumber = ic.OrderNumber
    where
      ic.rn = 1;

    GO
    --Dropping the tables
    DROP TABLE #OrderItem;
    DROP TABLE #ItemCost;
    DROP TABLE #Order;
    GO

       

Viewing 3 posts - 1 through 2 (of 2 total)

You must be logged in to reply to this topic. Login to reply