Join two tables using several criterias

  • I'm trying to combine two tables using below criterias
    1. if table 1 referance & amount matched with table 2 referaance & amount join those records
    2. for the items that were not matched above criteria & for rows does not have a value in referance field, join based on the similar amount in table 1 with table 2.
    3. for the items that were not matched in above 2 criterias & with out considering whether a value is contain in referance field or not, join based on the similar amount in table 1 with table 2.

    My tables are as follows.
    CREATE TABLE #tb1 (date DATE , ref INT, amount INT )
    INSERT INTO #tb1 VALUES
    ('2017-01-01',1000,500),
    ('2017-01-15',null,500),
    ('2017-02-01',2000,1000),
    ('2017-02-15',null,1000),
    ('2017-03-01',3000,500)

    CREATE TABLE #tb2 (date2 DATE , ref2 INT, amount2 INT )
    INSERT INTO #tb2 VALUES
    ('2017-01-04',1000,500),
    ('2017-01-20',null,500),
    ('2017-02-05',2000,1000),
    ('2017-02-25',null,1000),
    ('2017-03-05',null,500)

    -- I'm trying to get below result

    -- My code so far, i could consider only first two criterias (not third)

    select * from (
    select *
    from #tb1 a
    left join #tb2 b on a.amount = b.amount2 and a.ref = b.ref2
    where a.ref is not null and b.ref2 is not null
    union all
    select *
    from #tb1 a
    left join #tb2 b on a.amount = b.amount2
    where a.ref is null and b.ref2 is null
    )main
    order by [date]
    --Result i'm getting

    I need to avoid matching each item in table 1 with only one item in table 2 (See the line 3). To do this i'm taking the date fifference for each matched item pair  & keep the pair with lowest date difference.
    --  SQL statement to do this
    select * from (
    select *, DATEDIFF(day, b.date2, a.date) as 'DateDiff', ROW_NUMBER()over(partition by (a.date) order by ABS(DATEDIFF(day, b.date2, a.date))) as 'DiffRank'
    from #tb1 a
    left join #tb2 b on a.amount = b.amount2 and a.ref = b.ref2
    where a.ref is not null and b.ref2 is not null
    union all
    select * , DATEDIFF(day, b.date2, a.date) as 'DateDiff', ROW_NUMBER()over(partition by (a.date) order by ABS(DATEDIFF(day, b.date2, a.date))) as 'DiffRank'
    from #tb1 a
    left join #tb2 b on a.amount = b.amount2
    where a.ref is null and b.ref2 is null
    )main
    WHERE DiffRank = 1
    order by [date]

    Still i have to find a way to match records based  on 3rd criteria.

    There may be better ways to do this. Highly apreciate if some one can help on this.

  • This should get you started:
    IF 0 = 1 BEGIN
     DROP TABLE #tb1
     CREATE TABLE #tb1 (ID INT IDENTITY(1,1), [date] DATE , ref INT, amount INT, T2ID INT)
     INSERT INTO #tb1 ([date], ref, amount) VALUES
     ('2017-01-01',1000,500),
     ('2017-01-15',null,500),
     ('2017-02-01',2000,1000),
     ('2017-02-15',null,1000),
     ('2017-03-01',3000,500)

     DROP TABLE #tb2
     CREATE TABLE #tb2 (ID INT IDENTITY(1,1), [date2] DATE, ref2 INT, amount2 INT)
     INSERT INTO #tb2 ([date2], ref2, amount2) VALUES
     ('2017-01-04',1000,500),
     ('2017-01-20',null,500),
     ('2017-02-05',2000,1000),
     ('2017-02-25',null,1000),
     ('2017-03-05',null,500)
    END

    -- Do the obvious matches
    UPDATE tb1 SET T2ID = tb2.ID
    FROM #tb1 tb1
    INNER JOIN #tb2 tb2
     ON tb2.ref2 = tb1.ref
     AND tb2.Amount2 = tb1.Amount
     AND tb2.date2 > tb1.[date]

    -- find unused rows in tb2 which match unmatched rows in tb1 on amount
    UPDATE tb1 SET T2ID = x.ID
    FROM #tb1 tb1
    CROSS APPLY (
     SELECT TOP(1) tb2.ID
     FROM #tb2 tb2 WHERE NOT EXISTS (SELECT 1 FROM #tb1 t1 WHERE t1.T2ID = tb2.ID)
     AND tb2.Amount2 = tb1.Amount
     AND tb2.date2 > tb1.[date]
     AND tb1.T2ID IS NULL
    ) x

    SELECT * FROM #tb1

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

  • Here ya go.  This gives you the intended results.  I have commented the code but please let me know if you have any additional questions


    DROP TABLE IF EXISTS #tb1
    GO

    DROP TABLE IF EXISTS #tb2
    GO

    CREATE TABLE #tb1
    (
      [date] DATE
     ,[ref]  INT
     ,[amount] INT
    )

    INSERT INTO #tb1
    VALUES
       ('2017-01-01' ,1000 ,500 )
    ,('2017-01-15' ,NULL ,500 )
    ,('2017-02-01' ,2000 ,1000)
    ,('2017-02-15' ,NULL ,1000)
    ,('2017-03-01' ,3000 ,500 )

    CREATE TABLE #tb2
    (
      [date2] DATE
     ,[ref2]  INT
     ,[amount2] INT
    )
    INSERT INTO #tb2
    VALUES
       ('2017-01-04' ,1000 ,500 )
    ,('2017-01-20' ,NULL ,500 )
    ,('2017-02-05' ,2000 ,1000)
    ,('2017-02-25' ,NULL ,1000)
    ,('2017-03-05' ,NULL ,500 )

    ;with cte as -- I love CTEs. I feel it increases the readability of code when doing very quick transformation on the fly
     (
    /*** Ref and Amount columns match ***/
      SELECT [date]
        ,[ref]
        ,[amount]
        ,[date2]
        ,[ref2]
        ,[amount2]
        ,1    as [priority] -- we'll use this to help determine if any ties come from a previous set when it's time to filter  
      FROM   #tb1 t1
      INNER JOIN #tb2 t2
      on  t1.ref = t2.ref2  
        AND t1.amount = t2.amount2
      UNION 
    /*** Ref columns are null but Amounts match ***/
      SELECT [date]
        ,[ref]
        ,[amount]
        ,[date2]
        ,[ref2]
        ,[amount2]
        ,2   as [priority]
      FROM   #tb1 t1
      INNER JOIN #tb2 t2
      on  t1.amount = t2.amount2
      where t1.ref IS NULL AND t2.ref2 IS NULL

      UNION
    /***Ref columns can be null but the amounts have to match ***/
      SELECT [date]
        ,[ref]
        ,[amount]
        ,[date2]
        ,[ref2]
        ,[amount2]
        ,3   as [priority]
      FROM   #tb1 t1
      INNER JOIN #tb2 t2
      ON  t1.amount = t2.amount2
      WHERE t1.ref IS NOT NULL OR t2.ref2 IS NOT NULL
    ),
    /*** Create another CTE so we can rank the previous results by date and criteria weight ***/
    cte2 as
    (
      SELECT [date]
        ,[ref]
        ,[amount]
        ,[date2]
        ,[ref2]
        ,[amount2]            
        ,ROW_NUMBER() OVER(PARTITION BY [date] ORDER BY ABS(datediff(day, [date], [date2])), [priority] ASC) as [rank] -- rank any ties from col1 based on date and the criteria priority
      FROM cte
    )

    SELECT [date]
      ,[ref]
      ,[amount]
      ,[date2]
      ,[ref2]
      ,[amount2]
    FROM cte2
    WHERE rank = 1
    ORDER BY [date] ASC

Viewing 3 posts - 1 through 2 (of 2 total)

You must be logged in to reply to this topic. Login to reply