Insert missing records for each month/year with the previous month's value

  • I have a sample data, see below.

    Sample data

    The base rules are

    If the same CustomerID and Account# for the month that don't exist based on the previous month, then insert it;

    If the same CustomerID and Account# for the month that do exist based on the previous month, then keep it.

    For example, a record of the CustomerID 14 has not been updated since Dec. 2019. We need to add it into the missing months. The CustomerID 20 for Account# 1 also missed three records in Jan., Feb. and Apr. 2020. The output should look like this, see Expected results.

    Expected results

    Is there a Fastest way to do it since we have over 20 Million records and the date of the data starts from 2014?

    Thank you very much.

  • drop table if exists dbo.#TestDates;
    go
    create table dbo.#TestDates(
    yr int not null,
    mo int not null,
    cust_id int not null,
    acct_num int not null,
    amnt int not null);
    go

    drop table if exists dbo.#TestDates_fixed;
    go
    create table dbo.#TestDates_fixed(
    yr int not null,
    mo int not null,
    cust_id int not null,
    acct_num int not null,
    amnt int not null,
    row_src char(3));
    go

    insert #TestDates(yr, mo, cust_id, acct_num, amnt) values
    (2019, 11, 14, 1, 200),
    (2019, 11, 20, 1, 500),
    (2019, 11, 20, 2, 25),
    (2019, 12, 20, 1, 670),
    (2019, 12, 20, 2, 50),
    (2020, 1, 20, 2, 700),
    (2020, 2, 20, 2, 746),
    (2020, 3, 20, 1, 550),
    (2020, 3, 20, 2, 600),
    (2020, 4, 20, 2, 720);

    declare
    @end_dt date='2020-04-01';

    ;with dt_range_cte(cust_id, acct_num, mo_dt, lead_dt, lead_diff, amnt) as (
    select
    td.cust_id,
    td.acct_num,
    m.mo_dt,
    lead(m.mo_dt) over (partition by td.cust_id, td.acct_num order by m.mo_dt),
    datediff(month, m.mo_dt, isnull(lead(m.mo_dt) over (partition by td.cust_id, td.acct_num order by m.mo_dt), @end_dt)),
    td.amnt
    from
    #TestDates td
    cross apply
    (select datefromparts(td.yr, td.mo, 1) mo_dt) m)
    insert #TestDates_fixed(yr, mo, cust_id, acct_num, amnt, row_src)
    select *, 'old' from #TestDates
    union all
    select
    year(da.nxt_mo), month(da.nxt_mo), drc.cust_id, drc.acct_num, drc.amnt, 'new'
    from
    dt_range_cte drc
    cross apply
    dbo.fnTally(0, drc.lead_diff) f
    cross apply
    (select dateadd(month, f.n, drc.mo_dt) nxt_mo) da
    where
    f.n>0
    and (drc.lead_dt is null or (drc.lead_dt is not null and f.n<>drc.lead_diff));

    select * from #TestDates_fixed tdf order by 3, 4, 1, 2;

    Aus dem Paradies, das Cantor uns geschaffen, soll uns niemand vertreiben können

  • Thank you!

  • Hi, I have same use case to solve so I was trying your example to check if the code works, but I am getting error with below line

    dbo.fnTally(0, drc.lead_diff) f

    The error is invalid object name: fnTally

    Could you please tell me how to create this object and what should it contain?

    Thank you in advance!

     

     

  • Thank you it works perfect now.

Viewing 6 posts - 1 through 5 (of 5 total)

You must be logged in to reply to this topic. Login to reply