Insert missing records for each month/year with the previous month's value

  • sxzhu

    SSC Enthusiast

    Points: 114

    I have a sample data, see below.

    Sample data

    The base rules are

    If the same CustomerID and Account# for the month that don't exist based on the previous month, then insert it;

    If the same CustomerID and Account# for the month that do exist based on the previous month, then keep it.

    For example, a record of the CustomerID 14 has not been updated since Dec. 2019. We need to add it into the missing months. The CustomerID 20 for Account# 1 also missed three records in Jan., Feb. and Apr. 2020. The output should look like this, see Expected results.

    Expected results

    Is there a Fastest way to do it since we have over 20 Million records and the date of the data starts from 2014?

    Thank you very much.

  • Steve Collins

    Ten Centuries

    Points: 1075

    drop table if exists dbo.#TestDates;
    go
    create table dbo.#TestDates(
    yr int not null,
    mo int not null,
    cust_id int not null,
    acct_num int not null,
    amnt int not null);
    go

    drop table if exists dbo.#TestDates_fixed;
    go
    create table dbo.#TestDates_fixed(
    yr int not null,
    mo int not null,
    cust_id int not null,
    acct_num int not null,
    amnt int not null,
    row_src char(3));
    go

    insert #TestDates(yr, mo, cust_id, acct_num, amnt) values
    (2019, 11, 14, 1, 200),
    (2019, 11, 20, 1, 500),
    (2019, 11, 20, 2, 25),
    (2019, 12, 20, 1, 670),
    (2019, 12, 20, 2, 50),
    (2020, 1, 20, 2, 700),
    (2020, 2, 20, 2, 746),
    (2020, 3, 20, 1, 550),
    (2020, 3, 20, 2, 600),
    (2020, 4, 20, 2, 720);

    declare
    @end_dt date='2020-04-01';

    ;with dt_range_cte(cust_id, acct_num, mo_dt, lead_dt, lead_diff, amnt) as (
    select
    td.cust_id,
    td.acct_num,
    m.mo_dt,
    lead(m.mo_dt) over (partition by td.cust_id, td.acct_num order by m.mo_dt),
    datediff(month, m.mo_dt, isnull(lead(m.mo_dt) over (partition by td.cust_id, td.acct_num order by m.mo_dt), @end_dt)),
    td.amnt
    from
    #TestDates td
    cross apply
    (select datefromparts(td.yr, td.mo, 1) mo_dt) m)
    insert #TestDates_fixed(yr, mo, cust_id, acct_num, amnt, row_src)
    select *, 'old' from #TestDates
    union all
    select
    year(da.nxt_mo), month(da.nxt_mo), drc.cust_id, drc.acct_num, drc.amnt, 'new'
    from
    dt_range_cte drc
    cross apply
    dbo.fnTally(0, drc.lead_diff) f
    cross apply
    (select dateadd(month, f.n, drc.mo_dt) nxt_mo) da
    where
    f.n>0
    and (drc.lead_dt is null or (drc.lead_dt is not null and f.n<>drc.lead_diff));

    select * from #TestDates_fixed tdf order by 3, 4, 1, 2;

    Aus dem Paradies, das Cantor uns geschaffen, soll uns niemand vertreiben können

  • sxzhu

    SSC Enthusiast

    Points: 114

    Thank you!

Viewing 3 posts - 1 through 3 (of 3 total)

You must be logged in to reply to this topic. Login to reply