Using LAG to return prior non null value

  • Good Morning or Good Evening,

    Long thread here so I thought I would simplify the problem and my questions.

    When limiting the account to 1 (AccountID = 1) I get 30 records. When not applying a where to AccountID (2nd query) I get 50 records. Why? Can I rewrite the CROSS APPLY to get 60?

    In the 2nd result see an AccountID  of 2 in v.val but the LagReportAccountID is 1. Why are these not consistent?

    Is there a different CROSS APPLY join that I need to make this technique work?

    Thanks

    --Microsoft SQL Server 2019 

    --Build a temp calendar table
    DROP TABLE IF EXISTS #Calendar
    CREATE TABLE #Calendar (CalendarDate DATE)

    DECLARE @Start DATE = '2022-07-01'
    DECLARE @End DATE = '2022-07-31'

    WHILE ( @Start < @End )
    BEGIN
    INSERT INTO #Calendar (CalendarDate) VALUES( @Start )
    SELECT @Start = DATEADD(DAY, 1, @Start )
    END

    DROP TABLE IF EXISTS #MyData
    CREATE TABLE #MyData
    (
    ReportDate varchar(10) NOT NULL,
    AccountID int NOT NULL,
    ReportValue int NOT NULL
    )
    INSERT INTO #MyData (ReportDate, AccountID,ReportValue)
    VALUES
    ( '2022-07-01',1, 100 ),
    ( '2022-07-05',1, 101 ),
    ( '2022-07-06',1, 102 ),
    ( '2022-07-07',1, 103 ),
    ( '2022-07-08',1, 104 ),
    ( '2022-07-11',1, 105 ),
    ( '2022-07-12',1, 106 ),
    ( '2022-07-13',1, 107 ),
    ( '2022-07-14',1, 108 ),
    ( '2022-07-15',1, 109 ),
    ( '2022-07-18',1, 110 ),
    ( '2022-07-19',1, 111 ),
    ( '2022-07-20',1, 112 ),
    ( '2022-07-21',1, 113 ),
    ( '2022-07-22',1, 114 ),
    ( '2022-07-25',1, 115 ),
    ( '2022-07-26',1, 116 ),
    ( '2022-07-27',1, 117 ),
    ( '2022-07-28',1, 118 ),
    ( '2022-07-29',1, 119 ),
    ( '2022-07-01',2, 200 ),
    ( '2022-07-05',2, 201 ),
    ( '2022-07-06',2, 202 ),
    ( '2022-07-07',2, 203 ),
    ( '2022-07-08',2, 204 ),
    ( '2022-07-11',2, 205 ),
    ( '2022-07-12',2, 206 ),
    ( '2022-07-13',2, 207 ),
    ( '2022-07-14',2, 208 ),
    ( '2022-07-15',2, 209 ),
    ( '2022-07-18',2, 210 ),
    ( '2022-07-19',2, 211 ),
    ( '2022-07-20',2, 212 ),
    ( '2022-07-21',2, 213 ),
    ( '2022-07-22',2, 214 ),
    ( '2022-07-25',2, 215 ),
    ( '2022-07-26',2, 216 ),
    ( '2022-07-27',2, 217 ),
    ( '2022-07-28',2, 218 ),
    ( '2022-07-29',2, 219 )

    --SELECT * FROM #MyData ORDER BY ReportDate, AccountID

    --Quick reality check; Accounts are equivalent except account number and account 2 values are in the 200 range
    /*
    SELECT ReportDate,AccountID,ReportValue FROM #MyData WHERE AccountID =1
    EXCEPT
    SELECT ReportDate,AccountID - 1,ReportValue - 100 FROM #MyData WHERE AccountID = 2
    */

    SELECT
    v.val
    , C.CalendarDate
    , D.ReportDate
    , D.AccountID
    , D.ReportValue
    , CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 1, 10) AS DATE) AS LagReportDate
    , CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT) AS LagReportValue
    , CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 21, 10) AS INT) AS LagReportAccountID
    FROM #Calendar C
    LEFT JOIN #MyData D
    ON C.CalendarDate = D.ReportDate
    AND AccountID = 1
    CROSS APPLY (VALUES(CAST(C.CalendarDate AS CHAR(10)) + CAST(D.ReportValue AS CHAR(10)) + CAST(D.AccountID AS CHAR(10)) )) v(val)
    ORDER BY C.CalendarDate,LagReportAccountID

    SELECT
    v.val
    , C.CalendarDate
    , D.ReportDate
    , D.AccountID
    , D.ReportValue
    , CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 1, 10) AS DATE) AS LagReportDate
    , CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT) AS LagReportValue
    , CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 21, 10) AS INT) AS LagReportAccountID
    FROM #Calendar C
    LEFT JOIN #MyData D
    ON C.CalendarDate = D.ReportDate
    --AND AccountID = 1
    CROSS APPLY (VALUES(CAST(C.CalendarDate AS CHAR(10)) + CAST(D.ReportValue AS CHAR(10)) + CAST(D.AccountID AS CHAR(10)) )) v(val)
    ORDER BY C.CalendarDate,LagReportAccountID

  • I posted a follow-up an hour ago but I don't see it (yet)

    Here's my latest,  added CROSS JOIN to DISTINCT  AccountID which returns the correct number of records.

    My 'lag' columns still are returning data somewhat randomly

    --Microsoft SQL Server 2019 

    --Build a temp calendar table
    DROP TABLE IF EXISTS #Calendar
    CREATE TABLE #Calendar (CalendarDate DATE)

    DECLARE @Start DATE = '2022-07-01'
    DECLARE @End DATE = '2022-07-31'

    WHILE ( @Start < @End )
    BEGIN
    INSERT INTO #Calendar (CalendarDate) VALUES( @Start )
    SELECT @Start = DATEADD(DAY, 1, @Start )
    END

    DROP TABLE IF EXISTS #MyData
    CREATE TABLE #MyData
    (
    ReportDate varchar(10) NOT NULL,
    AccountID int NOT NULL,
    ReportValue int NOT NULL
    )
    INSERT INTO #MyData (ReportDate, AccountID,ReportValue)
    VALUES
    ( '2022-07-01',1, 100 ),
    ( '2022-07-05',1, 101 ),
    ( '2022-07-06',1, 102 ),
    ( '2022-07-07',1, 103 ),
    ( '2022-07-08',1, 104 ),
    ( '2022-07-11',1, 105 ),
    ( '2022-07-12',1, 106 ),
    ( '2022-07-13',1, 107 ),
    ( '2022-07-14',1, 108 ),
    ( '2022-07-15',1, 109 ),
    ( '2022-07-18',1, 110 ),
    ( '2022-07-19',1, 111 ),
    ( '2022-07-20',1, 112 ),
    ( '2022-07-21',1, 113 ),
    ( '2022-07-22',1, 114 ),
    ( '2022-07-25',1, 115 ),
    ( '2022-07-26',1, 116 ),
    ( '2022-07-27',1, 117 ),
    ( '2022-07-28',1, 118 ),
    ( '2022-07-29',1, 119 ),
    ( '2022-07-01',2, 200 ),
    ( '2022-07-05',2, 201 ),
    ( '2022-07-06',2, 202 ),
    ( '2022-07-07',2, 203 ),
    ( '2022-07-08',2, 204 ),
    ( '2022-07-11',2, 205 ),
    ( '2022-07-12',2, 206 ),
    ( '2022-07-13',2, 207 ),
    ( '2022-07-14',2, 208 ),
    ( '2022-07-15',2, 209 ),
    ( '2022-07-18',2, 210 ),
    ( '2022-07-19',2, 211 ),
    ( '2022-07-20',2, 212 ),
    ( '2022-07-21',2, 213 ),
    ( '2022-07-22',2, 214 ),
    ( '2022-07-25',2, 215 ),
    ( '2022-07-26',2, 216 ),
    ( '2022-07-27',2, 217 ),
    ( '2022-07-28',2, 218 ),
    ( '2022-07-29',2, 219 )

    --SELECT * FROM #MyData ORDER BY ReportDate, AccountID

    --Quick reality check; Accounts are equivalent except account number and account 2 values are in the 200 range
    /*
    SELECT ReportDate,AccountID,ReportValue FROM #MyData WHERE AccountID =1
    EXCEPT
    SELECT ReportDate,AccountID - 1,ReportValue - 100 FROM #MyData WHERE AccountID = 2
    */

    SELECT
    v.val
    , C.CalendarDate
    , D.ReportDate
    , D.AccountID
    , D.ReportValue
    , CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 1, 10) AS DATE) AS LagReportDate
    , CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT) AS LagReportValue
    , CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 21, 10) AS INT) AS LagReportAccountID
    FROM #Calendar C
    CROSS JOIN
    (
    SELECT DISTINCT AccountID FROM #MyData
    --WHERE AccountID = 1
    ) A
    LEFT JOIN #MyData D
    ON
    C.CalendarDate = D.ReportDate AND
    A.AccountID = D.AccountID

    CROSS APPLY (VALUES(CAST(C.CalendarDate AS CHAR(10)) + CAST(D.ReportValue AS CHAR(10)) + CAST(D.AccountID AS CHAR(10)))) v(val)
    ORDER BY C.CalendarDate,COALESCE(A.AccountID,CAST(SUBSTRING(MAX(v.val) OVER(PARTITION BY A.AccountID ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT))

Viewing 2 posts - 46 through 46 (of 46 total)

You must be logged in to reply to this topic. Login to reply