August 20, 2023 at 11:21 pm
Good Morning or Good Evening,
Long thread here so I thought I would simplify the problem and my questions.
When limiting the account to 1 (AccountID = 1) I get 30 records. When not applying a where to AccountID (2nd query) I get 50 records. Why? Can I rewrite the CROSS APPLY to get 60?
In the 2nd result see an AccountID of 2 in v.val but the LagReportAccountID is 1. Why are these not consistent?
Is there a different CROSS APPLY join that I need to make this technique work?
Thanks
--Microsoft SQL Server 2019
--Build a temp calendar table
DROP TABLE IF EXISTS #Calendar
CREATE TABLE #Calendar (CalendarDate DATE)
DECLARE @Start DATE = '2022-07-01'
DECLARE @End DATE = '2022-07-31'
WHILE ( @Start < @End )
BEGIN
INSERT INTO #Calendar (CalendarDate) VALUES( @Start )
SELECT @Start = DATEADD(DAY, 1, @Start )
END
DROP TABLE IF EXISTS #MyData
CREATE TABLE #MyData
(
ReportDate varchar(10) NOT NULL,
AccountID int NOT NULL,
ReportValue int NOT NULL
)
INSERT INTO #MyData (ReportDate, AccountID,ReportValue)
VALUES
( '2022-07-01',1, 100 ),
( '2022-07-05',1, 101 ),
( '2022-07-06',1, 102 ),
( '2022-07-07',1, 103 ),
( '2022-07-08',1, 104 ),
( '2022-07-11',1, 105 ),
( '2022-07-12',1, 106 ),
( '2022-07-13',1, 107 ),
( '2022-07-14',1, 108 ),
( '2022-07-15',1, 109 ),
( '2022-07-18',1, 110 ),
( '2022-07-19',1, 111 ),
( '2022-07-20',1, 112 ),
( '2022-07-21',1, 113 ),
( '2022-07-22',1, 114 ),
( '2022-07-25',1, 115 ),
( '2022-07-26',1, 116 ),
( '2022-07-27',1, 117 ),
( '2022-07-28',1, 118 ),
( '2022-07-29',1, 119 ),
( '2022-07-01',2, 200 ),
( '2022-07-05',2, 201 ),
( '2022-07-06',2, 202 ),
( '2022-07-07',2, 203 ),
( '2022-07-08',2, 204 ),
( '2022-07-11',2, 205 ),
( '2022-07-12',2, 206 ),
( '2022-07-13',2, 207 ),
( '2022-07-14',2, 208 ),
( '2022-07-15',2, 209 ),
( '2022-07-18',2, 210 ),
( '2022-07-19',2, 211 ),
( '2022-07-20',2, 212 ),
( '2022-07-21',2, 213 ),
( '2022-07-22',2, 214 ),
( '2022-07-25',2, 215 ),
( '2022-07-26',2, 216 ),
( '2022-07-27',2, 217 ),
( '2022-07-28',2, 218 ),
( '2022-07-29',2, 219 )
--SELECT * FROM #MyData ORDER BY ReportDate, AccountID
--Quick reality check; Accounts are equivalent except account number and account 2 values are in the 200 range
/*
SELECT ReportDate,AccountID,ReportValue FROM #MyData WHERE AccountID =1
EXCEPT
SELECT ReportDate,AccountID - 1,ReportValue - 100 FROM #MyData WHERE AccountID = 2
*/
SELECT
v.val
, C.CalendarDate
, D.ReportDate
, D.AccountID
, D.ReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 1, 10) AS DATE) AS LagReportDate
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT) AS LagReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 21, 10) AS INT) AS LagReportAccountID
FROM #Calendar C
LEFT JOIN #MyData D
ON C.CalendarDate = D.ReportDate
AND AccountID = 1
CROSS APPLY (VALUES(CAST(C.CalendarDate AS CHAR(10)) + CAST(D.ReportValue AS CHAR(10)) + CAST(D.AccountID AS CHAR(10)) )) v(val)
ORDER BY C.CalendarDate,LagReportAccountID
SELECT
v.val
, C.CalendarDate
, D.ReportDate
, D.AccountID
, D.ReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 1, 10) AS DATE) AS LagReportDate
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT) AS LagReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 21, 10) AS INT) AS LagReportAccountID
FROM #Calendar C
LEFT JOIN #MyData D
ON C.CalendarDate = D.ReportDate
--AND AccountID = 1
CROSS APPLY (VALUES(CAST(C.CalendarDate AS CHAR(10)) + CAST(D.ReportValue AS CHAR(10)) + CAST(D.AccountID AS CHAR(10)) )) v(val)
ORDER BY C.CalendarDate,LagReportAccountID
August 21, 2023 at 12:40 am
I posted a follow-up an hour ago but I don't see it (yet)
Here's my latest, added CROSS JOIN to DISTINCT AccountID which returns the correct number of records.
My 'lag' columns still are returning data somewhat randomly
--Microsoft SQL Server 2019
--Build a temp calendar table
DROP TABLE IF EXISTS #Calendar
CREATE TABLE #Calendar (CalendarDate DATE)
DECLARE @Start DATE = '2022-07-01'
DECLARE @End DATE = '2022-07-31'
WHILE ( @Start < @End )
BEGIN
INSERT INTO #Calendar (CalendarDate) VALUES( @Start )
SELECT @Start = DATEADD(DAY, 1, @Start )
END
DROP TABLE IF EXISTS #MyData
CREATE TABLE #MyData
(
ReportDate varchar(10) NOT NULL,
AccountID int NOT NULL,
ReportValue int NOT NULL
)
INSERT INTO #MyData (ReportDate, AccountID,ReportValue)
VALUES
( '2022-07-01',1, 100 ),
( '2022-07-05',1, 101 ),
( '2022-07-06',1, 102 ),
( '2022-07-07',1, 103 ),
( '2022-07-08',1, 104 ),
( '2022-07-11',1, 105 ),
( '2022-07-12',1, 106 ),
( '2022-07-13',1, 107 ),
( '2022-07-14',1, 108 ),
( '2022-07-15',1, 109 ),
( '2022-07-18',1, 110 ),
( '2022-07-19',1, 111 ),
( '2022-07-20',1, 112 ),
( '2022-07-21',1, 113 ),
( '2022-07-22',1, 114 ),
( '2022-07-25',1, 115 ),
( '2022-07-26',1, 116 ),
( '2022-07-27',1, 117 ),
( '2022-07-28',1, 118 ),
( '2022-07-29',1, 119 ),
( '2022-07-01',2, 200 ),
( '2022-07-05',2, 201 ),
( '2022-07-06',2, 202 ),
( '2022-07-07',2, 203 ),
( '2022-07-08',2, 204 ),
( '2022-07-11',2, 205 ),
( '2022-07-12',2, 206 ),
( '2022-07-13',2, 207 ),
( '2022-07-14',2, 208 ),
( '2022-07-15',2, 209 ),
( '2022-07-18',2, 210 ),
( '2022-07-19',2, 211 ),
( '2022-07-20',2, 212 ),
( '2022-07-21',2, 213 ),
( '2022-07-22',2, 214 ),
( '2022-07-25',2, 215 ),
( '2022-07-26',2, 216 ),
( '2022-07-27',2, 217 ),
( '2022-07-28',2, 218 ),
( '2022-07-29',2, 219 )
--SELECT * FROM #MyData ORDER BY ReportDate, AccountID
--Quick reality check; Accounts are equivalent except account number and account 2 values are in the 200 range
/*
SELECT ReportDate,AccountID,ReportValue FROM #MyData WHERE AccountID =1
EXCEPT
SELECT ReportDate,AccountID - 1,ReportValue - 100 FROM #MyData WHERE AccountID = 2
*/
SELECT
v.val
, C.CalendarDate
, D.ReportDate
, D.AccountID
, D.ReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 1, 10) AS DATE) AS LagReportDate
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT) AS LagReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 21, 10) AS INT) AS LagReportAccountID
FROM #Calendar C
CROSS JOIN
(
SELECT DISTINCT AccountID FROM #MyData
--WHERE AccountID = 1
) A
LEFT JOIN #MyData D
ON
C.CalendarDate = D.ReportDate AND
A.AccountID = D.AccountID
CROSS APPLY (VALUES(CAST(C.CalendarDate AS CHAR(10)) + CAST(D.ReportValue AS CHAR(10)) + CAST(D.AccountID AS CHAR(10)))) v(val)
ORDER BY C.CalendarDate,COALESCE(A.AccountID,CAST(SUBSTRING(MAX(v.val) OVER(PARTITION BY A.AccountID ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT))
Viewing 2 posts - 46 through 46 (of 46 total)
You must be logged in to reply to this topic. Login to reply