August 20, 2023 at 11:21 pm
Good Morning or Good Evening,
Long thread here so I thought I would simplify the problem and my questions.
When limiting the account to 1 (AccountID = 1) I get 30 records. When not applying a where to AccountID (2nd query) I get 50 records. Why? Can I rewrite the CROSS APPLY to get 60?
In the 2nd result see an AccountID of 2 in v.val but the LagReportAccountID is 1. Why are these not consistent?
Is there a different CROSS APPLY join that I need to make this technique work?
Thanks
--Microsoft SQL Server 2019
--Build a temp calendar table
DROP TABLE IF EXISTS #Calendar
CREATE TABLE #Calendar (CalendarDate DATE)
DECLARE @Start DATE = '2022-07-01'
DECLARE @End DATE = '2022-07-31'
WHILE ( @Start < @End )
BEGIN
INSERT INTO #Calendar (CalendarDate) VALUES( @Start )
SELECT @Start = DATEADD(DAY, 1, @Start )
END
DROP TABLE IF EXISTS #MyData
CREATE TABLE #MyData
(
ReportDate varchar(10) NOT NULL,
AccountID int NOT NULL,
ReportValue int NOT NULL
)
INSERT INTO #MyData (ReportDate, AccountID,ReportValue)
VALUES
( '2022-07-01',1, 100 ),
( '2022-07-05',1, 101 ),
( '2022-07-06',1, 102 ),
( '2022-07-07',1, 103 ),
( '2022-07-08',1, 104 ),
( '2022-07-11',1, 105 ),
( '2022-07-12',1, 106 ),
( '2022-07-13',1, 107 ),
( '2022-07-14',1, 108 ),
( '2022-07-15',1, 109 ),
( '2022-07-18',1, 110 ),
( '2022-07-19',1, 111 ),
( '2022-07-20',1, 112 ),
( '2022-07-21',1, 113 ),
( '2022-07-22',1, 114 ),
( '2022-07-25',1, 115 ),
( '2022-07-26',1, 116 ),
( '2022-07-27',1, 117 ),
( '2022-07-28',1, 118 ),
( '2022-07-29',1, 119 ),
( '2022-07-01',2, 200 ),
( '2022-07-05',2, 201 ),
( '2022-07-06',2, 202 ),
( '2022-07-07',2, 203 ),
( '2022-07-08',2, 204 ),
( '2022-07-11',2, 205 ),
( '2022-07-12',2, 206 ),
( '2022-07-13',2, 207 ),
( '2022-07-14',2, 208 ),
( '2022-07-15',2, 209 ),
( '2022-07-18',2, 210 ),
( '2022-07-19',2, 211 ),
( '2022-07-20',2, 212 ),
( '2022-07-21',2, 213 ),
( '2022-07-22',2, 214 ),
( '2022-07-25',2, 215 ),
( '2022-07-26',2, 216 ),
( '2022-07-27',2, 217 ),
( '2022-07-28',2, 218 ),
( '2022-07-29',2, 219 )
--SELECT * FROM #MyData ORDER BY ReportDate, AccountID
--Quick reality check; Accounts are equivalent except account number and account 2 values are in the 200 range
/*
SELECT ReportDate,AccountID,ReportValue FROM #MyData WHERE AccountID =1
EXCEPT
SELECT ReportDate,AccountID - 1,ReportValue - 100 FROM #MyData WHERE AccountID = 2
*/
SELECT
v.val
, C.CalendarDate
, D.ReportDate
, D.AccountID
, D.ReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 1, 10) AS DATE) AS LagReportDate
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT) AS LagReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 21, 10) AS INT) AS LagReportAccountID
FROM #Calendar C
LEFT JOIN #MyData D
ON C.CalendarDate = D.ReportDate
AND AccountID = 1
CROSS APPLY (VALUES(CAST(C.CalendarDate AS CHAR(10)) + CAST(D.ReportValue AS CHAR(10)) + CAST(D.AccountID AS CHAR(10)) )) v(val)
ORDER BY C.CalendarDate,LagReportAccountID
SELECT
v.val
, C.CalendarDate
, D.ReportDate
, D.AccountID
, D.ReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 1, 10) AS DATE) AS LagReportDate
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT) AS LagReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 21, 10) AS INT) AS LagReportAccountID
FROM #Calendar C
LEFT JOIN #MyData D
ON C.CalendarDate = D.ReportDate
--AND AccountID = 1
CROSS APPLY (VALUES(CAST(C.CalendarDate AS CHAR(10)) + CAST(D.ReportValue AS CHAR(10)) + CAST(D.AccountID AS CHAR(10)) )) v(val)
ORDER BY C.CalendarDate,LagReportAccountID
August 21, 2023 at 12:40 am
I posted a follow-up an hour ago but I don't see it (yet)
Here's my latest, added CROSS JOIN to DISTINCT AccountID which returns the correct number of records.
My 'lag' columns still are returning data somewhat randomly
--Microsoft SQL Server 2019
--Build a temp calendar table
DROP TABLE IF EXISTS #Calendar
CREATE TABLE #Calendar (CalendarDate DATE)
DECLARE @Start DATE = '2022-07-01'
DECLARE @End DATE = '2022-07-31'
WHILE ( @Start < @End )
BEGIN
INSERT INTO #Calendar (CalendarDate) VALUES( @Start )
SELECT @Start = DATEADD(DAY, 1, @Start )
END
DROP TABLE IF EXISTS #MyData
CREATE TABLE #MyData
(
ReportDate varchar(10) NOT NULL,
AccountID int NOT NULL,
ReportValue int NOT NULL
)
INSERT INTO #MyData (ReportDate, AccountID,ReportValue)
VALUES
( '2022-07-01',1, 100 ),
( '2022-07-05',1, 101 ),
( '2022-07-06',1, 102 ),
( '2022-07-07',1, 103 ),
( '2022-07-08',1, 104 ),
( '2022-07-11',1, 105 ),
( '2022-07-12',1, 106 ),
( '2022-07-13',1, 107 ),
( '2022-07-14',1, 108 ),
( '2022-07-15',1, 109 ),
( '2022-07-18',1, 110 ),
( '2022-07-19',1, 111 ),
( '2022-07-20',1, 112 ),
( '2022-07-21',1, 113 ),
( '2022-07-22',1, 114 ),
( '2022-07-25',1, 115 ),
( '2022-07-26',1, 116 ),
( '2022-07-27',1, 117 ),
( '2022-07-28',1, 118 ),
( '2022-07-29',1, 119 ),
( '2022-07-01',2, 200 ),
( '2022-07-05',2, 201 ),
( '2022-07-06',2, 202 ),
( '2022-07-07',2, 203 ),
( '2022-07-08',2, 204 ),
( '2022-07-11',2, 205 ),
( '2022-07-12',2, 206 ),
( '2022-07-13',2, 207 ),
( '2022-07-14',2, 208 ),
( '2022-07-15',2, 209 ),
( '2022-07-18',2, 210 ),
( '2022-07-19',2, 211 ),
( '2022-07-20',2, 212 ),
( '2022-07-21',2, 213 ),
( '2022-07-22',2, 214 ),
( '2022-07-25',2, 215 ),
( '2022-07-26',2, 216 ),
( '2022-07-27',2, 217 ),
( '2022-07-28',2, 218 ),
( '2022-07-29',2, 219 )
--SELECT * FROM #MyData ORDER BY ReportDate, AccountID
--Quick reality check; Accounts are equivalent except account number and account 2 values are in the 200 range
/*
SELECT ReportDate,AccountID,ReportValue FROM #MyData WHERE AccountID =1
EXCEPT
SELECT ReportDate,AccountID - 1,ReportValue - 100 FROM #MyData WHERE AccountID = 2
*/
SELECT
v.val
, C.CalendarDate
, D.ReportDate
, D.AccountID
, D.ReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 1, 10) AS DATE) AS LagReportDate
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT) AS LagReportValue
, CAST(SUBSTRING(MAX(v.val) OVER(ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 21, 10) AS INT) AS LagReportAccountID
FROM #Calendar C
CROSS JOIN
(
SELECT DISTINCT AccountID FROM #MyData
--WHERE AccountID = 1
) A
LEFT JOIN #MyData D
ON
C.CalendarDate = D.ReportDate AND
A.AccountID = D.AccountID
CROSS APPLY (VALUES(CAST(C.CalendarDate AS CHAR(10)) + CAST(D.ReportValue AS CHAR(10)) + CAST(D.AccountID AS CHAR(10)))) v(val)
ORDER BY C.CalendarDate,COALESCE(A.AccountID,CAST(SUBSTRING(MAX(v.val) OVER(PARTITION BY A.AccountID ORDER BY C.CalendarDate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 11, 10) AS INT))
Viewing 2 posts - 46 through 46 (of 46 total)
You must be logged in to reply to this topic. Login to reply
This website stores cookies on your computer.
These cookies are used to improve your website experience and provide more personalized services to you, both on this website and through other media.
To find out more about the cookies we use, see our Privacy Policy