Hi!
I'm having struggle with selecting rows, that match "dynamic" rules as described:
I think it has to do with "gaps and islands" and probably something with lead/lag to build group ids. But I don't know how to build the "counter" so I know if it's 10 or more records that match the pattern.
Please see this simplified demo table (id is an INT, in real-world it's a timestamp of an event)
drop table if exists patternDemo;
create table patternDemo
(
id int,
value int,
include char(3) -- select record in final query?
)
-- ignore everything before START
insert into patternDemo values (1, 100, 'no');
insert into patternDemo values (2, 100, 'no');
insert into patternDemo values (3, 100, 'no');
insert into patternDemo values (4, 200, 'no');
insert into patternDemo values (5, 100, 'no');
insert into patternDemo values (6, 300, 'no');
insert into patternDemo values (7, 500, 'no');
-- START: block having >= 10 consecutive valus above 1000
insert into patternDemo values (8, 1500, 'yes');
insert into patternDemo values (9, 1500, 'yes');
insert into patternDemo values (10, 1300, 'yes');
insert into patternDemo values (11, 1600, 'yes');
insert into patternDemo values (12, 1500, 'yes');
insert into patternDemo values (13, 1700, 'yes');
insert into patternDemo values (14, 1800, 'yes');
insert into patternDemo values (15, 1800, 'yes');
insert into patternDemo values (16, 1800, 'yes');
insert into patternDemo values (17, 1800, 'yes');
insert into patternDemo values (18, 1600, 'yes');
insert into patternDemo values (19, 1500, 'yes');
-- after a START block having at least 10 rows with a value over 1000
-- there maybe lower values, which i want to include, since we're still
-- before a STOP pattern
insert into patternDemo values (20, 600, 'yes');
insert into patternDemo values (21, 400, 'yes');
insert into patternDemo values (22, 1500, 'yes');
insert into patternDemo values (23, 1800, 'yes');
insert into patternDemo values (24, 1800, 'yes');
insert into patternDemo values (25, 1800, 'yes');
-- STOP pattern: another block of at least 10 rows with a value lower than 1000
insert into patternDemo values (26, 300, 'no');
insert into patternDemo values (27, 300, 'no');
insert into patternDemo values (28, 100, 'no');
insert into patternDemo values (29, 100, 'no');
insert into patternDemo values (30, 100, 'no');
insert into patternDemo values (31, 100, 'no');
insert into patternDemo values (32, 100, 'no');
insert into patternDemo values (33, 100, 'no');
insert into patternDemo values (34, 100, 'no');
insert into patternDemo values (35, 100, 'no');
insert into patternDemo values (36, 100, 'no');
insert into patternDemo values (37, 100, 'no');
insert into patternDemo values (38, 100, 'no');
insert into patternDemo values (39, 100, 'no');
insert into patternDemo values (40, 100, 'no');
January 17, 2022 at 3:23 pm
Well this is definitely a head scratcher. The following code is not pretty, but it seems to get the expected results
WITH cteBase AS (
SELECT *
, rn = ROW_NUMBER() OVER ( ORDER BY pd.id )
, IsStartBlock = SUM( CASE WHEN pd.VALUE > 1000 THEN 1 ELSE 0 END )
OVER ( ORDER BY pd.id ROWS BETWEEN 9 PRECEDING AND CURRENT ROW )
, IsEndBlock = SUM( CASE WHEN pd.VALUE < 1000 THEN 1 ELSE 0 END )
OVER ( ORDER BY pd.id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING )
FROM #patternDemo AS pd
)
, cteStart AS (
SELECT sb.*
, grp = sb.rn - ROW_NUMBER() OVER ( ORDER BY sb.rn )
FROM cteBase AS sb
WHERE sb.IsStartBlock IN ( 1, 10 )
)
, cteStartBlock AS (
SELECT sb.StartBlockStart
, StartBlockEnd = MAX( st.id )
, rn = ROW_NUMBER() OVER (ORDER BY sb.StartBlockStart)
FROM cteStart AS st
OUTER APPLY ( SELECT StartBlockStart = MAX( sbs.id )
FROM cteStart AS sbs
WHERE sbs.IsStartBlock = 1
AND sbs.id < st.id
) AS sb
WHERE st.IsStartBlock = 10
GROUP BY st.grp, sb.StartBlockStart
)
, cteEnd as (
SELECT eb.*
, grp = eb.rn - ROW_NUMBER() OVER ( ORDER BY eb.rn )
FROM cteBase AS eb
WHERE eb.IsEndBlock IN ( 1, 10 )
)
, cteEndBlock AS (
SELECT EndBlockStart = MIN( nd.id )
, eb.EndBlockEnd
, rn = ROW_NUMBER() OVER (ORDER BY eb.EndBlockEnd)
FROM cteEnd AS nd
OUTER APPLY ( SELECT EndBlockEnd = MIN( ebs.id )
FROM cteEnd AS ebs
WHERE ebs.IsEndBlock = 1
AND ebs.id > nd.id
) AS eb
WHERE nd.IsEndBlock = 10
GROUP BY nd.grp, eb.EndBlockEnd
)
, cteBlocks AS (
SELECT BlockNo = scte.rn
, scte.StartBlockStart
, scte.StartBlockEnd
, ecte.EndBlockStart
, ecte.EndBlockEnd
FROM cteStartBlock AS scte
LEFT JOIN cteEndBlock AS ecte
ON ecte.rn = scte.rn
)
SELECT bk.BlockNo
, src.*
FROM #patternDemo AS src
INNER JOIN cteBlocks AS bk
ON src.id > bk.StartBlockEnd
AND src.id < bk.EndBlockStart
ORDER BY src.id;
January 17, 2022 at 3:33 pm
WITH Rules
AS
(
SELECT id, [value]
,CASE
WHEN 10 = SUM(CASE WHEN [value] >= 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StartSignal
,CASE
WHEN 10 = SUM(CASE WHEN [value] < 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StopSignal
FROM patternDemo
)
,StartStop
AS
(
SELECT id, [value]
,CASE
WHEN 1 = LAG(StartSignal) OVER (ORDER BY id)
THEN 0
ELSE StartSignal
END AS StartSignal
,CASE
WHEN 1 = LAG(StopSignal) OVER (ORDER BY id)
THEN 0
ELSE StopSignal
END AS StopSignal
FROM Rules
)
,StartWithStart
AS
(
SELECT id, [value], StartSignal
,CASE
WHEN LAG(StopSignal) OVER (ORDER BY id) IS NULL
THEN 0
ELSE StopSignal
END AS StopSignal
FROM StartStop
)
,Includes
AS
(
SELECT id, [value]
,SUM(StartSignal) OVER (ORDER BY id)
- SUM(StopSignal) OVER (ORDER BY id) AS IncludeSignal
FROM StartWithStart
)
SELECT id, [value]
FROM Includes
WHERE IncludeSignal = 1;
January 17, 2022 at 4:09 pm
It seems safer to avoid LEAD and LAG because start/stop conditions might repeat within ranges of groups
with
gap_cte as (
select *, iif(v.sgn<>lag(v.sgn) over (order by id), 1, 0) gap
from #patternDemo pd
cross apply (values (sign([value]-1000))) v(sgn)),
grp_cte as (
select *, sum(gap) over (order by id) grp
from gap_cte),
rn_cte as (
select *,
row_number() over (partition by grp order by id desc) rn,
row_number() over (partition by grp order by id) rn_max
from grp_cte),
start_stop_cte as (
select top(1) with ties *
from rn_cte
where rn_max>=10
order by rn)
select g.*
from start_stop_cte s
outer apply (select top(1) grp
from start_stop_cte ss
where ss.id<s.id
and ss.sgn=1
order by id desc) starts(start_grp)
left join grp_cte g on g.grp<s.grp
and g.grp>=starts.start_grp
where s.sgn=-1
order by rn;
Aus dem Paradies, das Cantor uns geschaffen, soll uns niemand vertreiben können
This should cope with more edge conditions than my original query:
WITH Rules
AS
(
SELECT id, [value]
,CASE
WHEN 10 = SUM(CASE WHEN [value] >= 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StartSignal
,CASE
WHEN 10 = SUM(CASE WHEN [value] < 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StopSignal
FROM patternDemo
)
,StartStop
AS
(
SELECT id, [value]
,CASE
WHEN 1 = LAG(StartSignal) OVER (ORDER BY id)
THEN 0
ELSE StartSignal
END AS StartSignal
,CASE
WHEN 1 = LAG(StopSignal) OVER (ORDER BY id)
THEN 0
ELSE StopSignal
END AS StopSignal
,MAX(StartSignal) OVER (ORDER BY id)
-MAX(StopSignal) OVER (ORDER BY id) AS StartStopDiff
FROM Rules
)
,StartWithStart
AS
(
SELECT id, [value], StartSignal
,CASE
WHEN StartStopDiff = -1
THEN 0
ELSE StopSignal
END AS StopSignal
FROM StartStop
)
,Includes
AS
(
SELECT id, [value]
,SUM(StartSignal) OVER (ORDER BY id)
- SUM(StopSignal) OVER (ORDER BY id) AS IncludeSignal
FROM StartWithStart
)
SELECT id, [value], IncludeSignal
FROM Includes
WHERE IncludeSignal >= 1;
January 18, 2022 at 10:42 am
On looking at this again this may be a safer approach:
WITH Rules
AS
(
SELECT id, [value]
,CASE
WHEN 10 = SUM(CASE WHEN [value] >= 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StartSignal
,CASE
WHEN 10 = SUM(CASE WHEN [value] < 1000 THEN 1 ELSE 0 END)
OVER (ORDER BY id ROWS BETWEEN CURRENT ROW AND 9 FOLLOWING)
THEN 1
ELSE 0
END AS StopSignal
FROM patternDemo
)
,PKOnOffs
AS
(
SELECT id, [value]
,CASE
WHEN StartSignal = 1
THEN RIGHT('000000000' + CAST(id AS varchar(20)), 10) + 'On'
WHEN StopSignal = 1
THEN RIGHT('000000000' + CAST(id AS varchar(20)), 10) + 'Off'
END AS PKOnOff
FROM Rules
)
,OnOffs
AS
(
SELECT id, [value]
-- MAX ignores NULLs
,ISNULL(SUBSTRING(MAX(PKOnOff) OVER (ORDER BY id), 11, 3), 'Off') AS OnOff
FROM PKOnOffs
)
SELECT id, [value]
FROM OnOffs
WHERE OnOff = 'On';
Viewing 6 posts - 1 through 6 (of 6 total)
You must be logged in to reply to this topic. Login to reply
This website stores cookies on your computer.
These cookies are used to improve your website experience and provide more personalized services to you, both on this website and through other media.
To find out more about the cookies we use, see our Privacy Policy