merging intervals with identical data

  • I'm having issues building a cte sql statement for merging intervals and I would greatly appreciate someone helping me out. I have a table with data as follows:

    declare @table table

    (

    startpoint int,

    stoppoint int,

    value int

    )

    insert @table ( value, startpoint, stoppoint )

    select 1, 0, 10 union

    select 1, 10, 15 union

    select 2, 15, 25 union

    select 2, 25, 30 union

    select 2, 30, 40 union

    select 3, 40, 55 union

    select 3, 55, 60 union

    select 2, 60, 80

    The resulting query returns the rows in the table, sorted by startpoint:

    [font="System"]startpoint stoppoint value

    ----------- ----------- -----------

    0 10 1

    10 15 1

    15 25 2

    25 30 2

    30 40 2

    40 55 3

    55 60 3

    60 80 2[/font]

    I'm looking for a merge cte that returns consecutive intervals with the same value, as follows:

    [font="System"]startpoint stoppoint value

    ----------- ----------- -----------

    0 15 1

    15 40 2

    40 60 3

    60 80 2[/font]

  • I ended up finding the result. Here's the query if others are interested:

    ;WITH

    S AS

    (

    SELECTDISTINCT value, startpoint

    FROM@table S1

    WHERENOT EXISTS(

    SELECT*

    FROM@table S2

    WHERES2.value = S1.value

    ANDS2.startpoint < S1.startpoint

    ANDS2.stoppoint >= S1.startpoint

    )

    ),

    E AS

    (

    SELECTDISTINCT value, stoppoint

    FROM@table S1

    WHERENOT EXISTS (

    SELECT*

    FROM@table S2

    WHERES2.value = S1.value

    ANDS2.stoppoint > S1.stoppoint

    ANDS2.startpoint <= S1.stoppoint)

    )

    SELECTvalue = S.value,

    startpoint = S.startpoint,

    stoppoint = (

    SELECTMIN(stoppoint)

    FROME

    WHEREE.value = S.value

    ANDstoppoint >= startpoint

    )

    FROMS

  • Here is another option to try:

    create table dbo.valuetab

    (

    startpoint int,

    stoppoint int,

    value int

    )

    insert dbo.valuetab ( value, startpoint, stoppoint )

    select 1, 0, 10 union

    select 1, 10, 15 union

    select 2, 15, 25 union

    select 2, 25, 30 union

    select 2, 30, 40 union

    select 3, 40, 55 union

    select 3, 55, 60 union

    select 2, 60, 80

    --select * from dbo.valuetab;

    CREATE UNIQUE INDEX idx_user_startpoint ON dbo.valuetab(startpoint, value);

    CREATE UNIQUE INDEX idx_user_stoppoint ON dbo.valuetab(stoppoint, value);

    /*

    The following code is modified from code written by Itzek Ben-Gan and found in the article located here:

    http://sqlmag.com/blog/solutions-packing-date-and-time-intervals-puzzle

    */

    -- indexes

    /*

    CREATE UNIQUE INDEX idx_user_start_id ON dbo.Sessions(username, starttime, id);

    CREATE UNIQUE INDEX idx_user_end_id ON dbo.Sessions(username, endtime, id);

    */

    WITH C1 AS

    -- let e = end ordinals, let s = start ordinals

    (

    SELECT

    t1.value,

    t1.startpoint AS ts,

    +1 AS type,

    NULL AS e,

    ROW_NUMBER() OVER(PARTITION BY t1.value ORDER BY t1.startpoint) AS s

    FROM

    dbo.valuetab t1

    UNION ALL

    SELECT

    t1.value,

    t1.stoppoint AS ts,

    -1 AS type,

    ROW_NUMBER() OVER(PARTITION BY t1.value ORDER BY t1.stoppoint) AS e,

    NULL AS s

    FROM

    dbo.valuetab t1

    ),

    C2 AS

    -- let se = start or end ordinal, namely, how many events (start or end) happened so far

    (

    SELECT C1.*, ROW_NUMBER() OVER(PARTITION BY value ORDER BY ts) AS se

    FROM C1

    ),

    C3 AS

    -- For start events, the expression s - (se - s) - 1 represents how many sessions were active

    -- just before the current (hence - 1)

    --

    -- For end events, the expression (se - e) - e represents how many sessions are active

    -- right after this one

    --

    -- The above two expressions are 0 exactly when a group of packed intervals

    -- either starts or ends, respectively

    --

    -- After filtering only events when a group of packed intervals either starts or ends,

    -- group each pair of adjacent start/end events

    (

    SELECT

    value,

    ts,

    FLOOR((ROW_NUMBER() OVER(PARTITION BY value ORDER BY ts) - 1) / 2 + 1) AS grpnum

    FROM

    C2

    WHERE

    COALESCE(s - (se - s) - 1, (se - e) - e) = 0

    )

    select

    value,

    min(ts) startpoint,

    max(ts) stoppoint

    from

    C3

    group by

    value, grpnum

    order by

    startpoint;

    drop table dbo.valuetab;

  • Thanks, Lynn. That's a nice solution.

Viewing 4 posts - 1 through 3 (of 3 total)

You must be logged in to reply to this topic. Login to reply