Groups and Islands problem

  • Hi There,

    I got output but it seems very costly, so looking for some optimal solution.

    create table #ME (memid int , EffectiveDate datetime , termdate datetime)

    Insert into #ME values ('123','3-Dec-16','10-Jan-17')

    Insert into #ME values ('123','11-Jan-17','6-Feb-17')

    Insert into #ME values ('123','7-Feb-17','5-Mar-17')

    Insert into #ME values ('123','8-Mar-17','15-Apr-17')

    Insert into #ME values ('123','16-Apr-17','24-May-17')

    declare @StartDate datetime , @CutoffDate datetime

    select @StartDate= min(effectivedate),@CutoffDate = max(termdate) From #me where termdate<>'9999-12-31 00:00:00.000'

    SELECT d

    into #dim

    FROM

    (

    SELECT d = DATEADD(DAY, rn - 1, @StartDate)

    FROM

    (

    SELECT TOP (DATEDIFF(DAY, @StartDate, @CutoffDate))

    rn = ROW_NUMBER() OVER (ORDER BY s1.[object_id])

    FROM sys.all_objects AS s1

    CROSS JOIN sys.all_objects AS s2

    -- on my system this would support > 5 million days

    ORDER BY s1.[object_id]

    ) AS x

    ) AS y;

    select MemID, D As DateSpread Into #MemEligibilityDateSpread From #Dim dim JOIN #me ME on dim.d between ME.effectivedate and me.termdate

    WITH CTE AS

    (

    SELECT MEmID,

    UniqueDate = DateSpread,

    DateGroup = DATEADD(dd, - ROW_NUMBER() OVER (PARTITION BY Memid ORDER BY Memid,DateSpread), DateSpread)

    FROM #MemEligibilityDateSpread

    GROUP BY Memid,DateSpread

    )

    --===== Now, if we find the MIN and MAX date for each DateGroup, we'll have the

    -- Start and End dates of each group of contiguous daes. While we're at it,

    -- we can also figure out how many days are in each range of days.

    SELECT Memid,

    StartDate = MIN(UniqueDate),

    EndDate = MAX(UniqueDate)

    INTO #DateClasified

    FROM cte -- where MemID= 'H5716216700'

    GROUP BY Memid,DateGroup

    ORDER BY Memid,StartDate

    select ME.MemID,ME.EffectiveDate,ME.TermDate,DC.StartDate,DC.EndDate from #DateClasified dc join #me ME ON Me.MemID = dc.MemID

    and (ME.EffectiveDate BETWEEN DC.StartDate AND DC.EndDate

    OR ME.TermDate BETWEEN DC.StartDate AND DC.EndDate)

    Thanks in advance

  • You have to test it yourself if it performs better, but here's simplified alternative:

    SELECT

    Q.memid,

    Q.EffectiveDate,

    Q.termdate,

    MIN(Q.EffEctiveDate) OVER (PARTITION BY Q.memid, Q.GroupingDate) sincefrom,

    MAX(Q.termdate) OVER (PARTITION BY Q.memid, Q.GroupingDate) tildate

    FROM

    (

    SELECT

    M.memid, M.EffectiveDate, M.termdate,

    DATEADD(dd, -SUM(DATEDIFF(dd, M.EffectiveDate, M.termdate) + 1) OVER

    (PARTITION BY M.memid

    ORDER BY M.EffectiveDate

    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), termdate) GroupingDate

    FROM

    #ME M

    ) Q

    Edit: added Q.memid to PARTITION BY

  • Peter Brinkhaus (12/8/2016)


    You have to test it yourself if it performs better, but here's simplified alternative:

    SELECT

    Q.memid,

    Q.EffectiveDate,

    Q.termdate,

    MIN(Q.EffEctiveDate) OVER (PARTITION BY Q.memid, Q.GroupingDate) sincefrom,

    MAX(Q.termdate) OVER (PARTITION BY Q.memid, Q.GroupingDate) tildate

    FROM

    (

    SELECT

    M.memid, M.EffectiveDate, M.termdate,

    DATEADD(dd, -SUM(DATEDIFF(dd, M.EffectiveDate, M.termdate) + 1) OVER

    (PARTITION BY M.memid

    ORDER BY M.EffectiveDate

    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), termdate) GroupingDate

    FROM

    #ME M

    ) Q

    Edit: added Q.memid to PARTITION BY

    This solution doesn't work if there are overlaps like the following data set.

    create table #ME (memid int , EffectiveDate datetime , termdate datetime)

    Insert into #ME values ('123','3-Dec-16','10-Jan-17')

    Insert into #ME values ('123','11-Jan-17','8-Feb-17')

    Insert into #ME values ('123','7-Feb-17','5-Mar-17')

    Insert into #ME values ('123','8-Mar-17','15-Apr-17')

    Insert into #ME values ('123','16-Apr-17','24-May-17')

    Here is a solution that will work with overlaps and performs the same on this small data set.

    ;

    WITH C1 AS (

    SELECT *, MAX(DATEADD(DAY, 1, m.termdate)) OVER(PARTITION BY m.memid ORDER BY m.EffectiveDate, m.termdate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS prevend

    FROM #ME m

    )

    , C2 AS (

    SELECT *, SUM(s.is_start) OVER(PARTITION BY C1.memid ORDER BY C1.EffectiveDate, C1.termdate ROWS UNBOUNDED PRECEDING) AS grp

    FROM C1

    CROSS APPLY ( VALUES(CASE WHEN C1.EffectiveDate <= C1.prevend THEN 0 ELSE 1 END ) ) AS s(is_start)

    )

    SELECT C2.memid, C2.EffectiveDate, C2.termdate, MIN(C2.EffectiveDate) OVER(PARTITION BY grp) AS start_dt, MAX(C2.termdate) OVER(PARTITION BY grp) AS end_dt

    FROM C2

    ;

    This is based on the article New Solution to the Packing Intervals Problem

    Drew

    J. Drew Allen
    Business Intelligence Analyst
    Philadelphia, PA

  • drew.allen (12/8/2016)


    Peter Brinkhaus (12/8/2016)


    You have to test it yourself if it performs better, but here's simplified alternative:

    SELECT

    Q.memid,

    Q.EffectiveDate,

    Q.termdate,

    MIN(Q.EffEctiveDate) OVER (PARTITION BY Q.memid, Q.GroupingDate) sincefrom,

    MAX(Q.termdate) OVER (PARTITION BY Q.memid, Q.GroupingDate) tildate

    FROM

    (

    SELECT

    M.memid, M.EffectiveDate, M.termdate,

    DATEADD(dd, -SUM(DATEDIFF(dd, M.EffectiveDate, M.termdate) + 1) OVER

    (PARTITION BY M.memid

    ORDER BY M.EffectiveDate

    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), termdate) GroupingDate

    FROM

    #ME M

    ) Q

    Edit: added Q.memid to PARTITION BY

    This solution doesn't work if there are overlaps like the following data set.

    create table #ME (memid int , EffectiveDate datetime , termdate datetime)

    Insert into #ME values ('123','3-Dec-16','10-Jan-17')

    Insert into #ME values ('123','11-Jan-17','8-Feb-17')

    Insert into #ME values ('123','7-Feb-17','5-Mar-17')

    Insert into #ME values ('123','8-Mar-17','15-Apr-17')

    Insert into #ME values ('123','16-Apr-17','24-May-17')

    Here is a solution that will work with overlaps and performs the same on this small data set.

    ;

    WITH C1 AS (

    SELECT *, MAX(DATEADD(DAY, 1, m.termdate)) OVER(PARTITION BY m.memid ORDER BY m.EffectiveDate, m.termdate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS prevend

    FROM #ME m

    )

    , C2 AS (

    SELECT *, SUM(s.is_start) OVER(PARTITION BY C1.memid ORDER BY C1.EffectiveDate, C1.termdate ROWS UNBOUNDED PRECEDING) AS grp

    FROM C1

    CROSS APPLY ( VALUES(CASE WHEN C1.EffectiveDate <= C1.prevend THEN 0 ELSE 1 END ) ) AS s(is_start)

    )

    SELECT C2.memid, C2.EffectiveDate, C2.termdate, MIN(C2.EffectiveDate) OVER(PARTITION BY grp) AS start_dt, MAX(C2.termdate) OVER(PARTITION BY grp) AS end_dt

    FROM C2

    ;

    This is based on the article New Solution to the Packing Intervals Problem

    Drew

    Thanks for sharing that link, Drew. I'd been messing with LAG() for a while when you posted it up, and the solutions are virtually identical:

    -- Original LAG query

    SELECT memid, EffectiveDate, termdate,

    SinceFrom = MIN(EffectiveDate) OVER (PARTITION BY memid, grp),

    TilDate = MAX(termdate) OVER (PARTITION BY memid, grp)

    FROM (

    SELECT *, grp = SUM(x.Flag) OVER (PARTITION BY memid ORDER BY EffectiveDate, termdate ROWS UNBOUNDED PRECEDING)

    FROM (

    SELECT *, LastEndDate = DATEADD(DAY,1,LAG(Termdate,1) OVER(PARTITION BY memid ORDER BY EffectiveDate, termdate))

    FROM #ME

    ) d

    CROSS APPLY (SELECT Flag = CASE WHEN EffectiveDate <= LastEndDate THEN 0 ELSE 1 END) x

    ) e

    -- Modified to match Drew's CTE format

    ;WITH c1 AS (

    SELECT *, LastEndDate = DATEADD(DAY,1,LAG(Termdate,1) OVER(PARTITION BY memid ORDER BY EffectiveDate, termdate))

    FROM #ME

    ),

    c2 AS (

    SELECT *, grp = SUM(x.Flag) OVER (PARTITION BY memid ORDER BY EffectiveDate, termdate ROWS UNBOUNDED PRECEDING)

    FROM c1

    CROSS APPLY (SELECT Flag = CASE WHEN EffectiveDate <= LastEndDate THEN 0 ELSE 1 END) x

    )

    SELECT memid, EffectiveDate, termdate, SinceFrom = MIN(EffectiveDate) OVER (PARTITION BY memid, grp), TilDate = MAX(termdate) OVER (PARTITION BY memid, grp)

    FROM c2

    “Write the query the simplest way. If through testing it becomes clear that the performance is inadequate, consider alternative query forms.” - Gail Shaw

    For fast, accurate and documented assistance in answering your questions, please read this article.
    Understanding and using APPLY, (I) and (II) Paul White
    Hidden RBAR: Triangular Joins / The "Numbers" or "Tally" Table: What it is and how it replaces a loop Jeff Moden

Viewing 4 posts - 1 through 3 (of 3 total)

You must be logged in to reply to this topic. Login to reply