Home Forums SQL Server 2008 T-SQL (SS2K8) Reduce records in a slowly changing dimension table RE: Reduce records in a slowly changing dimension table

  • I neglected to provide sample data that would illustrate a particular scenario and naturally the code Lynn provided does not address it. The scenario is some groups may have both existing ranges and the snapshot records (records where the start and end date is the same). When there are 2 date range records with no gaps between I would like to consolidate these multiple records

    --New sample data that illustrates what I neglected to show earlier; Some members may have existing ranges and snapshot records
    INSERT INTO dbo.GroupMembers VALUES ('E', 5, '20171201', '20171215');
    INSERT INTO dbo.GroupMembers VALUES ('E', 5, '20171216', '20171216');
    INSERT INTO dbo.GroupMembers VALUES ('E', 5, '20171217', '20171217');
    INSERT INTO dbo.GroupMembers VALUES ('E', 5, '20171218', '20171218');

    In the spirit of showing you the code I have worked on I have what may be a 'solution' but it is rather inelegant. I used a calendar table to un-consolidate the data before consolidating it with the code Lynn provided. 
    Below will have the new sample data; Lynn's code and then I jam the unconsolidation into Lynn's code as a subquery.
    Any suggestions on how to do this properly? It seems silly to -un-consolidate the data.
    IF OBJECT_ID('dbo.GroupMembers', 'U') IS NOT NULL DROP TABLE dbo.GroupMembers;
    CREATE TABLE dbo.GroupMembers
    (
      GroupCode VARCHAR(1) NOT NULL,
      GroupMember INT NOT NULL,
      StartDate DATE NOT NULL,
      EndDate DATE NULL
    );

    --Some of my groups have proper ranges for start and end dates
    INSERT INTO dbo.GroupMembers VALUES ('A', 1, '20170101', '20170201');

    --Some of my groups have were populated with a snapshot for every day
    INSERT INTO dbo.GroupMembers VALUES ('B', 2, '20000101', '20000101');
    INSERT INTO dbo.GroupMembers VALUES ('B', 2, '20000102', '20000102');
    INSERT INTO dbo.GroupMembers VALUES ('B', 2, '20000103', '20000103');
    INSERT INTO dbo.GroupMembers VALUES ('B', 2, '20000104', '20000104');

    --Other groups are open ended and that is fine
    INSERT INTO dbo.GroupMembers VALUES ('C', 3, '20000104', NULL);

    --Some members may be removed and then re-added
    INSERT INTO dbo.GroupMembers VALUES ('D', 4, '20170101', '20170101');
    INSERT INTO dbo.GroupMembers VALUES ('D', 4, '20170102', '20170102');
    INSERT INTO dbo.GroupMembers VALUES ('D', 4, '20170104', '20170104');
    INSERT INTO dbo.GroupMembers VALUES ('D', 4, '20170105', '20170105');

    --Some members may have existing ranges and snapshot records
    INSERT INTO dbo.GroupMembers VALUES ('E', 5, '20171201', '20171215');
    INSERT INTO dbo.GroupMembers VALUES ('E', 5, '20171216', '20171216');
    INSERT INTO dbo.GroupMembers VALUES ('E', 5, '20171217', '20171217');
    INSERT INTO dbo.GroupMembers VALUES ('E', 5, '20171218', '20171218');

    --SELECT * FROM dbo.GroupMembers;

    --This wil have multiple E records
    WITH
        base AS
        (
        SELECT
            GroupCode,
            GroupMember,
            StartDate,
            EndDate,
            rn = ROW_NUMBER() OVER (PARTITION BY GroupCode, GroupMember ORDER BY StartDate)
        FROM dbo.GroupMembers
        )
        ,
        Interim AS
        (
        SELECT
             b.GroupCode,
             b.GroupMember,
             b.StartDate,
             b.EndDate,
             b.rn,
             GroupDate = DATEADD(DAY,-b.rn,b.StartDate)
        FROM base b
    )

    SELECT
    i.GroupCode,
    i.GroupMember,
    StartDate = MIN(i.StartDate),
    EndDate = MAX(i.EndDate)
    FROM Interim i
    GROUP BY
    i.GroupCode,
    i.GroupMember,
    i.GroupDate
    ORDER BY
    i.GroupCode; ;

    --If I explode the date ranges into the separate records in a subquery then the E records will be consolidated
    WITH
        base AS
        (
        SELECT
            GroupCode,
            GroupMember,
            StartDate,
            EndDate,
            rn = ROW_NUMBER() OVER (PARTITION BY GroupCode, GroupMember ORDER BY StartDate)
        FROM
        (
            SELECT
                #T1.GroupCode,
                #T1.GroupMember,
                C.CalendarDate AS StartDate,
                C.CalendarDate AS EndDate
            FROM #T1
            INNER JOIN dbo.Calendar C
             ON C.CalendarDate BETWEEN #T1.StartDate AND #T1.EndDate

            UNION ALL

             SELECT
                GroupCode,
                GroupMember,
                StartDate,
                EndDate
            FROM #T1
            WHERE EndDate IS NULL
        )Unconsolidation
        )
        ,
        Interim AS
        (
        SELECT
             b.GroupCode,
             b.GroupMember,
             b.StartDate,
             b.EndDate,
             b.rn,
             GroupDate = DATEADD(DAY,-b.rn,b.StartDate)
        FROM base b
    )

    SELECT
    i.GroupCode,
    i.GroupMember,
    StartDate = MIN(i.StartDate),
    EndDate = MAX(i.EndDate)
    FROM Interim i
    GROUP BY
    i.GroupCode,
    i.GroupMember,
    i.GroupDate
    ORDER BY
    i.GroupCode;