De-duplicating column values

  • sql_learner29

    SSCrazy Eights

    Points: 9107

    Hi All,

    Can someone help me with this SQL code..
    I was trying to de-duplicate the columns in my select based on exact match values.

    I get few modifier codes from the source data which may contain duplicates, I have to de-duplicate the values.

    Here is the SQL code to show some example:


     

    CREATE TABLE #CODE
    (
    ID int ,
    MOD0    varchar    (20),
    MOD1    varchar    (20),
    MOD2    varchar    (20),
    MOD3    varchar    (20),
    MOD4    varchar    (20),
    MOD5    varchar    (20),
    MOD6    varchar    (20),
    MOD7    varchar    (20),
    MOD8    varchar    (20),
    MOD9    varchar    (20),
    MOD10    varchar    (20),
    MOD11    varchar    (20),
    MOD12    varchar    (20),
    MOD13    varchar    (20),
    MOD14    varchar    (20),
    MOD15    varchar    (20),
    MOD16    varchar    (20)

    )
    insert into #CODE
    VALUES (111,'Z10','Z10','D91.1','I50.9','E73.9','Z10',    'D91.1', '','E73.9','','','','','','','','')      
    insert into #CODE
    VALUES (222,'576.51','G58.61','403.90','','576.51','428','Z10',    '428', 'G58.61','E73.9','','','','','','','')      
    insert into #CODE
    VALUES (333,'788.20','788.20','600.01','250','252','276.8 ','427.31',    '428', '584.9 ','780.93','250.00','252.00','428.0 ','','','','')      
    insert into #CODE
    VALUES (444,'530.19','530.19','162.9 ','276.51','787.20','401.9','414.00',    '428.0 ', '272.0 ','288.00','285.9 ','287.5 ','783.21 ','','','','')      

    SELECT * FROM #CODE
    --drop table #code
     

    in this example,
    for ID =111
    result should be like this
    ID    MOD0    MOD1    MOD2    MOD3    MOD4    MOD5    MOD6    MOD7    MOD8    MOD9    MOD10    MOD11    MOD12    MOD13    MOD14    MOD15    MOD16
    111    Z10    D91.1    I50.9    E73.9                                

    The column values need to be shifted to left column for every de-duplication.

    Also for ID = 333
    IM Renderer Page     


  • 428 <> 428.0

  • both are different codes

    any help on this..
    Thanks

    [font="Comic Sans MS"]
    ---------------------------------------------------

    Thanks [/font]

  • Joe Torre

    SSChampion

    Points: 10243

    Try:

    SELECT DISTINCT ID, u.ValueX
    FROM (SELECT ID
         , MOD0
         , MOD1
         , MOD2
         , MOD3
         , MOD4
         , MOD5
         , MOD6
         , MOD7
         , MOD8
         , MOD9
         , MOD10
         , MOD11
         , MOD12
         , MOD13
         , MOD14
         , MOD15
         , MOD16 FROM #CODE) p
         UNPIVOT (ValueX FOR MODX IN (MOD0
         , MOD1
         , MOD2
         , MOD3
         , MOD4
         , MOD5
         , MOD6
         , MOD7
         , MOD8
         , MOD9
         , MOD10
         , MOD11
         , MOD12
         , MOD13
         , MOD14
         , MOD15
         , MOD16)) AS u
     WHERE u.ValueX <>'';
     DROP TABLE #CODE;

  • Mr. Brian Gale

    SSC-Insane

    Points: 22523

    Possibly not the most efficient code, but how about:
    CREATE TABLE #CODE
    (
    ID int ,
    MOD0  varchar  (20),
    MOD1  varchar  (20),
    MOD2  varchar  (20),
    MOD3  varchar  (20),
    MOD4  varchar  (20),
    MOD5  varchar  (20),
    MOD6  varchar  (20),
    MOD7  varchar  (20),
    MOD8  varchar  (20),
    MOD9  varchar  (20),
    MOD10  varchar  (20),
    MOD11  varchar  (20),
    MOD12  varchar  (20),
    MOD13  varchar  (20),
    MOD14  varchar  (20),
    MOD15  varchar  (20),
    MOD16  varchar  (20)

    )
    CREATE TABLE #CODE2
    (
    ID int ,
    MOD0  varchar  (20),
    MOD1  varchar  (20),
    MOD2  varchar  (20),
    MOD3  varchar  (20),
    MOD4  varchar  (20),
    MOD5  varchar  (20),
    MOD6  varchar  (20),
    MOD7  varchar  (20),
    MOD8  varchar  (20),
    MOD9  varchar  (20),
    MOD10  varchar  (20),
    MOD11  varchar  (20),
    MOD12  varchar  (20),
    MOD13  varchar  (20),
    MOD14  varchar  (20),
    MOD15  varchar  (20),
    MOD16  varchar  (20)

    )

    insert into #CODE
    VALUES (111,'Z10','Z10','D91.1','I50.9','E73.9','Z10',  'D91.1', '','E73.9','','','','','','','','')  
    insert into #CODE
    VALUES (222,'576.51','G58.61','403.90','','576.51','428','Z10',  '428', 'G58.61','E73.9','','','','','','','')  
    insert into #CODE
    VALUES (333,'788.20','788.20','600.01','250','252','276.8 ','427.31',  '428', '584.9 ','780.93','250.00','252.00','428.0 ','','','','')  
    insert into #CODE
    VALUES (444,'530.19','530.19','162.9 ','276.51','787.20','401.9','414.00',  '428.0 ', '272.0 ','288.00','285.9 ','287.5 ','783.21 ','','','','')  

    ;WITH cteDATA AS
    (SELECT ID, MODval, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY MODVal desc) AS ROWNUM
        FROM (
        SELECT ID, MOD0 AS MODval
        FROM [#CODE]
        UNION
        SELECT ID, MOD1 AS MODval
        FROM [#CODE]
        UNION
        SELECT ID, MOD2 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD3 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD4 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD5 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD6 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD7 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD8 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD9 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD10 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD11 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD12 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD13 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD14 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD15 AS MODval
        FROM [#CODE]
        UNION
            SELECT ID, MOD16 AS MODval
        FROM [#CODE]
        )
        AS data)
    SELECT [cteDATA].[ID] ,
         [cteDATA].[MODval] ,
         [cteDATA].[ROWNUM]
    INTO #tempTable
    FROM [cteDATA]

    INSERT INTO [#CODE2]
            (
             [ID] ,
             [MOD0]
            )
    SELECT ID, MODVal
    FROM #tempTable
    WHERE ROWNUM = 1

    DECLARE @counter INT = 1
    DECLARE @strCounter VARCHAR(2)
    DECLARE @strCounterPlus1 VARCHAR(2)
    DECLARE @query VARCHAR(1000)
    WHILE (@counter < 17)
    BEGIN
    SELECT @strCounter= CAST(@counter AS VARCHAR(2))
    SELECT @strCounterPlus1= CAST((@counter+1) AS VARCHAR(2))
    SELECT @query = '
    UPDATE #CODE2
    SET MOD'+@strcounter+' = MODVAL
    FROM #tempTable
    WHERE ROWNUM = '+@strCounterPlus1+'
    AND #tempTable.ID = [#CODE2].[ID]
    AND MODVAL NOT LIKE ''''
    '
    EXEC (@query)
    SELECT @counter = @counter + 1
    END

    SELECT * FROM [#CODE2]
    drop table #code
    DROP TABLE [#CODE2]
    DROP TABLE #temptable

    That was kind of fun to work on.  I am sure there are more optimal ways to work on it, but that seems to meet your needs, no?
    Well, as long as reordering the column values is not an issue.  If that is an issue, then it is a much more complicated problem that will require several loops (as the best way I can think to do it anyways).
    If retaining the column order is a requirement (which I think it might be), let me know and I can take another stab at it.

  • Jacob Wilkins

    One Orange Chip

    Points: 27857

    Ok, so obligatory disclaimer: the order of attributes (columns) should not hold any significance. I point that out because of your desire to shift values "left".

    At any rate, the following query should do what you want.

    WITH split AS
    (
    SELECT ID,MODn, n=ROW_NUMBER() OVER (PARTITION BY ID ORDER BY MIN(n) ASC)-1
    FROM #CODE
     CROSS APPLY
     (VALUES
     (MOD0, 0),
     (MOD1, 1),
     (MOD2, 2),
     (MOD3, 3),
     (MOD4, 4),
     (MOD5, 5),
     (MOD6, 6),
     (MOD7, 7),
     (MOD8, 8),
     (MOD9, 9),
     (MOD10,10),
     (MOD11,11),
     (MOD12,12),
     (MOD13,13),
     (MOD14,14),
     (MOD15,15),
     (MOD16,16)
     )x(MODn,n)
    WHERE MODn<>'' --If you want to preserve an empty string's position, as in the row for ID=222, remove this line.
    GROUP BY ID, MODn

    SELECT ID,
     MOD0=MAX(CASE WHEN n=0 THEN MODn ELSE '' END),
     MOD1=MAX(CASE WHEN n=1 THEN MODn ELSE '' END),
     MOD2=MAX(CASE WHEN n=2 THEN MODn ELSE '' END),
     MOD3=MAX(CASE WHEN n=3 THEN MODn ELSE '' END),
     MOD4=MAX(CASE WHEN n=4 THEN MODn ELSE '' END),
     MOD5=MAX(CASE WHEN n=5 THEN MODn ELSE '' END),
     MOD6=MAX(CASE WHEN n=6 THEN MODn ELSE '' END),
     MOD7=MAX(CASE WHEN n=7 THEN MODn ELSE '' END),
     MOD8=MAX(CASE WHEN n=8 THEN MODn ELSE '' END),
     MOD9=MAX(CASE WHEN n=9 THEN MODn ELSE '' END),
     MOD10=MAX(CASE WHEN n=10 THEN MODn ELSE '' END),
     MOD11=MAX(CASE WHEN n=11 THEN MODn ELSE '' END),
     MOD12=MAX(CASE WHEN n=12 THEN MODn ELSE '' END),
     MOD13=MAX(CASE WHEN n=13 THEN MODn ELSE '' END),
     MOD14=MAX(CASE WHEN n=14 THEN MODn ELSE '' END),
     MOD15=MAX(CASE WHEN n=15 THEN MODn ELSE '' END),
     MOD16=MAX(CASE WHEN n=16 THEN MODn ELSE '' END)
    FROM split
    GROUP BY ID
    ORDER BY ID ASC;

    Cheers!

  • Mr. Brian Gale

    SSC-Insane

    Points: 22523

    Jacob, thanks for pointing out that those are blank not null as with my solution.

    Update to my solution, replace the first INSERT INTO [#code2] section with:
    INSERT INTO [#CODE2]
            (
             [ID] ,
             [MOD0] ,
             [MOD1] ,
             [MOD2] ,
             [MOD3] ,
             [MOD4] ,
             [MOD5] ,
             [MOD6] ,
             [MOD7] ,
             [MOD8] ,
             [MOD9] ,
             [MOD10] ,
             [MOD11] ,
             [MOD12] ,
             [MOD13] ,
             [MOD14] ,
             [MOD15] ,
             [MOD16]
            )
            SELECT ID, MODVal,          '' , -- MOD1 - varchar(20)
             '' , -- MOD2 - varchar(20)
             '' , -- MOD3 - varchar(20)
             '' , -- MOD4 - varchar(20)
             '' , -- MOD5 - varchar(20)
             '' , -- MOD6 - varchar(20)
             '' , -- MOD7 - varchar(20)
             '' , -- MOD8 - varchar(20)
             '' , -- MOD9 - varchar(20)
             '' , -- MOD10 - varchar(20)
             '' , -- MOD11 - varchar(20)
             '' , -- MOD12 - varchar(20)
             '' , -- MOD13 - varchar(20)
             '' , -- MOD14 - varchar(20)
             '' , -- MOD15 - varchar(20)
             '' -- MOD16 - varchar(20)

    FROM #tempTable
    WHERE ROWNUM = 1

    and then mine will put in blanks instead of nulls.  Mine still isn't retaining the order.  Yours retains the order and thus is doing a left shift of anything that is not a duplicate.  Nice!
    I was trying to figure out a nice clean way to do that when the Z10 in the row with ID 111 existed in the beginning and the middle.  I was thinking it was going to have to be a messy nested loop.  Your solution is a lot nicer!

  • sql_learner29

    SSCrazy Eights

    Points: 9107

    Thanks everyone!! it works..
    I appreciate your help !

    [font="Comic Sans MS"]
    ---------------------------------------------------

    Thanks [/font]

  • Viewing 6 posts - 1 through 6 (of 6 total)

    You must be logged in to reply to this topic. Login to reply