Query logic help please

  • Hi All,

    Consider the following:

    DECLARE @STR VARCHAR(200) = '101,102,103,108'

    -- that I can split to a table:

    DECLARE @t TABLE (Id INT)

    INSERT INTO @t

    SELECT 101 UNION ALL

    SELECT 102 UNION ALL

    SELECT 103 UNION ALL

    SELECT 108

    DECLARE @t2 TABLE (CategoryId INT, Id INT)

    INSERT INTO @t2

    SELECT 2, 50 UNION ALL

    SELECT 2, 51 UNION ALL

    SELECT 2, 52 UNION ALL

    SELECT 2, 59 UNION ALL

    SELECT 2, 60 UNION ALL

    SELECT 3, 101 UNION ALL

    SELECT 3, 102 UNION ALL

    SELECT 3, 103 UNION ALL

    SELECT 3, 108 UNION ALL

    SELECT 4, 109 UNION ALL

    SELECT 4, 120 UNION ALL

    SELECT 9, 125 UNION ALL

    SELECT 9, 166 UNION ALL

    SELECT 9, 169

    I need to return the CategoryId from @t2 only if each Id in the comma separated string has a matching value in @t2 (and if there is one and only one CategoryId for the Ids passed), so in this case, 3 should be returned.

    If I passed

    '101,102,103,108,109' (109 is Category 4)

    or

    '102,103,108' (101 is missing)

    etc...

    Then I would like to return NULL.

    Thanks

  • Quick solution for SQL Server 2012 and later, replaces also the split into @t

    😎

    USE tempdb;

    GO

    DECLARE @t2 TABLE (CategoryId INT, Id INT)

    INSERT INTO @t2

    SELECT 2, 50 UNION ALL

    SELECT 2, 51 UNION ALL

    SELECT 2, 52 UNION ALL

    SELECT 2, 59 UNION ALL

    SELECT 2, 60 UNION ALL

    SELECT 3, 101 UNION ALL

    SELECT 3, 102 UNION ALL

    SELECT 3, 103 UNION ALL

    SELECT 3, 108 UNION ALL

    SELECT 4, 109 UNION ALL

    SELECT 4, 120 UNION ALL

    SELECT 9, 125 UNION ALL

    SELECT 9, 166 UNION ALL

    SELECT 9, 169;

    DECLARE @INPUT_STR VARCHAR(50) = '101,102,103,108';--,109

    ;WITH INCOMING(DLM_STR) AS

    (SELECT CONCAT(CHAR(44),@INPUT_STR,CHAR(44)) AS DLM_STR)

    ,CATEGORY_COUNT AS

    (

    SELECT

    TT.CategoryId

    ,ROW_NUMBER() OVER

    (

    PARTITION BY TT.CategoryId

    ORDER BY (SELECT NULL)

    ) AS CAT_RID

    ,MIN(SIGN(CHARINDEX(CONCAT(CHAR(44),TT.Id,CHAR(44)),IC.DLM_STR,1))) OVER

    (

    PARTITION BY TT.CategoryId

    ) AS CAT_MISSING

    ,MAX(SIGN(CHARINDEX(CONCAT(CHAR(44),TT.Id,CHAR(44)),IC.DLM_STR,1))) OVER

    (

    PARTITION BY TT.CategoryId

    ) AS CAT_PRESENT

    FROM @t2 TT

    CROSS APPLY INCOMING IC

    )

    ,FINAL_SET AS

    (

    SELECT

    CC.CategoryId

    ,( CC.CAT_MISSING * CC.CAT_PRESENT ) -

    (MAX(CC.CategoryId) OVER

    (

    PARTITION BY (SELECT NULL)

    )

    - MIN(CC.CategoryId) OVER

    (

    PARTITION BY (SELECT NULL)

    )) AS VALID_FLAG

    FROM CATEGORY_COUNT CC

    WHERE CC.CAT_RID = 1

    AND CC.CAT_PRESENT = 1

    )

    SELECT

    1 AS RET_NO

    ,(

    SELECT

    FS.CategoryId

    FROM FINAL_SET FS

    WHERE FS.VALID_FLAG = 1

    ) AS CategoryId;

  • ...and another solution using the pre-split-string table

    😎

    USE tempdb;

    GO

    DECLARE @t TABLE (Id INT)

    INSERT INTO @t

    SELECT 101 UNION ALL

    SELECT 102 UNION ALL

    SELECT 103 UNION ALL

    SELECT 108;

    DECLARE @t2 TABLE (CategoryId INT, Id INT)

    INSERT INTO @t2

    SELECT 2, 50 UNION ALL

    SELECT 2, 51 UNION ALL

    SELECT 2, 52 UNION ALL

    SELECT 2, 59 UNION ALL

    SELECT 2, 60 UNION ALL

    SELECT 3, 101 UNION ALL

    SELECT 3, 102 UNION ALL

    SELECT 3, 103 UNION ALL

    SELECT 3, 108 UNION ALL

    SELECT 4, 109 UNION ALL

    SELECT 4, 120 UNION ALL

    SELECT 9, 125 UNION ALL

    SELECT 9, 166 UNION ALL

    SELECT 9, 169;

    ;WITH CATEGORY_COUNT AS

    (

    SELECT

    TT.CategoryId

    ,ROW_NUMBER() OVER

    (

    PARTITION BY TT.CategoryId

    ORDER BY (SELECT NULL)

    ) AS CAT_RID

    ,MIN(ISNULL(SIGN(IC.Id),0)) OVER

    (

    PARTITION BY TT.CategoryId

    ) AS CAT_MISSING

    ,MAX(ISNULL(SIGN(IC.Id),0)) OVER

    (

    PARTITION BY TT.CategoryId

    ) AS CAT_PRESENT

    FROM @t2 TT

    LEFT OUTER JOIN @t IC

    ON TT.Id = IC.Id

    )

    ,FINAL_SET AS

    (

    SELECT

    CC.CategoryId

    ,( CC.CAT_MISSING * CC.CAT_PRESENT ) -

    (MAX(CC.CategoryId) OVER

    (

    PARTITION BY (SELECT NULL)

    )

    - MIN(CC.CategoryId) OVER

    (

    PARTITION BY (SELECT NULL)

    )) AS VALID_FLAG

    FROM CATEGORY_COUNT CC

    WHERE CC.CAT_RID = 1

    AND CC.CAT_PRESENT = 1

    )

    SELECT

    1 AS RET_NO

    ,(

    SELECT

    FS.CategoryId

    FROM FINAL_SET FS

    WHERE FS.VALID_FLAG = 1

    ) AS CategoryId;

  • Thanks for your help. On table @t2, Id however is not primary key so if I had a the following data set, the solution would not work:

    DECLARE @t2 TABLE (CategoryId INT, Id INT)

    INSERT INTO @t2

    SELECT 2, 50 UNION ALL

    SELECT 2, 51 UNION ALL

    SELECT 2, 102 UNION ALL

    SELECT 2, 59 UNION ALL

    SELECT 2, 60 UNION ALL

    SELECT 3, 101 UNION ALL

    SELECT 3, 102 UNION ALL

    SELECT 3, 103 UNION ALL

    SELECT 3, 108 UNION ALL

    SELECT 4, 109 UNION ALL

    SELECT 4, 120 UNION ALL

    SELECT 9, 125 UNION ALL

    SELECT 9, 103 UNION ALL

    SELECT 9, 101;

    ALTER TABLE xyz ADD CONSTRAINT [PK_t2_CatId_Id] PRIMARY KEY CLUSTERED

    (

    CategoryId ASC,

    Id ASC

    )

    Thanks again for your help.

  • I'm currently trying something like this:

    DECLARE @t TABLE (Id INT)

    INSERT INTO @t

    SELECT 108 UNION ALL

    SELECT 102 UNION ALL

    SELECT 103 UNION ALL

    SELECT 101;

    DECLARE @t2 TABLE (CategoryId INT, Id INT)

    INSERT INTO @t2

    SELECT 2, 50 UNION ALL

    SELECT 2, 51 UNION ALL

    SELECT 2, 101 UNION ALL

    SELECT 2, 59 UNION ALL

    SELECT 2, 60 UNION ALL

    SELECT 3, 101 UNION ALL

    SELECT 3, 102 UNION ALL

    SELECT 3, 103 UNION ALL

    SELECT 3, 108 UNION ALL

    SELECT 4, 109 UNION ALL

    SELECT 4, 120 UNION ALL

    SELECT 9, 125 UNION ALL

    SELECT 9, 103 UNION ALL

    SELECT 9, 101;

    ;WITH a AS(

    SELECT x1.CategoryId

    , STUFF((SELECT ',' + convert(varchar(30), x2.Id ) AS [text()]

    FROM @t2 x2

    WHERE x1.CategoryId = x2.CategoryId

    ORDER BY x2.Id

    FOR XML PATH('')),1,1,'' ) AS "id_list"

    FROM @t2 x1

    GROUP BY CategoryId

    )

    SELECT CategoryId FROM a WHERE id_list = (

    SELECT DISTINCT STUFF((SELECT ',' + convert(varchar(30), x2.Id ) AS [text()]

    FROM @t x2

    ORDER BY x2.Id

    FOR XML PATH('')),1,1,'' ) AS "id_list"

    FROM @t x1

    )

    Your comments/ suggestions would be appreciated. Thank you.

  • Quick suggestions since Id's can be shared by Categories

    😎

    USE tempdb;

    GO

    DECLARE @t TABLE (Id INT)

    INSERT INTO @t

    SELECT 108 UNION ALL

    SELECT 102 UNION ALL

    SELECT 103 UNION ALL

    SELECT 101;

    DECLARE @t2 TABLE (CategoryId INT, Id INT)

    INSERT INTO @t2

    SELECT 2, 50 UNION ALL

    SELECT 2, 51 UNION ALL

    SELECT 2, 101 UNION ALL

    SELECT 2, 59 UNION ALL

    SELECT 2, 60 UNION ALL

    SELECT 3, 101 UNION ALL

    SELECT 3, 102 UNION ALL

    SELECT 3, 103 UNION ALL

    SELECT 3, 108 UNION ALL

    SELECT 4, 109 UNION ALL

    SELECT 4, 120 UNION ALL

    SELECT 9, 125 UNION ALL

    SELECT 9, 103 UNION ALL

    SELECT 9, 101;

    ;WITH INPUT_STR AS

    (SELECT (

    SELECT

    CONCAT(',', T.ID)

    FROM @t T

    ORDER BY T.Id

    FOR XML PATH(''), TYPE).value('.[1]','VARCHAR(100)') AS IN_STR

    )

    ,CATEGORY_STR AS

    (

    SELECT

    X.CategoryId

    ,(

    SELECT

    CONCAT(',', T.ID)

    FROM @t2 T

    WHERE T.CategoryId = X.CategoryId

    ORDER BY T.Id

    FOR XML PATH(''), TYPE).value('.[1]','VARCHAR(100)') AS X_STR

    FROM @t2 X

    GROUP BY X.CategoryId

    )

    SELECT

    C.CategoryId

    FROM CATEGORY_STR C

    INNER JOIN INPUT_STR I

    ON C.X_STR = I.IN_STR;

  • Excellent, thank you.

  • clayman (10/17/2014)


    Excellent, thank you.

    You are very welcome. Out of curiosity, what do you use for splitting the incoming parameters?

    😎

  • The reason why I split the string is that I want to order the Ids, eg.

    DECLARE @INPUT_STR VARCHAR(50) = '101,102,103,108';

    will be the same as

    DECLARE @INPUT_STR VARCHAR(50) = '103,102,101,108';

  • clayman (10/17/2014)


    The reason why I split the string is that I want to order the Ids, eg.

    DECLARE @INPUT_STR VARCHAR(50) = '101,102,103,108';

    will be the same as

    DECLARE @INPUT_STR VARCHAR(50) = '103,102,101,108';

    I realise the purpose, just curious on how you are actually splitting it.

    😎

  • Hi

    If t2 has large volume of data, there would be performance issue on your approach. My approach is below.

    USE tempdb;

    GO

    DECLARE @t TABLE (Id INT);

    INSERT INTO @t

    VALUES(108)

    , (102)

    , (103)

    , (101);

    IF OBJECT_ID('tempdb..#t2') IS NOT NULL

    DROP TABLE #t2;

    CREATE TABLE #t2

    (

    CategoryId INT

    , Id INT

    );

    CREATE INDEX __#t2_CategoryId_Id ON #t2(CategoryId, Id);

    CREATE INDEX __#t2_Id_CategoryId ON #t2(Id, CategoryId);

    INSERT INTO #t2

    VALUES(2, 50)

    ,(2, 51)

    ,(2, 101)

    ,(2, 59)

    ,(2, 60)

    ,(3, 101)

    ,(3, 102)

    ,(3, 103)

    ,(3, 108)

    ,(4, 109)

    ,(4, 120)

    ,(9, 125)

    ,(9, 103)

    ,(9, 101);

    DECLARE @NoOfIds int = (SELECT COUNT(*) FROM @t);

    WITH AllCategories

    AS

    (

    SELECTDISTINCT T2.CategoryId

    FROM#t2 T2

    INNER JOIN @t T ON (T.Id = T2.Id)

    )

    SELECTT2.CategoryId

    FROMAllCategories AC

    INNER JOIN #t2 T2 ON (T2.CategoryId = AC.CategoryId)

    LEFT OUTER JOIN @t T ON (T.Id = T2.Id)

    GROUP BY T2.CategoryId

    HAVING COUNT(T.Id) = @NoOfIds;

  • Good input @Seong-Bae Hwang, your approach is much more efficient as it bypasses the expensive XML table value functions and the string manipulation. Well done!

    Must admit that I got a little stuck inside the "match the string" box:pinch:

    😎

  • Seong-Bae Hwang (10/18/2014)


    Hi

    If t2 has large volume of data, there would be performance issue on your approach. My approach is below.

    USE tempdb;

    GO

    DECLARE @t TABLE (Id INT);

    INSERT INTO @t

    VALUES(108)

    , (102)

    , (103)

    , (101);

    IF OBJECT_ID('tempdb..#t2') IS NOT NULL

    DROP TABLE #t2;

    CREATE TABLE #t2

    (

    CategoryId INT

    , Id INT

    );

    CREATE INDEX __#t2_CategoryId_Id ON #t2(CategoryId, Id);

    CREATE INDEX __#t2_Id_CategoryId ON #t2(Id, CategoryId);

    INSERT INTO #t2

    VALUES(2, 50)

    ,(2, 51)

    ,(2, 101)

    ,(2, 59)

    ,(2, 60)

    ,(3, 101)

    ,(3, 102)

    ,(3, 103)

    ,(3, 108)

    ,(4, 109)

    ,(4, 120)

    ,(9, 125)

    ,(9, 103)

    ,(9, 101);

    DECLARE @NoOfIds int = (SELECT COUNT(*) FROM @t);

    WITH AllCategories

    AS

    (

    SELECTDISTINCT T2.CategoryId

    FROM#t2 T2

    INNER JOIN @t T ON (T.Id = T2.Id)

    )

    SELECTT2.CategoryId

    FROMAllCategories AC

    INNER JOIN #t2 T2 ON (T2.CategoryId = AC.CategoryId)

    LEFT OUTER JOIN @t T ON (T.Id = T2.Id)

    GROUP BY T2.CategoryId

    HAVING COUNT(T.Id) = @NoOfIds;

    Hi, thanks for your response. However, if I added for example " (3, 666) " to table #t2, then the query would return 3.. whereas it should return null.

  • Some code refactoring on the previous set based solution to fulfil all the requirements, several orders of magnitude faster than string manipulation although it has not been tested on very large set.

    😎

    USE tempdb;

    GO

    DECLARE @t TABLE (Id INT)

    INSERT INTO @t

    SELECT 101 UNION ALL

    SELECT 102 UNION ALL

    SELECT 103 UNION ALL

    SELECT 108;

    DECLARE @t2 TABLE (CategoryId INT, Id INT)

    INSERT INTO @t2

    SELECT 2, 50 UNION ALL

    SELECT 2, 51 UNION ALL

    SELECT 2, 52 UNION ALL

    SELECT 2, 59 UNION ALL

    SELECT 2, 60 UNION ALL

    SELECT 3, 101 UNION ALL

    SELECT 3, 102 UNION ALL

    SELECT 3, 103 UNION ALL

    SELECT 3, 108 UNION ALL

    SELECT 9, 108 UNION ALL

    SELECT 4, 108 UNION ALL

    SELECT 4, 109 UNION ALL

    SELECT 4, 120 UNION ALL

    SELECT 9, 125 UNION ALL

    SELECT 9, 166 UNION ALL

    SELECT 9, 169;

    ;WITH CATEGORY_CANDIDATES AS

    (

    SELECT DISTINCT

    T2.CategoryId

    FROM @t2 T2

    WHERE T2.Id IN (SELECT Id FROM @T)

    )

    ,BASE_SET AS

    (

    SELECT

    T2.CategoryId

    ,T2.Id

    ,COUNT(T2.Id) OVER

    (

    PARTITION BY T2.CategoryId

    ) AS T2_COUNT

    FROM @t2 T2

    INNER JOIN CATEGORY_CANDIDATES CC

    ON T2.CategoryId = CC.CategoryId

    )

    ,INCOMING_SET AS

    (

    SELECT

    T.Id

    ,COUNT(T.Id) OVER

    (

    PARTITION BY (SELECT NULL)

    ) AS T_COUNT

    FROM @t T

    )

    ,MATCHING_SET AS

    (

    SELECT

    BS.CategoryId

    ,COUNT(*) OVER

    (

    PARTITION BY BS.CategoryId

    ) AS T2M_COUNT

    ,T_COUNT

    ,T2_COUNT

    FROM INCOMING_SET INS

    INNER JOIN BASE_SET BS

    ON INS.Id = BS.Id

    )

    SELECT

    (

    SELECT TOP(1)

    MS.CategoryId

    FROM MATCHING_SET MS

    WHERE MS.T_COUNT = MS.T2_COUNT

    AND MS.T_COUNT = MS.T2M_COUNT

    ) AS CategoryId

  • Eirikur Eiriksson (10/20/2014)


    Some code refactoring on the previous set based solution to fulfil all the requirements, several orders of magnitude faster than string manipulation although it has not been tested on very large set.

    😎

    Careful now. If you have another CategoryID that does meet the requirements, it won't show.

    --Jeff Moden


    RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
    First step towards the paradigm shift of writing Set Based code:
    ________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

    Change is inevitable... Change for the better is not.


    Helpful Links:
    How to post code problems
    How to Post Performance Problems
    Create a Tally Function (fnTally)

Viewing 15 posts - 1 through 15 (of 17 total)

You must be logged in to reply to this topic. Login to reply