find records in a string and display

  • Hi All,

    I am having a column which is ntext and contains below type of data.

    { "running":"true", "all":{ }, "GPAs" : [ {"type":"item", "alias":"i_11111"} ,"GPA":"1.75" }, {"element": {"type":"item", "alias":"i_11111"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_33333"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_44444"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_22222"} ,"GPA":"1.42" }, {"element": {"type":"item", "alias":"i_55555"} ,"GPA":"15" }, {"element": {"type":"item", "alias":"i_66666"} ,"GPA":"10" } ]}

    above is a single row. There are many such rows are present in the table. Now I want to find all alias and GPA and display as below.

    idGPA

    111111.75

    111111.43

    333331.43

    444441.43

    In oracle it can be done by using REGEXP_SUBSTR. But how can we do this in SQL? Please help.

    Thanks,

    Abhas.

  • abhas (7/6/2015)


    Hi All,

    I am having a column which is ntext and contains below type of data.

    { "running":"true", "all":{ }, "GPAs" : [ {"type":"item", "alias":"i_11111"} ,"GPA":"1.75" }, {"element": {"type":"item", "alias":"i_11111"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_33333"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_44444"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_22222"} ,"GPA":"1.42" }, {"element": {"type":"item", "alias":"i_55555"} ,"GPA":"15" }, {"element": {"type":"item", "alias":"i_66666"} ,"GPA":"10" } ]}

    above is a single row. There are many such rows are present in the table. Now I want to find all alias and GPA and display as below.

    idGPA

    111111.75

    111111.43

    333331.43

    444441.43

    In oracle it can be done by using REGEXP_SUBSTR. But how can we do this in SQL? Please help.

    Thanks,

    Abhas.

    If the maximum length of the data is less than or equal to 8,000 characters for a single ntext field, then this should work:

    CREATE FUNCTION dbo.fnDelimitedSplit8K (

    @pString VARCHAR(8000),

    @pDelimiter CHAR(1)

    )

    --WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!

    RETURNS TABLE WITH SCHEMABINDING AS

    RETURN

    --===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...

    -- enough to cover VARCHAR(8000)

    WITH E1(N) AS (

    SELECT 1 UNION ALL

    SELECT 1 UNION ALL

    SELECT 1 UNION ALL

    SELECT 1 UNION ALL

    SELECT 1 UNION ALL

    SELECT 1 UNION ALL

    SELECT 1 UNION ALL

    SELECT 1 UNION ALL

    SELECT 1 UNION ALL

    SELECT 1

    ),--10E+1 or 10 rows

    E2(N) AS (

    SELECT 1

    FROM E1 AS a, E1 AS b

    ),--10E+2 or 100 rows

    E4(N) AS (

    SELECT 1

    FROM E2 AS a, E2 AS b

    ),--10E+4 or 10,000 rows max

    cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front

    -- for both a performance gain and prevention of accidental "overruns"

    SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))

    FROM E4

    ),

    cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)

    SELECT 1

    UNION ALL

    SELECT t.N+1

    FROM cteTally AS t

    WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter

    ),

    cteLen(N1,L1) AS (--==== Return start and length (for use in substring)

    SELECT s.N1, ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)

    FROM cteStart s

    )

    --===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.

    SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1), Item = SUBSTRING(@pString, l.N1, l.L1)

    FROM cteLen l;

    GO

    DECLARE @NTEXT_TABLE AS TABLE (

    NTEXT_COLUMN_NAME ntext

    );

    INSERT INTO @NTEXT_TABLE (NTEXT_COLUMN_NAME)

    VALUES ('{ "running":"true", "all":{ }, "GPAs" : [ {"type":"item", "alias":"i_11111"} ,"GPA":"1.75" }, {"element": {"type":"item", "alias":"i_11111"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_33333"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_44444"} ,"GPA":"1.43" }, {"element": {"type":"item", "alias":"i_22222"} ,"GPA":"1.42" }, {"element": {"type":"item", "alias":"i_55555"} ,"GPA":"15" }, {"element": {"type":"item", "alias":"i_66666"} ,"GPA":"10" } ]}');

    WITH SOURCE_DATA_ALIAS AS (

    SELECT DS.Item, DS.ItemNumber AS RN

    FROM @NTEXT_TABLE AS NT

    CROSS APPLY dbo.fnDelimitedSplit8K(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(CAST(NTEXT_COLUMN_NAME AS varchar(8000)), '{', ''), '}', ''), '[', ''), ']', ''), '"', ''), ' ', ''), ',') AS DS

    WHERE DS.ItemNumber > 3

    AND DS.Item LIKE 'alias%'

    ),

    SOURCE_DATA_GPA AS (

    SELECT DS.Item, DS.ItemNumber AS RN

    FROM @NTEXT_TABLE AS NT

    CROSS APPLY dbo.fnDelimitedSplit8K(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(CAST(NTEXT_COLUMN_NAME AS varchar(8000)), '{', ''), '}', ''), '[', ''), ']', ''), '"', ''), ' ', ''), ',') AS DS

    WHERE DS.ItemNumber > 3

    AND DS.Item LIKE 'GPA%'

    )

    SELECT RIGHT(A.Item, LEN(A.Item) - CHARINDEX(':', A.Item)) AS Alias, RIGHT(G.Item, LEN(G.Item) - CHARINDEX(':', G.Item)) AS GPA

    FROM SOURCE_DATA_ALIAS AS A

    INNER JOIN SOURCE_DATA_GPA AS G

    ON A.RN = G.RN - 1;

    It makes use of Jeff Moden's string splitter function, which is a highly performant piece of code that is ideal for this.

    Steve (aka sgmunson) 🙂 🙂 🙂
    Rent Servers for Income (picks and shovels strategy)

Viewing 2 posts - 1 through 1 (of 1 total)

You must be logged in to reply to this topic. Login to reply