Shred XML with 4 level hierarchy into SQL Server table

  • Hi,

    I have a xml document and I need to shred it and store it into a table. The difficulty for me is that each element in same level has different element name.

    Here is a part of the xml, which has 4 level hierarchy.

    <root>

    <elem10 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem10101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem101010>

    <elem101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem10102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102030 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102040 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102050 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem101020>

    </elem10>

    <elem15 Attr01="0.08" Attr02="-0.05" Attr03="9.00" Attr04="9.00" Attr05="7.00" Attr06="4.00">

    <elem151010 Attr01="0.03" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="1.00">

    <elem15101010 Attr01="0.02" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />

    <elem15101020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="0.00" />

    <elem15101030 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    <elem15101040 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    <elem15101050 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    </elem151010>

    <elem151020 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00">

    <elem15102010 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    </elem151020>

    <elem151030 Attr01="0.02" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="3.00" Attr06="1.00">

    <elem15103010 Attr01="0.01" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />

    <elem15103020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="0.00" />

    </elem151030>

    </elem15>

    <elem20 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem201010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem20101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem201010>

    <elem201020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem20102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem201020>

    </elem20>

    </root>

    Here is the expected table with result:(Sorry I cannot edit the expected output in good format, so I attached a image which is a well formatted output)

    ElementLV1ElementLV2ElementLV3Attr01Attr02Attr03Attr04Attr05Attr06

    elem10NULLNULL999999

    elem10elem101010NULL999999

    elem10elem101010elem10101010999999

    elem10elem101010elem10101020999999

    elem10elem101020NULL999999

    elem10elem101020elem10102010999999

    elem10elem101020elem10102020999999

    elem10elem101020elem10102030999999

    elem10elem101020elem10102040999999

    elem10elem101020elem10102050999999

    elem15NULLNULL0.08-0.059974

    elem15elem151010NULL0.03-0.019921

    elem15elem151010elem151010100.02-0.019911

    elem15elem151010elem151010200.0109910

    elem15elem151010elem15101030009900

    elem15elem151010elem15101040009900

    elem15elem151010elem15101050009900

    elem15elem151020NULL009900

    elem15elem151020elem15102010009900

    elem15elem151030NULL0.02-0.029931

    elem15elem151030elem151030100.01-0.029911

    elem15elem151030elem151030200.0109920

    elem20NULLNULL999999

    elem20elem201010NULL999999

    elem20NULLelem20101010999999

    elem20elem201020NULL999999

    elem20NULLelem20102010999999

    Could anyone help me out? Thanks,

    Tao

  • Hi, I've just come up with this... does this help with what you need:

    declare @xml xml = '<root>

    <elem10 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem10101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem101010>

    <elem101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem10102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102030 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102040 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102050 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem101020>

    </elem10>

    <elem15 Attr01="0.08" Attr02="-0.05" Attr03="9.00" Attr04="9.00" Attr05="7.00" Attr06="4.00">

    <elem151010 Attr01="0.03" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="1.00">

    <elem15101010 Attr01="0.02" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />

    <elem15101020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="0.00" />

    <elem15101030 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    <elem15101040 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    <elem15101050 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    </elem151010>

    <elem151020 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00">

    <elem15102010 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    </elem151020>

    <elem151030 Attr01="0.02" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="3.00" Attr06="1.00">

    <elem15103010 Attr01="0.01" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />

    <elem15103020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="0.00" />

    </elem151030>

    </elem15>

    <elem20 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem201010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem20101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem201010>

    <elem201020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem20102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem201020>

    </elem20>

    </root>'

    select l1.c.query('local-name(.)') as ElementLVL1

    , null as ElementLVL2

    , null as ElementLVL3

    , l1.c.value('@Attr01', 'decimal(12,2)') as Attr01

    , l1.c.value('@Attr02', 'decimal(12,2)') as Attr02

    , l1.c.value('@Attr03', 'decimal(12,2)') as Attr03

    , l1.c.value('@Attr04', 'decimal(12,2)') as Attr04

    , l1.c.value('@Attr05', 'decimal(12,2)') as Attr05

    , l1.c.value('@Attr06', 'decimal(12,2)') as Attr06

    from @xml.nodes('/root/*') l1(c)

    union all

    select l1.c.query('local-name(.)')

    , l2.c.query('local-name(.)')

    , null

    , l2.c.value('@Attr01', 'decimal(12,2)')

    , l2.c.value('@Attr02', 'decimal(12,2)')

    , l2.c.value('@Attr03', 'decimal(12,2)')

    , l2.c.value('@Attr04', 'decimal(12,2)')

    , l2.c.value('@Attr05', 'decimal(12,2)')

    , l2.c.value('@Attr06', 'decimal(12,2)')

    from @xml.nodes('/root/*') l1(c)

    outer apply l1.c.nodes('*') l2(c)

    union all

    select l1.c.query('local-name(.)')

    , l2.c.query('local-name(.)')

    , l3.c.query('local-name(.)')

    , l3.c.value('@Attr01', 'decimal(12,2)')

    , l3.c.value('@Attr02', 'decimal(12,2)')

    , l3.c.value('@Attr03', 'decimal(12,2)')

    , l3.c.value('@Attr04', 'decimal(12,2)')

    , l3.c.value('@Attr05', 'decimal(12,2)')

    , l3.c.value('@Attr06', 'decimal(12,2)')

    from @xml.nodes('/root/*') l1(c)

    outer apply l1.c.nodes('*') l2(c)

    outer apply l2.c.nodes('*') l3(c)

  • Here's a stored procedure that will shred the XML into a table. I had to add a set of <body></body> tags below the root but otherwise your XML parsed perfectly.

    [The code for creating stored procedure is below with attached png file output sample.]

    EXEC dbo.ParseXML_ByXMLInput

    N'<root>

    <body>

    <elem10 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem10101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem101010>

    <elem101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem10102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102030 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102040 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102050 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem101020>

    </elem10>

    <elem15 Attr01="0.08" Attr02="-0.05" Attr03="9.00" Attr04="9.00" Attr05="7.00" Attr06="4.00">

    <elem151010 Attr01="0.03" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="1.00">

    <elem15101010 Attr01="0.02" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />

    <elem15101020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="0.00" />

    <elem15101030 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    <elem15101040 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    <elem15101050 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    </elem151010>

    <elem151020 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00">

    <elem15102010 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    </elem151020>

    <elem151030 Attr01="0.02" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="3.00" Attr06="1.00">

    <elem15103010 Attr01="0.01" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />

    <elem15103020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="0.00" />

    </elem151030>

    </elem15>

    <elem20 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem201010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem20101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem201010>

    <elem201020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem20102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem201020>

    </elem20>

    </body>

    </root>'

    ,'root'

    ,'/body'

    ,1

    ,0

    [/code]

    CREATE PROCEDURE [dbo].[ParseXML_ByXMLInput]

    @strXML NVARCHAR(MAX)

    ,@schemanode NVARCHAR(255)

    ,@rootnode NVARCHAR(255)

    ,@showAll BIT = 0

    ,@debug BIT = 0

    AS

    BEGIN

    SET NOCOUNT ON

    DECLARE

    @idoc INT

    ,@id INT

    ,@parentid INT

    ,@SoapEnvOpen NVARCHAR(MAX)

    ,@SoapEnvClose NVARCHAR(MAX)

    ,@SoapBodyOpen NVARCHAR(MAX)

    ,@SoapBodyClose NVARCHAR(MAX)

    ,@SchemaNodeOpen NVARCHAR(MAX)

    ,@SchemaNodeClose NVARCHAR(MAX)

    ,@checkSoap INT

    ,@isSoap BIT = 0

    ,@checkSchema INT

    ,@isSchema BIT = 0

    ,@checkXMLHeader INT

    ,@isHeader BIT = 0

    ,@XMLHeader NVARCHAR(MAX)

    IF OBJECT_ID('tempdb..#ChildList') IS NOT NULL

    DROP TABLE #ChildList

    CREATE TABLE #ChildList (

    [RowNum] INT IDENTITY(1,1) NOT NULL,

    [parentid] INT NULL,

    [id] INT NULL,

    PRIMARY KEY (RowNum),

    UNIQUE (RowNum))

    IF OBJECT_ID('tempdb..#NodeList') IS NOT NULL

    DROP TABLE #NodeList

    CREATE TABLE #NodeList (

    [RowNum] INT NOT NULL,

    [id] INT NULL,

    [parentid] INT NULL,

    [nodetype] INT NULL,

    [localname] NVARCHAR(MAX) NULL,

    [text] NVARCHAR(MAX) NULL,

    PRIMARY KEY (RowNum),

    UNIQUE (RowNum))

    SET @id = 1

    SET @parentid = NULL

    /* Check to see if any XML exists */

    SET @strXML = NULLIF(@strXML,'')

    IF @strXML IS NULL

    BEGIN

    SELECT

    NULL AS RowNum

    ,NULL AS id

    ,NULL AS parentid

    ,NULL AS nodepath

    ,NULL AS nodetype

    ,NULL AS nodename

    ,NULL AS property

    ,NULL AS value

    ,NULL AS nodecontents

    ,'No XML to process' AS XMLStatus

    RETURN

    END

    ELSE

    BEGIN

    -- Get rid of tabs and extra spaces

    SELECT @strXML = dbo.svfRemoveExcessSpacesFromXML(@strXML)

    IF CHARINDEX('<',@strXML,1) > 0

    BEGIN

    SET @strXML = REPLACE(@strXML,'<','<')

    SET @strXML = REPLACE(@strXML,'>','>')

    IF @debug = 1

    SELECT @strXML AS ReplacedXML

    END

    END

    IF @debug = 1

    SELECT @strXML AS InputXML

    /* Check to see if the XML has a header */

    SET @checkXMLHeader = CHARINDEX('<?xml version',@strXML,0)

    IF @checkXMLHeader > 0

    SET @isHeader = 1

    /* If the XML has a header then remove it */

    IF @isHeader = 1

    BEGIN

    SET @XMLHeader = SUBSTRING(@strXML,CHARINDEX('<?xml version',@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@XMLHeader,'')

    END

    IF @debug = 1

    SELECT @XMLHeader AS XMLHeader

    /* Check to see if the XML has a SOAP wrapper */

    SET @checkSoap = CHARINDEX('<soapenv:Envelope',@strXML,0)

    IF @checkSoap > 0

    SET @isSoap = 1

    /* If the XML has a SOAP wrapper then remove it */

    IF @isSoap = 1

    BEGIN

    SET @SoapEnvOpen = SUBSTRING(@strXML,CHARINDEX('<soapenv:Envelope',@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@SoapEnvOpen,'')

    SET @SoapBodyOpen = SUBSTRING(@strXML,CHARINDEX('<soapenv:Body',@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@SoapBodyOpen,'')

    SET @SoapEnvClose = SUBSTRING(@strXML,CHARINDEX('</soapenv:Envelope>',@strXML,0),LEN('</soapenv:Envelope>'))

    SET @strXML = REPLACE(@strXML,@SoapEnvClose,'')

    SET @SoapBodyClose = SUBSTRING(@strXML,CHARINDEX('</soapenv:Body>',@strXML,0),LEN('</soapenv:Body>'))

    SET @strXML = REPLACE(@strXML,@SoapBodyClose,'')

    END

    IF @debug = 1

    BEGIN

    SELECT @SoapEnvOpen AS Soap_Wrapper

    SELECT @SoapBodyOpen AS Soap_Body

    SELECT @strXML AS XML_Without_SoapWrapper

    END

    --for testing

    --SELECT @strXML

    --RETURN

    /* Check to see if the XML has a schema definition node */

    SET @checkSchema = CHARINDEX('<'+@schemanode,@strXML,0)

    IF @checkSchema > 0

    SET @isSchema = 1

    /* If a schema definition node exists remove it */

    IF @isSchema = 1

    BEGIN

    SET @SchemaNodeOpen = SUBSTRING(@strXML,CHARINDEX('<'+@schemanode,@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@SchemaNodeOpen,'')

    SET @SchemaNodeClose = SUBSTRING(@strXML,CHARINDEX('</'+@schemanode+'>',@strXML,0),LEN('</'+@schemanode+'>'))

    SET @strXML = REPLACE(@strXML,@SchemaNodeClose,'')

    END

    BEGIN TRY

    EXEC sp_xml_preparedocument @idoc OUTPUT, @strXML

    END TRY

    BEGIN CATCH

    BEGIN

    IF @debug = 1

    BEGIN

    SELECT @strXML AS ModifiedXML

    SELECT CONVERT(XML,@strXML) AS FormattedXML

    END

    SELECT

    NULL AS RowNum

    ,NULL AS id

    ,NULL AS parentid

    ,NULL AS nodepath

    ,NULL AS nodetype

    ,NULL AS nodename

    ,NULL AS property

    ,NULL AS value

    ,NULL AS nodecontents

    ,'Invalid XML' AS XMLStatus

    RETURN

    END

    END CATCH

    ;WITH cte

    AS (

    SELECT

    CAST(p1.parentid AS INT) AS parentid

    ,CAST(p1.id AS INT) AS id

    FROM

    OPENXML (@idoc,@rootnode,2) AS p1

    UNION ALL

    SELECT

    CAST(p2.parentid AS INT) AS parentid

    ,CAST(p2.id AS INT) AS id

    FROM

    OPENXML (@idoc,@rootnode,2) AS p2

    JOIN

    cte

    ON CAST(cte.id AS INT) = CAST(p2.ParentID AS INT)

    WHERE

    CAST(p2.parentid AS INT) = @parentid

    )

    INSERT INTO #ChildList

    SELECT *

    FROM cte

    INSERT INTO #NodeList

    SELECT

    #ChildList.RowNum

    ,xmllist.id

    ,xmllist.parentid

    ,xmllist.nodetype

    ,xmllist.localname

    ,CAST(xmllist.[text] AS NVARCHAR(MAX)) AS [text]

    FROM #ChildList

    INNER JOIN

    OPENXML (@idoc,@rootnode,2) AS xmllist

    ON #ChildList.id = xmllist.id

    WHERE

    #ChildList.RowNum > 0

    --for testing

    --SELECT * FROM #NodeList

    --RETURN

    ;WITH RecursiveNodes(RowNum,id,parentid,nodepath,localname,[text],nodetype)

    AS (

    SELECT

    #NodeList.RowNum

    ,#NodeList.id

    ,#NodeList.parentid

    ,CAST('/' + REPLACE(REPLACE(REPLACE(REPLACE(#NodeList.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath

    ,#NodeList.localname

    ,CAST(#NodeList.[text] AS NVARCHAR(MAX)) AS [text]

    ,0 AS nodetype

    FROM #ChildList

    INNER JOIN

    #NodeList

    ON #ChildList.id = #NodeList.id

    WHERE

    #NodeList.parentid IS NULL

    AND #ChildList.RowNum > 0

    AND #NodeList.RowNum > 0

    UNION ALL

    SELECT

    n.RowNum

    ,n.id

    ,n.parentid

    ,CAST(r.nodepath + '/'+ REPLACE(REPLACE(REPLACE(REPLACE(n.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath

    ,n.localname

    ,n.[text]

    ,n.nodetype

    FROM #NodeList AS n

    INNER JOIN

    RecursiveNodes AS r

    ON n.parentid = r.id

    WHERE

    n.RowNum > 0

    AND r.RowNum > 0

    AND n.parentid >= 0

    )

    SELECT

    ROW_NUMBER() OVER (ORDER BY Result.RowNum) AS RowNum

    ,Result.id

    ,Result.parentid

    ,Result.nodepath

    ,Result.nodetype

    ,Result.nodename

    ,Result.property

    ,Result.value

    ,Result.nodecontents

    ,'OK' AS XMLStatus

    FROM

    (

    SELECT

    rn.RowNum

    ,rn.id

    ,rn.parentid

    ,rn.nodepath

    ,(CASE

    WHEN rn.nodetype = 0 THEN 'Root'

    WHEN rn.nodetype = 1 THEN 'Node'

    WHEN rn.nodetype = 2 THEN 'Property'

    ELSE 'Data'

    END) AS nodetype

    ,(CASE

    WHEN rn.nodetype = 0 THEN rn.localname

    WHEN rn.nodetype = 1 THEN rn.localname

    WHEN rn.nodetype = 2 THEN (SELECT TOP(1) localname FROM RecursiveNodes WHERE id = rn.parentid)

    ELSE NULL

    END) AS nodename

    ,(CASE

    WHEN rn.nodetype = 2 THEN rn.localname

    ELSE NULL

    END) AS property

    ,(CASE

    WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id)

    ELSE NULL

    END) AS value

    ,(CASE

    WHEN rn.nodetype = 1 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id)

    WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.parentid and [text] is not null)

    ELSE NULL

    END) AS nodecontents

    FROM

    RecursiveNodes AS rn

    WHERE

    rn.localname <> '#text'

    ) AS Result

    WHERE

    Result.id >= 0

    AND (@ShowAll = 0

    AND (Result.id = 0

    OR property IS NOT NULL

    OR value IS NOT NULL

    OR nodecontents IS NOT NULL))

    OR (@ShowAll = 1)

    END

     

     

  • Hi arthurolcot,

    Thanks, this solution works for me.

    Tao

  • Hi Steven,

    Thank you very much, the solution works for me.

    Steven Willis (7/8/2013)


    Here's a stored procedure that will shred the XML into a table. I had to add a set of <body></body> tags below the root but otherwise your XML parsed perfectly.

    [The code for creating stored procedure is below with attached png file output sample.]

    EXEC dbo.ParseXML_ByXMLInput

    N'<root>

    <body>

    <elem10 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem10101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem101010>

    <elem101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem10102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102030 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102040 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102050 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem101020>

    </elem10>

    <elem15 Attr01="0.08" Attr02="-0.05" Attr03="9.00" Attr04="9.00" Attr05="7.00" Attr06="4.00">

    <elem151010 Attr01="0.03" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="1.00">

    <elem15101010 Attr01="0.02" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />

    <elem15101020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="0.00" />

    <elem15101030 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    <elem15101040 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    <elem15101050 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    </elem151010>

    <elem151020 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00">

    <elem15102010 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    </elem151020>

    <elem151030 Attr01="0.02" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="3.00" Attr06="1.00">

    <elem15103010 Attr01="0.01" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />

    <elem15103020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="0.00" />

    </elem151030>

    </elem15>

    <elem20 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem201010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem20101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem201010>

    <elem201020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem20102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem201020>

    </elem20>

    </body>

    </root>'

    ,'root'

    ,'/body'

    ,1

    ,0

    [/code]

    CREATE PROCEDURE [dbo].[ParseXML_ByXMLInput]

    @strXML NVARCHAR(MAX)

    ,@schemanode NVARCHAR(255)

    ,@rootnode NVARCHAR(255)

    ,@showAll BIT = 0

    ,@debug BIT = 0

    AS

    BEGIN

    SET NOCOUNT ON

    DECLARE

    @idoc INT

    ,@id INT

    ,@parentid INT

    ,@SoapEnvOpen NVARCHAR(MAX)

    ,@SoapEnvClose NVARCHAR(MAX)

    ,@SoapBodyOpen NVARCHAR(MAX)

    ,@SoapBodyClose NVARCHAR(MAX)

    ,@SchemaNodeOpen NVARCHAR(MAX)

    ,@SchemaNodeClose NVARCHAR(MAX)

    ,@checkSoap INT

    ,@isSoap BIT = 0

    ,@checkSchema INT

    ,@isSchema BIT = 0

    ,@checkXMLHeader INT

    ,@isHeader BIT = 0

    ,@XMLHeader NVARCHAR(MAX)

    IF OBJECT_ID('tempdb..#ChildList') IS NOT NULL

    DROP TABLE #ChildList

    CREATE TABLE #ChildList (

    [RowNum] INT IDENTITY(1,1) NOT NULL,

    [parentid] INT NULL,

    [id] INT NULL,

    PRIMARY KEY (RowNum),

    UNIQUE (RowNum))

    IF OBJECT_ID('tempdb..#NodeList') IS NOT NULL

    DROP TABLE #NodeList

    CREATE TABLE #NodeList (

    [RowNum] INT NOT NULL,

    [id] INT NULL,

    [parentid] INT NULL,

    [nodetype] INT NULL,

    [localname] NVARCHAR(MAX) NULL,

    [text] NVARCHAR(MAX) NULL,

    PRIMARY KEY (RowNum),

    UNIQUE (RowNum))

    SET @id = 1

    SET @parentid = NULL

    /* Check to see if any XML exists */

    SET @strXML = NULLIF(@strXML,'')

    IF @strXML IS NULL

    BEGIN

    SELECT

    NULL AS RowNum

    ,NULL AS id

    ,NULL AS parentid

    ,NULL AS nodepath

    ,NULL AS nodetype

    ,NULL AS nodename

    ,NULL AS property

    ,NULL AS value

    ,NULL AS nodecontents

    ,'No XML to process' AS XMLStatus

    RETURN

    END

    ELSE

    BEGIN

    -- Get rid of tabs and extra spaces

    SELECT @strXML = dbo.svfRemoveExcessSpacesFromXML(@strXML)

    IF CHARINDEX('<',@strXML,1) > 0

    BEGIN

    SET @strXML = REPLACE(@strXML,'<','<')

    SET @strXML = REPLACE(@strXML,'>','>')

    IF @debug = 1

    SELECT @strXML AS ReplacedXML

    END

    END

    IF @debug = 1

    SELECT @strXML AS InputXML

    /* Check to see if the XML has a header */

    SET @checkXMLHeader = CHARINDEX('<?xml version',@strXML,0)

    IF @checkXMLHeader > 0

    SET @isHeader = 1

    /* If the XML has a header then remove it */

    IF @isHeader = 1

    BEGIN

    SET @XMLHeader = SUBSTRING(@strXML,CHARINDEX('<?xml version',@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@XMLHeader,'')

    END

    IF @debug = 1

    SELECT @XMLHeader AS XMLHeader

    /* Check to see if the XML has a SOAP wrapper */

    SET @checkSoap = CHARINDEX('<soapenv:Envelope',@strXML,0)

    IF @checkSoap > 0

    SET @isSoap = 1

    /* If the XML has a SOAP wrapper then remove it */

    IF @isSoap = 1

    BEGIN

    SET @SoapEnvOpen = SUBSTRING(@strXML,CHARINDEX('<soapenv:Envelope',@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@SoapEnvOpen,'')

    SET @SoapBodyOpen = SUBSTRING(@strXML,CHARINDEX('<soapenv:Body',@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@SoapBodyOpen,'')

    SET @SoapEnvClose = SUBSTRING(@strXML,CHARINDEX('</soapenv:Envelope>',@strXML,0),LEN('</soapenv:Envelope>'))

    SET @strXML = REPLACE(@strXML,@SoapEnvClose,'')

    SET @SoapBodyClose = SUBSTRING(@strXML,CHARINDEX('</soapenv:Body>',@strXML,0),LEN('</soapenv:Body>'))

    SET @strXML = REPLACE(@strXML,@SoapBodyClose,'')

    END

    IF @debug = 1

    BEGIN

    SELECT @SoapEnvOpen AS Soap_Wrapper

    SELECT @SoapBodyOpen AS Soap_Body

    SELECT @strXML AS XML_Without_SoapWrapper

    END

    --for testing

    --SELECT @strXML

    --RETURN

    /* Check to see if the XML has a schema definition node */

    SET @checkSchema = CHARINDEX('<'+@schemanode,@strXML,0)

    IF @checkSchema > 0

    SET @isSchema = 1

    /* If a schema definition node exists remove it */

    IF @isSchema = 1

    BEGIN

    SET @SchemaNodeOpen = SUBSTRING(@strXML,CHARINDEX('<'+@schemanode,@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@SchemaNodeOpen,'')

    SET @SchemaNodeClose = SUBSTRING(@strXML,CHARINDEX('</'+@schemanode+'>',@strXML,0),LEN('</'+@schemanode+'>'))

    SET @strXML = REPLACE(@strXML,@SchemaNodeClose,'')

    END

    BEGIN TRY

    EXEC sp_xml_preparedocument @idoc OUTPUT, @strXML

    END TRY

    BEGIN CATCH

    BEGIN

    IF @debug = 1

    BEGIN

    SELECT @strXML AS ModifiedXML

    SELECT CONVERT(XML,@strXML) AS FormattedXML

    END

    SELECT

    NULL AS RowNum

    ,NULL AS id

    ,NULL AS parentid

    ,NULL AS nodepath

    ,NULL AS nodetype

    ,NULL AS nodename

    ,NULL AS property

    ,NULL AS value

    ,NULL AS nodecontents

    ,'Invalid XML' AS XMLStatus

    RETURN

    END

    END CATCH

    ;WITH cte

    AS (

    SELECT

    CAST(p1.parentid AS INT) AS parentid

    ,CAST(p1.id AS INT) AS id

    FROM

    OPENXML (@idoc,@rootnode,2) AS p1

    UNION ALL

    SELECT

    CAST(p2.parentid AS INT) AS parentid

    ,CAST(p2.id AS INT) AS id

    FROM

    OPENXML (@idoc,@rootnode,2) AS p2

    JOIN

    cte

    ON CAST(cte.id AS INT) = CAST(p2.ParentID AS INT)

    WHERE

    CAST(p2.parentid AS INT) = @parentid

    )

    INSERT INTO #ChildList

    SELECT *

    FROM cte

    INSERT INTO #NodeList

    SELECT

    #ChildList.RowNum

    ,xmllist.id

    ,xmllist.parentid

    ,xmllist.nodetype

    ,xmllist.localname

    ,CAST(xmllist.[text] AS NVARCHAR(MAX)) AS [text]

    FROM #ChildList

    INNER JOIN

    OPENXML (@idoc,@rootnode,2) AS xmllist

    ON #ChildList.id = xmllist.id

    WHERE

    #ChildList.RowNum > 0

    --for testing

    --SELECT * FROM #NodeList

    --RETURN

    ;WITH RecursiveNodes(RowNum,id,parentid,nodepath,localname,[text],nodetype)

    AS (

    SELECT

    #NodeList.RowNum

    ,#NodeList.id

    ,#NodeList.parentid

    ,CAST('/' + REPLACE(REPLACE(REPLACE(REPLACE(#NodeList.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath

    ,#NodeList.localname

    ,CAST(#NodeList.[text] AS NVARCHAR(MAX)) AS [text]

    ,0 AS nodetype

    FROM #ChildList

    INNER JOIN

    #NodeList

    ON #ChildList.id = #NodeList.id

    WHERE

    #NodeList.parentid IS NULL

    AND #ChildList.RowNum > 0

    AND #NodeList.RowNum > 0

    UNION ALL

    SELECT

    n.RowNum

    ,n.id

    ,n.parentid

    ,CAST(r.nodepath + '/'+ REPLACE(REPLACE(REPLACE(REPLACE(n.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath

    ,n.localname

    ,n.[text]

    ,n.nodetype

    FROM #NodeList AS n

    INNER JOIN

    RecursiveNodes AS r

    ON n.parentid = r.id

    WHERE

    n.RowNum > 0

    AND r.RowNum > 0

    AND n.parentid >= 0

    )

    SELECT

    ROW_NUMBER() OVER (ORDER BY Result.RowNum) AS RowNum

    ,Result.id

    ,Result.parentid

    ,Result.nodepath

    ,Result.nodetype

    ,Result.nodename

    ,Result.property

    ,Result.value

    ,Result.nodecontents

    ,'OK' AS XMLStatus

    FROM

    (

    SELECT

    rn.RowNum

    ,rn.id

    ,rn.parentid

    ,rn.nodepath

    ,(CASE

    WHEN rn.nodetype = 0 THEN 'Root'

    WHEN rn.nodetype = 1 THEN 'Node'

    WHEN rn.nodetype = 2 THEN 'Property'

    ELSE 'Data'

    END) AS nodetype

    ,(CASE

    WHEN rn.nodetype = 0 THEN rn.localname

    WHEN rn.nodetype = 1 THEN rn.localname

    WHEN rn.nodetype = 2 THEN (SELECT TOP(1) localname FROM RecursiveNodes WHERE id = rn.parentid)

    ELSE NULL

    END) AS nodename

    ,(CASE

    WHEN rn.nodetype = 2 THEN rn.localname

    ELSE NULL

    END) AS property

    ,(CASE

    WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id)

    ELSE NULL

    END) AS value

    ,(CASE

    WHEN rn.nodetype = 1 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id)

    WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.parentid and [text] is not null)

    ELSE NULL

    END) AS nodecontents

    FROM

    RecursiveNodes AS rn

    WHERE

    rn.localname <> '#text'

    ) AS Result

    WHERE

    Result.id >= 0

    AND (@ShowAll = 0

    AND (Result.id = 0

    OR property IS NOT NULL

    OR value IS NOT NULL

    OR nodecontents IS NOT NULL))

    OR (@ShowAll = 1)

    END

     

     

Viewing 5 posts - 1 through 4 (of 4 total)

You must be logged in to reply to this topic. Login to reply