Home Forums Programming XML Shred XML with 4 level hierarchy into SQL Server table RE: Shred XML with 4 level hierarchy into SQL Server table

  • Hi Steven,

    Thank you very much, the solution works for me.

    Steven Willis (7/8/2013)


    Here's a stored procedure that will shred the XML into a table. I had to add a set of <body></body> tags below the root but otherwise your XML parsed perfectly.

    [The code for creating stored procedure is below with attached png file output sample.]

    EXEC dbo.ParseXML_ByXMLInput

    N'<root>

    <body>

    <elem10 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem10101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem101010>

    <elem101020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem10102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102030 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102040 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    <elem10102050 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem101020>

    </elem10>

    <elem15 Attr01="0.08" Attr02="-0.05" Attr03="9.00" Attr04="9.00" Attr05="7.00" Attr06="4.00">

    <elem151010 Attr01="0.03" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="1.00">

    <elem15101010 Attr01="0.02" Attr02="-0.01" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />

    <elem15101020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="0.00" />

    <elem15101030 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    <elem15101040 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    <elem15101050 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    </elem151010>

    <elem151020 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00">

    <elem15102010 Attr01="0.00" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="0.00" Attr06="0.00" />

    </elem151020>

    <elem151030 Attr01="0.02" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="3.00" Attr06="1.00">

    <elem15103010 Attr01="0.01" Attr02="-0.02" Attr03="9.00" Attr04="9.00" Attr05="1.00" Attr06="1.00" />

    <elem15103020 Attr01="0.01" Attr02="0.00" Attr03="9.00" Attr04="9.00" Attr05="2.00" Attr06="0.00" />

    </elem151030>

    </elem15>

    <elem20 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem201010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem20101010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem201010>

    <elem201020 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00">

    <elem20102010 Attr01="9.00" Attr02="9.00" Attr03="9.00" Attr04="9.00" Attr05="9.00" Attr06="9.00" />

    </elem201020>

    </elem20>

    </body>

    </root>'

    ,'root'

    ,'/body'

    ,1

    ,0

    [/code]

    CREATE PROCEDURE [dbo].[ParseXML_ByXMLInput]

    @strXML NVARCHAR(MAX)

    ,@schemanode NVARCHAR(255)

    ,@rootnode NVARCHAR(255)

    ,@showAll BIT = 0

    ,@debug BIT = 0

    AS

    BEGIN

    SET NOCOUNT ON

    DECLARE

    @idoc INT

    ,@id INT

    ,@parentid INT

    ,@SoapEnvOpen NVARCHAR(MAX)

    ,@SoapEnvClose NVARCHAR(MAX)

    ,@SoapBodyOpen NVARCHAR(MAX)

    ,@SoapBodyClose NVARCHAR(MAX)

    ,@SchemaNodeOpen NVARCHAR(MAX)

    ,@SchemaNodeClose NVARCHAR(MAX)

    ,@checkSoap INT

    ,@isSoap BIT = 0

    ,@checkSchema INT

    ,@isSchema BIT = 0

    ,@checkXMLHeader INT

    ,@isHeader BIT = 0

    ,@XMLHeader NVARCHAR(MAX)

    IF OBJECT_ID('tempdb..#ChildList') IS NOT NULL

    DROP TABLE #ChildList

    CREATE TABLE #ChildList (

    [RowNum] INT IDENTITY(1,1) NOT NULL,

    [parentid] INT NULL,

    [id] INT NULL,

    PRIMARY KEY (RowNum),

    UNIQUE (RowNum))

    IF OBJECT_ID('tempdb..#NodeList') IS NOT NULL

    DROP TABLE #NodeList

    CREATE TABLE #NodeList (

    [RowNum] INT NOT NULL,

    [id] INT NULL,

    [parentid] INT NULL,

    [nodetype] INT NULL,

    [localname] NVARCHAR(MAX) NULL,

    [text] NVARCHAR(MAX) NULL,

    PRIMARY KEY (RowNum),

    UNIQUE (RowNum))

    SET @id = 1

    SET @parentid = NULL

    /* Check to see if any XML exists */

    SET @strXML = NULLIF(@strXML,'')

    IF @strXML IS NULL

    BEGIN

    SELECT

    NULL AS RowNum

    ,NULL AS id

    ,NULL AS parentid

    ,NULL AS nodepath

    ,NULL AS nodetype

    ,NULL AS nodename

    ,NULL AS property

    ,NULL AS value

    ,NULL AS nodecontents

    ,'No XML to process' AS XMLStatus

    RETURN

    END

    ELSE

    BEGIN

    -- Get rid of tabs and extra spaces

    SELECT @strXML = dbo.svfRemoveExcessSpacesFromXML(@strXML)

    IF CHARINDEX('<',@strXML,1) > 0

    BEGIN

    SET @strXML = REPLACE(@strXML,'<','<')

    SET @strXML = REPLACE(@strXML,'>','>')

    IF @debug = 1

    SELECT @strXML AS ReplacedXML

    END

    END

    IF @debug = 1

    SELECT @strXML AS InputXML

    /* Check to see if the XML has a header */

    SET @checkXMLHeader = CHARINDEX('<?xml version',@strXML,0)

    IF @checkXMLHeader > 0

    SET @isHeader = 1

    /* If the XML has a header then remove it */

    IF @isHeader = 1

    BEGIN

    SET @XMLHeader = SUBSTRING(@strXML,CHARINDEX('<?xml version',@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@XMLHeader,'')

    END

    IF @debug = 1

    SELECT @XMLHeader AS XMLHeader

    /* Check to see if the XML has a SOAP wrapper */

    SET @checkSoap = CHARINDEX('<soapenv:Envelope',@strXML,0)

    IF @checkSoap > 0

    SET @isSoap = 1

    /* If the XML has a SOAP wrapper then remove it */

    IF @isSoap = 1

    BEGIN

    SET @SoapEnvOpen = SUBSTRING(@strXML,CHARINDEX('<soapenv:Envelope',@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@SoapEnvOpen,'')

    SET @SoapBodyOpen = SUBSTRING(@strXML,CHARINDEX('<soapenv:Body',@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@SoapBodyOpen,'')

    SET @SoapEnvClose = SUBSTRING(@strXML,CHARINDEX('</soapenv:Envelope>',@strXML,0),LEN('</soapenv:Envelope>'))

    SET @strXML = REPLACE(@strXML,@SoapEnvClose,'')

    SET @SoapBodyClose = SUBSTRING(@strXML,CHARINDEX('</soapenv:Body>',@strXML,0),LEN('</soapenv:Body>'))

    SET @strXML = REPLACE(@strXML,@SoapBodyClose,'')

    END

    IF @debug = 1

    BEGIN

    SELECT @SoapEnvOpen AS Soap_Wrapper

    SELECT @SoapBodyOpen AS Soap_Body

    SELECT @strXML AS XML_Without_SoapWrapper

    END

    --for testing

    --SELECT @strXML

    --RETURN

    /* Check to see if the XML has a schema definition node */

    SET @checkSchema = CHARINDEX('<'+@schemanode,@strXML,0)

    IF @checkSchema > 0

    SET @isSchema = 1

    /* If a schema definition node exists remove it */

    IF @isSchema = 1

    BEGIN

    SET @SchemaNodeOpen = SUBSTRING(@strXML,CHARINDEX('<'+@schemanode,@strXML,0),CHARINDEX('>',@strXML,0))

    SET @strXML = REPLACE(@strXML,@SchemaNodeOpen,'')

    SET @SchemaNodeClose = SUBSTRING(@strXML,CHARINDEX('</'+@schemanode+'>',@strXML,0),LEN('</'+@schemanode+'>'))

    SET @strXML = REPLACE(@strXML,@SchemaNodeClose,'')

    END

    BEGIN TRY

    EXEC sp_xml_preparedocument @idoc OUTPUT, @strXML

    END TRY

    BEGIN CATCH

    BEGIN

    IF @debug = 1

    BEGIN

    SELECT @strXML AS ModifiedXML

    SELECT CONVERT(XML,@strXML) AS FormattedXML

    END

    SELECT

    NULL AS RowNum

    ,NULL AS id

    ,NULL AS parentid

    ,NULL AS nodepath

    ,NULL AS nodetype

    ,NULL AS nodename

    ,NULL AS property

    ,NULL AS value

    ,NULL AS nodecontents

    ,'Invalid XML' AS XMLStatus

    RETURN

    END

    END CATCH

    ;WITH cte

    AS (

    SELECT

    CAST(p1.parentid AS INT) AS parentid

    ,CAST(p1.id AS INT) AS id

    FROM

    OPENXML (@idoc,@rootnode,2) AS p1

    UNION ALL

    SELECT

    CAST(p2.parentid AS INT) AS parentid

    ,CAST(p2.id AS INT) AS id

    FROM

    OPENXML (@idoc,@rootnode,2) AS p2

    JOIN

    cte

    ON CAST(cte.id AS INT) = CAST(p2.ParentID AS INT)

    WHERE

    CAST(p2.parentid AS INT) = @parentid

    )

    INSERT INTO #ChildList

    SELECT *

    FROM cte

    INSERT INTO #NodeList

    SELECT

    #ChildList.RowNum

    ,xmllist.id

    ,xmllist.parentid

    ,xmllist.nodetype

    ,xmllist.localname

    ,CAST(xmllist.[text] AS NVARCHAR(MAX)) AS [text]

    FROM #ChildList

    INNER JOIN

    OPENXML (@idoc,@rootnode,2) AS xmllist

    ON #ChildList.id = xmllist.id

    WHERE

    #ChildList.RowNum > 0

    --for testing

    --SELECT * FROM #NodeList

    --RETURN

    ;WITH RecursiveNodes(RowNum,id,parentid,nodepath,localname,[text],nodetype)

    AS (

    SELECT

    #NodeList.RowNum

    ,#NodeList.id

    ,#NodeList.parentid

    ,CAST('/' + REPLACE(REPLACE(REPLACE(REPLACE(#NodeList.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath

    ,#NodeList.localname

    ,CAST(#NodeList.[text] AS NVARCHAR(MAX)) AS [text]

    ,0 AS nodetype

    FROM #ChildList

    INNER JOIN

    #NodeList

    ON #ChildList.id = #NodeList.id

    WHERE

    #NodeList.parentid IS NULL

    AND #ChildList.RowNum > 0

    AND #NodeList.RowNum > 0

    UNION ALL

    SELECT

    n.RowNum

    ,n.id

    ,n.parentid

    ,CAST(r.nodepath + '/'+ REPLACE(REPLACE(REPLACE(REPLACE(n.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath

    ,n.localname

    ,n.[text]

    ,n.nodetype

    FROM #NodeList AS n

    INNER JOIN

    RecursiveNodes AS r

    ON n.parentid = r.id

    WHERE

    n.RowNum > 0

    AND r.RowNum > 0

    AND n.parentid >= 0

    )

    SELECT

    ROW_NUMBER() OVER (ORDER BY Result.RowNum) AS RowNum

    ,Result.id

    ,Result.parentid

    ,Result.nodepath

    ,Result.nodetype

    ,Result.nodename

    ,Result.property

    ,Result.value

    ,Result.nodecontents

    ,'OK' AS XMLStatus

    FROM

    (

    SELECT

    rn.RowNum

    ,rn.id

    ,rn.parentid

    ,rn.nodepath

    ,(CASE

    WHEN rn.nodetype = 0 THEN 'Root'

    WHEN rn.nodetype = 1 THEN 'Node'

    WHEN rn.nodetype = 2 THEN 'Property'

    ELSE 'Data'

    END) AS nodetype

    ,(CASE

    WHEN rn.nodetype = 0 THEN rn.localname

    WHEN rn.nodetype = 1 THEN rn.localname

    WHEN rn.nodetype = 2 THEN (SELECT TOP(1) localname FROM RecursiveNodes WHERE id = rn.parentid)

    ELSE NULL

    END) AS nodename

    ,(CASE

    WHEN rn.nodetype = 2 THEN rn.localname

    ELSE NULL

    END) AS property

    ,(CASE

    WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id)

    ELSE NULL

    END) AS value

    ,(CASE

    WHEN rn.nodetype = 1 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id)

    WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.parentid and [text] is not null)

    ELSE NULL

    END) AS nodecontents

    FROM

    RecursiveNodes AS rn

    WHERE

    rn.localname <> '#text'

    ) AS Result

    WHERE

    Result.id >= 0

    AND (@ShowAll = 0

    AND (Result.id = 0

    OR property IS NOT NULL

    OR value IS NOT NULL

    OR nodecontents IS NOT NULL))

    OR (@ShowAll = 1)

    END