|
|
|
Forum Newbie
      
Group: General Forum Members
Last Login: Tuesday, February 19, 2013 5:27 AM
Points: 6,
Visits: 20
|
|
I have this code to import a XML with 50mb, that contains around 26.280 entities. But this is taking to long, is with 50 minutes and still running, is that ok? or this code can be optimizing ?
INSERT INTO OSUSR_DFP_PEP_ENTITIES (ENT_ID, NAME, FIRSTNAME, LASTNAME, PREFIX, SUFFIX, AKA, NAMESOURCE, PARENTID, GOVDESIGNATION, ENTRYTYPE, ENTRYCATEGORY, ENTRYSUBCATEGORY, ORGANIZATION, POSITIONS, REMARKS, DOB, POB, COUNTRY, EXPIRATIONDATE, EFFECTIVEDATE, PICTUREFILE, LINKEDTO, RELATED_ID, SOURCEWEBLINK, TOUCHDATE, DIRECTID, PASSPORTID, NATIONALID, OTHERID, DOB2, ENTLEVEL, MASTERID, WATCH, RELATIONSHIPS) SELECT X.product.query('Ent_ID').value('.', 'INT'), X.product.query('Name').value('.', 'nvarchar(1000)'), X.product.query('FirstName').value('.', 'nvarchar(500)'), X.product.query('LastName').value('.', 'nvarchar(500)'), X.product.query('Prefix').value('.', 'nvarchar(500)'), X.product.query('Suffix').value('.', 'nvarchar(500)'), X.product.query('Aka').value('.', 'nvarchar(500)'), X.product.query('NameSource').value('.', 'nvarchar(500)'), X.product.query('ParentID').value('.', 'INT'), X.product.query('GovDesignation').value('.', 'nvarchar(500)'), X.product.query('EntryType').value('.', 'nvarchar(500)'), X.product.query('EntryCategory').value('.', 'nvarchar(500)'), X.product.query('EntrySubCategory').value('.', 'nvarchar(500)'), X.product.query('Organization').value('.', 'nvarchar(500)'), X.product.query('Positions').value('.', 'nvarchar(500)'), X.product.query('Remarks').value('.', 'nvarchar(max)'), X.product.query('DOB').value('.', 'nvarchar(500)'), X.product.query('POB').value('.', 'nvarchar(500)'), X.product.query('Country').value('.', 'nvarchar(500)'), X.product.query('ExpirationDate').value('.', 'nvarchar(500)'), X.product.query('EffectiveDate').value('.', 'nvarchar(500)'), X.product.query('PictureFile').value('.', 'nvarchar(500)'), X.product.query('LinkedTo').value('.', 'nvarchar(500)'), X.product.query('Related_ID').value('.', 'INT'), X.product.query('SourceWebLink').value('.', 'nvarchar(max)'), X.product.query('TouchDate').value('.', 'nvarchar(500)'), X.product.query('DirectID').value('.', 'nvarchar(500)'), X.product.query('PassportID').value('.', 'nvarchar(500)'), X.product.query('NationalID').value('.', 'nvarchar(500)'), X.product.query('OtherID').value('.', 'nvarchar(50)'), X.product.query('DOB2').value('.', 'nvarchar(500)'), X.product.query('EntLevel').value('.', 'nvarchar(500)'), X.product.query('MasterID').value('.', 'int'), X.product.query('Watch').value('.', 'bit'), X.product.query('Relationships').value('.', 'bit') FROM ( SELECT CAST(x AS XML) FROM OPENROWSET( BULK 'C:\temp\teste.xml', SINGLE_BLOB) AS T(x) ) AS T(x) CROSS APPLY x.nodes('Export/Entities') AS X(product);
|
|
|
|
|
Ten Centuries
      
Group: General Forum Members
Last Login: 2 days ago @ 4:46 PM
Points: 1,287,
Visits: 3,850
|
|
Just a couple of questions/suggestions
1. How long does this take ? SELECT CAST(x AS XML) FROM OPENROWSET( BULK 'C:\temp\teste.xml', SINGLE_BLOB) AS T(x)
2. Have you considered changing all the .value('.','varchar(whatever)') to .value('(./text())[1]','varchar(whatever)') ? In many cases this tiny tweak can make a big difference.
MM
|
|
|
|
|
Forum Newbie
      
Group: General Forum Members
Last Login: Tuesday, February 19, 2013 5:27 AM
Points: 6,
Visits: 20
|
|
Hi, to the first question, it takes 2 seconds.
To the other question, I'm testing, and return sooner with the answer i'm testing with this code:
DECLARE @X XML
SELECT @X = CAST(x AS XML) FROM OPENROWSET( BULK 'C:\temp\teste.xml', SINGLE_BLOB) AS T(x)
INSERT INTO OSUSR_DFP_PEP_ENTITIES (ENT_ID, NAME, FIRSTNAME, LASTNAME, PREFIX, SUFFIX, AKA, NAMESOURCE, PARENTID, GOVDESIGNATION, ENTRYTYPE, ENTRYCATEGORY, ENTRYSUBCATEGORY, ORGANIZATION, POSITIONS, REMARKS, DOB, POB, COUNTRY, EXPIRATIONDATE, EFFECTIVEDATE, PICTUREFILE, LINKEDTO, RELATED_ID, SOURCEWEBLINK, TOUCHDATE, DIRECTID, PASSPORTID, NATIONALID, OTHERID, DOB2, ENTLEVEL, MASTERID, WATCH, RELATIONSHIPS) SELECT X.product.query('Ent_ID').value('(./text())[1]', 'INT'), X.product.query('Name').value('(./text())[1]', 'nvarchar(1000)'), X.product.query('FirstName').value('(./text())[1]', 'nvarchar(500)'), X.product.query('LastName').value('(./text())[1]', 'nvarchar(500)'), X.product.query('Prefix').value('(./text())[1]', 'nvarchar(500)'), X.product.query('Suffix').value('(./text())[1]', 'nvarchar(500)'), X.product.query('Aka').value('(./text())[1]', 'nvarchar(500)'), X.product.query('NameSource').value('(./text())[1]', 'nvarchar(500)'), X.product.query('ParentID').value('(./text())[1]', 'INT'), X.product.query('GovDesignation').value('(./text())[1]', 'nvarchar(500)'), X.product.query('EntryType').value('(./text())[1]', 'nvarchar(500)'), X.product.query('EntryCategory').value('(./text())[1]', 'nvarchar(500)'), X.product.query('EntrySubCategory').value('(./text())[1]', 'nvarchar(500)'), X.product.query('Organization').value('(./text())[1]', 'nvarchar(500)'), X.product.query('Positions').value('(./text())[1]', 'nvarchar(500)'), X.product.query('Remarks').value('(./text())[1]', 'nvarchar(max)'), X.product.query('DOB').value('(./text())[1]', 'nvarchar(500)'), X.product.query('POB').value('(./text())[1]', 'nvarchar(500)'), X.product.query('Country').value('(./text())[1]', 'nvarchar(500)'), X.product.query('ExpirationDate').value('(./text())[1]', 'nvarchar(500)'), X.product.query('EffectiveDate').value('(./text())[1]', 'nvarchar(500)'), X.product.query('PictureFile').value('(./text())[1]', 'nvarchar(500)'), X.product.query('LinkedTo').value('(./text())[1]', 'nvarchar(500)'), X.product.query('Related_ID').value('(./text())[1]', 'INT'), X.product.query('SourceWebLink').value('(./text())[1]', 'nvarchar(max)'), X.product.query('TouchDate').value('(./text())[1]', 'nvarchar(500)'), X.product.query('DirectID').value('(./text())[1]', 'nvarchar(500)'), X.product.query('PassportID').value('(./text())[1]', 'nvarchar(500)'), X.product.query('NationalID').value('(./text())[1]', 'nvarchar(500)'), X.product.query('OtherID').value('(./text())[1]', 'nvarchar(50)'), X.product.query('DOB2').value('(./text())[1]', 'nvarchar(500)'), X.product.query('EntLevel').value('(./text())[1]', 'nvarchar(500)'), X.product.query('MasterID').value('(./text())[1]', 'int'), X.product.query('Watch').value('(./text())[1]', 'bit'), X.product.query('Relationships').value('(./text())[1]', 'bit') FROM @X.nodes('Export/Entities') AS X(product);
|
|
|
|
|
Forum Newbie
      
Group: General Forum Members
Last Login: Tuesday, February 19, 2013 5:27 AM
Points: 6,
Visits: 20
|
|
Its working with this way, takes 42 seconds to import 27.000 entities
DECLARE @X XML
SELECT @X = CAST(x AS XML) FROM OPENROWSET( BULK 'C:\temp\teste.xml', SINGLE_BLOB) AS T(x)
INSERT INTO OSUSR_DFP_PEP_ENTITIES (ENT_ID, NAME, FIRSTNAME, LASTNAME, PREFIX, SUFFIX, AKA, NAMESOURCE, PARENTID, GOVDESIGNATION, ENTRYTYPE, ENTRYCATEGORY, ENTRYSUBCATEGORY, ORGANIZATION, POSITIONS, REMARKS, DOB, POB, COUNTRY, EXPIRATIONDATE, EFFECTIVEDATE, PICTUREFILE, LINKEDTO, RELATED_ID, SOURCEWEBLINK, TOUCHDATE, DIRECTID, PASSPORTID, NATIONALID, OTHERID, DOB2, ENTLEVEL, MASTERID, WATCH, RELATIONSHIPS) SELECT X.product.value('(Ent_ID/text())[1]', 'INT'), X.product.value('(Name/text())[1]', 'nvarchar(1000)'), X.product.value('(FirstName/text())[1]', 'nvarchar(500)'), X.product.value('(LastName/text())[1]', 'nvarchar(500)'), X.product.value('(Prefix/text())[1]', 'nvarchar(500)'), X.product.value('(Suffix/text())[1]', 'nvarchar(500)'),
X.product.value('(Aka/text())[1]', 'nvarchar(500)'), X.product.value('(NameSource/text())[1]', 'nvarchar(500)'), X.product.value('(ParentID/text())[1]', 'INT'), X.product.value('(GovDesignation/text())[1]', 'nvarchar(500)'), X.product.value('(EntryType/text())[1]', 'nvarchar(500)'), X.product.value('(EntryCategory/text())[1]', 'nvarchar(500)'), X.product.value('(EntrySubCategory/text())[1]', 'nvarchar(500)'),
X.product.value('(Organization/text())[1]', 'nvarchar(500)'), X.product.value('(Positions/text())[1]', 'nvarchar(500)'), X.product.value('(Remarks/text())[1]', 'nvarchar(max)'), X.product.value('(DOB/text())[1]', 'nvarchar(500)'), X.product.value('(POB/text())[1]', 'nvarchar(500)'), X.product.value('(Country/text())[1]', 'nvarchar(500)'), X.product.value('(ExpirationDate/text())[1]', 'nvarchar(500)'), X.product.value('(EffectiveDate/text())[1]', 'nvarchar(500)'),
X.product.value('(PictureFile/text())[1]', 'nvarchar(500)'), X.product.value('(LinkedTo/text())[1]', 'nvarchar(500)'), X.product.value('(Related_ID/text())[1]', 'INT'), X.product.value('(SourceWebLink/text())[1]', 'nvarchar(max)'), X.product.value('(TouchDate/text())[1]', 'nvarchar(500)'), X.product.value('(DirectID/text())[1]', 'nvarchar(500)'), X.product.value('(PassportID/text())[1]', 'nvarchar(500)'),
X.product.value('(NationalID/text())[1]', 'nvarchar(500)'), X.product.value('(OtherID/text())[1]', 'nvarchar(50)'), X.product.value('(DOB2/text())[1]', 'nvarchar(500)'), X.product.value('(EntLevel/text())[1]', 'nvarchar(500)'), X.product.value('(MasterID/text())[1]', 'INT'), X.product.value('(Watch/text())[1]'), 'bit', X.product.value('(Relationships/text())[1]', 'bit')
FROM @X.nodes('Export/Entities') AS X(product);
|
|
|
|
|
Ten Centuries
      
Group: General Forum Members
Last Login: 2 days ago @ 4:46 PM
Points: 1,287,
Visits: 3,850
|
|
piortasd (1/14/2013) Its working with this way, takes 42 seconds to import 27.000 entities [quote]DECLARE @X XML
SELECT @X = CAST(x AS XML) FROM OPENROWSET( BULK 'C:\temp\teste.xml', SINGLE_BLOB) AS T(x)
INSERT INTO OSUSR_DFP_PEP_ENTITIES (ENT_ID, NAME, FIRSTNAME, LASTNAME, PREFIX, SUFFIX, AKA, NAMESOURCE, PARENTID, GOVDESIGNATION, ENTRYTYPE, ENTRYCATEGORY, ENTRYSUBCATEGORY, ORGANIZATION, POSITIONS, REMARKS, DOB, POB, COUNTRY, EXPIRATIONDATE, EFFECTIVEDATE, PICTUREFILE, LINKEDTO, RELATED_ID, SOURCEWEBLINK, TOUCHDATE, DIRECTID, PASSPORTID, NATIONALID, OTHERID, DOB2, ENTLEVEL, MASTERID, WATCH, RELATIONSHIPS) SELECT X.product.value('(Ent_ID/text())[1]', 'INT'), X.product.value('(Name/text())[1]', 'nvarchar(1000)'), X.product.value('(FirstName/text())[1]', 'nvarchar(500)'), X.product.value('(LastName/text())[1]', 'nvarchar(500)'), X.product.value('(Prefix/text())[1]', 'nvarchar(500)'), X.product.value('(Suffix/text())[1]', 'nvarchar(500)'), ...
Well spotted! I hadn't looked closely enough to see the .query(...).value clauses, but good to know you have it solved
MM
|
|
|
|
|
SSC Veteran
      
Group: General Forum Members
Last Login: Yesterday @ 11:23 AM
Points: 283,
Visits: 1,237
|
|
For future reference, you may want to parse the entire XML string into its own table in one pass and then construct your query around that to join with the proper columns.
This procedure will parse your XML into a table using the OPENXML method:
CREATE PROCEDURE dbo.ParseXML
@strXML AS XML ,@rootnode NVARCHAR(255)
AS BEGIN
/*
EXEC dbo.ParseXML '<AccountDetailsRsp AccountNum="1" AccountStatus="AccountStatus1"> <PlayerInfo PlayerID="1" FirstName="FirstName1" LastName="LastName1"> <AddressList> <PlayerAddress> <Address AddressType="primary" City="City1" State="State1" Zip="Zip1"/> <FutureUse>Example Text1</FutureUse> <Phone PhoneNumber="PhoneNumber1" PhoneType="Type1" /> <Phone PhoneNumber="PhoneNumber2" PhoneType="Type2" /> </PlayerAddress> <PlayerAddress> <Address AddressType="billing" City="City1" State="State1" Zip="Zip1"/> <FutureUse>Example Text2</FutureUse> <Phone PhoneNumber="PhoneNumber1" PhoneType="Type1" /> <Phone PhoneNumber="PhoneNumber2" PhoneType="Type2" /> </PlayerAddress> </AddressList> </PlayerInfo> </AccountDetailsRsp>' ,'AccountDetailsRsp'
EXEC dbo.ParseXML '<items> <item id="0001" type="Donut"> <name>Cake</name> <ppu>0.55</ppu> <batter id="1001">Regular</batter> <batter id="1002">Chocolate</batter> <batter id="1003">Blueberry</batter> <topping id="5001">None</topping> <topping id="5002">Glazed</topping> <topping id="5005">Sugar</topping> <topping id="5006">Sprinkles</topping> <topping id="5003">Chocolate</topping> <topping id="5004">Maple</topping> </item> </items>' ,'items'
*/ SET NOCOUNT ON
DECLARE @strText AS NVARCHAR(MAX) ,@idoc INT ,@id INT ,@parentid INT
IF OBJECT_ID('tempdb..#ChildList') IS NOT NULL DROP TABLE #ChildList
CREATE TABLE #ChildList ( [RowNum] INT IDENTITY(1,1) NOT NULL, [parentid] INT NULL, [id] INT NULL, PRIMARY KEY (RowNum), UNIQUE (RowNum))
IF OBJECT_ID('tempdb..#NodeList') IS NOT NULL DROP TABLE #NodeList
CREATE TABLE #NodeList ( [RowNum] INT NOT NULL, [id] INT NULL, [parentid] INT NULL, [nodetype] INT NULL, [localname] NVARCHAR(MAX) NULL, [text] NVARCHAR(MAX) NULL, PRIMARY KEY (RowNum), UNIQUE (RowNum))
SET @id = 1 SET @parentid = NULL
/* Get rid of tabs and extra spaces */
SET @strText = CAST(@strXML AS NVARCHAR(MAX))
SET @strText = REPLACE( REPLACE( REPLACE( REPLACE( @strText ,' ',' '+CHAR(7)) ,CHAR(7)+' ','') ,CHAR(7),'') ,CHAR(9),' ') SET @strXML = CONVERT(XML,@strText)
/* Validate the XML */ EXEC sp_xml_preparedocument @idoc OUTPUT, @strXML /* Parse the XML data */
;WITH cte AS ( SELECT CAST(p1.parentid AS INT) AS parentid ,CAST(p1.id AS INT) AS id FROM OPENXML (@idoc,@rootnode,2) AS p1
UNION ALL SELECT CAST(p2.parentid AS INT) AS parentid ,CAST(p2.id AS INT) AS id FROM OPENXML (@idoc,@rootnode,2) AS p2 JOIN cte ON CAST(cte.id AS INT) = CAST(p2.ParentID AS INT) WHERE CAST(p2.parentid AS INT) = @parentid ) INSERT INTO #ChildList SELECT * FROM cte INSERT INTO #NodeList SELECT #ChildList.RowNum ,xmllist.id ,xmllist.parentid ,xmllist.nodetype ,xmllist.localname ,CAST(xmllist.[text] AS NVARCHAR(MAX)) AS [text] FROM #ChildList INNER JOIN OPENXML (@idoc,@rootnode,2) AS xmllist ON #ChildList.id = xmllist.id WHERE #ChildList.RowNum > 0 /* Display the results */ ;WITH RecursiveNodes(RowNum,id,parentid,nodepath,localname,[text],nodetype) AS ( SELECT #NodeList.RowNum ,#NodeList.id ,#NodeList.parentid ,CAST('/' + REPLACE(REPLACE(REPLACE(REPLACE(#NodeList.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath ,#NodeList.localname ,CAST(#NodeList.[text] AS NVARCHAR(MAX)) AS [text] ,0 AS nodetype FROM #ChildList INNER JOIN #NodeList ON #ChildList.id = #NodeList.id WHERE #NodeList.parentid IS NULL AND #ChildList.RowNum > 0 AND #NodeList.RowNum > 0 UNION ALL
SELECT n.RowNum ,n.id ,n.parentid ,CAST(r.nodepath + '/'+ REPLACE(REPLACE(REPLACE(REPLACE(n.localname,'&',''),'?',''),' ',''),'.','') AS NVARCHAR(255)) AS nodepath ,n.localname ,n.[text] ,n.nodetype FROM #NodeList AS n INNER JOIN RecursiveNodes AS r ON n.parentid = r.id WHERE n.RowNum > 0 AND r.RowNum > 0 AND n.parentid >= 0 ) SELECT ROW_NUMBER() OVER (ORDER BY Result.RowNum) AS RowNum ,Result.id ,Result.parentid ,Result.nodepath ,Result.nodetype ,Result.nodename ,Result.property ,Result.value ,Result.nodecontents FROM ( SELECT rn.RowNum ,rn.id ,rn.parentid ,rn.nodepath ,(CASE WHEN rn.nodetype = 0 THEN 'Root' WHEN rn.nodetype = 1 THEN 'Node' WHEN rn.nodetype = 2 THEN 'Property' ELSE 'Data' END) AS nodetype ,(CASE WHEN rn.nodetype = 0 THEN rn.localname WHEN rn.nodetype = 1 THEN rn.localname WHEN rn.nodetype = 2 THEN (SELECT TOP(1) localname FROM RecursiveNodes WHERE id = rn.parentid) ELSE NULL END) AS nodename ,(CASE WHEN rn.nodetype = 2 THEN rn.localname ELSE NULL END) AS property ,(CASE WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id) ELSE NULL END) AS value ,(CASE WHEN rn.nodetype = 1 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.id) WHEN rn.nodetype = 2 THEN (SELECT TOP(1) [text] FROM RecursiveNodes WHERE parentid = rn.parentid and [text] is not null) ELSE NULL END) AS nodecontents FROM RecursiveNodes AS rn WHERE rn.localname <> '#text' ) AS Result WHERE Result.id >= 0 AND (Result.id = 0 OR property IS NOT NULL OR value IS NOT NULL OR nodecontents IS NOT NULL) END
Sample output for the second XML string in the examples above (sorry that the cols don't line up):
RowNum id parentid nodepath nodetype nodename property value nodecontents 1 0 NULL /items Root items NULL NULL NULL 2 3 2 /items/item/id Property item id 0001 NULL 3 4 2 /items/item/type Property item type Donut NULL 4 5 2 /items/item/name Node name NULL NULL Cake 5 6 2 /items/item/ppu Node ppu NULL NULL 0.55 6 8 7 /items/item/batter/id Property batter id 1001 Regular 7 11 10 /items/item/batter/id Property batter id 1002 Chocolate 8 14 13 /items/item/batter/id Property batter id 1003 Blueberry 9 17 16 /items/item/topping/id Property topping id 5001 None 10 20 19 /items/item/topping/id Property topping id 5002 Glazed 11 23 22 /items/item/topping/id Property topping id 5005 Sugar 12 26 25 /items/item/topping/id Property topping id 5006 Sprinkles 13 29 28 /items/item/topping/id Property topping id 5003 Chocolate 14 32 31 /items/item/topping/id Property topping id 5004 Maple
|
|
|
|