Help needed to decode comma delimited string and insert into tables

Question

Help needed to decode comma delimited string and insert into tables

JJH82

SSC Enthusiast

Points: 134
More actions
February 17, 2011 at 5:55 pm

#246897

Hi all,
Recently, I was working on turning structured audit string into a table
sample string 1:
,2 Column_Name "Col1" "Col2" , 2 New_Column_Value "A" "B" , 2 Old_Column_Value "a" "b"
sample result 1:
field_type field_name new_value old_value
column Col1 A a
column Col2 B b
sample string 2:
,2 Field_Name "F1" "F2", 2 New_Field_Name "F1" "F2", 2 Old_Field_Name "f1" "f2" ,1 Column_Name "Col3" ,1 New_Column_Value "C", 1 Old_Column_Value "c"
sample result 2:
field_type field_name new_value old_value
field F1 F1 f1
field F2 F2 f2
column Col3 C c
the string does not limit the number of field or column changes, so it can be 100,000 :sick:
I have a script on hand that uses cursor which takes an awful long time. I also tried tally table, but it does not fit.
Has anyone faced this issue before? or can anyone help me? thanks

Viewing 15 posts - 1 through 15 (of 40 total)

You must be logged in to reply to this topic. Login to reply

Jeff Moden SSC Guru Points: 1003851 More actions · Answer 1

jinghua.ji (2/17/2011)
Hi all,
Recently, I was working on turning structured audit string into a table
sample string 1:
,2 Column_Name "Col1" "Col2" , 2 New_Column_Value "A" "B" , 2 Old_Column_Value "a" "b"
sample result 1:
field_type field_name new_value old_value
column Col1 A a
column Col2 B b
sample string 2:
,2 Field_Name "F1" "F2", 2 New_Field_Name "F1" "F2", 2 Old_Field_Name "f1" "f2" ,1 Column_Name "Col3" ,1 New_Column_Value "C", 1 Old_Column_Value "c"
sample result 2:
field_type field_name new_value old_value
field F1 F1 f1
field F2 F2 f2
column Col3 C c
the string does not limit the number of field or column changes, so it can be 100,000 :sick:
I have a script on hand that uses cursor which takes an awful long time. I also tried tally table, but it does not fit.
Has anyone faced this issue before? or can anyone help me? thanks

No problem. Please post your Tally Table based code (because it will more closely identify the goal of the code) and let's have a go at it.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

JJH82 SSC Enthusiast Points: 134 More actions · Answer 2

a table called CLOB contains

ROW_ID (pk)

PARSE_CLOB (where the strings sit)

--=============================================================================

-- Setup

--=============================================================================

USE TestDB --DB that everyone has where we can cause no harm

SET NOCOUNT ON --Supress the auto-display of rowcounts for appearance/speed

DECLARE @StartTime DATETIME --Timer to measure total duration

SET @StartTime = GETDATE() --Start the timer

--=============================================================================

-- Create and populate a Tally table

--=============================================================================

--===== Conditionally drop

IF OBJECT_ID('dbo.Tally') IS NOT NULL

DROP TABLE dbo.Tally

--===== Create and populate the Tally table on the fly

SELECT TOP 11000 --equates to more than 30 years of dates

IDENTITY(int,1,1) AS N INTO dbo.Tally

FROM Master.dbo.SysColumns sc1,

Master.dbo.SysColumns sc2

--===== Add a Primary Key to maximize performance

ALTER TABLE dbo.Tally

ADD CONSTRAINT PK_Tally_N

PRIMARY KEY CLUSTERED (N) WITH FILLFACTOR = 100

--===== Let the public use it

-- GRANT SELECT, REFERENCES ON dbo.Tally TO PUBLIC

--===== Display the total duration

SELECT STR(DATEDIFF(ms,@StartTime,GETDATE())) + ' Milliseconds duration'

select * from Tally

-----------------------------------------------------------

IF EXISTS (

SELECT *

FROM sysobjects

WHERE id = object_id(N'[dbo].[SPLIT_CLOB]'))

drop table [dbo].[SPLIT_CLOB]

create table SPLIT_CLOB

(ROW_ID nvarchar(15),

SPLIT_CLOB nvarchar(max))

insert into SPLIT_CLOB

SELECT clob.ROW_ID,

SUBSTRING(clob.PARSE_CLOB+',',N+1,CHARINDEX(',',clob.PARSE_CLOB+',',N+1)-N-1) AS Value

FROM dbo.Tally t

CROSS JOIN CLOB clob

WHERE N < LEN(clob.PARSE_CLOB+',')

AND SUBSTRING(clob.PARSE_CLOB+',',N,1) = ','

Jeff Moden SSC Guru Points: 1003851 More actions · Answer 3

I got a late start on this one. It's after 2AM here and I have to be up-and-at-em in 3 hours. I apologize but, unless someone else wants to jump in, I won't be able to get to this until afte 7PM later on today.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

kumar20 SSC Veteran Points: 261 More actions · Answer 4

Not sure abt performance. Replace table variables with temp tables with indexes for large data. Try this SET NOCOUNT ON

DECLARE @T_Data TABLE

(

ID INT IDENTITY(1, 1), Data NVARCHAR(MAX)

)

INSERT INTO @T_Data

SELECT ',2 Column_Name "Col1" "Col2" , 2 New_Column_Value "A" "B", 2 Old_Column_Value "a" "b"'

UNION

SELECT ',2 Field_Name "F1" "F2", 2 New_Field_Name "F1" "F2", 2 Old_Field_Name "f1" "f2" ,1 Column_Name "Col3" ,1 New_Column_Value "C", 1 Old_Column_Value "c"'

DECLARE @T_FirstLevel TABLE

(

Lvl1ID INT IDENTITY(1, 1), ID INT, Lvl1Data NVARCHAR(MAX)

)

INSERT INTO @T_FirstLevel

SELECT ID, LTRIM(T2.Split.value('.', 'VARCHAR(100)')) AS 'Data'

FROM (SELECT ID, CONVERT(XML, '<d>' + REPLACE(Data, ',', '</d><d>') + '</d>') AS XMLData FROM @T_Data) A

CROSS APPLY XMLData.nodes('/d') as T2(Split)

WHERE LTRIM(T2.Split.value('.', 'VARCHAR(100)')) <> ''

DECLARE @T_SecondLevel TABLE

(

Lvl2ID INT IDENTITY(1, 1), Lvl1ID INT, ID INT, Lvl2Data NVARCHAR(MAX),

ColRow INT

)

INSERT INTO @T_SecondLevel

SELECT Lvl1ID, ID, LTRIM(T2.Split.value('.', 'VARCHAR(100)')) AS 'Data'

, ROW_NUMBER() OVER (PARTITION BY Lvl1ID ORDER BY Lvl1ID, ID) AS ColRow

FROM (SELECT Lvl1ID, ID, CONVERT(XML, '<d>' + REPLACE(Lvl1Data, ' ', '</d><d>') + '</d>') AS XMLData FROM @T_FirstLevel) A

CROSS APPLY XMLData.nodes('/d') as T2(Split)

WHERE LTRIM(T2.Split.value('.', 'VARCHAR(100)')) <> ''

DECLARE @T_Final TABLE

(

Lvl2ID INT, Lvl1ID INT, ID INT, Lvl2Data NVARCHAR(MAX), ColRow INT,

Cnt INT, GrpCnt INT, Batch INT

)

INSERT INTO @T_Final

SELECT T.*, (N.number + 1) AS GrpCnt, NULL AS Batch

FROM (SELECT

Lvl2ID, Lvl1ID, ID, Lvl2Data

, CASE WHEN ColRow > 3 THEN 3 ELSE ColRow END AS ColRow

, COUNT(CASE WHEN PATINDEX('%"%', Lvl2Data) > 0 THEN 1 ELSE NULL END) OVER (PARTITION BY lvl1ID) AS Cnt

FROM@T_SecondLevel) T

LEFT JOIN master..spt_values N ON (N.type = 'p' AND N.number <= (T.Cnt - 1) AND T.ColRow < 3)

ORDER BY T.Lvl2ID

UPDATE F1

SET GrpCnt = F2.Row

FROM @T_Final F1

INNER JOIN (SELECT *, ROW_NUMBER() OVER (PARTITION BY lvl1ID ORDER BY Lvl2ID) AS Row

FROM @T_Final WHERE GrpCnt IS NULL) F2 ON F1.Lvl2ID = F2.Lvl2ID

WHERE F1.GrpCnt IS NULL

UPDATE F1

SET Batch = F2.Lvl2Data

FROM @T_Final F1

INNER JOIN @T_Final F2 ON F1.lvl1ID = F2.lvl1ID

AND F2.ColRow = 1

SELECT F1.Batch

, MAX(CASE WHEN F1.Lvl2Data LIKE 'Column%' THEN 'Column'

WHEN F1.Lvl2Data LIKE 'Field%' THEN 'Field' END) as 'FieldType'

, MAX(CASE WHEN F1.Lvl2Data IN ('Column_Name', 'Field_Name') AND F2.ColRow = 3 THEN F2.Lvl2Data END) as 'FieldName'

, MAX(CASE WHEN F1.Lvl2Data IN ('New_Column_Value', 'New_Field_Name') AND F2.ColRow = 3 THEN F2.Lvl2Data END) AS 'NewValue'

, MAX(CASE WHEN F1.Lvl2Data IN ('Old_Column_Value', 'Old_Field_Name') AND F2.ColRow = 3 THEN F2.Lvl2Data END) AS 'OldValue'

FROM @T_Final F1

INNER JOIN @T_Final F2 ON F1.Lvl1ID = F2.Lvl1ID AND F1.GrpCnt = F2.GrpCnt AND F1.Batch = F2.Batch

GROUP BY F1.ID, F1.GrpCnt, F1.Batch

ORDER BY 1, 2

JJH82 SSC Enthusiast Points: 134 More actions · Answer 5

thanks kumar20, you code is working, However, I forgot to mention the fact that new value and old value can contain multiple words.

e.g. "new value" "Old value"

I am modifying your code to suit my needs. I will test the performance after that. 🙂

---------------

updates: there is a logic error, when there are equal number of field and colummn changes, the procedure will ignore the column changes.

I suspect that it is caused by MAX().

sample string 3:

,1 Field_Name "F3", 1 New_Field_Name "F3", 1 Old_Field_Name "f3" ,1 Column_Name "Col6" ,1 New_Column_Value "F", 1 Old_Column_Value "f"

kumar20 SSC Veteran Points: 261 More actions · Answer 6

I modified the code to fix the issue. ( Assumption is after every 3 commas(,) new set begins)

SET NOCOUNT ON

DECLARE @T_Data TABLE

(

ID INT IDENTITY(1, 1), Data NVARCHAR(MAX)

)

INSERT INTO @T_Data

SELECT ',2 Column_Name "Col1" "Col2" , 2 New_Column_Value "A" "B", 2 Old_Column_Value "a" "b"'

UNION

SELECT ',2 Field_Name "F1" "F2", 2 New_Field_Name "F1" "F2", 2 Old_Field_Name "f1" "f2" ,1 Column_Name "Col3" ,1 New_Column_Value "C", 1 Old_Column_Value "c"'

UNION

SELECT ',1 Field_Name "F3", 1 New_Field_Name "F3", 1 Old_Field_Name "f3" ,1 Column_Name "Col6" ,1 New_Column_Value "F", 1 Old_Column_Value "f"'

DECLARE @T_FirstLevel TABLE

(

Lvl1ID INT IDENTITY(1, 1), ID INT, Lvl1Data NVARCHAR(MAX)

)

INSERT INTO @T_FirstLevel

SELECT ID, LTRIM(T2.Split.value('.', 'VARCHAR(100)')) AS 'Data'

FROM (SELECT ID, CONVERT(XML, '<d>' + REPLACE(Data, ',', '</d><d>') + '</d>') AS XMLData FROM @T_Data) A

CROSS APPLY XMLData.nodes('/d') as T2(Split)

WHERE LTRIM(T2.Split.value('.', 'VARCHAR(100)')) <> ''

DECLARE @T_SecondLevel TABLE

(

Lvl2ID INT IDENTITY(1, 1), Lvl1ID INT, ID INT, Lvl2Data NVARCHAR(MAX),

ColRow INT

)

INSERT INTO @T_SecondLevel

SELECT Lvl1ID, ID, LTRIM(T2.Split.value('.', 'VARCHAR(100)')) AS 'Data'

, ROW_NUMBER() OVER (PARTITION BY Lvl1ID ORDER BY Lvl1ID, ID) AS ColRow

FROM (SELECT Lvl1ID, ID, CONVERT(XML, '<d>' + REPLACE(Lvl1Data, ' ', '</d><d>') + '</d>') AS XMLData FROM @T_FirstLevel) A

CROSS APPLY XMLData.nodes('/d') as T2(Split)

WHERE LTRIM(T2.Split.value('.', 'VARCHAR(100)')) <> ''

DECLARE @T_Final TABLE

(

Lvl2ID INT, Lvl1ID INT, ID INT, Lvl2Data NVARCHAR(MAX), ColRow INT,

Cnt INT, GrpCnt INT, Batch INT

)

INSERT INTO @T_Final

SELECT T.*, (N.number + 1) AS GrpCnt, NULL AS Batch

FROM (SELECT

Lvl2ID, Lvl1ID, ID, Lvl2Data

, CASE WHEN ColRow > 3 THEN 3 ELSE ColRow END AS ColRow

, COUNT(CASE WHEN PATINDEX('%"%', Lvl2Data) > 0 THEN 1 ELSE NULL END) OVER (PARTITION BY Lvl1ID) AS Cnt

FROM @T_SecondLevel) T

LEFT JOIN master..spt_values N ON (N.type = 'p' AND N.number <= (T.Cnt - 1) AND T.ColRow < 3)

ORDER BY T.Lvl2ID

UPDATE F1

SET GrpCnt = F2.Row

FROM @T_Final F1

INNER JOIN (SELECT *, ROW_NUMBER() OVER (PARTITION BY Lvl1ID ORDER BY Lvl2ID) AS Row

FROM @T_Final WHERE GrpCnt IS NULL) F2 ON F1.Lvl2ID = F2.Lvl2ID

WHERE F1.GrpCnt IS NULL

/*

UPDATE F1

SET Batch = F2.Lvl2Data

FROM @T_Final F1

INNER JOIN @T_Final F2 ON F1.Lvl1ID = F2.Lvl1ID

AND F2.ColRow = 1

*/

UPDATE F1

SET Batch = N.Number

FROM @T_Final F1

INNER JOIN master..spt_values N ON F1.Lvl1ID BETWEEN N.number AND (N.number * 3)

WHERE N.number <> 0

--SELECT * FROM @T_Final

SELECT F1.Batch

, MAX(CASE WHEN F1.Lvl2Data LIKE 'Column%' THEN 'Column'

WHEN F1.Lvl2Data LIKE 'Field%' THEN 'Field' END) as 'FieldType'

, MAX(CASE WHEN F1.Lvl2Data IN ('Column_Name', 'Field_Name') AND F2.ColRow = 3 THEN F2.Lvl2Data END) as 'FieldName'

, MAX(CASE WHEN F1.Lvl2Data IN ('New_Column_Value', 'New_Field_Name') AND F2.ColRow = 3 THEN F2.Lvl2Data END) AS 'NewValue'

, MAX(CASE WHEN F1.Lvl2Data IN ('Old_Column_Value', 'Old_Field_Name') AND F2.ColRow = 3 THEN F2.Lvl2Data END) AS 'OldValue'

FROM @T_Final F1

INNER JOIN @T_Final F2 ON F1.Lvl1ID = F2.Lvl1ID AND F1.GrpCnt = F2.GrpCnt AND F1.Batch = F2.Batch

GROUP BY F1.ID, F1.GrpCnt, F1.Batch

ORDER BY 1, 2

Jeff Moden SSC Guru Points: 1003851 More actions · Answer 7

JJH82 (2/20/2011)
thanks kumar20, you code is working, However, I forgot to mention the fact that new value and old value can contain multiple words.
e.g. "new value" "Old value"
I am modifying your code to suit my needs. I will test the performance after that. 🙂
---------------
updates: there is a logic error, when there are equal number of field and colummn changes, the procedure will ignore the column changes.
I suspect that it is caused by MAX().
sample string 3:
,1 Field_Name "F3", 1 New_Field_Name "F3", 1 Old_Field_Name "f3" ,1 Column_Name "Col6" ,1 New_Column_Value "F", 1 Old_Column_Value "f"

If you're getting an error, please post your code.

If you want really good answers, please post your data possibilities in a readily consumable format. Don't assume that you know how to do that. Please see the first link in my signature line below.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

JJH82 SSC Enthusiast Points: 134 More actions · Answer 8

I appologise for the troubles I caused.

Here is a detailed structure of the source data:

1. the length of each row of string is not fixed

2. the data contained in the string is comma seperated and start with a comma,

e.g. , a ,b ,c

3. every 3 commas represent a set of information,

e.g. , a1 , a2 , a3 , b1 , b2 , b3

4. each section between 2 commas contains a set of fixed format string,

e.g. ,1 New_Field "Field 1" , 1 New_Field_Name "New Field" , 1 Old_Field_Name "Old Field"

5. there are 2 types of value changes the audit table captures, Field and Column .

Field name may contain space, but column name is a whole string with "_",

new and old value may contain space

all data values are stored in "", empty value are represented by ""

6. each row of record may contain both field and column changes, or it may contain column changes only

7. some real data sample:

,1 Field_Name "Service Type Name" ,1 New_Field_Name "Disability" ,1 Old_Field_Name "" ,2 Column_Name "EFF_END_DATE" "EFF_START_DT" ,2 New_Column_Value "2014-10-31" "2010-10-01" ,2 Old_Column_Value "" ""

,2 Column_Name "X_ALLOW_CASE_CLAIMS" "X_CASE_TYPE" ,2 New_Column_Value "N" "Agreement Schedule" ,2 Old_Column_Value "" ""

,9 Column_Name "NAME" "TAX_IDEN_NUM" "X_GST_REG_FLG" "REL_NAME" "X_PAYMENT_METHOD" "X_PAYMENT_TERM" "CUST_STAT_CD" "NAME_1" "X_WITHHOLD_TAX_EXEMPT_FLG" ,9 New_Column_Value "ER15 ORG Test" "48123123124" "Y" "ER15 ORG Test" "None" "Immediate" "Draft" "ER15 ORG Test" "Y" ,9 Old_Column_Value "" "" "" "" "" "" "" "" ""

,1 Column_Name "ATTRIB_07" ,1 New_Column_Value "No" ,1 Old_Column_Value ""

,1 Column_Name "STATUS_CD" ,1 New_Column_Value "Submitted" ,1 Old_Column_Value "Draft"

p.s. at the moment, I am trying the code provided by kumar20, it breaks when it hits space between string. I am working to overcome it now

Jeff Moden SSC Guru Points: 1003851 More actions · Answer 9

JJH82 (2/21/2011)
I appologise for the troubles I caused.
Here is a detailed structure of the source data:
1. the length of each row of string is not fixed
2. the data contained in the string is comma seperated and start with a comma,
e.g. , a ,b ,c
3. every 3 commas represent a set of information,
e.g. , a1 , a2 , a3 , b1 , b2 , b3
4. each section between 2 commas contains a set of fixed format string,
e.g. ,1 New_Field "Field 1" , 1 New_Field_Name "New Field" , 1 Old_Field_Name "Old Field"
5. there are 2 types of value changes the audit table captures, Field and Column .
Field name may contain space, but column name is a whole string with "_",
new and old value may contain space
all data values are stored in "", empty value are represented by ""
6. each row of record may contain both field and column changes, or it may contain column changes only
7. some real data sample:
,1 Field_Name "Service Type Name" ,1 New_Field_Name "Disability" ,1 Old_Field_Name "" ,2 Column_Name "EFF_END_DATE" "EFF_START_DT" ,2 New_Column_Value "2014-10-31" "2010-10-01" ,2 Old_Column_Value "" ""
,2 Column_Name "X_ALLOW_CASE_CLAIMS" "X_CASE_TYPE" ,2 New_Column_Value "N" "Agreement Schedule" ,2 Old_Column_Value "" ""
,9 Column_Name "NAME" "TAX_IDEN_NUM" "X_GST_REG_FLG" "REL_NAME" "X_PAYMENT_METHOD" "X_PAYMENT_TERM" "CUST_STAT_CD" "NAME_1" "X_WITHHOLD_TAX_EXEMPT_FLG" ,9 New_Column_Value "ER15 ORG Test" "48123123124" "Y" "ER15 ORG Test" "None" "Immediate" "Draft" "ER15 ORG Test" "Y" ,9 Old_Column_Value "" "" "" "" "" "" "" "" ""
,1 Column_Name "ATTRIB_07" ,1 New_Column_Value "No" ,1 Old_Column_Value ""
,1 Column_Name "STATUS_CD" ,1 New_Column_Value "Submitted" ,1 Old_Column_Value "Draft"
p.s. at the moment, I am trying the code provided by kumar20, it breaks when it hits space between string. I am working to overcome it now

It's no trouble on our end. We're just trying to provide a solution. I missed the part about multiple fields being in a single section. 😛 Thank you for the update.

I believe that kumar20's code is a step in the right direction. You may have to suffix each row with an extra space to get it to work for multiple values within a field selection.

That, not withstanding, I'll take a look at a different method.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

Jeff Moden SSC Guru Points: 1003851 More actions · Answer 10

Just a quick question, JJH82... does the number right after each coma identify how many elements there are for a given section?

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

JJH82 SSC Enthusiast Points: 134 More actions · Answer 11

yes, it does

, 8 Field_Name "1" "2" "3" "4" "5" "6" "7" "8"

thank you in advance

Jeff Moden SSC Guru Points: 1003851 More actions · Answer 12

JJH82 (2/21/2011)
yes, it does
, 8 Field_Name "1" "2" "3" "4" "5" "6" "7" "8"
thank you in advance

Ok... thanks... I'll try to take a stab at this tonight.

--Jeff Moden

RBAR is pronounced "ree-bar" and is a "Modenism" for Row-By-Agonizing-Row.
First step towards the paradigm shift of writing Set Based code:
________Stop thinking about what you want to do to a ROW... think, instead, of what you want to do to a COLUMN.

Change is inevitable... Change for the better is not.

Helpful Links:
How to post code problems
How to Post Performance Problems
Create a Tally Function (fnTally)

JJH82 SSC Enthusiast Points: 134 More actions · Answer 13

hi kumar20, the script cannot process some name characters, e.g. '&', '<'

',2 Column_Names "FIELD_NAME" "READ_FLG" ,2 Old_Column_Values "Rural & Remote" "N" ,2 New_Column_Values "" ""'

'+2 Column_Names "ATTRIB_VALUE" "SCORE" +2 New_Column_Values "<5 volunteers:1 staff" "0" +2 Old_Column_Values "" ""'

is there a way to overcome that?

kumar20 SSC Veteran Points: 261 More actions · Answer 14

As i have used XML for splitting the values it breaks if there are any special chars. I modified the code to split based on numbers table rather xml.

And the code breaks if there is space between values bcoz each section is separated by space i am splitting based on space for section values, c secondlevel temp table insert. If you can replace dat with some other char it ll work. SET NOCOUNT ON

DECLARE @T_Data TABLE

(

ID INT IDENTITY(1, 1), Data NVARCHAR(MAX)

)

INSERT INTO @T_Data

SELECT ',2 Column_Name "Col1" "Col2" , 2 New_Column_Value "A" "B", 2 Old_Column_Value "a" "b"'

UNION

SELECT ',2 Field_Name "F1" "F2", 2 New_Field_Name "F1" "F2", 2 Old_Field_Name "f1" "f2" ,1 Column_Name "Col3" ,1 New_Column_Value "C", 1 Old_Column_Value "c"'

UNION

SELECT ',1 Field_Name "F3", 1 New_Field_Name "F3", 1 Old_Field_Name "f3" ,1 Column_Name "Col6" ,1 New_Column_Value "1<", 1 Old_Column_Value "2&"'

DECLARE @T_FirstLevel TABLE

(

Lvl1ID INT IDENTITY(1, 1), ID INT, Lvl1Data NVARCHAR(MAX)

)

INSERT INTO @T_FirstLevel

SELECT ID, LTRIM(SUBSTRING(Data, number+1, CHARINDEX(',', Data, number+1)-number - 1) ) AS 'Data'

FROM (SELECT ID, Data + ',' AS Data FROM @T_Data) A

INNER JOIN master..spt_values N ON N.number < LEN(A.Data)

AND N.type = 'p'

WHERE LTRIM(SUBSTRING(Data, number+1, CHARINDEX(',', Data, number+1)-number - 1) ) <> ''

AND SUBSTRING(Data, number, 1) = ','

DECLARE @T_SecondLevel TABLE

(

Lvl2ID INT IDENTITY(1, 1), Lvl1ID INT, ID INT, Lvl2Data NVARCHAR(MAX),

ColRow INT

)

INSERT INTO @T_SecondLevel

SELECT Lvl1ID, ID, LTRIM(SUBSTRING(Lvl1Data, number+1, CHARINDEX(' ', Lvl1Data, number+1)-number - 1) ) AS 'Data'

, ROW_NUMBER() OVER (PARTITION BY Lvl1ID ORDER BY Lvl1ID, ID) AS ColRow

FROM (SELECT Lvl1ID, ID, ' ' + Lvl1Data + ' ' AS Lvl1Data FROM @T_FirstLevel) A

INNER JOIN master..spt_values N ON N.number < LEN(A.Lvl1Data)

AND N.type = 'p'

WHERE LTRIM(SUBSTRING(Lvl1Data, number+1, CHARINDEX(' ', Lvl1Data, number+1)-number - 1) ) <> ''

AND SUBSTRING(Lvl1Data, number, 1) = ' '

DECLARE @T_Final TABLE

(

Lvl2ID INT, Lvl1ID INT, ID INT, Lvl2Data NVARCHAR(MAX), ColRow INT,

Cnt INT, GrpCnt INT, Batch INT

)

INSERT INTO @T_Final

SELECT T.*, (N.number + 1) AS GrpCnt, NULL AS Batch

FROM (SELECT

Lvl2ID, Lvl1ID, ID, Lvl2Data

, CASE WHEN ColRow > 3 THEN 3 ELSE ColRow END AS ColRow

, COUNT(CASE WHEN PATINDEX('%"%', Lvl2Data) > 0 THEN 1 ELSE NULL END) OVER (PARTITION BY Lvl1ID) AS Cnt

FROM @T_SecondLevel) T

LEFT JOIN master..spt_values N ON (N.type = 'p' AND N.number <= (T.Cnt - 1) AND T.ColRow < 3)

ORDER BY T.Lvl2ID

UPDATE F1

SET GrpCnt = F2.Row

FROM @T_Final F1

INNER JOIN (SELECT *, ROW_NUMBER() OVER (PARTITION BY Lvl1ID ORDER BY Lvl2ID) AS Row

FROM @T_Final WHERE GrpCnt IS NULL) F2 ON F1.Lvl2ID = F2.Lvl2ID

WHERE F1.GrpCnt IS NULL

/*

UPDATE F1

SET Batch = F2.Lvl2Data

FROM @T_Final F1

INNER JOIN @T_Final F2 ON F1.Lvl1ID = F2.Lvl1ID

AND F2.ColRow = 1

*/

UPDATE F1

SET Batch = N.Number

FROM @T_Final F1

INNER JOIN master..spt_values N ON F1.Lvl1ID BETWEEN N.number AND (N.number * 3)

WHERE N.number <> 0

--SELECT * FROM @T_Final

SELECT F1.Batch

, MAX(CASE WHEN F1.Lvl2Data LIKE 'Column%' THEN 'Column'

WHEN F1.Lvl2Data LIKE 'Field%' THEN 'Field' END) as 'FieldType'

, MAX(CASE WHEN F1.Lvl2Data IN ('Column_Name', 'Field_Name') AND F2.ColRow = 3 THEN F2.Lvl2Data END) as 'FieldName'

, MAX(CASE WHEN F1.Lvl2Data IN ('New_Column_Value', 'New_Field_Name') AND F2.ColRow = 3 THEN F2.Lvl2Data END) AS 'NewValue'

, MAX(CASE WHEN F1.Lvl2Data IN ('Old_Column_Value', 'Old_Field_Name') AND F2.ColRow = 3 THEN F2.Lvl2Data END) AS 'OldValue'

FROM @T_Final F1

INNER JOIN @T_Final F2 ON F1.Lvl1ID = F2.Lvl1ID AND F1.GrpCnt = F2.GrpCnt AND F1.Batch = F2.Batch

GROUP BY F1.ID, F1.GrpCnt, F1.Batch

ORDER BY 1, 2