Technical Article

Data Merge Builder

,

I have a recurring need to load application domain data and test data into newly created databases.  I needed a simple and reliable process to extract data, place this data under source control and have it be re-runnable.  I cooked up this script and it's been working like a champ for a couple of years.  I keep it in the MASTER database so that it's available for any database.  Yeah, I know that's not the best, but it works for my situation. ๐Ÿ˜‰

USEAGE:

exec dbo.sp_DataMergeBuilder

    @table   = 'schema.TableName'  -- 

,   @includeData = 1 -- use 0 for a simple listing of all columns with a null

,   @whereClause = ''  --  add a where clause in case you want to filter the table data

I refactored the code so that it no longer has a dependency on a string splitting routine.  I also cleaned up some latent bugs related to strings containing apostrophes and encrypted and/or binary data (I exclude it).

use master;
go
if object_id('dbo.sp_DataMergeBuilder','P') is null
exec sys.sp_executesql N'create proc dbo.sp_DataMergeBuilder as return 0;'
go
alter procedure dbo.sp_DataMergeBuilder
@tablesysname
,@includeDatabit= 0
,@whereClausenvarchar(1000)= ''
as
/*
โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”
ยฉ 2000-15 ยท NightOwl Development ยท All rights Reserved
โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”
Purpose:Generates bolier plate merge statement for use in creating a generic merge statement.
Returns:merge script for the specified table
Notes:this is a work in progress
History:
   DateDeveloperWork ItemModification
โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”
2011-12-30P. Hunter0Object created.
โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”โ€”
*/
set nocount on;

declare
@baseNamesysname
,@cmdnvarchar(max)= ''
,@crlfchar(2)= char(13) + char(10)
,@datanvarchar(max)= ''
,@dbNamesysname= db_name()
,@errMsgnvarchar(1000)
,@hasIdentbit= 0
,@maxColumnint
,@objectIdint
,@propNamesysname
,@rowsint
,@tabchar(1)= char(9)
,@userNamesysname= '''' + suser_sname() + ''''
,@xmlxml
;

set @cmd = 'use ' + @dbName + ';
select@objectId= object_id
,@baseName= t.name
,@propName= schema_name(t.schema_id) + ''.'' + t.name
,@maxColumn= t.max_column_id_used
,@hasIdent= (select count(1) from sys.identity_columns ic where ic.object_id = t.object_id)
fromsys.tables t
wheret.object_id = object_id(@table);';

exec sys.sp_executesql
@cmd
,N'@objectId int out, @basename sysname out, @propName sysname out, @maxColumn int out, @table sysname, @hasIdent bit out'
,@objectId out, @baseName out, @propName out, @maxColumn out, @table, @hasIdent out;

if @objectId is null
begin
set @errMsg = 'The table name "' + isnull(@table, 'nothing provided') + '" could not be located in the ' + @dbName + 'database.';
raiserror(@errMsg, 15, 1);
end;

create table #excludeColumns
(name sysname primary key
,defaultValue sysname
);
insert#excludeColumns ( name, defaultValue )
values( 'CreatedBy', @userName)
,( 'CreatedDate' , 'getdate()')
,( 'CreatedOn', 'getdate()')
,( 'ModifiedBy', @userName)
,( 'ModifiedDate', 'getdate()')
,( 'ModifiedOn', 'getdate()')
,( 'UpdatedBy', @userName)
,( 'UpdatedDate' , 'getdate()')
,( 'UpdatedOn', 'getdate()')
;

create table #columns
(column_idsmallint primary key
,namesysname
,isIdentitybit
,isNullablebit
,isExcludedbit
,tabstinyint
,remdrtinyint
,minIdsmallint
,maxIdsmallint
,lastMergesmallint
,maxTabstinyint
,typeNamesysname
,definitionnvarchar(max)
);

set @cmd = 'use ' + @dbName + ';
insert#columns
selects.column_id
,s.name
,s.is_identity
,s.is_nullable
,isExcluded= case s.name when x.name then 1 else 0 end
,tabs= (a.maxTabs - ((len(s.name)) / 4))
,remdr= (len(s.name) % 4)
,a.minId
,a.maxId
,a.lastMerge
,a.maxTabs
,typeName= t.name
,definition= case left(d.definition, 2)
when ''(('' then replace(replace(d.definition, ''(('', ''''), ''))'', '''')
else replace(substring(definition, 2, len(d.definition) - 2), ''(1)'', ''1'')
  end
fromsys.columnss
joinsys.typest
ons.user_type_id = t.user_type_id
ands.system_type_id = t.system_type_id
andt.namenot like ''%binary''
left
join#excludeColumns x
onx.name = s.name
left
joinsys.default_constraints d
ond.parent_object_id = s.object_id
andd.parent_column_id = s.column_id
outer
apply(selectminId= min(c.column_id)
,maxId= max(c.column_id)
,lastMerge= max(case c.name when ec.name then 0 else c.column_id end)
,maxTabs= max(len(c.name) + 4) / 4
fromsys.columns c
left
join#excludeColumns ec
onec.name = c.name
wherec.object_id= s.object_id
andc.is_computed= 0
)a
wheres.object_id= @objectId
ands.is_computed= 0';

exec sys.sp_executesql @cmd, N'@objectId int', @objectId;

--build the join criteria to prefer a "natural" unique key over a surogate key (identity) where available
create table #join
(scriptnvarchar(max)
,rowIdint identity primary key
);

set @cmd = 'use ' + @dbName + ';
insert#join ( script )
selectscript= case i.key_ordinal
when 1 then ''ontgt.''
else ''andtgt.''
  end + c.name + replicate(@tab, c.tabs) + ''= src.'' + c.name
from#columnsc
join(selectjoinOn= rank() over (order by i.index_id)
,ic.*
fromsys.index_columns ic
joinsys.indexes i
oni.object_id = ic.object_id
andi.index_id= ic.index_id
andi.is_unique= 1
whereic.object_id = @objectId
) ioni.column_id = c.column_id
andi.joinOn= 1;';

exec sys.sp_executesql @cmd, N'@objectId int, @tab char(1)', @objectId, @tab;

--they want to include data, this checks to see if there is data to extract...
if @includeData = 1
begin
set @cmd = 'select top 1 @includeData = count(1) from ' + @dbName + '.' + @propName
exec sys.sp_executesql @cmd, N'@includeData bit out', @includeData out;
end;

create table #cteData
(scriptnvarchar(max)
,idint identity primary key
);

/*
**extract the data for the table - OR - create a stub of the columns to be used...
*/--first, define the cte with column names
insert#cteData ( script )
selectcase c.column_id
when m.minId
then 'with cte_' + @baseName + @crlf + @tab + '(' + @tab + c.name
else @tab + ',' + @tab + c.name
+ case c.column_id when m.maxId then @crlf + @tab + ')' + @crlf + '  as' + @crlf + '(' else '' end
end
from#columnsc
cross
apply(selectminId = min(c.column_id)
,maxId = max(c.column_id)
from#columns c
wherec.name not in (select name from #excludeColumns)
)m
left
join#excludeColumnsx
on x.name = c.name
wherex.name is null
order by c.column_id;

if @includeData = 1
begin
--now extract the data for the table...
set @cmd = '';

--build a dynamic query that builds the extract command
select@cmd= @cmd
+ case c.column_id when 1
then @crlf + 'selectscript = ''select '' + replace(replace('
else ' + '', '' + '
  end + 'isnull('
+ case
when c.typeNamelike '%char'then '''#|#'' + ' + c.name + ' + ''#|#'''
when c.typeNamelike '%date%'
or c.typeNamelike '%time%'then '''#|#'' + convert(varchar(50), ' + c.name + ', 121) + ''#|#'''
when c.typeNamelike '%int'
or c.typeNamelike 'bit' 
or c.typeNamelike 'dec%' 
or c.typeNamelike 'num%' 
or c.typeNamelike 'flo%' 
or c.typeNamelike 'rea%'then 'convert(varchar(50), ' + c.name + ')'
else c.name
  end
+ ', ''null'')'
from#columnsc
cross
apply(selectminId = min(c.column_id)
,maxId = max(c.column_id)
from#columns c
wherec.name not in (select name from #excludeColumns)
)m
left
join#excludeColumnsx
on x.name = c.name
wherex.name is null
order by c.column_id;

set@cmd = @cmd + ', '''''''' ,'''''''''''' ), ''#|#'','''''''') + ''' + @crlf + @tab + @tab + 'union all'''

set @cmd = 'use ' + @dbName + ';' + @crlf
 + 'insert#cteData ( script )' + @cmd + @crlf
 + 'from' + @propName + @crlf
 + case when @whereClause > '' then @whereClause + + @crlf else '' end
 + 'order by ' + (select top 1 name from #columns order by isIdentity desc, column_id) + ';'

--execute the resulting query
exec sys.sp_executesql @cmd;

set @rows = @@rowcount;

update#cteData
setscript = replace(script, @tab + @tab + 'union all', ')')
whereId = (select max(id) from #cteData);
end;

if @includeData = 0--build a stub select statement or...
or @rows = 0--no rows generated from the data
begin
--either there's no data to include or the data isn't suppoed to be included
set @includeData = 0;

--create a stub for each column 
insert#cteData ( script )
selectscript= case c.column_id
when m.minId
then 'select'
else ','
end + @tab + c.name + replicate(@tab, c.tabs) + '= null'
+ case c.column_id when m.maxId then @crlf + ')' else '' end
from#columnsc
cross
apply(selectminId = min(c.column_id)
,maxId = max(c.column_id)
from#columns c
wherec.name not in (select name from #excludeColumns)
)m
left
join#excludeColumnsx
on x.name = c.name
wherex.name is null
order by c.column_id;
end;

/*
**begin building the output script...
*/create table #output
(scriptnvarchar(max)
,rowIdint identity primary key
);

if @hasIdent = 1
begin
insert#output(script)
selectscript= '--This table has an identity column so preserve those number on insert'
union all
selectscript= 'set identity_insert ' + @propName + ' on;' + @crlf + 'go';
end;

--setup the stub cte that will hold the data to be merged
insert#output(script)
selectscript= '--create cte containing the source data to be merged into the target table...';

--provide the cteData data or "fake data"
insert#output(script)
selectscript
from#cteData
order by Id;

--create top half of except clause to get the differences between the cte and target table
insert#output(script)
selectscript= '--using an EXCEPT statement generates the true diferences between the cte source and the target table you want to merge into...';

--create top half of except clause to get the differences between the cte and target table
insert#output(script)
selectscript= case c.column_id
when c.minId
then 'merge' + @propName + 'tgt' + @crlf
+ 'using(select'
else replicate(@tab, 3) + ','
  end + @tab + c.name
+ case c.column_id
when c.lastMerge
then @crlf + 'fromcte_'+ @baseName + @crlf
+ case @includeData
when 0 then 'where' + (select top 1 name from #columns c
 order by c.isIdentity desc, c.column_id) + ' is not null' + @crlf
else ''
  end
+ 'EXCEPT--return only true differences'
else '' end
from#columns c
wherec.isExcluded = 0;

--create bottom half of except clause to get the differences between the cte and target table
insert#output(script)
selectscript= case c.column_id
when c.minId
then 'select'
else replicate(@tab, 3) + ','
  end + @tab + c.name
+ case c.column_id
when c.lastMerge
then @crlf + 'from'+ @propName + @crlf
+ ')src'
else '' end
from#columns c
wherec.isExcluded = 0;

--add the join criteria
insert#output(script)
selectscript
from#join j;


--create the update portion
insert#output(script)
selectscript= case c.column_id
when b.minId
then 'whenmatched' + @crlf
+ 'thenupdate' + @crlf
+ 'set'
else replicate(@tab, 3) + ','
  end + @tab + c.name + replicate(@tab, c.tabs) + '= '
+ case c.name
when x.name then x.defaultValue
else 'src.' + c.name
  end
from#columns c
cross
join(selectminId = min(column_id)
,maxId = max(column_id)
from#columns m
wherem.name not like 'Created[BD][ya]%'
andm.isIdentity = 0 ) b
left
join#excludeColumns x
onx.name = c.name
wherec.name not like 'Created[BD][ya]%'
andc.isIdentity = 0
order by c.column_id;

--create the top part of the insert
insert#output(script)
selectscript= case c.column_id
when c.minId
then 'whennot matched' + @crlf
+ 'theninsert' + @crlf
+ '('
else replicate(@tab, 3) + ','
  end + @tab + c.name
+ case c.column_id when c.maxId then @crlf + ')' else '' end
from#columns c
left
join#excludeColumns x
onx.name = c.name
order by c.column_id;

--create the bottom/values part of the insert
insert#output(script)
selectscript= case c.column_id
when c.minId
then 'values' + @crlf
+ '('
else replicate(@tab, 3) + ','
  end + @tab
+ case
when c.name = x.name
then x.defaultValue + '--'  + c.name
when c.definition > ''
then 'isnull(src.' + c.name + ', ' + c.definition + ')' 
else 'src.' + c.name end
+ case c.column_id when c.maxId then @crlf + ');' else '' end
from#columns c
left
join#excludeColumns x
onx.name = c.name
order by c.column_id;

--complete the script with a batch separator
insert#output(script) values ( 'go' );

if @hasIdent = 1
begin
insert#output(script)
selectscript= '--reset the identity engine and reseed the identity value'
union all
selectscript= 'set identity_insert ' + @propName + ' off;' + @crlf + 'go'
union all
selectscript= 'dbcc checkident(''' + @propName + ''', reseed);' + @crlf + 'go' + @crlf;
end;

--return the results
select script from #output o order by o.rowId;
go

Rate

โ˜… โ˜… โ˜… โ˜… โ˜… โ˜… โ˜… โ˜… โ˜… โ˜…

Share

Share

Rate

โ˜… โ˜… โ˜… โ˜… โ˜… โ˜… โ˜… โ˜… โ˜… โ˜…