Identify Primary Key Violation Prior to Insert (Updated)

,

This script allows for testing if rows will cause a primary key violations prior to inserting the data.  Offending rows are stored in a table so they can be used in subsequent queries or used to debug bad data issues.

This updated version adds a timestamp to the duplicates table so that identifying when the duplicate occurred is easier. This is really helpful if you have scheduled data uploads and can track when a bulk insert from a file was performed.

This can be called from any database and will identify duplicates in the select statement as well as the target table

Split and PrimaryKeyColumns functions are called by the stored procedure and are included in the post.

IF exists (SELECT * from dbo.sysobjects 
	WHERE id = object_id(N'[dbo].[Split]') 
	AND OBJECTPROPERTY(id, N'IsTableFunction') = 1)
DROP FUNCTION [dbo].[Split]
GO


GO
CREATE FUNCTION dbo.Split (	@vcDelimitedString 		varchar(8000),
				@vcDelimiter			varchar(100) )
/**************************************************************************
DESCRIPTION: Accepts a delimited string and splits it at the specified
		delimiter points.  Returns the individual items as a table data
		type with the ElementID field as the array index and the Element
		field as the data

PARAMETERS:
		@vcDelimitedString		- The string to be split
		@vcDelimiter			- String containing the delimiter where
							delimited string should be split

RETURNS:
		Table data type containing array of strings that were split with
		the delimiters removed from the source string

USAGE:
		SELECT ElementID, Element FROM Split('11111,22222,3333', ',') ORDER BY ElementID

AUTHOR:	Karen Gayda

DATE: 	05/31/2001

MODIFICATION HISTORY:
	WHO		DATE		DESCRIPTION
	---		----------	---------------------------------------------------

***************************************************************************/
RETURNS @tblArray TABLE 
   (
	ElementID	smallint	IDENTITY(1,1),  --Array index
   	Element		varchar(1000)			--Array element contents
   )
AS
BEGIN

	DECLARE 
	@siIndex					smallint,
	@siStart					smallint,
	@siDelSize					smallint


	SET @siDelSize	= LEN(@vcDelimiter)
	--loop through source string and add elements to destination table array
	WHILE LEN(@vcDelimitedString) > 0
	BEGIN
		SET @siIndex = CHARINDEX(@vcDelimiter, @vcDelimitedString)
		IF @siIndex = 0
		BEGIN
			INSERT INTO @tblArray VALUES(@vcDelimitedString)
			BREAK
		END
		ELSE
		BEGIN
			INSERT INTO @tblArray VALUES(SUBSTRING(@vcDelimitedString, 1,@siIndex - 1))
			SET @siStart = @siIndex + @siDelSize
			SET @vcDelimitedString = SUBSTRING(@vcDelimitedString, @siStart , LEN(@vcDelimitedString) - @siStart + 1)
		END
	END
	
	RETURN
END
GO





IF exists (SELECT * from dbo.sysobjects 
	WHERE id = object_id(N'[dbo].[PrimaryKeyColumns]') 
	AND OBJECTPROPERTY(id, N'IsTableFunction') = 1)
DROP FUNCTION [dbo].[PrimaryKeyColumns]
GO


GO
CREATE FUNCTION dbo.PrimaryKeyColumns (	@sysTableName 		sysname )
/**************************************************************************
DESCRIPTION:    Finds the name and column id of primary key columns for a
		table.

PARAMETERS:
		@sysTableName	- The name of the table for which primary
					key info is sought		

RETURNS:
		Table data type list of primary key column names

USAGE:		USE pubs
		SELECT * from dbo.PrimaryKeyColumns( 'authors')

AUTHOR:	Karen Gayda

DATE: 	10/24/2003

MODIFICATION HISTORY:
	WHO		DATE		DESCRIPTION
	---		----------	---------------------------------------------------

***************************************************************************/
RETURNS @tblCols TABLE 
   (
   	name		sysname,		-- Name of primary key column
	colid		smallint		-- System id for column
   )
AS
BEGIN
	
	INSERT INTO @tblCols
		SELECT  c.name, c.colid
		FROM      sysindexes i
		  INNER JOIN sysobjects t
			ON i.id = t.id
		  INNER JOIN sysindexkeys k
		        ON i.indid = k.indid
		        AND i.id = k.ID
		  INNER JOIN syscolumns c
			ON c.id = t.id
		        AND c.colid = k.colid
		WHERE  i.id = t.id
		 AND      i.indid BETWEEN 1 And 254 
		 AND      (i.status & 2048) = 2048
		 AND t.id = OBJECT_ID(@sysTableName)
		ORDER BY k.KeyNo


	RETURN
END
GO

SET QUOTED_IDENTIFIER OFF 
GO
SET ANSI_NULLS ON 
GO

IF EXISTS(SELECT * FROM dbo.sysobjects 
		WHERE id = object_id(N'[dbo].[sp_GetKeyViolations]') 
		AND OBJECTPROPERTY(id, N'IsProcedure') = 1
	)
	DROP PROCEDURE [dbo].[sp_GetKeyViolations]
GO

CREATE  PROCEDURE dbo.sp_GetKeyViolations
	@vcSelectStatement	varchar (8000),
	@sysTargetTable		sysname,
	@sysViolationTable	sysname,
	@sysTargetDatabase	sysname,
	@bitEmptyViolationTable bit	= 0
	
/***********************************************************************************
DESCRIPTION:
       Evaluates whether a select statement will cause Primary Key violations
	if data is inserted into a given target table. Key violation rows are
	saved to the designated violation table in the target database.

PARAMETERS:
	@vcSelectStatement - SQL statement that selects primary key columns for 
			records being tested for key violations.  
			IMPORTANT: Only key columns should
				be included in select.  If source table 
				columns are named differently than in the
				target then they must be aliased with the
				same name as the target.  These restrictions
				were necessary to make the procedure generic.

	@sysTargetTable - table where data will be inserted if record is
		not a duplicate.

	@sysViolationTable - table where key violation rows will be saved

	@sysTargetDatabase - Database where target table is located and where 
		key violation table exists and/or will be created
	
	@bitEmptyViolationTable - Optional, indicates whether pre-existing key violation
		rows should be deleted from violation table before adding new
		records.  If omitted, records will be preserved.

	
	
USAGE: 

exec sp_GetKeyViolations 'SELECT TOP 10 CustomerID as CustomerID FROM Northwind.dbo.Customers WHERE ContactTitle LIKE ''Sales%''',  
			'Customers', --target table
			'DupCustomers', -- table to store violation rows
			'NORTHWIND', -- target database
			0 -- do not clear violation table prior to insert
			

REMARKS: This is a very generic function that allows for identifying key violations
	for data that will be inserted into a target table before the insert takes
	place.  The offending rows are saved to a caller-specified table so that the
	records can be analyzed or used by a subsequent query to exclude those rows
	from being inserted.

	Duplicates are identified in the source select as well as duplicates that already exist in target.

DEPENDANCIES:
	dbo.Split function must be installed in master database
	dbo.PrimaryKeyColumns function must be installed in each target database
	
	
AUTHOR: Karen Gayda

DATE: 10/24/2003

MODIFICATIONS:
 
	Who	Date		What
	---	----------	----------------------------------------------------
	KMG	10/30/2003	Added ability to run this proc. from any database
	KMG	10/31/2003	Added check for duplicates in source select in 
					addition to checking already existing data
	KMG 	01/04/2004	Added timestamp column to duplicates table for easier
				tracking of data
************************************************************************************/
AS

SET NOCOUNT ON

DECLARE @vcSQL 		varchar(8000),
	@vcColumnList 	varchar(300),
	@sysColName	sysname
	
SET @vcSQL = 'DECLARE crCols CURSOR STATIC FOR SELECT DISTINCT
	 	name  FROM ' + @sysTargetDatabase + '.dbo.PrimaryKeyColumns(''' + @sysTargetTable + ''')'
EXEC(@vcSQL)
OPEN crCols  

--For Each Column in PrimaryKey, concatonate to column list
SET @vcColumnList = ''
FETCH NEXT FROM crCols INTO @sysColName
WHILE(@@FETCH_STATUS = 0) 
BEGIN
	SET @vcColumnList = @vcColumnList + @sysColName + ','
	FETCH NEXT FROM crCols INTO @sysColName
END --Get next column
SET @vcColumnList = SUBSTRING(@vcColumnList,1,LEN(@vcColumnList) -1) --remove trailing comma
CLOSE crCols


--Get select rows and save to temporary table
IF EXISTS (select * from sysobjects where id = object_id('##tempSourceRecords') 
	and OBJECTPROPERTY(id, N'IsTable') = 1)
  DROP TABLE ##tempSourceRecords

SET @vcSQL = (SELECT Element FROM master.dbo.Split(@vcSelectStatement, 'FROM') WHERE ElementID = 1) 
		+   ' INTO ##tempSourceRecords FROM ' 
		+ (SELECT Element FROM master.dbo.Split(@vcSelectStatement, 'FROM') WHERE ElementID = 2)

PRINT(@vcSQL)
EXEC (@vcSQL)

	
--If key violation table does not already exist, create it
DECLARE	@nvcSQL			nvarchar(4000),
	@nvcParmDef		nvarchar(500),
	@bitExists		bit,
	@vcDate 		varchar(25)
	SET @vcDate = CONVERT(varchar(20),GETDATE(), 109)

	
        SET @nvcSQL = 'IF EXISTS (select * from '+ @sysTargetDatabase+ '.dbo.sysobjects 
				where name = ''' + @sysViolationTable + ''' AND
				xtype=''U'') ' +
			'SET @bitExistsOUT = 1 ' +
			'ELSE ' +
			'SET @bitExistsOUT = 0'
	
	SET @nvcParmDef = N'@bitExistsOUT bit OUTPUT'

	EXEC sp_executesql @nvcSQL, @nvcParmDef, @bitExistsOUT=@bitExists OUTPUT
        
	IF @bitExists = 0
	BEGIN
	--Create new table based upon supplied select list if needed
		SET @vcSQL = 'SELECT TOP 0 ' + @vcColumnList + ',''' + @vcDate + ''' as ProcessedDate ' +
		+ ' INTO ' + @sysTargetDatabase + '.dbo.'+ @sysViolationTable + 
		' FROM ##tempSourceRecords ' 
		PRINT @vcSQL
		EXEC (@vcSQL)
	END

--Empty violation table if caller indicates it should be cleared
IF @bitEmptyViolationTable = 1
BEGIN
	SET @vcSQL = 'DELETE FROM ' + @sysTargetDatabase + '.dbo.' +  @sysViolationTable
	PRINT @vcSQL
	EXEC (@vcSQL)
END

--Insert duplicate rows from target into key violation table
SET @vcSQL = 'INSERT INTO ' + @sysTargetDatabase + '.dbo.' + @sysViolationTable +
	' SELECT a.' + REPLACE(@vcColumnList, ',', ',a.') + ', ''' + @vcDate + '''' +
	' FROM ##tempSourceRecords a' +
	' INNER JOIN ' + @sysTargetDatabase + '.dbo.' + @sysTargetTable + ' b ' +
	' ON '

OPEN crCols
FETCH NEXT FROM crCols INTO @sysColName --get inner join columns to add to SQL insert statement
WHILE(@@FETCH_STATUS = 0) 
BEGIN
	SET @vcSQL  = @vcSQL + 'a.' + @sysColName + '=b.' + @sysColName + ' AND '
	FETCH NEXT FROM crCols INTO @sysColName
END --Get next column
SET @vcSQL = SUBSTRING(@vcSQL,1,LEN(@vcSQL) -4) --remove trailing AND

--Add rows that are duplicate in the source select that may cause violation
SET @vcSQL = @vcSQL + ' UNION SELECT ' + @vcColumnList + + ',''' + @vcDate + ''' as ProcessedDate ' 
	+ ' FROM ##TempSourceRecords ' +
	'GROUP BY ' + @vcColumnList + ' HAVING COUNT(*) > 1'

CLOSE crCOLS
DEALLOCATE crCols

PRINT @vcSQL
EXEC (@vcSQL)

DROP TABLE ##tempSourceRecords

RETURN (0)


GO

Rate

4 (2)

Share

Share

Rate

4 (2)