Technical Article

Identify Primary Key Violation Prior to Insert (Updated)

,

This script allows for testing if rows will cause a primary key violations prior to inserting the data.  Offending rows are stored in a table so they can be used in subsequent queries or used to debug bad data issues.

This updated version adds a timestamp to the duplicates table so that identifying when the duplicate occurred is easier. This is really helpful if you have scheduled data uploads and can track when a bulk insert from a file was performed.

This can be called from any database and will identify duplicates in the select statement as well as the target table

Split and PrimaryKeyColumns functions are called by the stored procedure and are included in the post.

IF exists (SELECT * from dbo.sysobjects 
WHERE id = object_id(N'[dbo].[Split]') 
AND OBJECTPROPERTY(id, N'IsTableFunction') = 1)
DROP FUNCTION [dbo].[Split]
GO


GO
CREATE FUNCTION dbo.Split (@vcDelimitedString varchar(8000),
@vcDelimitervarchar(100) )
/**************************************************************************
DESCRIPTION: Accepts a delimited string and splits it at the specified
delimiter points.  Returns the individual items as a table data
type with the ElementID field as the array index and the Element
field as the data

PARAMETERS:
@vcDelimitedString- The string to be split
@vcDelimiter- String containing the delimiter where
delimited string should be split

RETURNS:
Table data type containing array of strings that were split with
the delimiters removed from the source string

USAGE:
SELECT ElementID, Element FROM Split('11111,22222,3333', ',') ORDER BY ElementID

AUTHOR:Karen Gayda

DATE: 05/31/2001

MODIFICATION HISTORY:
WHODATEDESCRIPTION
----------------------------------------------------------------

***************************************************************************/RETURNS @tblArray TABLE 
   (
ElementIDsmallintIDENTITY(1,1),  --Array index
   Elementvarchar(1000)--Array element contents
   )
AS
BEGIN

DECLARE 
@siIndexsmallint,
@siStartsmallint,
@siDelSizesmallint


SET @siDelSize= LEN(@vcDelimiter)
--loop through source string and add elements to destination table array
WHILE LEN(@vcDelimitedString) > 0
BEGIN
SET @siIndex = CHARINDEX(@vcDelimiter, @vcDelimitedString)
IF @siIndex = 0
BEGIN
INSERT INTO @tblArray VALUES(@vcDelimitedString)
BREAK
END
ELSE
BEGIN
INSERT INTO @tblArray VALUES(SUBSTRING(@vcDelimitedString, 1,@siIndex - 1))
SET @siStart = @siIndex + @siDelSize
SET @vcDelimitedString = SUBSTRING(@vcDelimitedString, @siStart , LEN(@vcDelimitedString) - @siStart + 1)
END
END

RETURN
END
GO





IF exists (SELECT * from dbo.sysobjects 
WHERE id = object_id(N'[dbo].[PrimaryKeyColumns]') 
AND OBJECTPROPERTY(id, N'IsTableFunction') = 1)
DROP FUNCTION [dbo].[PrimaryKeyColumns]
GO


GO
CREATE FUNCTION dbo.PrimaryKeyColumns (@sysTableName sysname )
/**************************************************************************
DESCRIPTION:    Finds the name and column id of primary key columns for a
table.

PARAMETERS:
@sysTableName- The name of the table for which primary
key info is sought

RETURNS:
Table data type list of primary key column names

USAGE:USE pubs
SELECT * from dbo.PrimaryKeyColumns( 'authors')

AUTHOR:Karen Gayda

DATE: 10/24/2003

MODIFICATION HISTORY:
WHODATEDESCRIPTION
----------------------------------------------------------------

***************************************************************************/RETURNS @tblCols TABLE 
   (
   namesysname,-- Name of primary key column
colidsmallint-- System id for column
   )
AS
BEGIN

INSERT INTO @tblCols
SELECT  c.name, c.colid
FROM      sysindexes i
  INNER JOIN sysobjects t
ON i.id = t.id
  INNER JOIN sysindexkeys k
        ON i.indid = k.indid
        AND i.id = k.ID
  INNER JOIN syscolumns c
ON c.id = t.id
        AND c.colid = k.colid
WHERE  i.id = t.id
 AND      i.indid BETWEEN 1 And 254 
 AND      (i.status & 2048) = 2048
 AND t.id = OBJECT_ID(@sysTableName)
ORDER BY k.KeyNo


RETURN
END
GO

SET QUOTED_IDENTIFIER OFF 
GO
SET ANSI_NULLS ON 
GO

IF EXISTS(SELECT * FROM dbo.sysobjects 
WHERE id = object_id(N'[dbo].[sp_GetKeyViolations]') 
AND OBJECTPROPERTY(id, N'IsProcedure') = 1
)
DROP PROCEDURE [dbo].[sp_GetKeyViolations]
GO

CREATE  PROCEDURE dbo.sp_GetKeyViolations
@vcSelectStatementvarchar (8000),
@sysTargetTablesysname,
@sysViolationTablesysname,
@sysTargetDatabasesysname,
@bitEmptyViolationTable bit= 0

/***********************************************************************************
DESCRIPTION:
       Evaluates whether a select statement will cause Primary Key violations
if data is inserted into a given target table. Key violation rows are
saved to the designated violation table in the target database.

PARAMETERS:
@vcSelectStatement - SQL statement that selects primary key columns for 
records being tested for key violations.  
IMPORTANT: Only key columns should
be included in select.  If source table 
columns are named differently than in the
target then they must be aliased with the
same name as the target.  These restrictions
were necessary to make the procedure generic.

@sysTargetTable - table where data will be inserted if record is
not a duplicate.

@sysViolationTable - table where key violation rows will be saved

@sysTargetDatabase - Database where target table is located and where 
key violation table exists and/or will be created

@bitEmptyViolationTable - Optional, indicates whether pre-existing key violation
rows should be deleted from violation table before adding new
records.  If omitted, records will be preserved.



USAGE: 

exec sp_GetKeyViolations 'SELECT TOP 10 CustomerID as CustomerID FROM Northwind.dbo.Customers WHERE ContactTitle LIKE ''Sales%''',  
'Customers', --target table
'DupCustomers', -- table to store violation rows
'NORTHWIND', -- target database
0 -- do not clear violation table prior to insert


REMARKS: This is a very generic function that allows for identifying key violations
for data that will be inserted into a target table before the insert takes
place.  The offending rows are saved to a caller-specified table so that the
records can be analyzed or used by a subsequent query to exclude those rows
from being inserted.

Duplicates are identified in the source select as well as duplicates that already exist in target.

DEPENDANCIES:
dbo.Split function must be installed in master database
dbo.PrimaryKeyColumns function must be installed in each target database


AUTHOR: Karen Gayda

DATE: 10/24/2003

MODIFICATIONS:
 
WhoDateWhat
-----------------------------------------------------------------
KMG10/30/2003Added ability to run this proc. from any database
KMG10/31/2003Added check for duplicates in source select in 
addition to checking already existing data
KMG 01/04/2004Added timestamp column to duplicates table for easier
tracking of data
************************************************************************************/AS

SET NOCOUNT ON

DECLARE @vcSQL varchar(8000),
@vcColumnList varchar(300),
@sysColNamesysname

SET @vcSQL = 'DECLARE crCols CURSOR STATIC FOR SELECT DISTINCT
 name  FROM ' + @sysTargetDatabase + '.dbo.PrimaryKeyColumns(''' + @sysTargetTable + ''')'
EXEC(@vcSQL)
OPEN crCols  

--For Each Column in PrimaryKey, concatonate to column list
SET @vcColumnList = ''
FETCH NEXT FROM crCols INTO @sysColName
WHILE(@@FETCH_STATUS = 0) 
BEGIN
SET @vcColumnList = @vcColumnList + @sysColName + ','
FETCH NEXT FROM crCols INTO @sysColName
END --Get next column
SET @vcColumnList = SUBSTRING(@vcColumnList,1,LEN(@vcColumnList) -1) --remove trailing comma
CLOSE crCols


--Get select rows and save to temporary table
IF EXISTS (select * from sysobjects where id = object_id('##tempSourceRecords') 
and OBJECTPROPERTY(id, N'IsTable') = 1)
  DROP TABLE ##tempSourceRecords

SET @vcSQL = (SELECT Element FROM master.dbo.Split(@vcSelectStatement, 'FROM') WHERE ElementID = 1) 
+   ' INTO ##tempSourceRecords FROM ' 
+ (SELECT Element FROM master.dbo.Split(@vcSelectStatement, 'FROM') WHERE ElementID = 2)

PRINT(@vcSQL)
EXEC (@vcSQL)


--If key violation table does not already exist, create it
DECLARE@nvcSQLnvarchar(4000),
@nvcParmDefnvarchar(500),
@bitExistsbit,
@vcDate varchar(25)
SET @vcDate = CONVERT(varchar(20),GETDATE(), 109)


        SET @nvcSQL = 'IF EXISTS (select * from '+ @sysTargetDatabase+ '.dbo.sysobjects 
where name = ''' + @sysViolationTable + ''' AND
xtype=''U'') ' +
'SET @bitExistsOUT = 1 ' +
'ELSE ' +
'SET @bitExistsOUT = 0'

SET @nvcParmDef = N'@bitExistsOUT bit OUTPUT'

EXEC sp_executesql @nvcSQL, @nvcParmDef, @bitExistsOUT=@bitExists OUTPUT
        
IF @bitExists = 0
BEGIN
--Create new table based upon supplied select list if needed
SET @vcSQL = 'SELECT TOP 0 ' + @vcColumnList + ',''' + @vcDate + ''' as ProcessedDate ' +
+ ' INTO ' + @sysTargetDatabase + '.dbo.'+ @sysViolationTable + 
' FROM ##tempSourceRecords ' 
PRINT @vcSQL
EXEC (@vcSQL)
END

--Empty violation table if caller indicates it should be cleared
IF @bitEmptyViolationTable = 1
BEGIN
SET @vcSQL = 'DELETE FROM ' + @sysTargetDatabase + '.dbo.' +  @sysViolationTable
PRINT @vcSQL
EXEC (@vcSQL)
END

--Insert duplicate rows from target into key violation table
SET @vcSQL = 'INSERT INTO ' + @sysTargetDatabase + '.dbo.' + @sysViolationTable +
' SELECT a.' + REPLACE(@vcColumnList, ',', ',a.') + ', ''' + @vcDate + '''' +
' FROM ##tempSourceRecords a' +
' INNER JOIN ' + @sysTargetDatabase + '.dbo.' + @sysTargetTable + ' b ' +
' ON '

OPEN crCols
FETCH NEXT FROM crCols INTO @sysColName --get inner join columns to add to SQL insert statement
WHILE(@@FETCH_STATUS = 0) 
BEGIN
SET @vcSQL  = @vcSQL + 'a.' + @sysColName + '=b.' + @sysColName + ' AND '
FETCH NEXT FROM crCols INTO @sysColName
END --Get next column
SET @vcSQL = SUBSTRING(@vcSQL,1,LEN(@vcSQL) -4) --remove trailing AND

--Add rows that are duplicate in the source select that may cause violation
SET @vcSQL = @vcSQL + ' UNION SELECT ' + @vcColumnList + + ',''' + @vcDate + ''' as ProcessedDate ' 
+ ' FROM ##TempSourceRecords ' +
'GROUP BY ' + @vcColumnList + ' HAVING COUNT(*) > 1'

CLOSE crCOLS
DEALLOCATE crCols

PRINT @vcSQL
EXEC (@vcSQL)

DROP TABLE ##tempSourceRecords

RETURN (0)


GO

Rate

4 (2)

You rated this post out of 5. Change rating

Share

Share

Rate

4 (2)

You rated this post out of 5. Change rating