OK, I have a solution for you. 😀
This script will automatically handle generating the SQL statements to clean up your tables and foreign keys.
There is a table variable called @WorkingList that will contain all the unresolved users that might be duplicates.
To fix them, in the source data, fix the names to be the same. In the case of roy heaney, if all three rows are in fact the same user, set the first and last name to be roy heaney in all three rows.
Run the script below in your development database ( or anywhere you want to play with it ) to see exactly how it would work.
If you have any questions or need help, feel free to ask.
/*
This will create a table called acz_users, which has a userid, first and last name,
and a last login date.
There are duplicate users that we want to merge in this table, based on this criteria:
When a user is unique, leave them alone.
When a user is clearly a duplicate, keep the row that has the most recent last login.
When a user is not unique, and possibly a duplicate, set them aside for someone
to determine what should be done.
This script will generate the SQL statements to clean up the a user table as provided,
and also generate some statements to keep other files foreign keys in sync
*/
IF EXISTS (SELECT * FROM dbo.sysobjects WHERE id = OBJECT_ID(N'[dbo].[acz_users]') AND OBJECTPROPERTY(id, N'IsUserTable') = 1)
DROP TABLE [dbo].[acz_users]
CREATE TABLE [dbo].[acz_users](
[pkiuser] [varchar](50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[FirstName] [varchar](50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[LastName] [varchar](50) COLLATE SQL_Latin1_General_CP1_CI_AS NULL,
[LastLogin] [datetime] NULL
) ON [PRIMARY]
Insert into acz_users( pkiuser, FirstName, LastName, LastLogin ) values ( '123','roy','heaney','01/01/00')
Insert into acz_users( pkiuser, FirstName, LastName, LastLogin ) values ( 'TS345','','heaney','08/01/00')
Insert into acz_users( pkiuser, FirstName, LastName, LastLogin ) values ( 'TS456','roy','','03/01/00')
Insert into acz_users( pkiuser, FirstName, LastName, LastLogin ) values ( '564','Bob','Noodle','05/01/00')
Insert into acz_users( pkiuser, FirstName, LastName, LastLogin ) values ( '789','Andy','Griffith','06/01/00')
Insert into acz_users( pkiuser, FirstName, LastName, LastLogin ) values ( 'TS358','Andy','Griffith','04/01/00')
Insert into acz_users( pkiuser, FirstName, LastName, LastLogin ) values ( 'TS765','','Outsideguy','07/01/00')
Insert into acz_users( pkiuser, FirstName, LastName, LastLogin ) values ( 'TS234','Andy','','09/01/00')
Insert into acz_users( pkiuser, FirstName, LastName, LastLogin ) values ( '901','Roy','Fokker','04/01/00')
Insert into acz_users( pkiuser, FirstName, LastName, LastLogin ) values ( 'TS735','Roy','Fokker','07/01/00')
-- Set Up Tables
-- @WorkingList holds all and has them removed as they are cleared
declare @WorkingList table(
useridvarchar(50),
WorkingFirstNamevarchar(50),
WorkingLastNamevarchar(50),
WorkingLastLogindatetime,
WorkingFullNamevarchar(100) ,
CountFirstnumeric,
CountLastnumeric,
CountFullnumeric,
CountTotalnumeric )
-- @RenumList holds rows that are going to be renumbered
declare @RenumList table(
useridvarchar(50),
RenumFirstNamevarchar(50),
RenumLastNamevarchar(50),
RenumLastLogindatetime,
RenumFullNamevarchar(100) )
-- @RenumList holds rows that are going to be renumbered
declare @Renum2List table(
userid2varchar(50),
Renum2FirstNamevarchar(50),
Renum2LastNamevarchar(50),
Renum2LastLogindatetime,
Renum2FullNamevarchar(100) )
-- @CleanList holds records that will not be purged
declare @CleanList table(
useridvarchar(50),
CleanFirstNamevarchar(50),
CleanLastNamevarchar(50),
CleanLastLogindatetime,
CleanFullNamevarchar(100) )
-- @PurgeList holds the records that will be deleted from production
declare @PurgeList table(
useridvarchar(50),
PurgeFirstNamevarchar(50),
PurgeLastNamevarchar(50),
PurgeLastLogindatetime,
PurgeFullNamevarchar(100) )
-- @Output holds the sql statements to update fk tables
declare @Output table(
ResolveFkLearningSQLvarchar(1000),
ResolveFkSessionSQLvarchar(1000),
CleanUserTableSQLvarchar(1000),
CleanUserDetailTableSQLvarchar(1000)
)
-- @Xref holds old key to new key translation
declare @Xref table(
OldUserIdvarchar(50),
OldFullNamevarchar(100),
NewUserIdvarchar(50),
NewFullNamevarchar(100) )
-- Load the working list
Insert into @WorkingList( userid, WorkingFirstName, WorkingLastName, WorkingLastLogin, WorkingFullName )
Select pkiuser, FirstName, LastName, LastLogin, IsNull( FirstName, '' ) + ' ' + IsNull( LastName, '' )
From acz_users
-- Determine what automatically goes to the CleanList because it is unique
Update @WorkingList
Set CountFirst = ( Select Count( FirstName ) from acz_users where WorkingFirstName = FirstName and FirstName <> '' )
Update @WorkingList
Set CountLast = ( Select Count( LastName ) from acz_users where WorkingLastName = LastName and LastName <> '')
Update @WorkingList
Set CountFull = ( Select Count( FirstName + ' ' + LastName ) from acz_users where FirstName + ' ' + LastName = WorkingFullName )
-- Good full name that is unique
Update @WorkingList
set CountTotal = 1 where
CountFirst = 1 and CountLast = 1
-- Unique first name with no last name ( would never be able to resolve )
Update @WorkingList
set CountTotal = 1 where
CountFirst = 1 and CountLast = 0
-- Unique last name with no first name ( would never be able to resolve )
Update @WorkingList
set CountTotal = 1 where
CountFirst = 0 and CountLast = 1
Update @WorkingList
Set CountTotal = 0 where CountTotal is null
-- Move all the resolved ones to the ClearList and clear out the WorkingList
Insert into @CleanList ( userid, CleanFirstName, CleanLastName, CleanLastLogin, CleanFullName )
Select userid, WorkingFirstName, WorkingLastName, WorkingLastLogin, WorkingFullName from @WorkingList
where CountTotal = 1
Delete from @WorkingList where CountTotal = 1
-- Handle Matching Full Names
Insert into @RenumList( userid, RenumFirstName, RenumLastName, RenumLastLogin, RenumFullName )
Select userid, WorkingFirstName, WorkingLastName, WorkingLastLogin, WorkingFullName from @WorkingList
where CountFull = 2
-- Setup the duplicate renumlist
Insert into @Renum2List
Select * from @RenumList
Delete from @WorkingList where CountFull = 2
-- For the duplicate full names, get the max last login date and move to clean list
Insert into @CleanList ( userid, CleanFirstName, CleanLastName, CleanLastLogin, CleanFullName )
Select userid, RenumFirstName, RenumLastName, RenumLastLogin, RenumFullName from @RenumList
where RenumLastLogin = ( select Renum2LastLogin from @Renum2List
where Renum2LastLogin = ( select max( Renum2LastLogin ) from @Renum2List where
Renum2FullName = RenumFullName ) )
delete from @RenumList where userid in ( select userid from @CleanList )
delete from @Renum2List where userid2 in ( select userid from @CleanList )
-- Build the Xref file for the renumbers
Insert into @Xref( OldUserId, OldFullName )
select userid, RenumFullName from @RenumList
Update @Xref
set NewUserId = ( select userid from @CleanList where CleanFullName = OldFullName )
Update @Xref
set NewFullName = ( select CleanFullName from @CleanList where CleanFullName = OldFullName )
-- Now Move the @RenumList to the @PurgeList
Insert into @PurgeList( userid, PurgeFirstName, PurgeLastName, PurgeLastLogin, PurgeFullName )
Select userid, RenumFirstName, RenumLastName, RenumLastLogin, RenumFullName from @RenumList
Delete from @RenumList
Delete from @Renum2List
-- Create the SQL Statements to update the other tables
Insert into @Output( ResolveFkLearningSQL )
Select 'Update tblUserLearningObjectSession set userid = ' + NewUserId + ' where userid = ' + OldUserId
from @Xref
Insert into @Output( ResolveFkSessionSQL )
Select 'Update tblUserSessionMap set userid = ' + NewUserId + ' where userid = ' + OldUserId
from @Xref
Insert into @Output( CleanUserTableSQL )
Select 'Delete from tbllmsuser where userid = ' + userid
from @PurgeList
Insert into @Output( CleanUserDetailTableSQL )
Select 'Delete from tbllmsuserdetail where userid = ' + userid
from @PurgeList
Select * from acz_users
Select * from @WorkingList -- These are the ones that need to be resolved
Select * from @CleanList -- These will be left alone
Select * from @RenumList -- Should be empty
Select * from @Xref -- Shows who is being renumbered to what
Select * from @PurgeList -- Shows who is getting deleted
Select * from @Output -- Contains the SQL Statements to clean up the files