Scrub Personal Information Data

  • q

  • Does this example helps you?

    Note that the code is non deterministic, so the results aren't guaranteed to be the same on each run.

    CREATE TABLE PersonalInformation(

    PersonID int,

    Name varchar(200),

    SomeNumber char(9)

    );

    INSERT INTO PersonalInformation

    VALUES

    (1, 'Clark Kent', '123456789'),

    (2, 'Bruce Wayne', '456789123'),

    (5, 'Diana Prince', '798456123');

    CREATE TABLE Names(

    Name varchar(200)

    );

    INSERT INTO Names

    VALUES

    ('Superman' ),

    ('Batman' ),

    ('Wonder Woman');

    WITH ctePersonalInformation AS(

    SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) rn

    FROM PersonalInformation

    ),

    cteNames AS(

    SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) rn

    FROM Names

    )

    UPDATE p SET

    Name = n.name

    FROM ctePersonalInformation p

    JOIN cteNames n ON p.rn = n.rn;

    SELECT *

    FROM PersonalInformation;

    DROP TABLE PersonalInformation, Names;

    Luis C.
    General Disclaimer:
    Are you seriously taking the advice and code from someone from the internet without testing it? Do you at least understand it? Or can it easily kill your server?

    How to post data/code on a forum to get the best help: Option 1 / Option 2
  • scrubbing data is really nothing more than an update statement, but you have to come up with a pattern for your updates.

    in my case, I've got a suite of tables i pulled of of various list websites (list of animal names, list of colors, list of nouns, list of movie stars. i use those lists to generate fake data.

    so i like to generate a table from those, and update from that.

    the basiscs:

    UPDATE MyTarget

    SET MyTarget.FirstName = MySource.FirstName,

    MyTarget.LastName = MySource.LastName,

    MyTarget.SSN = MySource.[SocialSecurity]

    FROM SomeTable MyTarget

    INNER JOIN [#ToBeImported] MySource

    ON MyTarget.ID = MySource.JoiningID

    --allowing duplicates: every 10K get the same values

    -- *MyTarget.ID % 10000 + 1 = MySource.JoiningID

    and here is a solid example of generating some sample fake data, which you can use to update from.

    this snippet generates 1,560,896 rows.

    IF OBJECT_ID('tempdb.[dbo].[#ToBeImported]') IS NOT NULL

    DROP TABLE [dbo].[#ToBeImported]

    --#################################################################################################

    -- Fake Data select * from [MovieStarNames]

    --#################################################################################################

    IF OBJECT_ID('[dbo].[MovieStarNames]') IS NOT NULL

    DROP TABLE [dbo].[MovieStarNames]

    GO

    CREATE TABLE [dbo].[MovieStarNames] (

    [StarFirstName] VARCHAR(50) NOT NULL,

    [StarLastName] VARCHAR(50) NOT NULL,

    [GENDER] VARCHAR(1) NULL)

    GO

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Al','Pacino','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Alec','Guinness','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Anthony','Hopkins','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Ben','Kingsley','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Bill','Murray','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Brad','Pitt','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Charlton','Heston','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Christian','Bale','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Christopher','Walken','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Clint','Eastwood','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Colin','Firth','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Cuba','Gooding Jr.','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Danny','DeVito','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Denzel','Washington','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Don','Cheadle','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Dustin','Hoffman','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Ed','Harris','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Edward','Norton','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Forest','Whitaker','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Gary','Oldman','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Gene','Hackman','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('George','Clooney','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Harrison','Ford','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Jack','Lemmon','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Jack','Nicholson','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('James','Stewart','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Jeff','Bridges','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Jeremy','Irons','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Joaquin','Phoenix','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Joe','Pesci','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('John','Hurt','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('John','Malkovich','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('John','Wayne','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Johnny','Depp','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Jon','Voight','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Kevin','Kline','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Kevin','Spacey','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Leonardo','DiCaprio','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Liam','Neeson','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Marlon','Brando','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Martin','Sheen','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Matt','Damon','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Mel','Gibson','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Michael','Caine','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Michael','Douglas','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Morgan','Freeman','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Philip','Seymour Hoffman','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Ralph','Fiennes','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Robert','De Niro','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Robert','Duvall','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Robert','Redford','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Robin','Williams','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Russell','Crowe','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Samuel','L. Jackson','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Sean','Connery','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Sean','Penn','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Steve','McQueen','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Tim','Robbins','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Tom','Cruise','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Tom','Hanks','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Tommy','Lee Jones','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Will','Smith','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Jamie','Foxx','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Heath','Ledger','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Ian','McKellen','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Christoph','Waltz','M')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Bette','Davis','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Katharine','Hepburn','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Marilyn','Monroe','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Ingrid','Bergman','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Meryl','Streep','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Vivien','Leigh','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Olivia','de Havilland','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Audrey','Hepburn','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Barbara','Stanwyck','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Kate','Winslet','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Ava','Gardner','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Rita','Hayworth','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Elizabeth','Taylor','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Judy','Garland','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Claudette','Colbert','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Joan','Crawford','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Joan','Fontaine','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Jodie','Foster','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Natalie','Wood','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Cate','Blanchett','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Judi','Dench','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Julie','Andrews','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Maureen','O’Hara','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Grace','Kelly','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Lauren','Bacall','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Sandra','Bullock','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Sally','Field','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Julia','Roberts','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Deborah','Kerr','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Shirley','MacLaine','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Greta','Garbo','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Myrna','Loy','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Natalie','Portman','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Nicole','Kidman','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Sophia','Loren','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Susan','Hayward','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Diane','Keaton','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Marlene','Dietrich','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Carole','Lombard','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Ginger','Rogers','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Jean','Arthur','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Jean','Harlow','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Doris','Day','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Jane','Fonda','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Reese','Witherspoon','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Mary','Pickford','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Angelina','Jolie','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Mae','West','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Barbra','Streisand','F')

    INSERT INTO [MovieStarNames] ([StarFirstName],[StarLastName],[GENDER])VALUES('Halle','Berry','F')

    GO

    --#################################################################################################

    --random data base don movie stars

    --#################################################################################################

    --our fake data from randomized functions:

    SELECT

    row_number() over(order by mv.[StarLastName]) + 5555 As JoiningID,

    convert(int,NULL) AS PrID,

    RIGHT('0000000000' + CONVERT(varchar,abs(checksum(newid()))),10) AS [CompanyIdentifier],

    mv.[StarFirstName] AS [FirstName],

    lv.[StarLastName] AS [LastName],

    'A' AS [MiddleName],

    CONVERT(varchar,ABS(CHECKSUM(NEWID()))%50000+1) + ' North Beach Place' AS [Address],

    'Suite ' + CONVERT(varchar,ABS(CHECKSUM(NEWID()))%500+1) AS [Address2],

    'Hollywood' AS [City],

    'FL' AS [State],

    ABS(CHECKSUM(NEWID()))%50000+1 AS [Zip],

    RIGHT('9542345678' + CONVERT(varchar,abs(checksum(newid()))),10) AS [HomePhone],

    RIGHT('7542345678' + CONVERT(varchar,abs(checksum(newid()))),10) AS [CellPhone],

    mv.[StarFirstName] + '.' + mv.[StarLastName] + '@fakemail.com' AS [EMailAddress],

    RIGHT('222222222' + CONVERT(varchar,abs(checksum(newid()))),9) AS [SocialSecurity],

    mv.Gender AS [Gender],

    dateadd(dd, -1 * ABS(CHECKSUM(NEWID()))%40000+1, convert(date,getdate())) As [BirthDate],

    ABS(checksum(mv.StarLastName)) % 2 AS [IsUsCitizen],

    CASE ABS(checksum(mv.StarLastName)) % 4 + 1

    WHEN 4 THEN 'Widow'

    WHEN 3 THEN 'Divorced'

    WHEN 2 THEN 'Married'

    WHEN 1 THEN 'Single'

    END As [MaritalStatus],

    NULL AS [CRM_DocID],

    NULL AS [PhySrv_DocID],

    'FL' AS [DriverLicenseState],

    LEFT(mv.[StarLastName],1) + RIGHT('9542345678' + CONVERT(varchar,abs(checksum(newid()))),12) AS [DriverLicenseNumber],

    DATEADD(dd,ABS(CHECKSUM(NEWID()))%2000+1,convert(date,getdate())) AS [DriverLicenseExpiration],

    CASE ABS(checksum(mv.StarLastName)) % 2 WHEN 1 THEN 'FullTime' ELSE 'PartTime' END AS [FullPartTime],

    CASE ABS(checksum(mv.StarLastName)) % 3 WHEN 1 THEN 1 ELSE 0 END AS [IsRemote],

    CASE ABS(checksum(mv.StarLastName)) % 5 WHEN 0 THEN 1 ELSE 0 END AS [IsSupervisor],

    'M' + RIGHT(CONVERT(varchar,abs(checksum(newid()))),5) As LicenseNumber,

    'FL' As MedicalLicenseState,

    CONVERT(varchar,ABS(CHECKSUM(NEWID()))%50000+1) + ' South Beach Road' As ReferenceAddress,

    'Hollywood' As ReferenceCity,

    'FL' As ReferenceState,

    '33324' As ReferenceZip,

    RIGHT('9542345678' + CONVERT(varchar,abs(checksum(newid()))),10) As ReferencePhone

    INTO #ToBeImported

    FROM MovieStarNames mv

    CROSS JOIN MovieStarNames lv

    CROSS JOIN MovieStarNames lv2

    --CROSS JOIN MovieStarNames lv3

    --order by newid() SLOWS THINGS DOWN severely, but consecutive rows don't have same values

    Lowell


    --help us help you! If you post a question, make sure you include a CREATE TABLE... statement and INSERT INTO... statement into that table to give the volunteers here representative data. with your description of the problem, we can provide a tested, verifiable solution to your question! asking the question the right way gets you a tested answer the fastest way possible!

Viewing 3 posts - 1 through 3 (of 3 total)

You must be logged in to reply to this topic. Login to reply