brickpack (7/11/2013)
I'm trying to split a full name column up into First / Middle / Last....but some of the names are like "Joe B. W. LastName" or "Jenny MiddleName MaidenName MarriedName" and so on...In the past I've used parsename() and charindex() to solve this with "cleaner" data. How would I go about handling these?
Thanks
For any large list it's a nightmare! :crazy:
However, here's a community-inspired function thats been kicking around for years (not mine so I can't take any credit for it) that will make a good start on parsing names. Usually it will still take a bit of tweaking to cover unusual combinations (which can be added to the function ad infinitum). And then several passes through the data as well as visual inspection...like I said: a nightmare.
ALTER FUNCTION [dbo].[svfFormatName]
(
@NameString VARCHAR(100)
,@NameFormat VARCHAR(20)
)
RETURNS VARCHAR(100)
AS
BEGIN
--svfFormatName decodes a NameString into its component parts and returns it in a requested format.
--@NameString is the raw value to be parsed.
--@NameFormat is a string that defines the output format. Each letter in the string represents
--a component of the name in the order that it is to be returned.
-- [H] = Full honorific
-- [h] = Abbreviated honorific
-- [F] = First name
-- [f] = First initial
-- [M] = Middle name
-- [m] = Middle initial
-- [L] = Last name
-- [l] = Last initial
-- = Full suffix
-- = Abbreviated suffix
-- [.] = Period
-- [,] = Comma
-- [ ] = Space
--Sample Syntax : select dbo.svfFormatName('President Barack Hussein Obama Senior', 'h. F m. L s.')
--Returns : 'Pres. Barack H. Obama Sr.'
DECLARE @Honorific VARCHAR(20)
DECLARE @FirstName VARCHAR(20)
DECLARE @MiddleName VARCHAR(30)
DECLARE @LastName VARCHAR(30)
DECLARE @Suffix VARCHAR(20)
DECLARE @TempString VARCHAR(100)
DECLARE @TempString2 VARCHAR(100)
DECLARE @IgnorePeriod CHAR(1)
--Prepare the string
--Make sure each period is followed by a space character.
SET @NameString = RTRIM(LTRIM(REPLACE(@NameString,'.','. ')))
--Replace numeric suffixes
SET @NameString = REPLACE(@NameString,'2nd','II')
SET @NameString = REPLACE(@NameString,'3rd','III')
SET @NameString = REPLACE(@NameString,'4th','IV')
--Remove disallowed characters
DECLARE @PatternString VARCHAR(50)
SET @NameString = REPLACE(@NameString,'-','¬') --Replace dashes we want to save, as patindex does not allow escaping characters.
SET @PatternString = '%[^a-z ¬()`,'''']%~' --'''' includes single quote in permitted character list.
WHILE PATINDEX(@PatternString,@NameString) > 0
SET @NameString = STUFF(@NameString,PATINDEX(@PatternString,@NameString),1,' ')
SET @NameString = REPLACE(@NameString,'¬','-') --Put the dashes back
--Remove telephone ext
SET @NameString = LTRIM(RTRIM(REPLACE(' ' + @NameString + ' ',' EXT ',' ')))
--Make sure there is at least one space after commas
SET @NameString = REPLACE(@NameString,',',', ')
--Eliminate double-spaces.
WHILE CHARINDEX(' ',@NameString) > 0
SET @NameString = REPLACE(@NameString,' ',' ')
--Eliminate periods
WHILE CHARINDEX('.',@NameString) > 0
SET @NameString = REPLACE(@NameString,'.','')
--Remove spaces around hyphenated names
SET @NameString = REPLACE(REPLACE(@NameString,'- ','-'),' -','-')
--Join Irish surnames
SET @NameString = REPLACE(@NameString,'O'' ','O''')
--Remove commas before suffixes
SET @NameString = REPLACE(@NameString,', CLU',' CLU')
SET @NameString = REPLACE(@NameString,', CNP',' CNP') --Certified Notary Public
SET @NameString = REPLACE(@NameString,', ESQ',' ESQ')
SET @NameString = REPLACE(@NameString,', Jr',' Jr')
SET @NameString = REPLACE(@NameString,', LPN',' LPN')
SET @NameString = REPLACE(@NameString,', RN',' RN')
SET @NameString = REPLACE(@NameString,', Sr',' Sr')
SET @NameString = REPLACE(@NameString,', II',' II')
SET @NameString = REPLACE(@NameString,', III',' III')
SET @NameString = REPLACE(@NameString,', IV',' IV')
--Temporarily join multi-word firstnames
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Ann Marie ',' Ann~Marie '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Anna Marie ',' Anna~Marie '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Barbara Jo ',' Barbara~Jo '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Betty Lou ',' Betty~Lou '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Billie Jo ',' Billie~Jo '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Bobbi Jo ',' Bobbi~Jo '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Dee Dee ',' Dee~Dee '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Fannie Mae ',' Fannie~Mae '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Lisa Marie ',' Lisa~Marie '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Mary Beth ',' Mary~Beth '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Mary Ellen ',' Mary~Ellen '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Mary Jane ',' Mary~Jane '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Mary Jo ',' Mary~Jo '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Mary Lou ',' Mary~Lou '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Rose Mary ',' Rose~Mary '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Rose Marie ',' Rose~Marie '))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Sugar Rae ',' Sugar~Rae '))
--For compound names ending in Ann, also include Anne variation.
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Beth Ann',' Beth~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Dee Ann',' Dee~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Dorothy Ann',' Dorothy~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Ellen Ann',' Ellen~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Jo Ann',' Jo~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Lea Ann',' Lea~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Lee Ann',' Lee~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Leigh Ann',' Leigh~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Lu Ann',' Lu~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Mary Ann',' Mary~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Rae Ann',' Rae~Ann'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Ruth Ann',' Ruth~Ann'))
--Temporarily join multi-word surnames
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Dos ',' Dos~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' St ',' St.~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' St. ',' St.~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Da ',' Da~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Di ',' Di~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Del ',' Del~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Des ',' Des~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Vanden ',' Vanden~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Van De ',' Van~De~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Van Den ',' Van~Den~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Vander ',' Vander~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Van ',' Van~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Ver ',' Ver~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Van Der ',' Van~Der~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Von Der ',' Von~Der~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Von ',' Von~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Mc ',' Mc~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' Mac ',' Mac~'))
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' La ',' La~')) --Must be checked before "De", to handle "De La [Surname]"s.
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' De ',' De~'))
--Temporarily join 2nd lastname and nicknames defined within parethesis
SET @NameString = LTRIM(REPLACE(' ' + @NameString,' (','~('))
--If the lastname is listed first, strip it off.
SET @TempString = RTRIM(LEFT(@NameString,CHARINDEX(' ',@NameString)))
--Search for suffixes trailing the LastName
SET @TempString2 = LTRIM(RIGHT(@NameString,LEN(@NameString) - LEN(@TempString)))
SET @TempString2 = RTRIM(LEFT(@TempString2,CHARINDEX(' ',@TempString2)))
IF RIGHT(@TempString2,1) = ','
BEGIN
SET @Suffix = LEFT(@TempString2,LEN(@TempString2) - 1)
SET @LastName = LEFT(@TempString,LEN(@TempString))
END
IF RIGHT(@TempString,1) = ','
SET @LastName = LEFT(@TempString,LEN(@TempString) - 1)
IF LEN(@LastName) > 0
SET @NameString = LTRIM(RIGHT(@NameString,LEN(@NameString) - LEN(@TempString)))
IF LEN(@Suffix) > 0
SET @NameString = LTRIM(RIGHT(@NameString,LEN(@NameString) - LEN(@TempString2)))
--Get rid of any remaining commas
WHILE CHARINDEX(',',@NameString) > 0
SET @NameString = REPLACE(@NameString,',','')
--Get Honorific and strip it out of the string
SET @TempString = RTRIM(LEFT(@NameString,CHARINDEX(' ',@NameString + ' ')))
IF @TempString IN ('Admiral','Adm','Captain','Cpt','Capt','Commander','Cmd','Corporal','Cpl','Doctor','Dr','Father','Fr','General','Gen','Governor','Gov','Honorable','Hon','Lieutenant','Lt','Madam','Mdm','Madame','Mme','Mademoiselle','Mlle','Major','Maj','Miss','Ms','Mr','Mrs','President','Pres','Private','Pvt','Professor','Prof','Rabbi','Reverend','Rev','Senior','Sr','Seniora','Sra','Seniorita','Srta','Sergeant','Sgt','Sir','Sister')
SET @Honorific = @TempString
IF LEN(@Honorific) > 0
SET @NameString = LTRIM(RIGHT(@NameString,LEN(@NameString) - LEN(@TempString)))
--Get Suffix and strip it out of the string
IF @Suffix IS NULL
BEGIN
SET @TempString = LTRIM(RIGHT(@NameString,CHARINDEX(' ',REVERSE(@NameString) + ' ')))
WHILE @TempString IN ('Attorney','Att','Atty','BA','BS','CPA','CNP', --Certified Notary Public
'DDS','DVM','Esquire','Esq','II','III','IV','Junior','Jr','LPN','MBA','MD','OD','PHD','RN','Senior','Sr','ASA','SRA','CLU'--Realestate Certifictaions
)
BEGIN
SET @Suffix = @TempString + COALESCE(' ' + @Suffix,'')
SET @NameString = RTRIM(LEFT(@NameString,LEN(@NameString) - LEN(@TempString)))
SET @TempString = LTRIM(RIGHT(@NameString,CHARINDEX(' ',REVERSE(@NameString) + ' ')))
END
END
IF @LastName IS NULL
BEGIN
--Get LastName and strip it out of the string
SET @LastName = LTRIM(RIGHT(@NameString,CHARINDEX(' ',REVERSE(@NameString) + ' ')))
SET @NameString = RTRIM(LEFT(@NameString,LEN(@NameString) - LEN(@LastName)))
END
--Get FirstName and strip it out of the string
SET @FirstName = RTRIM(LEFT(@NameString,CHARINDEX(' ',@NameString + ' ')))
SET @NameString = LTRIM(RIGHT(@NameString,LEN(@NameString) - LEN(@FirstName)))
--Anything remaining is MiddleName
SET @MiddleName = @NameString
--Create the output string
SET @TempString = ''
WHILE LEN(@NameFormat) > 0
BEGIN
IF @IgnorePeriod = 'F'
OR LEFT(@NameFormat,1) <> '.'
BEGIN
SET @IgnorePeriod = 'F'
SET @TempString = @TempString
+ CASE ASCII(LEFT(@NameFormat,1))
WHEN '32' THEN
CASE RIGHT(@TempString,1) --Space
WHEN ' ' THEN ''
ELSE ' '
END
WHEN '44' THEN
CASE RIGHT(@TempString,1) --Comma
WHEN ' ' THEN ''
ELSE ','
END
WHEN '46' THEN
CASE RIGHT(@TempString,1) --Period
WHEN ' ' THEN ''
ELSE '.'
END
WHEN '70' THEN ISNULL(@FirstName,'') --F
WHEN '72' THEN
CASE @Honorific --H
WHEN 'Adm' THEN 'Admiral'
WHEN 'Capt' THEN 'Captain'
WHEN 'Cmd' THEN 'Commander'
WHEN 'Cpl' THEN 'Corporal'
WHEN 'Cpt' THEN 'Captain'
WHEN 'Dr' THEN 'Doctor'
WHEN 'Fr' THEN 'Father'
WHEN 'Gen' THEN 'General'
WHEN 'Gov' THEN 'Governor'
WHEN 'Hon' THEN 'Honorable'
WHEN 'Lt' THEN 'Lieutenant'
WHEN 'Maj' THEN 'Major'
WHEN 'Mdm' THEN 'Madam'
WHEN 'Mlle' THEN 'Mademoiselle'
WHEN 'Mme' THEN 'Madame'
WHEN 'Ms' THEN 'Miss'
WHEN 'Pres' THEN 'President'
WHEN 'Prof' THEN 'Professor'
WHEN 'Pvt' THEN 'Private'
WHEN 'Sr' THEN 'Senior'
WHEN 'Sra' THEN 'Seniora'
WHEN 'Srta' THEN 'Seniorita'
WHEN 'Rev' THEN 'Reverend'
WHEN 'Sgt' THEN 'Sergeant'
ELSE ISNULL(@Honorific,'')
END
WHEN '76' THEN ISNULL(@LastName,'') --L
WHEN '77' THEN ISNULL(@MiddleName,'') --M
WHEN '83' THEN
CASE @Suffix --S
WHEN 'Att' THEN 'Attorney'
WHEN 'Atty' THEN 'Attorney'
WHEN 'Esq' THEN 'Esquire'
WHEN 'Jr' THEN 'Junior'
WHEN 'Sr' THEN 'Senior'
ELSE ISNULL(@Suffix,'')
END
WHEN '102' THEN ISNULL(LEFT(@FirstName,1),'') --f
WHEN '104' THEN
CASE @Honorific --h
WHEN 'Admiral' THEN 'Adm'
WHEN 'Captain' THEN 'Capt'
WHEN 'Commander' THEN 'Cmd'
WHEN 'Corporal' THEN 'Cpl'
WHEN 'Doctor' THEN 'Dr'
WHEN 'Father' THEN 'Fr'
WHEN 'General' THEN 'Gen'
WHEN 'Governor' THEN 'Gov'
WHEN 'Honorable' THEN 'Hon'
WHEN 'Lieutenant' THEN 'Lt'
WHEN 'Madam' THEN 'Mdm'
WHEN 'Madame' THEN 'Mme'
WHEN 'Mademoiselle' THEN 'Mlle'
WHEN 'Major' THEN 'Maj'
WHEN 'Miss' THEN 'Ms'
WHEN 'President' THEN 'Pres'
WHEN 'Private' THEN 'Pvt'
WHEN 'Professor' THEN 'Prof'
WHEN 'Reverend' THEN 'Rev'
WHEN 'Senior' THEN 'Sr'
WHEN 'Seniora' THEN 'Sra'
WHEN 'Seniorita' THEN 'Srta'
WHEN 'Sergeant' THEN 'Sgt'
ELSE ISNULL(@Honorific,'')
END
WHEN '108' THEN ISNULL(LEFT(@LastName,1),'') --l
WHEN '109' THEN ISNULL(LEFT(@MiddleName,1),'') --m
WHEN '115' THEN
CASE @Suffix --s
WHEN 'Attorney' THEN 'Atty'
WHEN 'Esquire' THEN 'Esq'
WHEN 'Junior' THEN 'Jr'
WHEN 'Senior' THEN 'Sr'
ELSE ISNULL(@Suffix,'')
END
ELSE ''
END
--The following honorifics and suffixes have no further abbreviations, and so should not be followed by a period:
IF (
(
ASCII(LEFT(@NameFormat,1)) = 72
AND @Honorific IN ('Rabbi','Sister')
)
OR (
ASCII(LEFT(@NameFormat,1)) = 115
AND @Suffix IN ('ASA','BA','BS','CLU','CNP','DDS','DVM','II','III','IV','V','MBA','MD','PHD','RN','LPN','SRA')
)
)
SET @IgnorePeriod = 'T'
--If the FirstName or MiddleName is not an initial, then do not follow with a period.
IF ASCII(LEFT(@NameFormat,1)) = '70'
AND LEN(@FirstName) > 1
SET @IgnorePeriod = 'T'
IF ASCII(LEFT(@NameFormat,1)) = '77'
AND LEN(@MiddleName) > 1
SET @IgnorePeriod = 'T'
END
SET @NameFormat = RIGHT(@NameFormat,LEN(@NameFormat) - 1)
END
--select replace(@TempString, '~', ' ')
RETURN REPLACE(@TempString, '~', ' ')
END