Jeff,
Thanks for that link too. I've updated all the iterations with your tally table method as well as other parts of the code. It's pretty cool to do it that way. Here is the altered code of the modified algorithm.
USE [master]
GO
Create function [dbo].[fn_Phon_NYSIIS_ver2] (@Word varchar(30))
RETURNS varchar(30)
BEGIN
Declare
@NYSIIS varchar(30),
@FirstCharacter char(1),
@Length int,
@FirstVowel char(1)
/*
Remove all non-alpha characters, trim trailing/leading whitespace and set to upper case
*/
select @Word = (
select Case When substring(@Word,t.n,1)
in ('A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z')
Then SUBSTRING(@Word,t.N,1) Else '' END
From master.dbo.Tally t
Where t.N <= len(@Word)
Order By t.N For XML Path(''),Type
).value('(./text())[1]','varchar(max)')
SET @Word = rtrim(ltrim(@Word))
SET @Word = Upper(@Word)
SET @Length = len(@Word)
/*
1.if the first character of the name is a vowel, remember it
*/
SET @FirstVowel = ''
IF left(@Word,1) in ('A','E','I','O','U')BEGIN SET @FirstVowel = left(@Word,1) END
/*
2. remove all 'S' and 'Z' chars from the end of the name
*/
select @Word =
(
select substring(@Word,t.N,1)
From master.dbo.Tally t
Where t.N <= DATALENGTH(@Word) and t.N <=
(
select max(Case When SUBSTRING(@Word,t.n,1) not in ('S','Z') Then t.N Else 0 END)
From master.dbo.Tally t Where t.N <= Datalength(@Word)
)
Order By t.N FOR XML PATH(''),TYPE
).value('(./text())[1]','varchar(max)')
/*
3. Transcode Initial Strings
*/
SET @Length = len(@Word)
IF left(@Word,3) = 'MAC' BEGIN SET @Word = 'MCC' + right(@Word,@Length-3) END
IF left(@Word,2) = 'PF' BEGIN SET @Word = 'F' + right(@Word,@Length-2) END
/*
4. Transcode trailing strings as follows
IX » IC
EX » EC
YE » Y
EE » Y
IE » Y
DT » D
RT » D
RD » D
NT » D
ND » D
*/
SET @Length = len(@Word)
IF RIGHT(@Word,2) = 'IX' BEGIN SET @Word = left(@Word,@Length-2) + 'IC' END
IF RIGHT(@Word,2) = 'EX' BEGIN SET @Word = left(@Word,@Length-2) + 'EC' END
IF RIGHT(@Word,2) in ('YE','EE','IE') BEGIN SET @Word = left(@Word,@Length-2) + 'Y' END
IF RIGHT(@Word,2) in ('DT','RT','RD','NT','ND') BEGIN SET @Word = left(@Word,@Length-2) + 'D' END
/*
5. transcode 'EV' to 'EF' if not at start of name
*/
SET @Length = len(@Word)
SET @Word = left(@Word,2) + REPLACE(SUBSTRING(@Word,3,@Length-2),'EV','EF')
/*
6. use first character of name as first character of key
*/
SET @FirstCharacter = Left(@Word,1)
/*
7. remove any 'W' that follows a vowel
*/
select @Word =
(
select
Case When substring(@Word,t.N,1) = 'W' and SUBSTRING(@Word,t.N-1,1) not in ('A','E','I','O','U')
Then SUBSTRING(@Word,t.N,1)
When substring(@Word,t.N,1) <> 'W' Then Substring(@Word,t.N,1) END
From master.dbo.Tally t
Where t.N <= DATALENGTH(@Word)
Order By t.N FOR XML PATH(''),TYPE
).value('(./text())[1]','varchar(max)')
/*
8. replace all vowels with 'A' and collapse all strings of repeated 'A' to one
*/
SET @Word = replace(@Word,'E','A')
SET @Word = replace(@Word,'I','A')
SET @Word = replace(@Word,'O','A')
SET @Word = replace(@Word,'U','A')
--Collapse all A's
select @Word =
(
Select Case When SUBSTRING(@Word,t.N,1) = 'A' and SUBSTRING(@Word,t.N+1,1) = 'A'
Then 'A'
Else SUBSTRING(@Word,t.N,1)
END
From master.dbo.Tally t
Where substring(@Word,t.N,1) <> substring(@Word,t.N-1,1) and t.N <= datalength(@Word)
Order By t.N FOR XML PATH(''),TYPE
).value('(./text())[1]','varchar(max)')
/*
9.transcode 'GHT' to 'GT'
*/
SET @Word = REPLACE(@WOrd,'GHT','GT')
/*
10 transcode 'DG' to 'G'
*/
SET @Word = REPLACE(@WOrd,'DG','G')
/*
11.transcode 'PH' to 'F'
*/
SET @Word = REPLACE(@Word,'PH','F')
/*
12.if not first character, eliminate all 'H' preceded or followed by a vowel
*/
select @Word =
(
select
Case When substring(@Word,t.N,1) = 'H' and t.N > 1 and
(SUBSTRING(@Word,t.N-1,1) in ('A','E','I','O','U') or SUBSTRING(@Word,t.N+1,1) in ('A','E','I','O','U'))
Then '' Else substring(@Word,t.N,1) END
From master.dbo.Tally t
Where t.N <= DATALENGTH(@Word)
Order By t.N FOR XML PATH(''),TYPE
).value('(./text())[1]','varchar(max)')
/*
13.change 'KN' to 'N', else 'K' to 'C'
*/
SET @Word = REPLACE(@Word,'KN','N')
SET @Word = REPLACE(@Word,'K','C')
/*
14.if not first character, change 'M' to 'N'
*/
SET @Length = len(@Word)
SET @Word = left(@Word,1) + REPLACE(right(@Word,@Length-1),'M','N')
/*
15.if not first character, change 'Q' to 'G'
*/
SET @Length = len(@Word)
SET @Word = left(@Word,1) + REPLACE(right(@Word,@Length-1),'Q','G')
/*
16.change 'SH' to 'S'
*/
SET @Word = REPLACE(@Word,'SH','S')
/*
17.change 'SCH' to 'S'
*/
SET @Word = REPLACE(@Word,'SCH','S')
/*
18.change 'SCH' to 'S'
*/
SET @Word = REPLACE(@Word,'YW','Y')
/*
19.if not first or last character, change 'Y' to 'A'
*/
SET @Length = len(@Word)
SET @Word = left(@Word,1) + replace(substring(@Word,2,@Length-2),'Y','A') + RIGHT(@Word,1)
/*
20.change 'WR' to 'R'
*/
SET @Word = REPLACE(@Word,'WR','R')
/*
21.if not first character, change 'Z' to 'S'
*/
SET @Length = len(@Word)
SET @Word = left(@Word,1) + REPLACE(right(@Word,@Length-1),'Z','S')
/*
22.change 'AY' to 'Y'
*/
SET @Word = REPLACE(@Word,'AY','Y')
/*
23. remove all trailing vowels
*/
select @Word =
(
select substring(@Word,t.N,1)
From master.dbo.Tally t
Where t.N <= DATALENGTH(@Word) and t.N <=
(
select max(Case When SUBSTRING(@Word,t.n,1) not in ('A','E','I','O','U') Then t.N Else 0 END)
From master.dbo.Tally t Where t.N <= Datalength(@Word)
)
Order By t.N FOR XML PATH(''),TYPE
).value('(./text())[1]','varchar(max)')
/*
24.collapse all strings of repeated characters
*/
select @Word =
(
Select substring(@Word,t.N,1)
From master.dbo.Tally t
Where substring(@Word,t.N,1) <> substring(@Word,t.N-1,1) and t.N <= datalength(@Word)
Order By t.N FOR XML PATH(''),TYPE
).value('(./text())[1]','varchar(max)')
/*
25.If first character of original name is a vowel, prepend to code (or replace first transcoded 'A')
*/
IF @FirstVowel > '' and LEFT(@Word,1) = 'A'
BEGIN
SET @Word = STUFF(@Word,1,1,@FirstVowel)
END
SET @NYSIIS = @Word
RETURN (@NYSIIS)
END