June 8, 2010 at 6:32 am
Hi everyone,
In the constant search for a "perfect" propercase function, I came across the following code at http://www.tek-tips.com/viewthread.cfm?qid=1604419&page=2
use adventureworks
go
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO
create FUNCTION fn_ProperCase (@Str varchar(max)) returns varchar(max) as
BEGIN
/*
Purpose: Propercase function that handles Mc and Mac surnames correctly, as well as exception list for items
to leave as lower case, upper case and exceptions to tha Mc/Mac rules.
Adding items to the three exception strings to suit your needs.
Author: Clayton_Groom@hotmail.com. 2004-03-02
Loosly based on a function by Tim Raster. Link: http://www.houseoffusion.com/cf_lists/index.cfm?method=messages&ThreadID=541&forumid=6ܒ
Added logic to handle the first or only word in a string correctly. was not handling exceptions properly.
Added exception cases for items to be excluded and and items to keep in upper case
Changed to use patindex to search for delimiters instead of looping on each delimiter.
2004-03-04 Added numbers and additional special characters to delimiter search criteria
2004-03-05 Added code to convert html char() tag for apostrophe character, added more exceptions
2004-03-11 CDG added more delimiters and html exceptions
2004-03-19 CDG Added '&' delimiter and replace for ''' html string.
2004-06-23 CDG added a couple more replaces for html coded characters
*/
If @STR > ''
begin
declare @DelimPos smallint
,@NextDelim smallint
,@StrLen smallint
,@WordLen smallint
declare @w_LOWERCASE_EXCEPTIONS varchar(4000)
declare @w_ALLCAPS_EXCEPTIONS varchar(4000)
declare @w_MAC_EXCEPTIONS varchar(4000)
declare @w_MIXEDCAPS_EXCEPTIONS varchar(4000)
set @w_LOWERCASE_EXCEPTIONS = '|with|when|and|or|an|the|from|to|on|as|of|in|at|for|will|'
set @w_ALLCAPS_EXCEPTIONS = '|LLP|LLC|DDS|MD|JD|MC|OB|JJ|NMMA|RRT|US|USA|PO|SW|SE|NW|NE|N|E|W|S|II|III|IV|VI|VII|VIII|IX|RV|LP|RR|HC|NRA|SCUBA|NAACP|NH|CA|HCI|SS|KC|RC|MC|DL|'
set @w_MAC_EXCEPTIONS = '|Macon|Maceo|Mackinac|Mackinac Island|Mackinaw|Mackinaw City|Macks Creek|Macks Inn|Macom|Macomb|Macombtownship|Macon|Macy|'
set @w_MIXEDCAPS_EXCEPTIONS = '|PhD|'
-- clean up html tags that sometimes get into name/address data from web forms
set @STR= replace(@str, ''','''')
set @STR= replace(@str, ''','''')
set @STR= replace(@str, ''','''')
set @STR= replace(@str, '&','')
set @STR= replace(@str, '''','''')
set @STR= replace(@str, ''','''')
-- looping logic requires the string start and end with a valid delimiter to avoid having to code for special cases
set @STR = '|' + lower(@str) + '|'
set @Strlen = len (@Str)
set @delimpos = patindex('%[ 0-9/\.''"`(){}<>+--_|#$*:;?&%]%', @STR)
set @nextDelim = @delimpos + patindex('%[ 0-9/\.''"`(){}<>+--_|#$*:;?&%]%', substring(@str, @delimpos+1, @strlen - @delimpos ))
set @wordlen = (@NextDelim - @DelimPos) -1
while @DelimPos < @NextDelim
begin
set @STR = case
-- 's condition. the 's' following a possessive case should not be upper cased...
when substring(@str,@delimpos ,1) = ''''
and substring(@str, @DelimPos + 1, @WordLen) = 's'
and @nextDelim = @DelimPos +2
then Left(@Str, @DelimPos)
+ lower(substring(@Str, @DelimPos + 1, 1))
+ Right(@Str, @StrLen - @DelimPos - 1)
-- handle "S.A.". "A.". '|a|' is the only single characther in the lowercase exception list...
when substring(@str, @delimpos, (@nextDelim - @DelimPos) + 1 ) = ' a '
then Left(@Str, @DelimPos)
+ lower(substring(@Str, @DelimPos + 1, 1))
+ Right(@Str, @StrLen - @DelimPos - 1)
-- mixed caps exceptions
when charindex('|' + substring(@Str, @DelimPos + 1, @WordLen)+ '|', @w_MIXEDCAPS_EXCEPTIONS) > 0
then Left(@Str, @DelimPos)
+ substring(@w_MIXEDCAPS_EXCEPTIONS
,charindex('|' + substring(@Str, @DelimPos + 1, @WordLen)+ '|', @w_MIXEDCAPS_EXCEPTIONS)+ 1
, @WordLen)
+ substring(@Str, @DelimPos + 1 + @WordLen, @StrLen - (@DelimPos + @WordLen))
-- ALL caps exceptions
when charindex('|' + substring(@Str, @DelimPos + 1, @WordLen)+ '|', @w_ALLCAPS_EXCEPTIONS) > 0
then Left(@Str, @DelimPos)
+ upper(substring(@Str, @DelimPos + 1, @WordLen))
+ substring(@Str, @DelimPos + 1 + @WordLen, @StrLen - (@DelimPos + @WordLen))
-- no-caps exceptions
when charindex('|' + substring(@Str, @DelimPos + 1, @WordLen) + '|', @w_LOWERCASE_EXCEPTIONS) > 0
then Left(@Str, @DelimPos)
+ lower(substring(@Str, @DelimPos + 1, @WordLen))
+ substring(@Str, @DelimPos + 1 + @WordLen, @StrLen - (@DelimPos + @WordLen))
-- Mc exceptions
when substring(@Str, @DelimPos + 1, 2) = 'Mc'
and charindex('|' + substring(@Str, @DelimPos + 1, @WordLen)+ '|', @w_MAC_EXCEPTIONS) = 0
and @WordLen > 2
then Left(@Str, @DelimPos)
+ upper(substring(@Str, @DelimPos + 1, 1 ))
+ lower (substring(@Str, @DelimPos + 2, 1 ))
+ upper(substring(@Str, @DelimPos + 3,1 ))
+ substring(@Str, @DelimPos + 4, @WordLen -3)
+ substring(@Str, @DelimPos + 1 + @WordLen, @StrLen - (@DelimPos + @WordLen))
-- Mac exceptions
when substring(@Str, @DelimPos + 1, 3) = 'Mac'
and charindex('|' + substring(@Str, @DelimPos + 1, @WordLen)+'|', @w_MAC_EXCEPTIONS) = 0
and @WordLen > 3
then Left(@Str, @DelimPos)
+ upper(substring(@Str, @DelimPos + 1, 1 ))
+ lower (substring(@Str, @DelimPos + 2, 2 ))
+ upper(substring(@Str, @DelimPos + 4,1 ))
+ substring(@Str, @DelimPos + 5, @WordLen -4)
+ substring(@Str, @DelimPos + 1 + @WordLen, @StrLen - (@DelimPos + @WordLen))
--normal case
else Left(@Str, @DelimPos)
+ upper(substring(@Str, @DelimPos + 1, 1))
+ Right(@Str, @StrLen - @DelimPos - 1)
end
set @DelimPos = @NextDelim
set @NextDelim = @DelimPos + patindex('%[ 0-9/\.''"`(){}<>+--_|#$*:;?&%]%', substring(@Str, @DelimPos+1, @Strlen - @DelimPos ))
set @wordlen = (@NextDelim - @DelimPos) -1
While @wordlen = 0 -- skip processing delimiters as words
begin
set @DelimPos = @NextDelim
set @NextDelim = @DelimPos + patindex('%[ 0-9/\.''"`(){}<>+--_|#$*:;?&%]%', substring(@Str, @DelimPos+1, @Strlen - @DelimPos ))
set @wordlen = (@NextDelim - @DelimPos) -1
end
end
end
-- set first letter of string to upper case. handles lines beginning with an excluded preposition, and trim delimiters
set @STR = upper(substring(@Str,2,1)) + substring(@Str,3, @Strlen -3)
return @STR
END
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
--To convert the string 'william h gates' to proper case:
SELECT dbo.fn_ProperCase('william h gates')
--To convert the Notes field of titles table in pubs database to proper case:
SELECT dbo.fn_ProperCase(FirstName + ' ' + LastName) FROM adventureworks.Person.contact
set nocount on
select dbo.fn_ProperCase('macarthur')
select dbo.fn_ProperCase('Robert mcnamara and ewan macintosh mcgregor, dds, llp')
select dbo.fn_ProperCase('BOB AND RITA MCEWAN')
select dbo.fn_ProperCase('BOB And ROBERT MCCLINTOCK DDS')
select dbo.fn_ProperCase('BOB & ROBERT MCCLINTOCK-DDS')
select dbo.fn_ProperCase('Nh Coach & Camper/gilmans')
select dbo.fn_ProperCase('paul o''neal')
select dbo.fn_ProperCase('paul o''neal''s yachts')
select dbo.fn_ProperCase('in motion marine')
select dbo.fn_ProperCase('H & h Marine')
select dbo.fn_ProperCase('Endless Summer Rv''S')
select dbo.fn_ProperCase('104 brockhaven court apt 10b')
select dbo.fn_ProperCase('104 brockhaven court apt 10a room 11')
select dbo.fn_ProperCase('this is a test and only a test eh? arthur a. dent')
Now I wasn't expecting this to be perfect (I am sure for example that the code will be a little slow to run through thousands of records), but this is not even capitalising words after the spaces. Is there something strange about my system (could this relate to collation for example?)
When I run the last select I get ONLY the first letter of the field capitalised. eg Robert mcnamara and ewan macintosh mcgregor, dds, llp.
Can anyone see what the problem is? I'd appreciate the assistance.
Paul
June 8, 2010 at 7:03 am
Paul i think you are on the right track about the collation issue;
your function and examples worked fine; i got the correct proper cas for all the examples you posted.
I did a select * from sys.columns, and all my columns in my sandbox database are collated as "SQL_Latin1_General_CP1_CI_AS"
can you do the same for your db?
Lowell
June 9, 2010 at 1:36 am
Hi Lowell,
Thanks for your reply. Yes, my columns are collated as Latin1_General_CI_AS.
Do you think that is the reason? Pity, because it looked like a useful function for our system.
Paul
June 9, 2010 at 5:35 am
Paul_Harvey (6/9/2010)
Hi Lowell,Thanks for your reply. Yes, my columns are collated as Latin1_General_CI_AS.
Do you think that is the reason? Pity, because it looked like a useful function for our system.
Paul
Nah... you can add Latin1_General_CI_AS collation clauses to the function to make it usable. Just be aware that the code is going to be relatively very slow because of all the WHILE loops and the fact that it's a scalar function instead of a CROSS APPLYed Inline Table Value Function (iTVF). I haven't studied the code in depth but consider using a Tally Table to replace the loops and consider spending a bit of extra time to warp the code into a single query so the it can be used as an iTVF.
For information on how to build a Tally Table, what it is, and how to use it to replace a WHILE loop in this case, please see the following article...
http://www.sqlservercentral.com/articles/T-SQL/62867/
--Jeff Moden
Change is inevitable... Change for the better is not.
June 9, 2010 at 6:02 am
Thanks Jeff.
I took a look at your article, and can see that I will need to have a bit more tie on my hands to get my head around it! I have bookmarked it and will take a look. Sounds interesting. Unfortunately just a bit beyond my current SQL knowledge.
Can you assist with adding the collation clauses at all?
Paul
Viewing 5 posts - 1 through 4 (of 4 total)
You must be logged in to reply to this topic. Login to reply