Extracting words with removal of punctuations

,

Extracting words of a string with removal of unwanted punctuations.

This is a flexible function to extract words from a string and tested on both SQL 2000 and 2005
As you see the logic of this function is very simple. Maybe the update statement seems strange
to beginners because it does not update any field of underlying table; it removes all unwanted
punctuations from @s. In fact the replace function will execute for each punctuation that is in
@UnwantedPunctuations. The WHILE loop will execute repeatedly to extract words and put them into output table (@out).

create  function dbo.ufnSplitWords(@s varchar(8000))
returns @out table  (wrd  varchar(8000))
as
begin

/**********************************************************

  Written By  	: yousef ekhtiari
  Email         :yousef_ekhtiari@Hotmail.com
  Create  Date  : 24 May 2007
  Modified Date :
  Description 	: Extracting words of a string with removal
                 of  unwanted punctuations
  USAGE:
        select * from dbo.ufnSplitWords('This is a  test.')
	select * from dbo.ufnSplitWords('This,,  is a    test')
        select * from dbo.ufnSplitWords('Do    you need more example?')

**********************************************************/

declare 
		@pos int,
		@tmp varchar(8000),
		@wrd varchar(50)
		
declare @UnwantedPunctuations table (punc char(1))

/*keep unwanted punctuations in a table.
you may customise these punctuation as you wish */

insert @UnwantedPunctuations
select '.' union all select ',' union all select '?' union all select ':' 
	
--Removing unwanted punctuations from the input string
update  @UnwantedPunctuations
set @s=replace(@s,punc,' ')

set @s=ltrim(rtrim(@s))+' '

	while len(@s)>0
	begin
		set  @pos=CHARINDEX(' ',@s,2)
		set @wrd=ltrim(left(@s,@pos))
		--Removing unwanted punctuations 
		update  @UnwantedPunctuations
		set @wrd=replace(@wrd,punc,'')
		
		insert  @out values(  rtrim(@wrd))
				
		set @s=ltrim(stuff(@s,1,@pos ,''))
	
	end
return
end

Rate

5 (2)

Share

Share

Rate

5 (2)