Technical Article

String Tokenizer with Complex Search

,

Sample solution to update line numbers in parser expressions

/*
Sample solution to update line numbers in parser expressions
*/--===================================================================
--Create and populate helper tables and functions for StringTokenizer
--===================================================================

if object_id('Numbers') is not null
 drop table Numbers
go
if object_id('Delimeters') is not null
 drop table Delimeters
go
if object_id('udfn_GetDelimeterPositions') is not null
 drop function udfn_GetDelimeterPositions
go
if object_id('udfn_GetTokenPositions') is not null
 drop function udfn_GetTokenPositions
go 

if object_id('udfn_ReplaceParserExpression') is not null
 drop function udfn_ReplaceParserExpression
go
if object_id('tempdb..#expressions') is not null
 drop table #expressions
go
SET NOCOUNT ON
go

--===========================================
--Create table Numbers
--We will use this table to extract tokens from
--the given string with delimeters
--===========================================

Create procedure #udsp_CreateNumberTable 
@size int=10000
as
BEGIN

IF NOT EXISTS (SELECT * FROM dbo.sysobjects 
 WHERE id = OBJECT_ID(N'[dbo].[Numbers]') 
 AND OBJECTPROPERTY(id, N'IsUserTable') = 1)
    BEGIN
    CREATE TABLE [dbo].[Numbers](
     [number] [int],
     CONSTRAINT [XPKNumbers] PRIMARY KEY CLUSTERED 
    (
     [number] ASC
    ) ON [PRIMARY]
    ) ON [PRIMARY]

    Declare @ii int
    Select @ii=1
    while (@ii<=@size)
     BEGIN
     INSERT INTO Numbers(number) SELECT @ii
     SELECT @ii=@ii+1
     END
    end
end

go
--=========================================================
--Create table Delimeters
--We will use this table to store all possible delimeters
--that are allowed by the expression
--=========================================================

create procedure #udsp_CreateDelimeterTable
as
begin
IF NOT EXISTS (SELECT * FROM dbo.sysobjects 
 WHERE id = OBJECT_ID(N'[dbo].[Delimeters]') 
 AND OBJECTPROPERTY(id, N'IsUserTable') = 1)
BEGIN
    CREATE TABLE [dbo].[Delimeters](
     [id] [int],
 delimeter nvarchar(1)
     CONSTRAINT [XPKDelimeters] PRIMARY KEY CLUSTERED 
    (
     [id] ASC
    ) ON [PRIMARY]
    ) ON [PRIMARY]
 insert into Delimeters (id,delimeter) values (1, ':')
 insert into Delimeters (id,delimeter) values (2, '+')
 insert into Delimeters (id,delimeter) values (3, '-')
 insert into Delimeters (id,delimeter) values (4, '(')
 insert into Delimeters (id,delimeter) values (5, ')')
 insert into Delimeters (id,delimeter) values (6, '/')
 insert into Delimeters (id,delimeter) values (7, '*')
 insert into Delimeters (id,delimeter) values (8, '>')
 insert into Delimeters (id,delimeter) values (9, '<')
 insert into Delimeters (id,delimeter) values (10, '=')
 insert into Delimeters (id,delimeter) values (11, '^')
 insert into Delimeters (id,delimeter) values (12, ',')
 insert into Delimeters (id,delimeter) values (13, '.')
 insert into Delimeters (id,delimeter) values (14, '!')

END
end
go

--========================================
--execute functions to create tables
--========================================
exec #udsp_CreateNumberTable
exec #udsp_CreateDelimeterTable
drop procedure #udsp_CreateNumberTable
drop procedure #udsp_CreateDelimeterTable
go

--test
--select count(*) from Numbers
--select * from Delimeters
--==========================================================
--Create functions for the StringTokenizer
--==========================================================
/*
Function: udfn_GetDelimeterPositions
Parameters: 
 @string - input string
 @delimeter - common delimeter
Returns: a table of delimeter positions in the input string
*/create FUNCTION [dbo].[udfn_GetDelimeterPositions]
(
@string varchar(8000),
@delimeter Varchar(255)
)
RETURNS
@results TABLE
(
 pos int,
 token varchar(8000) 
)
AS
begin
 insert into @results (pos, token)
 select charindex(@delimeter, @string, number), @delimeter
 from Numbers
 where number <=len(@string)
 AND SUBSTRING(@delimeter + @string, 
             number, 
             len(replace(@delimeter,' ','|'))) = @delimeter
 and charindex(@delimeter, @string, number) > 0
 ORDER BY number
 return 
END
go

/*
Function: udfn_GetTokenPositions
Parameters: 
 @string - input string
 @@delimeter - string delimeter 
Returns: table with tokens and their positions extracted from the input string 
Description: Use this function to extract all tokens delimited by common delimeter.
In this context token is defined as part of the input string between delimeters,
for example, 
string AA143-(AA65:AA68) will be parsed into tokens:
AA143
AA65
AA68
*/
Create FUNCTION [dbo].[udfn_GetTokenPositions]
(
@string varchar(8000),
@delimeter Varchar(255)
)
RETURNS
@results TABLE
(
pos int,
token varchar(8000)
)
AS
begin
INSERT INTO @results (pos,token)
select number, SUBSTRING(@string+@delimeter, number, 
 CHARINDEX(@delimeter, @string+@delimeter, number) - number)
FROM Numbers
where number <= len(replace(@string,' ','|'))
AND SUBSTRING(@delimeter + @string, 
            number, 
            len(replace(@delimeter,' ','|'))) = @delimeter
AND SUBSTRING(@string+@delimeter, number, 
 CHARINDEX(@delimeter, @string+@delimeter, number) - number) <> ''
ORDER BY number RETURN
END
go

/*
--Test
declare @str varchar(500)
set @str = '|100|||AA160|AA164|||AB160|AB164||||AB160|AB164|'
select * from dbo.udfn_GetTokenPositions(@str,'|')
*//*
Function: udfn_ReplaceParserExpression
Parameters:
 @str - input string
 @search_pattern - consists of the search pattern and the exact number, for example, '%[A,B,D,G,J][A-Z][1][8][0]%'
 @str_replace - string to replace the input string, this is new number
Description: This is the main function to search and replace parser expression string
1. Get all delimeter positions in the parser expression string and save them in the temp array(table)
2. Replace all delimeters in the parser expression with a common delimeter
3. Get all tokens and their positions and save them in the temp array(table)
4. Search the tokens and replace the number by using pattern search and T-SQL update statement
5. Reconstruct the string by using saved delimeter and token positions.
*/
create function udfn_ReplaceParserExpression(
 @str varchar(500),
 @search_pattern varchar(100), 
 @str_replace varchar(100) = '')
returns varchar(500)
as
begin
declare @tokens table (pos int, token varchar(100))
declare 
 @ii int,
 @max_delimeters int,
 @delimeter varchar(1),
 @str_temp varchar(500),
 @common_delimeter varchar(1),
 @str_token varchar(100),
 @s varchar(50),
 @ret varchar(500)

 set @common_delimeter = '|'
 set @str_temp = @str
 set @ii = 1
 select @max_delimeters = count(id) from Delimeters
 while @ii<=@max_delimeters
 begin
 select @delimeter = delimeter from Delimeters where id = @ii
 insert into @tokens (pos, token)
 select pos, token from dbo.udfn_GetDelimeterPositions(@str_temp, @delimeter)
 --replace delimeter with common delimeter
 set @str_temp = replace(@str_temp,@delimeter ,@common_delimeter)
 set @ii = @ii + 1
 end
 insert into @tokens (pos, token)
 select pos, token from dbo.udfn_GetTokenPositions(@str_temp,@common_delimeter)
 
 --search and replace tokens
 update @tokens
 set token = substring(token, 1, patindex('%[0-9]%', token) -1) + @str_replace
 where patindex(@search_pattern, replace(token, ' ', '')) > 0
 
 --reconstruct the string 
 --cursor solution
-- set @ret = ''
-- declare tokens_cursor cursor for
-- select token from @tokens order by pos
-- open tokens_cursor
-- fetch next from tokens_cursor into @str_token
-- while @@fetch_status = 0
-- begin
-- set @ret = @ret + @str_token 
-- fetch next from tokens_cursor into @str_token
-- end
-- close tokens_cursor
-- deallocate tokens_cursor
-- return @ret

 --set solution
 select @ret = isnull(@ret, '') + token 
 from @tokens
 order by pos
 return @ret
end
go

set nocount off
go

--create test table
create table #expressions (parserexpression nvarchar(200))
insert into #expressions values ('(AA165+AA179:AA180)')
insert into #expressions values ('(AA165+AA179:AA180)/AA158*100')
insert into #expressions values ('(AA165+AA179+AA180)')
insert into #expressions values ('(AA165+AA179+AA180)/AA158*100')
insert into #expressions values ('(AA167-AB167)*(AB180:AB187/AB167)')
insert into #expressions values ('(AA218+AA165+AA179+AA180)/AA80:AA85')
insert into #expressions values ('(AA76:AA85 + AA115:AA120)/(AA207 + AA197 + AA198 + AA165 + AA179 + AA180)')
insert into #expressions values ('@IF(AA58-AB58+AA179+AA165<=0,-(AA58-AB58+AA179+AA165+AA37:AA44-AB37:AB44+AA60:AA72-AB60:AB72-AA180),0)')
insert into #expressions values (=0,-(AB37:AB44-AA37:AA44+AB60:AB72-AA60:AA72-AA180),0)'">'@IF(AA58-AB58+AA179+AA165>=0,-(AB37:AB44-AA37:AA44+AB60:AB72-AA60:AA72-AA180),0)')
insert into #expressions values (=0.000000,-(AB37:AB44-AA37:AA44+AB60:AB72-AA60:AA72-AA180),0)'">'@IF(AA58-AB58+AA179+AA165>=0.000000,-(AB37:AB44-AA37:AA44+AB60:AB72-AA60:AA72-AA180),0)')
insert into #expressions values ('100*((AA165+AA179+AA180)-(AB165+AB179+AB180)) /(AB165+AB179+AB180)')
insert into #expressions values ('AA165 + AA179 + AA180')
insert into #expressions values ('AA165+AA179+AA180')
insert into #expressions values ('AA167*(AB180:AB187/AB167)-AA180:AA187')
insert into #expressions values ('AA179 + AA165 + AA180')
insert into #expressions values ('AA180')
insert into #expressions values ('AA180 + AA181')
insert into #expressions values ('AA180:AA181')
insert into #expressions values ('AA180:AA187')
insert into #expressions values ('AA180:AA187/AA167*100')
insert into #expressions values ('AB165+AB179+AB180')
insert into #expressions values ('AB180:AB187-AA180:AA187')

/*
TEST
For example, we want to search all possible tokens containing number 180 and replace it 
with number 000
The input string will be the parser expression: for example, 
100*((AA165+AA179+AA180)-(AB165+AB179+AB180)) /(AB165+AB179+AB180)
For example, the search pattern will be '[A,D,G,J][A-Z][1][8][0]%' 
The replace string will be 000
*/
declare 
 @str_search varchar(100),
 @str_replace varchar(100),
 @search_pattern varchar(50),
 @s varchar(50),
 @ii int

set @str_search = '180'
set @str_replace = '000'
set @search_pattern = '[A,D,G,J][A-Z]' 
--add square brakets to @string_search 
set @s = ''
set @ii = 1
while @ii <= len(@str_search)
begin
 set @s = @s + '[' + substring(@str_search, @ii, 1) + ']'
 set @ii = @ii + 1
end 
--create line number search pattern 
set @search_pattern = '%' + @search_pattern + @s + '%'
--display the search and replace results
select parserexpression str_in, 
 dbo.udfn_ReplaceParserExpression(parserexpression, @search_pattern, @str_replace) str_out,
 @search_pattern str_search, 
 @str_replace str_replace
from #expressions
where patindex(@search_pattern, parserexpression) > 0
go

if object_id('Numbers') is not null
 drop table Numbers
go
if object_id('Delimeters') is not null
 drop table Delimeters
go
if object_id('udfn_GetDelimeterPositions') is not null
 drop function udfn_GetDelimeterPositions
go
if object_id('udfn_GetTokenPositions') is not null
 drop function udfn_GetTokenPositions
go 

if object_id('udfn_ReplaceParserExpression') is not null
 drop function udfn_ReplaceParserExpression
go
if object_id('tempdb..#expressions') is not null
 drop table #expressions
go

Rate

3 (3)

You rated this post out of 5. Change rating

Share

Share

Rate

3 (3)

You rated this post out of 5. Change rating