Marcus Farrugia (3/25/2015)
Hi I have a table with a varchar column that is delimited by char(227)+char(228)ie,
OrNo=7807ãäSeqNo=1ãäPStopType=PãäPCity=TillsonburgãäPSt=ONãäPZIP=N4G4J1ãäPAdr1=10 ROUSE STREETãäPAdr2=ãäPLat=42.8625ãäPLon=-80.7267ãäDStopType=DãäDCity=GaffneyãäDSt=SCãäDZIP=29341ãäDAdr1=121 PAN AMERICAN DRIVE
would anyone be able to point me to a split function that accepts a multiple character value as a delimter?
Thank you!
Hopefully these rows aren't permanently saved that way, which means you could do a one-time replace of the double-delimiter with a single character delimiter.
What would probably be even faster, though, is to export the data to a file and reimport it using ãä as the delimiter.
--Jeff Moden
Change is inevitable... Change for the better is not.
Quick thought, the most efficient way is probably splitting on one of the two delimiter characters and remove the other delimiter character from the output. Manipulating the input is very costly, more costly than double splitting, i.e. splitting on one and then the other. Consider these examples below.
USE tempdb;
GO
SET NOCOUNT ON;
DECLARE @DATASTR VARCHAR(250) = 'OrNo=7807ãäSeqNo=1ãäPStopType=PãäPCity=TillsonburgãäPSt=ONãäPZIP=N4G4J1ãäPAdr1=10 ROUSE STREETãäPAdr2=ãäPLat=42.8625ãäPLon=-80.7267ãäDStopType=DãäDCity=GaffneyãäDSt=SCãäDZIP=29341ãäDAdr1=121 PAN AMERICAN DRIVE';
DECLARE @ROW_COUNT INT = 10000;
DECLARE @INT_BUCKET01 INT = 0;
DECLARE @INT_BUCKET02 INT = 0;
DECLARE @STR_BUCKET01 VARCHAR(250) = '';
DECLARE @TIMER TABLE (T_TEXT VARCHAR(150) NOT NULL, T_TS DATETIME2(7) NOT NULL DEFAULT (SYSDATETIME()));
IF OBJECT_ID('dbo.TBL_DOUBLECHAR_DELIM') IS NOT NULL DROP TABLE dbo.TBL_DOUBLECHAR_DELIM;
CREATE TABLE dbo.TBL_DOUBLECHAR_DELIM
(
DD_ID INT IDENTITY(1,1) NOT NULL CONSTRAINT PK_DBO_TBL_DOUBLECHAR_DELIM_DD_ID PRIMARY KEY CLUSTERED
,DD_STR VARCHAR(250) NOT NULL
);
;WITH GEN_INS(GN_STR) AS
(
SELECT TOP(@ROW_COUNT)
@DATASTR AS GN_STR
FROM sys.all_columns S1, sys.all_columns S2, sys.all_columns S3, sys.all_columns S4
)
INSERT INTO TBL_DOUBLECHAR_DELIM(DD_STR)
SELECT
GI.GN_STR
FROM GEN_INS GI;
INSERT INTO @TIMER (T_TEXT) VALUES('Dry Run');
SELECT
@INT_BUCKET01 = DD.DD_ID
,@STR_BUCKET01 = DD.DD_STR
FROM dbo.TBL_DOUBLECHAR_DELIM DD
INSERT INTO @TIMER (T_TEXT) VALUES('Dry Run');
INSERT INTO @TIMER (T_TEXT) VALUES('Replace delimiter with a single in the input');
/* Replace ãä with | */
SELECT
@INT_BUCKET01 = DD.DD_ID
,@INT_BUCKET02 = D1.ItemNumber
,@STR_BUCKET01 = D1.Item
FROM dbo.TBL_DOUBLECHAR_DELIM DD
CROSS APPLY dbo.DelimitedSplitL8K(REPLACE(DD.DD_STR,'ãä','|'),'|') D1
INSERT INTO @TIMER (T_TEXT) VALUES('Replace delimiter with a single in the input');
INSERT INTO @TIMER (T_TEXT) VALUES('Remove one part of the delimiter in the input');
/* Remove one part of the delimiter in the input */
SELECT
@INT_BUCKET01 = DD.DD_ID
,@INT_BUCKET02 = D1.ItemNumber
,@STR_BUCKET01 = D1.Item
FROM dbo.TBL_DOUBLECHAR_DELIM DD
CROSS APPLY dbo.DelimitedSplitL8K(REPLACE(DD.DD_STR,'ä',''),'ã') D1;
INSERT INTO @TIMER (T_TEXT) VALUES('Remove one part of the delimiter in the input');
INSERT INTO @TIMER (T_TEXT) VALUES('Remove one part of the delimiter in the output');
/* Remove one part of the delimiter in the output */
SELECT
@INT_BUCKET01 = DD.DD_ID
,@INT_BUCKET02 = D1.ItemNumber
,@STR_BUCKET01 = REPLACE(D1.Item,'ä','')
FROM dbo.TBL_DOUBLECHAR_DELIM DD
CROSS APPLY dbo.DelimitedSplitL8K(DD.DD_STR,'ã') D1;
INSERT INTO @TIMER (T_TEXT) VALUES('Remove one part of the delimiter in the output');
INSERT INTO @TIMER (T_TEXT) VALUES('Double split');
/* Double split */
SELECT
@INT_BUCKET01 = DD.DD_ID
,@INT_BUCKET02 = D1.ItemNumber
,@STR_BUCKET01 = D2.Item
FROM dbo.TBL_DOUBLECHAR_DELIM DD
CROSS APPLY dbo.DelimitedSplitL8K(DD.DD_STR,'ã') D1
CROSS APPLY dbo.DelimitedSplitL8K(D1.Item,'ä') D2
WHERE D2.Item > '';
INSERT INTO @TIMER (T_TEXT) VALUES('Double split');
SELECT
TT.T_TEXT
,DATEDIFF(MICROSECOND,MIN(TT.T_TS),MAX(TT.T_TS)) AS DURATION
FROM @TIMER TT
GROUP BY TT.T_TEXT
ORDER BY DURATION;
Results
T_TEXT DURATION
------------------------------------------------ -----------
Dry Run 3000
Remove one part of the delimiter in the output 379021
Double split 922053
Remove one part of the delimiter in the input 3539203
Replace delimiter with a single in the input 3653209
Hi, just wanted to close this topic off, I apologize for taking too long to respond at all. Got caught in a rabbit hole ... Anyway, a colleague helped me out by showing me this function which worked for my purpose. Thanks to all for responding.
/*
********************************************************************************
Purpose: Parse values from a delimited string
& return the result as an indexed table
Copyright 1996, 1997, 2000, 2003 Clayton Groom (<A href="mailto:Clayton_Groom@hotmail.com">Clayton_Groom@hotmail.com</A>)
Posted to the public domain Aug, 2004
06-17-03 Rewritten as SQL 2000 function.
Reworked to allow for delimiters > 1 character in length
and to convert Text values to numbers
********************************************************************************
*/
create function [dbo].[fn_ParseText2Table]
(
@p_SourceText varchar(8000)
,@p_Delimeter varchar(100) = ',' --default to comma delimited.
)
RETURNS @retTable TABLE
(
Position int identity(1,1)
,Int_Value int
,Num_value Numeric(18,3)
,txt_value varchar(2000)
)
AS
BEGIN
DECLARE @w_Continue int
,@w_StartPos int
,@w_Length int
,@w_Delimeter_pos int
,@w_tmp_int int
,@w_tmp_num numeric(18,3)
,@w_tmp_txt varchar(2000)
,@w_Delimeter_Len tinyint
if len(@p_SourceText) = 0
begin
SET @w_Continue = 0 -- force early exit
end
else
begin
-- parse the original @p_SourceText array into a temp table
SET @w_Continue = 1
SET @w_StartPos = 1
SET @p_SourceText = RTRIM( LTRIM( @p_SourceText))
SET @w_Length = DATALENGTH( RTRIM( LTRIM( @p_SourceText)))
SET @w_Delimeter_Len = len(@p_Delimeter)
end
WHILE @w_Continue = 1
BEGIN
SET @w_Delimeter_pos = CHARINDEX( @p_Delimeter
,(SUBSTRING( @p_SourceText, @w_StartPos
,((@w_Length - @w_StartPos) + @w_Delimeter_Len)))
)
IF @w_Delimeter_pos > 0 -- delimeter(s) found, get the value
BEGIN
SET @w_tmp_txt = LTRIM(RTRIM( SUBSTRING( @p_SourceText, @w_StartPos
,(@w_Delimeter_pos - 1)) ))
if isnumeric(@w_tmp_txt) = 1
begin
set @w_tmp_int = cast( cast(@w_tmp_txt as numeric) as int)
set @w_tmp_num = cast( @w_tmp_txt as numeric(18,3))
end
else
begin
set @w_tmp_int = null
set @w_tmp_num = null
end
SET @w_StartPos = @w_Delimeter_pos + @w_StartPos + (@w_Delimeter_Len- 1)
END
ELSE -- No more delimeters, get last value
BEGIN
SET @w_tmp_txt = LTRIM(RTRIM( SUBSTRING( @p_SourceText, @w_StartPos
,((@w_Length - @w_StartPos) + @w_Delimeter_Len)) ))
if isnumeric(@w_tmp_txt) = 1
begin
set @w_tmp_int = cast( cast(@w_tmp_txt as numeric) as int)
set @w_tmp_num = cast( @w_tmp_txt as numeric(18,3))
end
else
begin
set @w_tmp_int = null
set @w_tmp_num = null
end
SELECT @w_Continue = 0
END
INSERT INTO @retTable VALUES( @w_tmp_int, @w_tmp_num, @w_tmp_txt )
END
RETURN
END
GO
Thank you for taking time to respond: Eirikur, Jeff, Magoo ...
No problem. You're happy with the performance of the While loop then?
--Jeff Moden
Change is inevitable... Change for the better is not.
Viewing 8 posts - 1 through 8 (of 8 total)
You must be logged in to reply to this topic. Login to reply