m.t.cleary (9/10/2012)
Paul's CLR splitter has two features which mean it shouldn't be compared directly to DelimitedSplit8K based splitters.
More than two (it supports very long strings and Unicode) but overall the comparison is a fair one.
The first is that it doesn't handle the trailing delimiter correctly. Using comma as a delimiter, the string '55555,' returns one item "55555", not two "55555" and "". Using Jeff's functional test cases (17 strings) it returns 45 rows and not 52.
I have never found that way of treating the trailing-delimiter case intuitive or useful, but it is a trivial code change if you do need it to work that way:
CREATE ASSEMBLY Split
FROM 0x4D5A90000300000004000000FFFF0000B800000000000000400000000000000000000000000000000000000000000000000000000000000000000000800000000E1FBA0E00B409CD21B8014CCD21546869732070726F6772616D2063616E6E6F742062652072756E20696E20444F53206D6F64652E0D0D0A2400000000000000504500004C010300F7F34D500000000000000000E00002210B010800000E00000006000000000000AE2D0000002000000040000000004000002000000002000004000000000000000400000000000000008000000002000000000000030040850000100000100000000010000010000000000000100000000000000000000000602D00004B000000004000006803000000000000000000000000000000000000006000000C00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000080000000000000000000000082000004800000000000000000000002E74657874000000B40D000000200000000E000000020000000000000000000000000000200000602E7273726300000068030000004000000004000000100000000000000000000000000000400000402E72656C6F6300000C0000000060000000020000001400000000000000000000000000004000004200000000000000000000000000000000902D0000000000004800000002000500E42100007C0B00000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000A2026F1300000A2D0E026F1400000A0373040000062B0C168D140000011673040000068C030000022A000000133002001A0000000100001102A5040000020A031200280800000654041200280A000006512A1E02281500000A2AC202037D0100000402047D0300000402027B010000048E697D02000004027C05000004FE150400000202167D040000042A0013300500C400000002000011027B04000004027B020000043102162A027B040000040A2B56027B010000040693027B030000043342027C0500000425280800000617582809000006027C05000004027B01000004027B0400000406027B0400000459731700000A280B000006020617587D04000004172A0617580A06027B0200000432A1027C0500000425280800000617582809000006027C05000004027B01000004027B04000004027B02000004027B0400000459731700000A280B00000602027B0200000417587D04000004172A32027B050000048C040000022A1A731800000A7A1E027B060000042A2202037D060000042A1E027B070000042A2202037D070000042A000042534A4201000100000000000C00000076322E302E35303732370000000005006C000000CC030000237E0000380400009004000023537472696E677300000000C80800000800000023555300D0080000100000002347554944000000E00800009C02000023426C6F6200000000000000020000015717A2030902000000FA253300160000010000001900000004000000070000000B00000009000000010000001900000012000000020000000200000003000000050000000300000001000000020000000200000000000A00010000000000060052004B00060059004B000600760063000A00A3008E000A002D02120206007502560206009F028D020600B6028D020600D3028D020600F2028D0206000B038D02060024038D0206003F038D0206005A038D02060073035602060087038D020600C003A0030600E003A0030A000404120206002E044B00060033045602060049045602060054044B0006005B044B0006007304A003000000000100000000000100010001001000140000000500010001000B011000290000000900010004000B01100039000000090006000800210057012E0021005D01320021006401350001006E0132000100740138000100CE0132000100E80152005020000000009600AC000A0001007C20000000009600B60012000300A220000000008618BE001B000600AA20000000008318BE001F000600DC2000000000E101C40026000800AC2100000000E109F5002A000800B92100000000E1012C011B000800C021000000008308A20140000800C821000000008308AF0144000800D121000000008308BC0149000900D921000000008308C5014D000900000001000C02000002003F02000001004902020002004D02020003008202000001000C02000002003F0200000100870200000100870203000D001900EC002600190020012A00190051011B002900BE001B003100BE001B003900BE004D004100BE004D004900BE004D005100BE004D005900BE004D006100BE004D006900BE004D007100BE004D007900BE0070008100BE004D008900BE0044009100BE001B009900BE001B00210019042600210024040D020900BE001B00A900BE001702B900BE001D02C100BE001B00C900BE001B00200093007500240023005D002E0033002E022E0043003D022E008B007C022E004B0043022E0053002E022E0073003D022E003B003D022E00830073022E005B0052022E0063003D02C100CB002902E100CB0029020001CB0029022001CB0029024001CB0029026001CB00290212022502030001000400020000007B013C000000FE01550000000702590002000600030001000900050002000800050002000A00070001000B00070003000A00030003000C00050003000E00070004800000010000001D12E315000000000000FE03000002000000000000000000000001004200000000000200000000000000000000000100820000000000030002000400020000000000003C4D6F64756C653E0053706C69742E646C6C0055736572446566696E656446756E6374696F6E730053706C6974456E756D657261746F720053706C6974526F77006D73636F726C69620053797374656D004F626A6563740056616C7565547970650053797374656D2E436F6C6C656374696F6E730049456E756D657261746F720053797374656D2E446174610053797374656D2E446174612E53716C54797065730053716C43686172730053706C6974746572420046696C6C526F77002E63746F720053797374656D2E436F6C6C656374696F6E732E49456E756D657261746F722E4D6F76654E657874004D6F76654E6578740053797374656D2E436F6C6C656374696F6E732E49456E756D657261746F722E6765745F43757272656E74006765745F43757272656E740053797374656D2E436F6C6C656374696F6E732E49456E756D657261746F722E526573657400526573657400696E707574006C656E6774680064656C696D69746572007374617274007265636F72640053797374656D2E436F6C6C656374696F6E732E49456E756D657261746F722E43757272656E74006765745F53657175656E6365007365745F53657175656E6365006765745F4974656D007365745F4974656D003C53657175656E63653E6B5F5F4261636B696E674669656C64003C4974656D3E6B5F5F4261636B696E674669656C640053657175656E6365004974656D00496E707574004D6963726F736F66742E53716C5365727665722E5365727665720053716C46616365744174747269627574650044656C696D69746572006F626A0073657175656E63650053797374656D2E52756E74696D652E496E7465726F705365727669636573004F7574417474726962757465006974656D0076616C75650053797374656D2E5265666C656374696F6E00417373656D626C795469746C6541747472696275746500417373656D626C794465736372697074696F6E41747472696275746500417373656D626C79436F6E66696775726174696F6E41747472696275746500417373656D626C79436F6D70616E7941747472696275746500417373656D626C7950726F6475637441747472696275746500417373656D626C79436F7079726967687441747472696275746500417373656D626C7954726164656D61726B41747472696275746500417373656D626C7943756C7475726541747472696275746500436F6D56697369626C6541747472696275746500417373656D626C7956657273696F6E4174747269627574650053797374656D2E52756E74696D652E436F6D70696C6572536572766963657300436F6D70696C6174696F6E52656C61786174696F6E734174747269627574650052756E74696D65436F6D7061746962696C6974794174747269627574650053706C69740053716C46756E6374696F6E417474726962757465006765745F49734E756C6C006765745F56616C75650043686172005374727563744C61796F7574417474726962757465004C61796F75744B696E6400537472696E67004E6F74496D706C656D656E746564457863657074696F6E00436F6D70696C657247656E6572617465644174747269627574650000000003200000000000769A3CD63DE4C742A197817A9DEE94EC0008B77A5C561934E089070002120D121103080003011C1008100E03200001062002011D0303032000020320001C03061D03020608020603030611100328001C0320000804200101080320000E042001010E02060E032800080328000E12010001005408074D617853697A65FFFFFFFF04200101028196010006005455794D6963726F736F66742E53716C5365727665722E5365727665722E446174614163636573734B696E642C2053797374656D2E446174612C2056657273696F6E3D322E302E302E302C2043756C747572653D6E65757472616C2C205075626C69634B6579546F6B656E3D623737613563353631393334653038390A446174614163636573730000000054557F4D6963726F736F66742E53716C5365727665722E5365727665722E53797374656D446174614163636573734B696E642C2053797374656D2E446174612C2056657273696F6E3D322E302E302E302C2043756C747572653D6E65757472616C2C205075626C69634B6579546F6B656E3D623737613563353631393334653038391053797374656D446174614163636573730000000054020F497344657465726D696E69737469630154020949735072656369736501540E1146696C6C526F774D6574686F644E616D650746696C6C526F77540E0F5461626C65446566696E6974696F6E2173657175656E636520494E542C206974656D204E564152434841522834303030290420001D030407011110052001011159072003011D0308080307010804010000000E01000953706C69747465724200000501000000000E0100094D6963726F736F667400002001001B436F7079726967687420C2A9204D6963726F736F6674203230313100000801000800000000001E01000100540216577261704E6F6E457863657074696F6E5468726F77730100882D000000000000000000009E2D0000002000000000000000000000000000000000000000000000902D00000000000000005F436F72446C6C4D61696E006D73636F7265652E646C6C0000000000FF250020400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001001000000018000080000000000000000000000000000001000100000030000080000000000000000000000000000001000000000048000000584000000C03000000000000000000000C0334000000560053005F00560045005200530049004F004E005F0049004E0046004F0000000000BD04EFFE0000010000000100E3151D1200000100E3151D123F000000000000000400000002000000000000000000000000000000440000000100560061007200460069006C00650049006E0066006F00000000002400040000005400720061006E0073006C006100740069006F006E00000000000000B0046C020000010053007400720069006E006700460069006C00650049006E0066006F00000048020000010030003000300030003000340062003000000034000A00010043006F006D00700061006E0079004E0061006D006500000000004D006900630072006F0073006F006600740000003C000A000100460069006C0065004400650073006300720069007000740069006F006E0000000000530070006C0069007400740065007200420000003C000E000100460069006C006500560065007200730069006F006E000000000031002E0030002E0034003600330037002E003500360030003300000034000A00010049006E007400650072006E0061006C004E0061006D0065000000530070006C00690074002E0064006C006C0000005C001B0001004C006500670061006C0043006F007000790072006900670068007400000043006F0070007900720069006700680074002000A90020004D006900630072006F0073006F006600740020003200300031003100000000003C000A0001004F0072006900670069006E0061006C00460069006C0065006E0061006D0065000000530070006C00690074002E0064006C006C00000034000A000100500072006F0064007500630074004E0061006D00650000000000530070006C00690074007400650072004200000040000E000100500072006F006400750063007400560065007200730069006F006E00000031002E0030002E0034003600330037002E003500360030003300000044000E00010041007300730065006D0062006C0079002000560065007200730069006F006E00000031002E0030002E0034003600330037002E0035003600300033000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002000000C000000B03D00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
WITH PERMISSION_SET = SAFE;
GO
CREATE FUNCTION dbo.SplitterB
(
@Input nvarchar(max),
@Delimiter nchar(1)
)
RETURNS TABLE
(
sequence int NULL,
item nvarchar(4000) NULL
)
AS
EXTERNAL NAME Split.UserDefinedFunctions.SplitterB;
GO
Source:
using System.Collections;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
public partial class UserDefinedFunctions
{
/**
* How SQL Server SQLCLR table-valued functions work:
*
* 1. SQL Server passes the input parameters in to the function and receives an enumeration object in return
* 2. SQL Server calls the MoveNext() method on the enumeration object
* 3. If the MoveNext() call returns true, SQL Server calls the Current() method to get an object for the current row
* 4. SQL Server calls the FillRow method to obtain column values for the current row
* 5. Repeat from step 2, until MoveNext() returns false
*
* */
[SqlFunction
(
DataAccess = DataAccessKind.None, // No user data access by this function
SystemDataAccess = SystemDataAccessKind.None, // No system data access by this function
IsDeterministic = true, // This function is deterministic
IsPrecise = true, // This function is precise
FillRowMethodName = "FillRow", // The method called by SQL Server to obtain the next row
TableDefinition =
"sequence INT, item NVARCHAR(4000)" // Returned table definition
)
]
// 1. SQL Server passes input parameters and receives an enumration object
public static IEnumerator SplitterB
(
[SqlFacet(MaxSize = -1)] SqlChars Input,
char Delimiter
)
{
return Input.IsNull ?
new SplitEnumerator(new char[0], char.MinValue) :
new SplitEnumerator(Input.Value, Delimiter);
}
// The enumeration object
struct SplitEnumerator : IEnumerator
{
// Constructor (called once when the object is created)
internal SplitEnumerator(char[] Input, char Delimiter)
{
// Save references
input = Input;
delimiter = Delimiter;
// Remember the length of the character array
length = input.Length;
// Structure holding split rows
record = new SplitRow();
// Starting at the first character
start = 0;
}
// Enumerator implementation
#region IEnumerator Methods
// 2. SQL Server calls the MoveNext() method on the enumeration object
bool IEnumerator.MoveNext()
{
// No more rows?
if (start > length) { return false; }
// Find the next delimiter
for (int i = start; i < length; i++)
{
if (input == delimiter)
{
// Increment the sequence number
record.Sequence++;
// Save the split element
record.Item = new string(input, start, i - start);
// Set the next element search start point
start = i + 1;
return true;
}
}
// Last item
record.Sequence++;
record.Item = new string(input, start, length - start);
start = length + 1;
return true;
}
// 3. SQL Server calls the Current() method to get an object for the current row
// (We pack the current row data in an OutputRecord structure)
object IEnumerator.Current
{
get { return record; }
}
// Required by the IEnumerator interface, but not needed for this implementation
void IEnumerator.Reset()
{
throw new System.NotImplementedException();
}
#endregion
readonly char[] input; // Reference to the string to be split
readonly int length; // Length of the input string
readonly char delimiter; // The delimiter character
int start; // Current search start position
SplitRow record; // Each row to be returned
}
// 4. SQL Server calls the FillRow method to obtain column values for the current row
public static void FillRow(object obj, out int sequence, out string item)
{
// The passed-in object is an OutputRecord
var r = (SplitRow)obj;
// Set the output parameter values
sequence = r.Sequence;
item = r.Item;
}
// Structure used to hold each row
struct SplitRow
{
internal int Sequence { get; set; } // Sequence of the element
internal string Item { get; set; } // The element
}
};
The second is that it doesn't use collations for the comparison of string to delimiter [...] I can't see any use for this behaviour besides confusing yourself and others so I recommend using a binary collation.
Yes that's why the SQLCLR uses binary comparisons. It is easy to carry over the collation settings from the caller, but I have never felt the need to do this for string splitting.
Paul White
SQLPerformance.com
SQLkiwi blog
@SQL_Kiwi