• Paul White

    SSC Guru

    Points: 150341

    m.t.cleary (9/10/2012)


    Paul's CLR splitter has two features which mean it shouldn't be compared directly to DelimitedSplit8K based splitters.

    More than two (it supports very long strings and Unicode) but overall the comparison is a fair one.

    The first is that it doesn't handle the trailing delimiter correctly. Using comma as a delimiter, the string '55555,' returns one item "55555", not two "55555" and "". Using Jeff's functional test cases (17 strings) it returns 45 rows and not 52.

    I have never found that way of treating the trailing-delimiter case intuitive or useful, but it is a trivial code change if you do need it to work that way:

    CREATE ASSEMBLY Split

    FROM 0x4D5A90000300000004000000FFFF0000B800000000000000400000000000000000000000000000000000000000000000000000000000000000000000800000000E1FBA0E00B409CD21B8014CCD21546869732070726F6772616D2063616E6E6F742062652072756E20696E20444F53206D6F64652E0D0D0A2400000000000000504500004C010300F7F34D500000000000000000E00002210B010800000E00000006000000000000AE2D0000002000000040000000004000002000000002000004000000000000000400000000000000008000000002000000000000030040850000100000100000000010000010000000000000100000000000000000000000602D00004B000000004000006803000000000000000000000000000000000000006000000C00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000200000080000000000000000000000082000004800000000000000000000002E74657874000000B40D000000200000000E000000020000000000000000000000000000200000602E7273726300000068030000004000000004000000100000000000000000000000000000400000402E72656C6F6300000C0000000060000000020000001400000000000000000000000000004000004200000000000000000000000000000000902D0000000000004800000002000500E42100007C0B00000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000A2026F1300000A2D0E026F1400000A0373040000062B0C168D140000011673040000068C030000022A000000133002001A0000000100001102A5040000020A031200280800000654041200280A000006512A1E02281500000A2AC202037D0100000402047D0300000402027B010000048E697D02000004027C05000004FE150400000202167D040000042A0013300500C400000002000011027B04000004027B020000043102162A027B040000040A2B56027B010000040693027B030000043342027C0500000425280800000617582809000006027C05000004027B01000004027B0400000406027B0400000459731700000A280B000006020617587D04000004172A0617580A06027B0200000432A1027C0500000425280800000617582809000006027C05000004027B01000004027B04000004027B02000004027B0400000459731700000A280B00000602027B0200000417587D04000004172A32027B050000048C040000022A1A731800000A7A1E027B060000042A2202037D060000042A1E027B070000042A2202037D070000042A000042534A4201000100000000000C00000076322E302E35303732370000000005006C000000CC030000237E0000380400009004000023537472696E677300000000C80800000800000023555300D0080000100000002347554944000000E00800009C02000023426C6F6200000000000000020000015717A2030902000000FA253300160000010000001900000004000000070000000B00000009000000010000001900000012000000020000000200000003000000050000000300000001000000020000000200000000000A00010000000000060052004B00060059004B000600760063000A00A3008E000A002D02120206007502560206009F028D020600B6028D020600D3028D020600F2028D0206000B038D02060024038D0206003F038D0206005A038D02060073035602060087038D020600C003A0030600E003A0030A000404120206002E044B00060033045602060049045602060054044B0006005B044B0006007304A003000000000100000000000100010001001000140000000500010001000B011000290000000900010004000B01100039000000090006000800210057012E0021005D01320021006401350001006E0132000100740138000100CE0132000100E80152005020000000009600AC000A0001007C20000000009600B60012000300A220000000008618BE001B000600AA20000000008318BE001F000600DC2000000000E101C40026000800AC2100000000E109F5002A000800B92100000000E1012C011B000800C021000000008308A20140000800C821000000008308AF0144000800D121000000008308BC0149000900D921000000008308C5014D000900000001000C02000002003F02000001004902020002004D02020003008202000001000C02000002003F0200000100870200000100870203000D001900EC002600190020012A00190051011B002900BE001B003100BE001B003900BE004D004100BE004D004900BE004D005100BE004D005900BE004D006100BE004D006900BE004D007100BE004D007900BE0070008100BE004D008900BE0044009100BE001B009900BE001B00210019042600210024040D020900BE001B00A900BE001702B900BE001D02C100BE001B00C900BE001B00200093007500240023005D002E0033002E022E0043003D022E008B007C022E004B0043022E0053002E022E0073003D022E003B003D022E00830073022E005B0052022E0063003D02C100CB002902E100CB0029020001CB0029022001CB0029024001CB0029026001CB00290212022502030001000400020000007B013C000000FE01550000000702590002000600030001000900050002000800050002000A00070001000B00070003000A00030003000C00050003000E00070004800000010000001D12E315000000000000FE03000002000000000000000000000001004200000000000200000000000000000000000100820000000000030002000400020000000000003C4D6F64756C653E0053706C69742E646C6C0055736572446566696E656446756E6374696F6E730053706C6974456E756D657261746F720053706C6974526F77006D73636F726C69620053797374656D004F626A6563740056616C7565547970650053797374656D2E436F6C6C656374696F6E730049456E756D657261746F720053797374656D2E446174610053797374656D2E446174612E53716C54797065730053716C43686172730053706C6974746572420046696C6C526F77002E63746F720053797374656D2E436F6C6C656374696F6E732E49456E756D657261746F722E4D6F76654E657874004D6F76654E6578740053797374656D2E436F6C6C656374696F6E732E49456E756D657261746F722E6765745F43757272656E74006765745F43757272656E740053797374656D2E436F6C6C656374696F6E732E49456E756D657261746F722E526573657400526573657400696E707574006C656E6774680064656C696D69746572007374617274007265636F72640053797374656D2E436F6C6C656374696F6E732E49456E756D657261746F722E43757272656E74006765745F53657175656E6365007365745F53657175656E6365006765745F4974656D007365745F4974656D003C53657175656E63653E6B5F5F4261636B696E674669656C64003C4974656D3E6B5F5F4261636B696E674669656C640053657175656E6365004974656D00496E707574004D6963726F736F66742E53716C5365727665722E5365727665720053716C46616365744174747269627574650044656C696D69746572006F626A0073657175656E63650053797374656D2E52756E74696D652E496E7465726F705365727669636573004F7574417474726962757465006974656D0076616C75650053797374656D2E5265666C656374696F6E00417373656D626C795469746C6541747472696275746500417373656D626C794465736372697074696F6E41747472696275746500417373656D626C79436F6E66696775726174696F6E41747472696275746500417373656D626C79436F6D70616E7941747472696275746500417373656D626C7950726F6475637441747472696275746500417373656D626C79436F7079726967687441747472696275746500417373656D626C7954726164656D61726B41747472696275746500417373656D626C7943756C7475726541747472696275746500436F6D56697369626C6541747472696275746500417373656D626C7956657273696F6E4174747269627574650053797374656D2E52756E74696D652E436F6D70696C6572536572766963657300436F6D70696C6174696F6E52656C61786174696F6E734174747269627574650052756E74696D65436F6D7061746962696C6974794174747269627574650053706C69740053716C46756E6374696F6E417474726962757465006765745F49734E756C6C006765745F56616C75650043686172005374727563744C61796F7574417474726962757465004C61796F75744B696E6400537472696E67004E6F74496D706C656D656E746564457863657074696F6E00436F6D70696C657247656E6572617465644174747269627574650000000003200000000000769A3CD63DE4C742A197817A9DEE94EC0008B77A5C561934E089070002120D121103080003011C1008100E03200001062002011D0303032000020320001C03061D03020608020603030611100328001C0320000804200101080320000E042001010E02060E032800080328000E12010001005408074D617853697A65FFFFFFFF04200101028196010006005455794D6963726F736F66742E53716C5365727665722E5365727665722E446174614163636573734B696E642C2053797374656D2E446174612C2056657273696F6E3D322E302E302E302C2043756C747572653D6E65757472616C2C205075626C69634B6579546F6B656E3D623737613563353631393334653038390A446174614163636573730000000054557F4D6963726F736F66742E53716C5365727665722E5365727665722E53797374656D446174614163636573734B696E642C2053797374656D2E446174612C2056657273696F6E3D322E302E302E302C2043756C747572653D6E65757472616C2C205075626C69634B6579546F6B656E3D623737613563353631393334653038391053797374656D446174614163636573730000000054020F497344657465726D696E69737469630154020949735072656369736501540E1146696C6C526F774D6574686F644E616D650746696C6C526F77540E0F5461626C65446566696E6974696F6E2173657175656E636520494E542C206974656D204E564152434841522834303030290420001D030407011110052001011159072003011D0308080307010804010000000E01000953706C69747465724200000501000000000E0100094D6963726F736F667400002001001B436F7079726967687420C2A9204D6963726F736F6674203230313100000801000800000000001E01000100540216577261704E6F6E457863657074696F6E5468726F77730100882D000000000000000000009E2D0000002000000000000000000000000000000000000000000000902D00000000000000005F436F72446C6C4D61696E006D73636F7265652E646C6C0000000000FF250020400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001001000000018000080000000000000000000000000000001000100000030000080000000000000000000000000000001000000000048000000584000000C03000000000000000000000C0334000000560053005F00560045005200530049004F004E005F0049004E0046004F0000000000BD04EFFE0000010000000100E3151D1200000100E3151D123F000000000000000400000002000000000000000000000000000000440000000100560061007200460069006C00650049006E0066006F00000000002400040000005400720061006E0073006C006100740069006F006E00000000000000B0046C020000010053007400720069006E006700460069006C00650049006E0066006F00000048020000010030003000300030003000340062003000000034000A00010043006F006D00700061006E0079004E0061006D006500000000004D006900630072006F0073006F006600740000003C000A000100460069006C0065004400650073006300720069007000740069006F006E0000000000530070006C0069007400740065007200420000003C000E000100460069006C006500560065007200730069006F006E000000000031002E0030002E0034003600330037002E003500360030003300000034000A00010049006E007400650072006E0061006C004E0061006D0065000000530070006C00690074002E0064006C006C0000005C001B0001004C006500670061006C0043006F007000790072006900670068007400000043006F0070007900720069006700680074002000A90020004D006900630072006F0073006F006600740020003200300031003100000000003C000A0001004F0072006900670069006E0061006C00460069006C0065006E0061006D0065000000530070006C00690074002E0064006C006C00000034000A000100500072006F0064007500630074004E0061006D00650000000000530070006C00690074007400650072004200000040000E000100500072006F006400750063007400560065007200730069006F006E00000031002E0030002E0034003600330037002E003500360030003300000044000E00010041007300730065006D0062006C0079002000560065007200730069006F006E00000031002E0030002E0034003600330037002E0035003600300033000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002000000C000000B03D00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

    WITH PERMISSION_SET = SAFE;

    GO

    CREATE FUNCTION dbo.SplitterB

    (

    @Input nvarchar(max),

    @Delimiter nchar(1)

    )

    RETURNS TABLE

    (

    sequence int NULL,

    item nvarchar(4000) NULL

    )

    AS

    EXTERNAL NAME Split.UserDefinedFunctions.SplitterB;

    GO

    Source:

    using System.Collections;

    using System.Data.SqlTypes;

    using Microsoft.SqlServer.Server;

    public partial class UserDefinedFunctions

    {

    /**

    * How SQL Server SQLCLR table-valued functions work:

    *

    * 1. SQL Server passes the input parameters in to the function and receives an enumeration object in return

    * 2. SQL Server calls the MoveNext() method on the enumeration object

    * 3. If the MoveNext() call returns true, SQL Server calls the Current() method to get an object for the current row

    * 4. SQL Server calls the FillRow method to obtain column values for the current row

    * 5. Repeat from step 2, until MoveNext() returns false

    *

    * */

    [SqlFunction

    (

    DataAccess = DataAccessKind.None, // No user data access by this function

    SystemDataAccess = SystemDataAccessKind.None, // No system data access by this function

    IsDeterministic = true, // This function is deterministic

    IsPrecise = true, // This function is precise

    FillRowMethodName = "FillRow", // The method called by SQL Server to obtain the next row

    TableDefinition =

    "sequence INT, item NVARCHAR(4000)" // Returned table definition

    )

    ]

    // 1. SQL Server passes input parameters and receives an enumration object

    public static IEnumerator SplitterB

    (

    [SqlFacet(MaxSize = -1)] SqlChars Input,

    char Delimiter

    )

    {

    return Input.IsNull ?

    new SplitEnumerator(new char[0], char.MinValue) :

    new SplitEnumerator(Input.Value, Delimiter);

    }

    // The enumeration object

    struct SplitEnumerator : IEnumerator

    {

    // Constructor (called once when the object is created)

    internal SplitEnumerator(char[] Input, char Delimiter)

    {

    // Save references

    input = Input;

    delimiter = Delimiter;

    // Remember the length of the character array

    length = input.Length;

    // Structure holding split rows

    record = new SplitRow();

    // Starting at the first character

    start = 0;

    }

    // Enumerator implementation

    #region IEnumerator Methods

    // 2. SQL Server calls the MoveNext() method on the enumeration object

    bool IEnumerator.MoveNext()

    {

    // No more rows?

    if (start > length) { return false; }

    // Find the next delimiter

    for (int i = start; i < length; i++)

    {

    if (input == delimiter)

    {

    // Increment the sequence number

    record.Sequence++;

    // Save the split element

    record.Item = new string(input, start, i - start);

    // Set the next element search start point

    start = i + 1;

    return true;

    }

    }

    // Last item

    record.Sequence++;

    record.Item = new string(input, start, length - start);

    start = length + 1;

    return true;

    }

    // 3. SQL Server calls the Current() method to get an object for the current row

    // (We pack the current row data in an OutputRecord structure)

    object IEnumerator.Current

    {

    get { return record; }

    }

    // Required by the IEnumerator interface, but not needed for this implementation

    void IEnumerator.Reset()

    {

    throw new System.NotImplementedException();

    }

    #endregion

    readonly char[] input; // Reference to the string to be split

    readonly int length; // Length of the input string

    readonly char delimiter; // The delimiter character

    int start; // Current search start position

    SplitRow record; // Each row to be returned

    }

    // 4. SQL Server calls the FillRow method to obtain column values for the current row

    public static void FillRow(object obj, out int sequence, out string item)

    {

    // The passed-in object is an OutputRecord

    var r = (SplitRow)obj;

    // Set the output parameter values

    sequence = r.Sequence;

    item = r.Item;

    }

    // Structure used to hold each row

    struct SplitRow

    {

    internal int Sequence { get; set; } // Sequence of the element

    internal string Item { get; set; } // The element

    }

    };

    The second is that it doesn't use collations for the comparison of string to delimiter [...] I can't see any use for this behaviour besides confusing yourself and others so I recommend using a binary collation.

    Yes that's why the SQLCLR uses binary comparisons. It is easy to carry over the collation settings from the caller, but I have never felt the need to do this for string splitting.