• Here's my C# SQL CLR implementation- just deploy to an SQL server.

    In my opinion, for anything involving significant string manipulation, a CLR language is vastly superior to SQL server almost every time (as well as being a lot more elegant).

    The JaroWinkler algorithm was ported from the java lingpipe version, so I'd advise checking the output before committing to production.

    Also, it's very fast, I was getting 36k rows processing and sorting on JaroWinkler scores in under a second.

    JaroWinkler.cs

    using System;

    using System.Collections.Generic;

    using System.Text;

    namespace JaroWinkler

    {

    class JaroWinklerDistance

    {

    private double mWeightThreshold;

    private double mNumChars;

    /**

    * Construct a basic Jaro string distance without the Winkler

    * modifications. See the class documentation above for more information

    * on the exact algorithm and its parameters.

    */

    public JaroWinklerDistance() {

    mNumChars=Double.PositiveInfinity;

    mWeightThreshold=0;

    }

    /**

    * Construct a Winkler-modified Jaro string distance with the

    * specified weight threshold for refinement and an initial number

    * of characters over which to reweight. See the class

    * documentation above for more information on the exact algorithm

    * and its parameters.

    */

    public JaroWinklerDistance(double weightThreshold, int numChars) {

    mNumChars = numChars;

    mWeightThreshold = weightThreshold;

    }

    /**

    * Returns the Jaro-Winkler distance between the specified character

    * sequences. Teh distance is symmetric and will fall in the

    * range <code>0</code> (perfect match) to <code>1</code> (no overlap).

    * See the class definition above for formal definitions.

    *

    *

    This method is defined to be:

    *

    * <pre>

    * distance(cSeq1,cSeq2) = 1 - proximity(cSeq1,cSeq2)</code></pre>

    *

    * @param cSeq1 First character sequence to compare.

    * @param cSeq2 Second character sequence to compare.

    * @return The Jaro-Winkler comparison value for the two character

    * sequences.

    */

    public double distance(String cSeq1, String cSeq2)

    {

    return 1.0 - proximity(cSeq1,cSeq2);

    }

    /**

    * Return the Jaro-Winkler comparison value between the specified

    * character sequences. The comparison is symmetric and will fall

    * in the range <code>0</code> (no match) to <code>1</code>

    * (perfect match)inclusive. See the class definition above for

    * an exact definition of Jaro-Winkler string comparison.

    *

    *

    The method {@link #distance(CharSequence,CharSequence)} returns

    * a distance measure that is one minus the comparison value.

    *

    * @param cSeq1 First character sequence to compare.

    * @param cSeq2 Second character sequence to compare.

    * @return The Jaro-Winkler comparison value for the two character

    * sequences.

    */

    public double proximity(String cSeq1, String cSeq2) {

    int len1 = cSeq1.Length;

    int len2 = cSeq2.Length;

    if (len1 == 0)

    return len2 == 0 ? 1.0 : 0.0;

    int searchRange = Math.Max(0,Math.Max(len1,len2)/2 - 1);

    bool[] matched1 = new bool[len1];

    matched1.Initialize();

    //Arrays.Fill(matched1,false);

    bool[] matched2 = new bool[len2];

    matched2.Initialize();

    //Arrays.fill(matched2,false);

    int numCommon = 0;

    for (int i = 0; i < len1; ++i) {

    int start = Math.Max(0,i-searchRange);

    int end = Math.Min(i+searchRange+1,len2);

    for (int j = start; j < end; ++j) {

    if (matched2[j]) continue;

    if (cSeq1 != cSeq2[j])

    continue;

    matched1 = true;

    matched2[j] = true;

    ++numCommon;

    break;

    }

    }

    if (numCommon == 0) return 0.0;

    int numHalfTransposed = 0;

    int k = 0;

    for (int i = 0; i < len1; ++i) {

    if (!matched1) continue;

    while (!matched2[k]) { ++k; }

    if (cSeq1 != cSeq2[k])

    {

    ++numHalfTransposed;

    }

    ++k;

    }

    // System.out.println("numHalfTransposed=" + numHalfTransposed);

    int numTransposed = numHalfTransposed/2;

    // System.out.println("numCommon=" + numCommon

    // + " numTransposed=" + numTransposed);

    double numCommonD = numCommon;

    double weight = (numCommonD/len1

    + numCommonD/len2

    + (numCommon - numTransposed)/numCommonD)/3.0;

    if (weight <= mWeightThreshold) return weight;

    double max = Math.Min(mNumChars,Math.Min(cSeq1.Length,cSeq2.Length));

    int pos = 0;

    while (pos < max && cSeq1[pos] == cSeq2[pos])

    {

    ++pos;

    }

    if (pos == 0) return weight;

    return weight + 0.1 * pos * (1.0 - weight);

    }

    /**

    * A constant for the Jaro distance. The value is the same as

    * would be returned by the nullary constructor

    * <code>JaroWinklerDistance()</code>.

    *

    *

    Instances are thread safe, so this single distance instance

    * may be used for all comparisons within an application.

    */

    //public static JaroWinklerDistance JARO_DISTANCE = new JaroWinklerDistance();

    /**

    * A constant for the Jaro-Winkler distance with defaults set as

    * in Winkler's papers. The value is the same as would be

    * returned by the nullary constructor

    * <code>JaroWinklerDistance(0.7,4)</code>.

    *

    *

    Instances are thread safe, so this single distance instance

    * may be used for all comparisons within an application.

    */

    //public static JaroWinklerDistance JARO_WINKLER_DISTANCE = new JaroWinklerDistance(0.70,4);

    }

    }

    JaroWinklerWrapper.cs

    using System;

    using System.Data;

    using System.Data.SqlClient;

    using System.Data.SqlTypes;

    using Microsoft.SqlServer.Server;

    using JaroWinkler;

    public partial class UserDefinedFunctions

    {

    [Microsoft.SqlServer.Server.SqlFunction]

    public static SqlDouble JaroWinklerDistance(string s, string t)

    {

    JaroWinklerDistance a = new JaroWinklerDistance();

    return a.distance(s, t);

    }

    [Microsoft.SqlServer.Server.SqlFunction]

    public static SqlDouble JaroWinklerProximity(string s, string t)

    {

    JaroWinklerDistance a = new JaroWinklerDistance();

    return a.proximity(s, t);

    }

    };

    Then call it from your code something like this

    select

    Name,

    'TestString',

    dbo.JaroWinklerDistance('TestString',Name)

    from

    Names

    WHERE

    Name is not null