I've had some good results from this in recent years:
USE [Matching]
GO
/****** Object: UserDefinedFunction [dbo].[IF_Levenshtein01] Script Date: 25/07/2013 10:00:08 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
-- this will score around 10,000 word pairs per second on 2010 laptop technology
alter FUNCTION [dbo].[IF_Levenshtein02]
(
@Reference VARCHAR(20), @Target VARCHAR(20)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
( -- output query
SELECT [Score %] = CASE
WHEN @Reference = @Target THEN CAST(100 AS NUMERIC(5,2))
WHEN 0 = 1 THEN CAST(100 AS NUMERIC(5,2))-- placeholder for any other shortcuts
ELSE
(SELECT
[Score %] = CAST(SUM(LetterScore)*100.0/MAX(WordLength*WordLength) AS NUMERIC(5,2))
FROM ( -- do
SELECT
seq = t1.n,
ref.Letter,
v.WordLength,
LetterScore = v.WordLength - ISNULL(MIN(tgt.n),v.WordLength)
FROM ( -- v
SELECT
Reference = LEFT(@Reference + REPLICATE('_',WordLength),WordLength),
Target = LEFT(@Target + REPLICATE('_',WordLength),WordLength),
WordLength = WordLength
FROM ( -- di
SELECT WordLength = MAX(WordLength)
FROM (VALUES (DATALENGTH(@Reference)),(DATALENGTH(@Target))) d (WordLength)
) di
) v
CROSS APPLY ( -- t1
SELECT TOP(WordLength) n
FROM (VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),(13),(14),(15),(16),(17),(18),(19),(20)) t2 (n)
) t1
CROSS APPLY (SELECT Letter = SUBSTRING(Reference,t1.n,1)) ref
OUTER APPLY ( -- tgt
SELECT TOP(WordLength) n = ABS(t1.n - t2.n)
FROM (VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),(13),(14),(15),(16),(17),(18),(19),(20)) t2 (n)
WHERE SUBSTRING(@Target,t2.n,1) = ref.Letter
) tgt
GROUP BY t1.n, ref.Letter, v.WordLength
) do
)
END
) -- output query
GO
SELECT * FROM [dbo].[IF_Levenshtein02] ('summer day','Sommarskog')
Score %
--------
52.00
For better assistance in answering your questions, please read this[/url].
Hidden RBAR: Triangular Joins[/url] / The "Numbers" or "Tally" Table: What it is and how it replaces a loop[/url] Jeff Moden[/url]