I was really surprised at how poorly the RegEx did compared to MailAddress. I am now wondering what MailAddress uses to determine validity.
Here are a couple TVFs to go along with the SVFs:
[font="Courier New"]using System;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
using System.Text.RegularExpressions;
using System.Collections;
using System.Net.Mail;
public partial class UserDefinedFunctions
{
private static readonly Regex re = new Regex("^([0-9a-zA-Z]([-.\\w]*[0-9a-zA-Z])*@([0-9a-zA-Z][-\\w]*[0-9a-zA-Z]\\.)+[a-zA-Z]{2,9})$");
[Microsoft.SqlServer.Server.SqlFunction]
public static SqlBoolean IsValidEmailRegEx(SqlString email)
{
return re.IsMatch(email.ToString());
}
[SqlFunction(FillRowMethodName = "EmailIsValidRegEx",
TableDefinition = "EmailIsValid bit")]
public static IEnumerable EmailValidityRegEx(SqlString emailAddress)
{
ArrayList EmailCollection = new ArrayList();
EmailCollection.Add(emailAddress);
return EmailCollection;
}
//FillRow method. The method name has been specified above as
//a SqlFunction attribute property
public static void EmailIsValidRegEx(object emailAddress,
out SqlBoolean emailIsValid)
{
emailIsValid = re.IsMatch(emailAddress.ToString());
}
};
[/font]
[font="Courier New"]using System;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
using System.Collections;
using System.Net.Mail;
public partial class UserDefinedFunctions
{
[Microsoft.SqlServer.Server.SqlFunction]
public static SqlBoolean IsValidEmailSystemNet(SqlString email)
{
try
{
System.Net.Mail.MailAddress ma = new System.Net.Mail.MailAddress(email.ToString());
return (SqlBoolean)true;
}
catch
{
return (SqlBoolean)false;
}
}
[SqlFunction(FillRowMethodName = "EmailIsValidSystemNet",
TableDefinition = "EmailIsValid bit")]
public static IEnumerable EmailValiditySystemNet(SqlString emailAddress)
{
ArrayList EmailCollection = new ArrayList();
EmailCollection.Add(emailAddress);
return EmailCollection;
}
//FillRow method. The method name has been specified above as
//a SqlFunction attribute property
public static void EmailIsValidSystemNet(object emailAddress,
out SqlBoolean emailIsValid)
{
try
{
MailAddress ma = new MailAddress(emailAddress.ToString());
emailIsValid = (SqlBoolean)true;
}
catch
{
emailIsValid = (SqlBoolean)false;
}
}
};
[/font]
On my lowly 32-bit 2.4 GHz Intel Core 2 Duo laptop w/4GB RAM the MailAddress SVF took the prize although I wonder if on a machine with more CPUs if there would be a tipping where the TVF might finish in a shorter amount of time:
SQL Server parse and compile time:
CPU time = 0 ms, elapsed time = 0 ms.
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 0 ms.
(1000 row(s) affected)
--------------------------------------------------------------------------------
CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE
--------------------------------------------------------------------------------
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========== BASELINE ==========
SQL Server Execution Times:
CPU time = 0 ms, elapsed time = 0 ms.
================================================================================
--------------------------------------------------------------------------------
CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE
--------------------------------------------------------------------------------
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========== Using Mail Object Function ==========
SQL Server Execution Times:
CPU time = 16 ms, elapsed time = 61 ms.
(1000 row(s) affected)
================================================================================
--------------------------------------------------------------------------------
CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE
--------------------------------------------------------------------------------
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========== Using RegEx Function ==========
SQL Server Execution Times:
CPU time = 7145 ms, elapsed time = 7437 ms.
(1000 row(s) affected)
================================================================================
--------------------------------------------------------------------------------
CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE
--------------------------------------------------------------------------------
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========== Using Table-Valued Mail Object Function ==========
SQL Server Execution Times:
CPU time = 16 ms, elapsed time = 80 ms.
(1000 row(s) affected)
================================================================================
--------------------------------------------------------------------------------
CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE
--------------------------------------------------------------------------------
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
========== Using Table-Valued RegEx Function ==========
SQL Server Execution Times:
CPU time = 7207 ms, elapsed time = 7429 ms.
(1000 row(s) affected)
================================================================================
--------------------------------------------------------------------------------
CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE
--------------------------------------------------------------------------------
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
DBCC execution completed. If DBCC printed error messages, contact your system administrator.
USE test;
SET STATISTICS TIME OFF;
IF OBJECT_ID(N'tempdb..#MySampleEmails') IS NOT NULL
DROP TABLE #MySampleEmails;
GO
WITH cteBase
AS (
--=== Gen enough 8 character random codes
SELECT TOP 1000
LEFT(NEWID(), 8) AS RandomCode
FROM Master.sys.All_Columns ac1
CROSS JOIN Master.sys.All_Columns ac2
),
cteManip
AS (
--=== Number the codes so we can select unique random codes
SELECT RandomCode + CASE WHEN RandomCode LIKE '[A-Z]%' THEN '@'
ELSE ''
END + RandomCode + CASE WHEN RandomCode LIKE '[A-Z]%' THEN '.com'
ELSE ''
END AS RandomCode
FROM cteBase
)
--=== Select 1 million random codes
SELECT IDENTITY( INT,1,1 ) AS ID,
RandomCode
INTO #MySampleEmails
FROM cteManip;
---------------------------------------
IF OBJECT_ID(N'tempdb..#deleteme1') IS NOT NULL
DROP TABLE #deleteme1;
GO
IF OBJECT_ID(N'tempdb..#deleteme2') IS NOT NULL
DROP TABLE #deleteme2;
GO
IF OBJECT_ID(N'tempdb..#deleteme3') IS NOT NULL
DROP TABLE #deleteme3;
GO
IF OBJECT_ID(N'tempdb..#deleteme4') IS NOT NULL
DROP TABLE #deleteme4;
GO
DECLARE @HOLDER VARCHAR(20);
PRINT REPLICATE('-', 80);
PRINT 'CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE';
PRINT REPLICATE('-', 80);
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;
PRINT '========== BASELINE ==========';
SET STATISTICS TIME ON;
SELECT @HOLDER = COUNT(*)
FROM #MySampleEmails;
SET STATISTICS TIME OFF;
PRINT REPLICATE('=', 80);
PRINT REPLICATE('-', 80);
PRINT 'CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE';
PRINT REPLICATE('-', 80);
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;
PRINT '========== Using Mail Object Function ==========';
SET STATISTICS TIME ON;
SELECT *,
dbo.IsValidEmailSystemNet(RandomCode) AS IsValidEmail
INTO #deleteme1
FROM #MySampleEmails
OPTION (MAXDOP 0);
SET STATISTICS TIME OFF;
PRINT REPLICATE('=', 80);
PRINT REPLICATE('-', 80);
PRINT 'CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE';
PRINT REPLICATE('-', 80);
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;
PRINT '========== Using RegEx Function ==========';
SET STATISTICS TIME ON;
SELECT *,
dbo.IsValidEmailRegEx(RandomCode) AS IsValidEmail
INTO #deleteme2
FROM #MySampleEmails;
SET STATISTICS TIME OFF;
PRINT REPLICATE('=', 80);
PRINT REPLICATE('-', 80);
PRINT 'CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE';
PRINT REPLICATE('-', 80);
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;
PRINT '========== Using Table-Valued Mail Object Function ==========';
SET STATISTICS TIME ON;
SELECT mse.ID,
mse.RandomCode,
x.EmailIsValid
INTO #deleteme4
FROM #MySampleEmails mse
CROSS APPLY dbo.EmailValiditySystemNet(mse.RandomCode) x;
SET STATISTICS TIME OFF;
PRINT REPLICATE('=', 80);
PRINT REPLICATE('-', 80);
PRINT 'CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE';
PRINT REPLICATE('-', 80);
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;
PRINT '========== Using Table-Valued RegEx Function ==========';
SET STATISTICS TIME ON;
SELECT mse.ID,
mse.RandomCode,
x.EmailIsValid
INTO #deleteme3
FROM #MySampleEmails mse
CROSS APPLY dbo.EmailValidityRegEx(mse.RandomCode) x;
SET STATISTICS TIME OFF;
PRINT REPLICATE('=', 80);
PRINT REPLICATE('-', 80);
PRINT 'CLEAR CACHE SO NEITHER QUERY GETS AN ADVANTAGE';
PRINT REPLICATE('-', 80);
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;
-----------------------------------------------------------------------------
-----------------------------------------------------------------------------
IF OBJECT_ID(N'tempdb..#deleteme1') IS NOT NULL
DROP TABLE #deleteme1;
GO
IF OBJECT_ID(N'tempdb..#deleteme2') IS NOT NULL
DROP TABLE #deleteme2;
GO
IF OBJECT_ID(N'tempdb..#deleteme3') IS NOT NULL
DROP TABLE #deleteme3;
GO
IF OBJECT_ID(N'tempdb..#deleteme4') IS NOT NULL
DROP TABLE #deleteme4;
GO
IF OBJECT_ID(N'tempdb..#MySampleEmails') IS NOT NULL
DROP TABLE #MySampleEmails;
GO
There are no special teachers of virtue, because virtue is taught by the whole community.
--Plato