This is what we use (VB.NET):
Dim rxEmailLink As New Regex("<a\b[^>]*mailto:\b[^>]*>(.*?)</a>")
Dim m As Match = rxEmailLink.Match(Html)
While m.Success
Dim strEntireLinkOrig As String = m.Value
Dim strEntireLink As String = strEntireLinkOrig
strEntireLink = strEntireLink.Replace("'", """") ' replace any single quotes with double quotes to make sure the javascript is well formed
Dim rxLink As New Regex("(<a\b[^>]*mailto:)([\w.\-_^@]*@[\w.\-_^@]*)(\b[^>]*?)>(.*?)</a>")
Dim rxLinkMatch As Match = rxLink.Match(strEntireLink)
Dim strReplace As String = String.Format("<script language=""JavaScript"">document.write('{0}{1}{2}>{3}</a>');</script>", _
RandomlyChopStringJS(rxLinkMatch.Groups(1).ToString), _
ConvertToAsciiHex(rxLinkMatch.Groups(2).ToString), _
rxLinkMatch.Groups(3), _
ConvertToHtmlEntites(rxLinkMatch.Groups(4).ToString))
Result = Result.Replace(strEntireLinkOrig, strReplace)
m = m.NextMatch()
End While
and
Public Function RandomlyChopStringJS(ByVal s As String) As String
Dim intChop As Integer = Int(6 * Rnd()) + 1
Dim intCount As Integer = 0
RandomlyChopStringJS = ""
If Not s Is Nothing AndAlso Len(s) > 0 Then
For Each c As Char In s.ToCharArray()
If intCount = intChop Then
RandomlyChopStringJS &= "'+'"
intChop = Int(6 * Rnd()) + 1
intCount = 0
End If
RandomlyChopStringJS &= c
intCount += 1
Next
End If
End Function
We override Render and run the outgoing HTML through this before it goes out the door. This renders email addresses that render normally to a browser, but look like this in the source:
<script language="JavaScript">document.write('<a '+'clas'+'s='+'"Mail'+'Link'+'" hr'+'ef'+'="ma'+'ilto:%69%6E%66%6F%40%62%69%63%75%73%61%2E%6F%72%67">info@bicusa.org</a>');</script>
Obviously not foolproof, but hopefully cuts down on a certain amount of harvesting without making things hard for the visitor.