Fastest and easiest to read (IMHO):
s.StripPunctuation();
to implement:
public static class StringExtension
{
public static string StripPunctuation(this string s)
{
var sb = new StringBuilder();
foreach (char c in s)
{
if (!char.IsPunctuation(c))
sb.Append(c);
}
return sb.ToString();
}
}
I tested several of the ideas posted here. Hades32's solution was the fastest (the stringbuilder with a foreach loop).
stringbuilder with foreach ( 1059 ms )
stringbuilder with foreach wrapped in extension ( 1056 ms )
stringbuilder with for loop ( 1061 ms )
string concat with foreach ( 2254 ms )
where with new string ( 1333 ms )
where with aggregate ( 2884 ms )
compiled regex ( 2481 ms )
This isn't a very realistic benchmark. Here is the code if you'd like to improve:
[Test]
public void MeasureStripPunctionationTest()
{
Measure("stringbuilder with foreach", s =>
{
var sb = new StringBuilder();
foreach (char c in s)
{
if (!char.IsPunctuation(c))
sb.Append(c);
}
return sb.ToString();
});
Measure("stringbuilder with foreach wrapped in extension", s =>
{
var sb = new StringBuilder();
foreach (char c in s)
{
if (!char.IsPunctuation(c))
sb.Append(c);
}
return sb.ToString();
});
Measure("stringbuilder with for", s =>
{
var sb = new StringBuilder();
for (int i = 0; i < s.Length; i++)
{
if (!char.IsPunctuation(s[i]))
sb.Append(s[i]);
}
return sb.ToString();
});
Measure("string concat with foreach", s =>
{
var result = "";
foreach (char c in s)
{
if (!char.IsPunctuation(c))
result += c;
}
return result;
});
Measure("where with new string", s => new string(s.Where(item => !char.IsPunctuation(item)).ToArray()));
Measure("where with aggregate", s => s.Where(item => !char.IsPunctuation(item))
.Aggregate(string.Empty, (result, c) => result + c));
var stripRegex = new Regex(@"\p{P}+", RegexOptions.Compiled);
Measure("compiled regex", s => stripRegex.Replace(s, ""));
}
private void Measure(string name, Func<string, string> stripPunctation)
{
using (new PerformanceTimer(name))
{
var s = "a !@#$ short >{}*' string";
for (int i = 0; i < 1000000; i++)
{
var withoutPunctuation = stripPunctation(s);
}
}
}