I believe this extension method for TextReader
would do the trick:
public static class TextReaderTokenizer
{
// Adjust as needed. -1 is EOF.
private static int[] whitespace = { -1, ' ', '\r' , '\n', '\t' };
public static T ReadToken<T>(this TextReader reader)
{
StringBuilder sb = new StringBuilder();
while (Array.IndexOf(whitespace, reader.Peek()) < 0)
{
sb.Append((char)reader.Read());
}
return (T)Convert.ChangeType(sb.ToString(), typeof(T));
}
}
It can be used thus:
TextReader reader = File.OpenText("foo.txt");
int n = reader.ReadToken<int>();
string s = reader.ReadToken<string>();
[EDIT] As requested in question comments, here's an instance wrapper version of the above that is parametrized with delimiters and CultureInfo
:
public class TextTokenizer
{
private TextReader reader;
private Predicate<char> isDelim;
private CultureInfo cultureInfo;
public TextTokenizer(TextReader reader, Predicate<char> isDelim, CultureInfo cultureInfo)
{
this.reader = reader;
this.isDelim = isDelim;
this.cultureInfo = cultureInfo;
}
public TextTokenizer(TextReader reader, char[] delims, CultureInfo cultureInfo)
{
this.reader = reader;
this.isDelim = c => Array.IndexOf(delims, c) >= 0;
this.cultureInfo = cultureInfo;
}
public TextReader BaseReader
{
get { return reader; }
}
public T ReadToken<T>()
{
StringBuilder sb = new StringBuilder();
while (true)
{
int c = reader.Peek();
if (c < 0 || isDelim((char)c))
{
break;
}
sb.Append((char)reader.Read());
}
return (T)Convert.ChangeType(sb.ToString(), typeof(T));
}
}
Sample usage:
TextReader reader = File.OpenText("foo.txt");
TextTokenizer tokenizer = new TextTokenizer(
reader,
new[] { ' ', '\r', '\n', '\t' },
CultureInfo.InvariantCulture);
int n = tokenizer.ReadToken<int>();
string s = tokenizer.ReadToken<string>();