In the following code you'll see a simple lexer that conforms to the following regular expression:
\d*(\.\d*)?([eE]([+-]\d+|\d+))?
If I were to use this design for something more complex, all of the anonymous delegates would be a nightmare to maintain. The biggest challenge I am facing is what to name the methods that would act as choice points in the state machine. In the variable exponentPart
the last anonymous delegate passed to MatchOne
will decide whether we have a signed integer, an integer, or a false match. Please post any ideas on how I can organize such a project assuming a complex language with lots of shared symbols.
static void Main(string[] args)
{
var exponentPart =
Lex.Start()
.MatchOne(s => s.Continue(s.Current == 'e' || s.Current == 'E'))
.MatchOne(
s => // What would I name this?
{
if (char.IsDigit(s.Current))
{
return Lex.Start().MatchZeroOrMore(s1 => s1.Continue(char.IsDigit(s1.Current)))(s.Continue(true));
}
else if (s.Current == '+' || s.Current == '-')
{
return Lex.Start().MatchOneOrMore(s1 => s1.Continue(char.IsDigit(s1.Current)))(s.Continue(true));
}
else
{
return s.RememberedState();
}
}
);
var fractionalPart =
Lex.Start()
.MatchOne(s => s.Continue(s.Current == '.'))
.MatchOneOrMore(s1 => s1.Continue(char.IsDigit(s1.Current)))
.Remember()
.MatchOne(exponentPart);
var decimalLiteral =
Lex.Start()
.MatchOneOrMore(s => s.Continue(char.IsDigit(s.Current)))
.Remember()
.MatchOne(
s => // What would I name this?
{
if (s.Current == '.')
{
return fractionalPart(s);
}
else if (s.Current == 'e' || s.Current == 'E')
{
return exponentPart(s);
}
else
{
return s.RememberedState();
}
}
);
var input = "999.999e+999";
var result = decimalLiteral(new LexState(input, 0, 0, 0, true));
Console.WriteLine(result.Value.Substring(result.StartIndex, result.EndIndex - result.StartIndex + 1));
Console.ReadLine();
}