views:

146

answers:

2

I just finished writing a date parser for my ECMAScript implementation. Previously I had written a regular expressions compiler and I was really impressed with the way the spec described the process. Essentially the input is passed through a series of continuations that test each portion of the string. My date parser is loosely based around the idea and I really want to know what it is called.

Note: I have only left the core of the parser to reduce noise.

public sealed class DateParser
{
    public double Parse()
    {
        using (var tokens = Tokenize().GetEnumerator())
        {
            var previous = new Result(ResultType.Success, HandleFirst);
            var next = default(Result);
            while (true)
            {
                if (!tokens.MoveNext())
                {
                    return previous.Type == ResultType.Optional ? Complete() : double.NaN;
                }
                next = previous.Continuation(tokens.Current);
                if (next.Type == ResultType.Complete)
                {
                    return Complete();
                }
                else if (next.Type == ResultType.MustFail)
                {
                    return double.NaN;
                }
                else if (next.Type == ResultType.CanFail)
                {
                    return previous.Type == ResultType.Optional ? Complete() : double.NaN;
                }
                previous = next;
            }
        }
    }

    private Result HandleFirst(DateToken token)
    {
        switch (token.Type)
        {
            case DateTokenType.Integer:
                return HandleYear(token);
            case DateTokenType.T:
                return HandleT(token);
            default:
                return new Result(ResultType.MustFail, null);
        }
    }

    private Result HandleYear(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 4)
        {
            _year = double.Parse(token.Value);
            return new Result(ResultType.Optional, HandleMonthHyphen);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleMonthHyphen(DateToken token)
    {
        if (token.Type == DateTokenType.Hyphen)
        {
            return new Result(ResultType.Success, HandleMonth);
        }
        return new Result(ResultType.Complete, null);
    }

    private Result HandleMonth(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _month = double.Parse(token.Value);
            if (_month < 1 || _month > 12)
            {
                _month = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Optional, HandleDayHyphen);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleDayHyphen(DateToken token)
    {
        if (token.Type == DateTokenType.Hyphen)
        {
            return new Result(ResultType.Success, HandleDay);
        }
        return new Result(ResultType.CanFail, null);
    }

    private Result HandleDay(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _day = double.Parse(token.Value);
            if (_day < 1 || _day > 31)
            {
                _day = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Optional, HandleT);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleT(DateToken token)
    {
        if (token.Type == DateTokenType.T)
        {
            return new Result(ResultType.Success, HandleHour);
        }
        return new Result(ResultType.CanFail, null);
    }

    private Result HandleHour(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _hour = double.Parse(token.Value);
            if (_hour >= DatePrototype.HoursPerDay)
            {
                _hour = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Success, HandleHourColon);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleHourColon(DateToken token)
    {
        if (token.Type == DateTokenType.Colon)
        {
            return new Result(ResultType.Success, HandleMinute);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleMinute(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _minute = double.Parse(token.Value);
            if (_minute >= DatePrototype.MinutesPerHour)
            {
                _minute = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Optional, HandleSecondColonOrOffset);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleSecondColonOrOffset(DateToken token)
    {
        if (token.Type == DateTokenType.Colon)
        {
            return new Result(ResultType.Success, HandleSecond);
        }
        else
        {
            var result = HandleOffset(token);
            if (result.Type == ResultType.CanFail)
            {
                return new Result(ResultType.MustFail, null);
            }
            return result;
        }
    }

    private Result HandleSecond(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _second = double.Parse(token.Value);
            if (_second >= DatePrototype.SecondsPerMinute)
            {
                _second = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Optional, HandleSecondDotOrOffset);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleSecondDotOrOffset(DateToken token)
    {
        if (token.Type == DateTokenType.Dot)
        {
            return new Result(ResultType.Success, HandleMillisecond);
        }
        else
        {
            var result = HandleOffset(token);
            if (result.Type == ResultType.CanFail)
            {
                return new Result(ResultType.MustFail, null);
            }
            return result;
        }
    }

    private Result HandleMillisecond(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 3)
        {
            _millisecond = double.Parse(token.Value);
            if (_millisecond >= DatePrototype.MsPerMinute)
            {
                _millisecond = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Optional, HandleOffset);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleOffset(DateToken token)
    {
        switch (token.Type)
        {
            case DateTokenType.Z:
                _offset = 0.0;
                return new Result(ResultType.Success, null);
            case DateTokenType.Plus:
                _offset = 0.0;
                return new Result(ResultType.Success, HandleOffsetHour);
            case DateTokenType.Hyphen:
                _offset = -0.0;
                return new Result(ResultType.Success, HandleOffsetHour);
            default:
                return new Result(ResultType.CanFail, null);
        }
    }

    private Result HandleOffsetHour(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _offset += double.Parse(token.Value) * DatePrototype.MsPerHour;
            return new Result(ResultType.Success, HandleOffsetHourColon);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleOffsetHourColon(DateToken token)
    {
        if (token.Type == DateTokenType.Colon)
        {
            return new Result(ResultType.Success, HandleOffsetMinute);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleOffsetMinute(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _offset += double.Parse(token.Value) * DatePrototype.MsPerMinute;
            return new Result(ResultType.Complete, null);
        }
        return new Result(ResultType.MustFail, null);
    }
} 
+2  A: 

I think the term you're looking for is Finite State Machine, which essentially something like:

while( tokensAvailable ){
    // look at current token
    // do something maybe relating to state
    // loop
}
Noon Silk
Is this a general term for the design? Could there be a more specific term I could use to guide my research?
ChaosPandion
Yes. And why not start your research from the wikipedia article, or by any other strategy you can think of starting from the provided term.
Noon Silk
@silky - I'll do that. One of the biggest disadvantages of having no formal computer science education is not knowing the lingo... Oh and that damn piece of paper. :)
ChaosPandion
@ChaosPandion: Sure, I don't have a computer science background either. Just educate yourself.
Noon Silk
@silky - I always try to educate myself. Even when I was in school for electronics. I Just need a nice reference point to begin.
ChaosPandion
+1  A: 

Try recursive descent parser.

There's a free ebook (pdf) by Niklaus Wirth that gives a good introduction to some different techniques.

Jordão
http://en.wikipedia.org/wiki/Bottom-up_parsing better
lukas
Thanks, always looking for some good reading material.
ChaosPandion
@lukas: I'm just describing what I see in the code. Although, for such a simple "grammar", it doesn't seem to recurse. :-) Maybe just a top-down parsing then.
Jordão
@Jordão - The grammar is so simple they didn't even bother to formalize it with BNF notation. *Yep, I have done some research on parsing. :)*
ChaosPandion