views:

854

answers:

3

Is anyone aware of an online resource where I can find out how to write a simple expression parser using Boost::Spirit?.

I do not necessarily need to evaluate the expression, but I need to parse it and be able to return a boolean to indicate whether the expression is parsable or not (e.g. brackets not matching etc).

I need the parser to be able recognise function names (e.g. foo and foobar), so this would also be a useful example to help me learn writing BNF notation.

The expressions will be normal arithmetic equations, i.e. comprising of the following symbols:

  1. opening/closing brackets
  2. arithmetic operators
  3. recognized function names, and check for their required arguments
+1  A: 

Here's some old Spirit prototype code I had laying around:

#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <exception>
#include <iterator>
#include <sstream>
#include <list>

#include <boost/spirit.hpp>
#include <boost/shared_ptr.hpp>

using namespace std;
using namespace boost::spirit;
using namespace boost;

void g(unsigned int i)
{   
    cout << "row: " << i << endl;
}

struct u
{
    u(const char* c): s(c) {}
    void operator()(const char* first, const char* last) const
    {
        cout << s << ": " << string(first, last) << endl;   
    }
private:
    string s;
};


struct Exp
{
};

struct Range: public Exp
{
};

struct Index: public Exp
{
};

struct String: public Exp
{
};

struct Op
{
    virtual ~Op() = 0;
    virtual string name() = 0;
};

Op::~Op() {}

struct CountIf: public Op
{
    string name() { return "CountIf"; }
};

struct Sum: public Op
{
    string name() { return "Sum"; }
};

struct Statement
{
    virtual ~Statement() = 0;
    virtual void print() = 0;
};

Statement::~Statement() {}

struct Formula: public Statement
{
    Formula(const char* first, const char* last): s(first, last), op(new CountIf)
    {
        typedef rule<phrase_scanner_t> r_t;

        r_t r_index     = (+alpha_p)[u("col")] >> uint_p[&g];
        r_t r_range     = r_index >> ':' >> r_index;
        r_t r_string    = ch_p('\"') >> *alnum_p >> '\"';
        r_t r_exp       = r_range | r_index | r_string; // will invoke actions for index twice due to range
        r_t r_list      = !(r_exp[u("arg")] % ',');
        r_t r_op        = as_lower_d["countif"] | as_lower_d["sum"];
        r_t r_formula   = r_op >> '(' >> r_list >> ')';

        cout << s << ": matched: " << boolalpha << parse(s.c_str(), r_formula, space_p).full << endl; 
    }
    void print() { cout << "Formula: " << s << " / " << op->name() << endl; }
private:
    string s;
    shared_ptr<Op> op;
    list<shared_ptr<Exp> > exp_list;
};

struct Comment: public Statement
{
    Comment(const char* first, const char* last): comment(first, last) {}
    void print() {cout << "Comment: " << comment << endl; }
private:
    string comment;
};


struct MakeFormula
{
    MakeFormula(list<shared_ptr<Statement> >& list_): list(list_) {}
    void operator()(const char* first, const char* last) const
    {
        cout << "MakeFormula: " << string(first, last) << endl;
        list.push_back(shared_ptr<Statement>(new Formula(first, last)));
    }
private:
    list<shared_ptr<Statement> >& list;
};

struct MakeComment
{
    MakeComment(list<shared_ptr<Statement> >& list_): list(list_) {}
    void operator()(const char* first, const char* last) const
    {
        cout << "MakeComment: " << string(first, last) << endl;
        list.push_back(shared_ptr<Statement>(new Comment(first, last)));
    }
private:
    list<shared_ptr<Statement> >& list;
};


int main(int argc, char* argv[])
try
{
    //typedef vector<string> v_t;
    //v_t v(argv + 1, argv + argc);
    // copy(v.begin(), v.end(), ostream_iterator<v_t::value_type>(cout, "\n"));

    string s;
    getline(cin, s);

    //        =COUNTIF(J2:J36, "Abc")

    typedef list<shared_ptr<Statement> > list_t;
    list_t list;

    typedef rule<phrase_scanner_t> r_t;

    r_t r_index     = (+alpha_p)[u("col")] >> uint_p[&g];
    r_t r_range     = r_index >> ':' >> r_index;
    r_t r_string    = ch_p('\"') >> *alnum_p >> '\"';
    r_t r_exp       = r_range | r_index | r_string; // will invoke actions for index twice due to range
    r_t r_list      = !(r_exp[u("arg")] % ',');
    r_t r_op        = as_lower_d["countif"] | as_lower_d["sum"];
    r_t r_formula   = r_op >> '(' >> r_list >> ')';
    r_t r_statement = (ch_p('=')  >> r_formula   [MakeFormula(list)])
                    | (ch_p('\'') >> (*anychar_p)[MakeComment(list)])
                    ;

    cout << s << ": matched: " << boolalpha << parse(s.c_str(), r_statement, space_p).full << endl; 

    for (list_t::const_iterator it = list.begin(); it != list.end(); ++it)
    {
        (*it)->print();
    }
}
catch(const exception& ex)
{
    cerr << "Error: " << ex.what() << endl;
}

Try running it and entering a line like:

=COUNTIF(J2:J36, "Abc")
John Zwinck
Nice example! Well, but is this really _simple_?
Vlad
Well, it's not completely trivial, but it's still only a page (maybe double-sided!) worth of printed code that I wrote on a domestic flight. :)
John Zwinck
A: 

I'm not sure if this qualifies as simple either, but I've used this uri-grammar available at http://code.google.com/p/uri-grammar/source/browse/trunk/src/uri/grammar.hpp. It may not be trivial, but at least its parsing something that you probably understand already (URIs). When reading these grammars, its best to read from the bottom up, since that's where the most generic tokens tend to be defined.

Peter Kovacs
+1  A: 

The current version of Spirit (V2.x) contains a whole series of calculator examples from the very simple to a full fledged mini-c interpreter. You should have a look there as those are a perfect starting point for writing your own expression parser.

hkaiser