views:

105

answers:

4

I have been using the following win32 console code to try to parse a B Machine Grammar embedded within C++ using Boost Spirit grammar template. I am a relatively new Boost user. The code compiles, but when I run the .exe file produced by VC++2008, the program partially parses the input file. I believe the problem is with my grammar definition or the functions attached as semantic atctions.

The code is given below:

// BIFAnalyser.cpp : Defines the entry point for the console application.
//
//
/*=============================================================================
    Copyright (c) Temitope Jos Onunkun 2010 
    http://www.dcs.kcl.ac.uk/pg/onun/

    Use, modification and distribution is subject to the Boost Software
    License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
    http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
////////////////////////////////////////////////////////////////////////////
//                                                                        //
//  B Machine parser using the Boost "Grammar" and "Semantic Actions".    // 
//                                                                        //
////////////////////////////////////////////////////////////////////////////

#include <boost/spirit/core.hpp>
#include <boost/tokenizer.hpp>
#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <utility>

///////////////////////////////////////////////////////////////////////////////////////////
using namespace std;
using namespace boost::spirit;

///////////////////////////////////////////////////////////////////////////////////////////
//
//  Semantic actions
//
////////////////////////////////////////////////////////////////////////////
vector<string> strVect;


namespace 
{
 //semantic action function on individual lexeme
    void    do_noint(char const* str, char const* end)
    {
        string  s(str, end);

  if(atoi(str))
  {
   ;
  }
  else
  {
  strVect.push_back(s);
        cout << "PUSH(" << s << ')' << endl;
  }
    }

 //semantic action function on addition of lexemes
void do_add(char const*, char const*)    
{ 
 cout << "ADD" << endl; 
 for(vector<string>::iterator vi = strVect.begin(); vi < strVect.end(); ++vi)
   cout << *vi << " ";
}

 //semantic action function on subtraction of lexemes
void do_subt(char const*, char const*)   
{ 
 cout << "SUBTRACT" << endl; 
 for(vector<string>::iterator vi = strVect.begin(); vi < strVect.end(); ++vi)
   cout << *vi << " ";
}

 //semantic action function on multiplication of lexemes
void do_mult(char const*, char const*)   
{ 
 cout << "\nMULTIPLY" << endl; 
 for(vector<string>::iterator vi = strVect.begin(); vi < strVect.end(); ++vi)
   cout << *vi << " ";
 cout << "\n";
}

 //semantic action function on division of lexemes
void do_div(char const*, char const*)    
{ 
 cout << "\nDIVIDE" << endl; 
 for(vector<string>::iterator vi = strVect.begin(); vi < strVect.end(); ++vi)
   cout << *vi << " ";
}

 //semantic action function on simple substitution
void do_sSubst(char const* str, char const* end)    
{ 
   string  s(str, end);

  //use boost tokenizer to break down tokens
 typedef boost::tokenizer<boost::char_separator<char> > Tokenizer;
 boost::char_separator<char> sep("-+/*:=()"); // default char separator
      Tokenizer tok(s, sep);
 Tokenizer::iterator tok_iter = tok.begin();

pair<string, string > dependency; //create a pair object for dependencies

  //save first variable token in simple substitution
 dependency.first = *tok.begin();

  //create a vector object to store all tokens 
 vector<string> dx;
    // 
      for( ; tok_iter != tok.end(); ++tok_iter)  //save all tokens in vector
  {
   dx.push_back(*tok_iter );
  }

 vector<string> d_hat; //stores set of dependency pairs

 string dep; //pairs variables as string object



 for(int unsigned i=1; i < dx.size()-1; i++)
 {
        dependency.second = dx.at(i);
     dep = dependency.first + "|->" + dependency.second + " ";
   d_hat.push_back(dep);
 }


 cout << "PUSH(" << s << ')' << endl;

 for(int unsigned i=0; i < d_hat.size(); i++)
    cout <<"\n...\n" << d_hat.at(i) << " ";

 cout << "\nSIMPLE SUBSTITUTION\n";  

}

    //semantic action function on multiple substitution
void do_mSubst(char const* str, char const* end)    
{ 
  string  s(str, end);

  //use boost tokenizer to break down tokens
  typedef boost::tokenizer<boost::char_separator<char> > Tok;
  boost::char_separator<char> sep("-+/*:=()"); // default char separator
  Tok tok(s, sep);
  Tok::iterator tok_iter = tok.begin();

 // string start = *tok.begin();

  vector<string> mx;

  for( ; tok_iter != tok.end(); ++tok_iter)  //save all tokens in vector
  {
 mx.push_back(*tok_iter );
  }

 mx.push_back("END\n");  //add a marker "end" 

 for(unsigned int i=0; i<mx.size(); i++)
 {
  // if(mx.at(i) == "END" || mx.at(i) == "||" )
  //  break;
  // else if( mx.at(i) == "||")
  //   do_sSubst(str, end); 
  // else 
  // {
  //   do_sSubst(str, end); 

  // }

  cout << "\nTokens ... " << mx.at(i) << " ";
 }


  cout << "PUSH(" << s << ')' << endl;
  cout << "MULTIPLE SUBSTITUTION\n";
 } 

} 

////////////////////////////////////////////////////////////////////////////
//
//  Simple Substitution Grammar
//
////////////////////////////////////////////////////////////////////////////

//  Simple substitution grammar parser with integer values removed
struct Substitution : public grammar<Substitution>
{
    template <typename ScannerT>
    struct definition
    {
        definition(Substitution const& )
        {
  multi_subst 
       = (simple_subst [&do_mSubst]
       >> +( str_p("||") >> simple_subst [&do_mSubst])  
       )
       ;

  simple_subst 
    =  (Identifier >> str_p(":=")  
>> expression)[&do_sSubst]
         ;

  Identifier
    = alpha_p >> +alnum_p//[do_noint]
    ;

            expression
                  =   term
                     >> *(   ('+' >> term)[&do_add]
                        |   ('-' >> term)[&do_subt]
                        )
                  ;

            term
                  =   factor
                     >> *(   ('*' >> factor)[&do_mult]
                        |   ('/' >> factor)[&do_div]
                        )
                  ;

            factor
                =   lexeme_d[( (alpha_p >> +alnum_p) | +digit_p)[&do_noint]]
                |   '(' >> expression >> ')'
                |   ('+' >> factor)
                ;
        }
rule<ScannerT> expression, term, factor, Identifier, simple_subst, 
   multi_subst ;

        rule<ScannerT> const&
        start() const 
  { 
   return multi_subst; 
  }
    };
};

////////////////////////////////////////////////////////////////////////////
//
//  Main program
//
////////////////////////////////////////////////////////////////////////////
int
main()
{
    cout << "************************************************************\n\n";
    cout << "\t\t...Machine Parser...\n\n";
    cout << "************************************************************\n\n";
   // cout << "Type an expression...or [q or Q] to quit\n\n";

//prompt for file name to be input
cout << "Please enter a filename...or [q or Q] to quit:\n\n "; 
char strFilename[256]; //file name store as a string object
cin >> strFilename;

ifstream inFile(strFilename); // opens file object for reading
  //output file for truncated machine (operations only)


Substitution elementary_subst;  //  Simple substitution parser object

string str, next;
// inFile.open(strFilename);
while (inFile >> str)
  {
  getline(cin, next);

  str += next;

        if (str.empty() || str[0] == 'q' || str[0] == 'Q')
            break;

        parse_info<> info = parse(str.c_str(), elementary_subst, space_p);

        if (info.full)
        {
            cout << "\n-------------------------\n";
            cout << "Parsing succeeded\n";
            cout << "\n-------------------------\n";
        }
        else
        {
            cout << "\n-------------------------\n";
            cout << "Parsing failed\n";
            cout << "stopped at: \": " << info.stop << "\"\n";
            cout << "\n-------------------------\n";
        }
    }
    cout << "Please enter a filename...or [q or Q] to quit\n";
    cin >> strFilename;


    return 0;
}

The contents of the file I tried to parse, which I named "mf7.txt" is given below:

debt:=(LoanRequest+outstandingLoan1)*20 || newDebt := loanammount-paidammount


The output when I execute the program is:

************************************************************
                ...Machine Parser...
************************************************************
Please enter a filename...or [q or Q] to quit: 
c:\tplat\mf7.txt
PUSH(LoanRequest)
PUSH(outstandingLoan1)
ADD
LoanRequest outstandingLoan1
MULTIPLY
LoanRequest outstandingLoan1
PUSH(debt:=(LoanRequest+outstandingLoan1)*20)
...
debt|->LoanRequest
...
debt|->outstandingLoan1
SIMPLE SUBSTITUTION
Tokens ... debt
Tokens ... LoanRequest
Tokens ... outstandingLoan1
Tokens ... 20
Tokens ... END 
PUSH(debt:=(LoanRequest+outstandingLoan1)*20)
MULTIPLE SUBSTITUTION
-------------------------
Parsing failedstopped at: ": "
-------------------------

My intention is to capture only the variables in the file, which I managed to do up to the "||" string. Clearly, the program is not parsing beyond the "||" string in the input file. I will appreciate assistance to fix the grammar. SOS, please.

+3  A: 

I believe everything with your code is just fine. The parse fails at the trailing space character after all the input has been properly matched. The reason is that the parse() function does not execute a post-skip step (i.e. it does not invoke the skipping parser after the last matching parser). The easiest way to work around this is to append a !end_p to your grammar:

parse_info<> info = parse(str.c_str(), elementary_subst >> !end_p, space_p); 

which forces the required skipping and makes sure your input has been matched in full.

hkaiser
Thanks hkaiser. I have made the modification: adding the ">> !end_p" to my grammar object. Unfortunately, the output is still the same as it was. It appears I still have some problem either with the grammar definition or the appended functions.
Decmanc04
Now I know what's wrong: a) you need to call inFile.unsetf(std::ios::skipws); in order to avoid the file stream skipping your whitespace, and b) the getline needs to read from the file and not cin: getline(inFile, next);. That and the fix I suggested above makes your parser succeed.
hkaiser
Ta hkaiser. I have applied your suggestions and with a slight modification of the grammar, it works fine now, and all the variables are captured. The key part of the output is:debt|->LoanRequest...debt|->outstandingLoan1...debt|->outstandingLoan1SIMPLE SUBSTITUTIONPUSH(loanammount)PUSH(paidammount)SUBTRACTPUSH(newDebt:=loanammount-paidammount)...newDebt|->loanammount...newDebt |->paidammountSIMPLE SUBSTITUTIONAlthough the dependency pair "debt|->outstandingLoan1" is duplicated, I should be able to fix that on inspection of the code later. Once again THANKS.
Decmanc04
Hey, hkaiser, I'm afraid I've hit some problem again. After fixing the duplication of the dependency pair "debt|->outstandingLoan1", I scaled up the grammar to include, in part, machine_subst = ( (simple_subst | multi_subst | choice | multi_choice | select | conditional | preconditional) >> *(ch_p(';') >> machine_subst) );and used "machine_subst" as the start symbol. Then the parser failed on "||" again. When I changed the start symbol back to "multi_subst", it passed, but I need to use "machine_subst".
Decmanc04
Please give me your new code, otherwise I'm not able to spot what's wrong.
hkaiser
A: 

Hi guys, for some reason I'm not certain of, the grammar parsed correctly when I changed machine_subst rule to ... "machine_subst = ( ((simple_subst) | (multi_subst) | (choice) | (multi_choice) | (select) | (conditional) | (preconditional))". I suspect putting each of the sub-rules in parenthesis explicitly specify that the options are between each of the sub-rules and not part thereof. Any idea why the grammar definition behaves this way will be appreciated, but it's not critical to my current project since it works for me as it is.

Decmac04
A: 
Decmac04
+1  A: 

I have redesigned the grammar as follows, and that seem to have fixed the problem:

subst = multi_choice | machine_subst ;

        multi_choice 
                = machine_subst
                >> +( str_p("[]") >> machine_subst )
                ;


        machine_subst
                = (  multi_subst 
                | simple_subst
                | if_select_pre_subst
                | unbounded_choice )[&do_machSubst]
                ;

... ...

multi_subst = ( simple_subst

+( str_p("||") >> simple_subst ) ) [&do_mSubst] ;

        simple_subst
                = (identifier
                >> str_p(":=") >> arith_expr) [&do_sSubst]
                ;

        expression
                = predicate
                | logic_expr
                | arith_expr
                ;

        predicate
                = ( logic_expr 
                >> +( ( str_p("&") >> logic_expr )
                |     ( str_p("OR") >> logic_expr ) ) )[&do_predicate]
                ;

        logic_expr 
                = ( identifier
                >> ( (str_p("<") >> arith_expr)
                | (str_p(">")  >> arith_expr)
                | (str_p("/:")  >> arith_expr)
                | (str_p("<:")  >> arith_expr)
                | (str_p("/<:")  >> arith_expr)
                | (str_p("<<:")  >> arith_expr)
                | (str_p("/<<:")  >> arith_expr)
                | (str_p("<=")  >> arith_expr)
                | (str_p("=")  >> arith_expr)
                | (str_p(">=")  >> arith_expr)
                | (str_p("=>")  >> arith_expr) )
                )  [&do_logicExpr]
                ;

... ... I now use the start rule "subst" for files f1.txt and f2.txt and "expression" for f3.txt and f4.txt.

start() const { return subst; //return machine_subst; //return expression; //return if_select_pre_subst; //return multi_choice; //return unbounded_choice; }

I am still building the grammar up, so if I have any further issues, I will post it.

Decmac04