views:

629

answers:

8

Hallo Rudi, I dont get the error, please can you help me out, here is the .l and .y file.thanks.

%{
#include "ifanw.tab.h"
extern int yylval;
%}
%%
"="      { return EQ; }
"!="     { return NE; }
"<"      { return LT; }
"<="     { return LE; }
">"      { return GT; }
">="     { return GE; }
"+"      { return PLUS; }
"-"      { return MINUS; }
"*"      { return MULT; }
"/"      { return DIVIDE; }
")"      { return RPAREN; }
"("      { return LPAREN; }
":="     { return ASSIGN; }
";"      { return SEMICOLON; }
"IF"     { return IF; }
"THEN"   { return THEN; }
"ELSE"   { return ELSE; }
"FI"     { return FI; }
"WHILE"  { return WHILE; }
"DO"     { return DO; }
"OD"     { return OD; }
"PRINT"  { return PRINT; }
[0-9]+   { yylval = atoi(yytext); return NUMBER; }
[a-z]    { yylval = yytext[0] - 'a'; return NAME; }   
\        { ; }
\n       { nextline(); }
\t       { ; }
"//".*\n { nextline(); }
.        { yyerror("illegal token"); }
%%

Yacc-file

%start ROOT

%token EQ
%token NE
%token LT
%token LE
%token GT
%token GE
%token PLUS
%token MINUS
%token MULT
%token DIVIDE
%token RPAREN
%token LPAREN
%token ASSIGN
%token SEMICOLON
%token IF
%token THEN
%token ELSE
%token FI
%token WHILE
%token DO
%token OD
%token PRINT
%token NUMBER
%token NAME

%%

ROOT:
   stmtseq { execute($1); } 
   ;

statement:
     designator ASSIGN expression { $$ = assignment($1, $3); } 
   | PRINT expression { $$ = print($2); } 
   | IF expression THEN stmtseq ELSE stmtseq FI
    { $$ = ifstmt($2, $4, $6); }
   | IF expression THEN stmtseq FI
    { $$ = ifstmt($2, $4, empty()); }
   | WHILE expression DO stmtseq OD { $$ = whilestmt($2, $4); }   
   ;

stmtseq:
     stmtseq SEMICOLON statement { $$ = seq($1, $3); }
   | statement { $$ = $1; }
   ;

expression:
 expr2 { $$ = $1; } 
   | expr2 EQ expr2 { $$ = eq($1, $3); }
   | expr2 NE expr2 { $$ = ne($1, $3); }
   | expr2 LT expr2 { $$ = le($1, $3); }
   | expr2 LE expr2 { $$ = le($1, $3); }
   | expr2 GT expr2 { $$ = gt($1, $3); }
   | expr2 GE expr2 { $$ = gt($1, $3); }
   ;

expr2:
     expr3 { $$ == $1; }
   | expr2 PLUS expr3 { $$ = plus($1, $3); }
   | expr2 MINUS expr3 { $$ = minus($1, $3); }
   ;

expr3:
     expr4 { $$ = $1; }
   | expr3 MULT expr4 { $$ = mult($1, $3); }
   | expr3 DIVIDE expr4 { $$ = divide ($1, $3); }
   ;

expr4:
     PLUS expr4 { $$ = $2; }
   | MINUS expr4 { $$ = neg($2); }
   | LPAREN expression RPAREN { $$ = $2; }
   | NUMBER { $$ = number($1); }
   | designator { $$ = $1; }
   ;

designator:
     NAME { $$ = name($1); }
  ;
%%
+4  A: 

EDIT: I put the old answer to the end

Here is the promised more detailed example:

usually I begin with an example file of the desired language:

# example.toy
begin # example of the simple toy language
    x = 23;
    while x > 0 do begin
        x = x - 1;
        print(x*x);
    end;
end;

The next step is to create a lexer+parser combination where the previous file passes.

Here comes the lexer (generate the source with flex -o lexer.c lexer.l). Also note that the lexer source depends on the parser sources (because of the TOKEN_* constants), so bison must be run before compiling the lexer source:

%option noyywrap

%{
#include "parser.h"
#include <stdlib.h>
%}

%%

"while" return TOKEN_WHILE;
"begin" return TOKEN_BEGIN;
"end"   return TOKEN_END;
"do"    return TOKEN_DO;
[a-zA-Z_][a-zA-Z0-9_]* {yylval.name = strdup(yytext); return TOKEN_ID;}
[-]?[0-9]+    {yylval.val = atoi(yytext); return TOKEN_NUMBER;}
[()=;]  {return *yytext;}
[*/+-<>] {yylval.op = *yytext; return TOKEN_OPERATOR;}
[ \t\n] {/* suppress the output of the whitespaces from the input file to stdout */}
#.* {/* one-line comment */}

and the parser (compile with bison -d -o parser.c parser.y, the -d tells bison to create the parser.h header file with some stuff the lexer needs)

%error-verbose /* instruct bison to generate verbose error messages*/
%{
/* enable debugging of the parser: when yydebug is set to 1 before the
 * yyparse call the parser prints a lot of messages about what it does */
#define YYDEBUG 1
%}

%union {
    int val;
    char op;
    char* name;
}

%token TOKEN_BEGIN TOKEN_END TOKEN_WHILE TOKEN_DO TOKEN_ID TOKEN_NUMBER TOKEN_OPERATOR
%start program

%{
/* Forward declarations */
void yyerror(const char* const message);


%}

%%

program: statement';';

block: TOKEN_BEGIN statements TOKEN_END;

statements:
    | statements statement ';'
    | statements block';';

statement: 
      assignment
    | whileStmt
    | block
    | call;

assignment: TOKEN_ID '=' expression;

expression: TOKEN_ID
    | TOKEN_NUMBER
    | expression TOKEN_OPERATOR expression;

whileStmt: TOKEN_WHILE expression TOKEN_DO statement;

call: TOKEN_ID '(' expression ')';

%%

#include <stdlib.h>

void yyerror(const char* const message)
{
    fprintf(stderr, "Parse error:%s\n", message);
    exit(1);
}

int main()
{
    yydebug = 0;
    yyparse();
}

After gcc parser.c lexer.c -o toylang-noop the call of toylang-noop < example.toy must run without any error. So now the parser itself works and can parse the example script.

The next step is to create a so called abstract syntax tree of the grammar. At this point I start with the augmenting of the parser by defining different types to the tokens and rules, as well as inserting rules to each parsing step.

%error-verbose /* instruct bison to generate verbose error messages*/
%{
#include "astgen.h"
#define YYDEBUG 1

/* Since the parser must return the AST, it must get a parameter where
 * the AST can be stored. The type of the parameter will be void*. */
#define YYPARSE_PARAM astDest
%}

%union {
    int val;
    char op;
    char* name;
    struct AstElement* ast; /* this is the new member to store AST elements */
}

%token TOKEN_BEGIN TOKEN_END TOKEN_WHILE TOKEN_DO
%token<name> TOKEN_ID
%token<val> TOKEN_NUMBER
%token<op> TOKEN_OPERATOR
%type<ast> program block statements statement assignment expression whileStmt call
%start program

%{
/* Forward declarations */
void yyerror(const char* const message);


%}

%%

program: statement';' { (*(struct AstElement**)astDest) = $1; };

block: TOKEN_BEGIN statements TOKEN_END{ $$ = $2; };

statements: {$$=0;}
    | statements statement ';' {$$=makeStatement($1, $2);}
    | statements block';' {$$=makeStatement($1, $2);};

statement: 
      assignment {$$=$1;}
    | whileStmt {$$=$1;}
    | block {$$=$1;}
    | call {$$=$1;}

assignment: TOKEN_ID '=' expression {$$=makeAssignment($1, $3);}

expression: TOKEN_ID {$$=makeExpByName($1);}
    | TOKEN_NUMBER {$$=makeExpByNum($1);}
    | expression TOKEN_OPERATOR expression {$$=makeExp($1, $3, $2);}

whileStmt: TOKEN_WHILE expression TOKEN_DO statement{$$=makeWhile($2, $4);};

call: TOKEN_ID '(' expression ')' {$$=makeCall($1, $3);};

%%

#include "astexec.h"
#include <stdlib.h>

void yyerror(const char* const message)
{
    fprintf(stderr, "Parse error:%s\n", message);
    exit(1);
}

int main()
{
    yydebug = 0;
    struct AstElement *a;
    yyparse(&a);
}

As you can see, the main part when generating the AST is to create the nodes of the AST when a certain rule of the parser was passed. Since bison maintains a stack of the current parsing process itself, is is only needed to assign the current parsing status to the elements of the stack (these are the $$=foo(bar) lines)

The target is the following structure in memory:

ekStatements
  .count = 2
  .statements
    ekAssignment
      .name = "x"
      .right
        ekNumber
          .val = 23
    ekWhile
      .cond
        ekBinExpression
        .left
          ekId
            .name = "x"
        .right
          ekNumber
            .val=0
        .op = '>'
      .statements
        ekAssignment
          .name = "x"
          .right
            ekBinExpression
              .left
                ekId
                  .name = "x"
              .right
                ekNumber
                  .val = 1
              .op = '-'
        ekCall
          .name = "print"
          .param
            ekBinExpression
              .left
                ekId
                  .name = "x"
              .right
                ekId
                  .name = "x"
              .op = '*'

To get this graph, there is the generating code needed, astgen.h:

#ifndef ASTGEN_H
#define ASTGEN_H

struct AstElement
{
    enum {ekId, ekNumber, ekBinExpression, ekAssignment, ekWhile, ekCall, ekStatements, ekLastElement} kind;
    union
    {
        int val;
        char* name;
        struct
        {
            struct AstElement *left, *right;
            char op;
        }expression;
        struct
        {
            char*name;
            struct AstElement* right;
        }assignment;
        struct
        {
            int count;
            struct AstElement** statements;
        }statements;
        struct
        {
            struct AstElement* cond;
            struct AstElement* statements;
        } whileStmt;
        struct
        {
            char* name;
            struct AstElement* param;
        }call;
    } data;
};

struct AstElement* makeAssignment(char*name, struct AstElement* val);
struct AstElement* makeExpByNum(int val);
struct AstElement* makeExpByName(char*name);
struct AstElement* makeExp(struct AstElement* left, struct AstElement* right, char op);
struct AstElement* makeStatement(struct AstElement* dest, struct AstElement* toAppend);
struct AstElement* makeWhile(struct AstElement* cond, struct AstElement* exec);
struct AstElement* makeCall(char* name, struct AstElement* param);
#endif

astgen.c:

#include "astgen.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

static void* checkAlloc(size_t sz)
{
    void* result = calloc(sz, 1);
    if(!result)
    {
        fprintf(stderr, "alloc failed\n");
        exit(1);
    }
}

struct AstElement* makeAssignment( char*name, struct AstElement* val)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekAssignment;
    result->data.assignment.name = name;
    result->data.assignment.right = val;
    return result;
}

struct AstElement* makeExpByNum(int val)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekNumber;
    result->data.val = val;
    return result;
}

struct AstElement* makeExpByName(char*name)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekId;
    result->data.name = name;
    return result;
}

struct AstElement* makeExp(struct AstElement* left, struct AstElement* right, char op)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekBinExpression;
    result->data.expression.left = left;
    result->data.expression.right = right;
    result->data.expression.op = op;
    return result;
}

struct AstElement* makeStatement(struct AstElement* result, struct AstElement* toAppend)
{
    if(!result)
    {
        result = checkAlloc(sizeof(*result));
        result->kind = ekStatements;
        result->data.statements.count = 0;
        result->data.statements.statements = 0;
    }
    assert(ekStatements == result->kind);
    result->data.statements.count++;
    result->data.statements.statements = realloc(result->data.statements.statements, result->data.statements.count*sizeof(*result->data.statements.statements));
    result->data.statements.statements[result->data.statements.count-1] = toAppend;
    return result;
}

struct AstElement* makeWhile(struct AstElement* cond, struct AstElement* exec)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekWhile;
    result->data.whileStmt.cond = cond;
    result->data.whileStmt.statements = exec;
    return result;
}

struct AstElement* makeCall(char* name, struct AstElement* param)
{
    struct AstElement* result = checkAlloc(sizeof(*result));
    result->kind = ekCall;
    result->data.call.name = name;
    result->data.call.param = param;
    return result;
}

You can see here that the generating of the AST elements is a rather monotone job. After the step is done, the program still does nothing, but the AST can be viewed in a debugger.

The next step is to write the interpreter. This is astexec.h:

#ifndef ASTEXEC_H
#define ASTEXEC_H

struct AstElement;
struct ExecEnviron;

/* creates the execution engine */
struct ExecEnviron* createEnv();

/* removes the ExecEnviron */
void freeEnv(struct ExecEnviron* e);

/* executes an AST */
void execAst(struct ExecEnviron* e, struct AstElement* a);

#endif

Well, this looks friendly. The Interpreter itself is simple, despite it's length. The most functions deals with only a particular kind of AstElement. The correct function is selected by the dispatchExpression and dispatchStatement functions. The dispatch functions looks for the target function in the valExecs and runExecs arrays.

astexec.c:

#include "astexec.h"
#include "astgen.h"
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>

struct ExecEnviron
{
    int x; /* The value of the x variable, a real language would have some name->value lookup table instead */
};

static int execTermExpression(struct ExecEnviron* e, struct AstElement* a);
static int execBinExp(struct ExecEnviron* e, struct AstElement* a);
static void execAssign(struct ExecEnviron* e, struct AstElement* a);
static void execWhile(struct ExecEnviron* e, struct AstElement* a);
static void execCall(struct ExecEnviron* e, struct AstElement* a);
static void execStmt(struct ExecEnviron* e, struct AstElement* a);

/* Lookup Array for AST elements which yields values */
static int(*valExecs[])(struct ExecEnviron* e, struct AstElement* a) =
{
    execTermExpression,
    execTermExpression,
    execBinExp,
    NULL,
    NULL,
    NULL,
    NULL
};

/* lookup array for non-value AST elements */
static void(*runExecs[])(struct ExecEnviron* e, struct AstElement* a) =
{
    NULL, /* ID and numbers are canonical and */
    NULL, /* don't need to be executed */
    NULL, /* a binary expression is not executed */
    execAssign,
    execWhile,
    execCall,
    execStmt,
};

/* Dispatches any value expression */
static int dispatchExpression(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(valExecs[a->kind]);
    return valExecs[a->kind](e, a);
}

static void dispatchStatement(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(runExecs[a->kind]);
    runExecs[a->kind](e, a);
}

static void onlyName(const char* name, const char* reference, const char* kind)
{
    if(strcmp(reference, name))
    {
        fprintf(stderr,
            "This language knows only the %s '%s', not '%s'\n",
            kind, reference, name);
        exit(1);
    }
}

static void onlyX(const char* name)
{
    onlyName(name, "x", "variable");
}

static void onlyPrint(const char* name)
{
    onlyName(name, "print", "function");
}

static int execTermExpression(struct ExecEnviron* e, struct AstElement* a)
{
    /* This function looks ugly because it handles two different kinds of
     * AstElement. I would refactor it to an execNameExp and execVal
     * function to get rid of this two if statements. */
    assert(a);
    if(ekNumber == a->kind)
    {
        return a->data.val;
    }
    else
    {
        if(ekId == a->kind)
        {
            onlyX(a->data.name);
            assert(e);
            return e->x;
        }
    }
    fprintf(stderr, "OOPS: tried to get the value of a non-expression(%d)\n", a->kind);
    exit(1);
}

static int execBinExp(struct ExecEnviron* e, struct AstElement* a)
{
    assert(ekBinExpression == a->kind);
    const int left = dispatchExpression(e, a->data.expression.left);
    const int right = dispatchExpression(e, a->data.expression.right);
    switch(a->data.expression.op)
    {
        case '+':
            return left + right;
        case '-':
            return left - right;
        case '*':
            return left * right;
        case '<':
            return left < right;
        case '>':
            return left > right;
        default:
            fprintf(stderr,  "OOPS: Unknown operator:%c\n", a->data.expression.op);
            exit(1);
    }
    /* no return here, since every switch case returns some value (or bails out) */
}

static void execAssign(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(ekAssignment == a->kind);
    onlyX(a->data.assignment.name);
    assert(e);
    struct AstElement* r = a->data.assignment.right;
    e->x = dispatchExpression(e, r);
}

static void execWhile(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(ekWhile == a->kind);
    struct AstElement* const c = a->data.whileStmt.cond;
    struct AstElement* const s = a->data.whileStmt.statements;
    assert(c);
    assert(s);
    while(dispatchExpression(e, c))
    {
        dispatchStatement(e, s);
    }
}

static void execCall(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(ekCall == a->kind);
    onlyPrint(a->data.call.name);
    printf("%d\n", dispatchExpression(e, a->data.call.param));
}

static void execStmt(struct ExecEnviron* e, struct AstElement* a)
{
    assert(a);
    assert(ekStatements == a->kind);
    int i;
    for(i=0; i<a->data.statements.count; i++)
    {
        dispatchStatement(e, a->data.statements.statements[i]);
    }
}

void execAst(struct ExecEnviron* e, struct AstElement* a)
{
    dispatchStatement(e, a);
}

struct ExecEnviron* createEnv()
{
    assert(ekLastElement == (sizeof(valExecs)/sizeof(*valExecs)));
    assert(ekLastElement == (sizeof(runExecs)/sizeof(*runExecs)));
    return calloc(1, sizeof(struct ExecEnviron));
}

void freeEnv(struct ExecEnviron* e)
{
    free(e);
}

Now the interpreter is complete, and the example can be run, after the main function is updated:

#include <assert.h>

int main()
{
    yydebug = 0;
    struct AstElement *a = 0;
    yyparse(&a);
    /* Q&D WARNING: in production code this assert must be replaced by
     * real error handling. */
    assert(a);
    struct ExecEnviron* e = createEnv();
    execAst(e, a);
    freeEnv(e);
    /* TODO: destroy the AST */
}

Now the interpreter for this language works. Note that there are some limitations within this interpreter:

  • it has only one variable and one function
    • and only one parameter to a function
  • only the type int for values
  • it is difficult to add goto support, since for each AST element the interpreter calls an interpreting function. Goto can be implemented within one block by hacking something into the execStmt function, but to jump between different blocks or levels the execution machinery must be changed dramatically (this is because one can't jump between different stack frames in the interpreter). For example the AST can be transformed into byte code and this byte code is interpreted by a vm.
  • some other which I would need to lookup :)

You need to define the grammar for your language. Some thing like this (both lexer and parser are incomplete):

/* foo.y */
%token ID IF ELSE OR AND /* First list all terminal symbols of the language */
%%

statements: /* allow empty statements */ | stm | statements ';' stm;

stm: ifStatement
   | NAME
   | NAME expList
   | label;

expList: expression | expList expression;

label: ':' NAME { /* code to store the label */ };

ifStatement: IF expression statements
           | IF expression statements ELSE statements;

expression: ID                          { /* Code to handle the found ID */ }
          | expression AND expression   { /* Code to con cat two expression with and */ }
          | expression OR expression
          | '(' expression ')';

Then you compile this file with bison -d foo.y -o foo.c. The -d switch instruct bison to generate a header with all the tokens the parser uses. Now you create your lexer

/* bar.l */
%{
#include "foo.h"
%}

%%

IF   return IF;
ELSE return ELSE;
OR   return OR;
AND  return AND;
[A-Z]+  { /*store yylval somewhere to access it in the parser*/ return ID; }

After this you have your lexer and parser done, and "only" need to write the semantic actions for your language.

Rudi
hallo thanks for your answer,its really helpfulbut i dont have an idea how i write the code to store the label, can you give me any idea, an you explain a little bit the code you wrote for example "statements or "explist". thanks to you
Imran
I try to build a better example this weekend, expect it on monday or tuesday
Rudi
"grammar", not "grammer"
Viet
Viet
thanks for your help, i will try my best.
Imran
@Viet thanks for pointing the typo (I'm not a native english speaker). Also I don't see the classical if/else conflict in Imram's grammar, since Imram uses `END IF` tokens so the end of each block is easy to distinguish. Or am I missing something?@Imram I'm sorry, but i forgot the flashdrive with the example at home, so you have to wait a day.
Rudi
no problem, i can wait, im doing my best to learn this language, its a little bit difficult for me, but thanks to you.
Imran
Hallo Rudi,i've found a flex/bison program, but it has a lot of errors, for all tokens it says undeclared(first use in function)the tokens are alreadd declared, then why this error appears, can you show throw the program, ill put it in the Question, "Flex/bison, error: undeclared", thanks.
Imran
It seems like you compiled the lexer sources before you transformed the parser into the c source. Also it might be necessary to add the `-d` switch to transform the parser.
Rudi
Thank you so much for you help and effort, many informations i need time to understand your code, but another point, i think i misunderstood my work, my work wasnt to generate a if statement, i have to write an interpreter that analysis strings, like if for exam there is a if statement, the interpreter has to show, that if was used, that a variable was used, an operator so on, i've got a solution but it shows me an error undefined reference to yylex, can you help me i show you the .l and .y file, please can you check it out, i dont know what to do
Imran
yylex is the function which flex generates. If this function is undefined, either the .c/.o file isn't given to the compiler, or the function got renamed (either by #define yylex something other /-Dyylex=somethingOther compiler switch, or by the flex prefix option)
Rudi
Hallo Rudii dont get the error with the yylex, i added the .l and .y files at the top of the site, can you please look out why it doesnt work. Thanks a lot.
Imran
Hallo Rudi,i've achieved, that the files are compiled without any error, but the command file doesnt work dont know why if i start the exe file in dos, nothing happens, i show you the files if written. Thanks for your Help.
Imran
You can add `#define YYDEBUG 1` at the start of the parser, and a `yydebug=1;`line in the main() function to bring the parser to emit lots of internal debug messages. I found they VERY helpful to get a grip if somethings went wrong.
Rudi
Hi Rudi i wanted to ask is there an option to generate a JMP instruction like in assembler to go to another label, for example::L1IF FLAG AND X"0001" EVT 23;ELSE WAIT 500 ms; JMP L1;END IF;
Imran
Hallo Rudi, now i achieved that the interpreter analysis the input and gives out what is used, ok i had it, thanks for your help, your ideas and comments helped me a lot, here is the flex and bison file, at the top.
Imran
Now i found something to implement the JMP Instructer, in flex/bison its called go to, so here the code can yout throw an eye on this code, i ant compile it, error comes, "ysmbol Name is used, but not defined as token and has no rules ", error message for OUT to, but Out is defined as a token, thanks.
Imran
A: 

Please look a eye on these files, why the command dont start.Here the files:

lex:

#include "iftest.tab.h"
int yylval;
%}
%option noyywrap
%%
"="      { return EQ; }
"!="     { return NE; }
"<"      { return LT; }
"<="     { return LE; }
">"      { return GT; }
">="     { return GE; }
"+"      { return PLUS; }
"-"      { return MINUS; }
"*"      { return MULT; }
"/"      { return DIVIDE; }
")"      { return RPAREN; }
"("      { return LPAREN; }
":="     { return ASSIGN; }
";"      { return SEMICOLON; }
"IF"     { return IF; }
"THEN"   { return THEN; }
"ELSE"   { return ELSE; }
"FI"     { return FI; }
"WHILE"  { return WHILE; }
"DO"     { return DO; }
"OD"     { return OD; }
"PRINT"  { return PRINT; }
[0-9]+   { yylval = atoi(yytext); return NUMBER; }
[a-z]    { yylval = yytext[0] - 'a'; return NAME; }   
\        { ; }

%%

YACC file:

%{
#include stdio.h>
#include string.h>
#define YYDEBUG 1
extern int yylval;
%}
%start ROOT

%token EQ
%token NE
%token LT
%token LE
%token GT
%token GE
%token PLUS
%token MINUS
%token MULT
%token DIVIDE
%token RPAREN
%token LPAREN
%token ASSIGN
%token SEMICOLON
%token IF
%token THEN
%token ELSE
%token FI
%token WHILE
%token DO
%token OD
%token PRINT
%token NUMBER
%token NAME

%%

ROOT:
   stmtseq { execute($1); } 
   ;

statement:
     designator ASSIGN expression { $$ = assignment($1, $3); } 
   | PRINT expression { $$ = print($2); } 
   | IF expression THEN stmtseq ELSE stmtseq FI
    { $$ = ifstmt($2, $4, $6); }
   | IF expression THEN stmtseq FI
    { $$ = ifstmt($2, $4, empty()); }
   | WHILE expression DO stmtseq OD { $$ = whilestmt($2, $4); }   
   ;

stmtseq:
     stmtseq SEMICOLON statement { $$ = seq($1, $3); }
   | statement { $$ = $1; }
   ;

expression:
     expr2 { $$ = $1; } 
   | expr2 EQ expr2 { $$ = eq($1, $3); }
   | expr2 NE expr2 { $$ = ne($1, $3); }
   | expr2 LT expr2 { $$ = le($1, $3); }
   | expr2 LE expr2 { $$ = le($1, $3); }
   | expr2 GT expr2 { $$ = gt($1, $3); }
   | expr2 GE expr2 { $$ = gt($1, $3); }
   ;

expr2:
     expr3 { $$ == $1; }
   | expr2 PLUS expr3 { $$ = plus($1, $3); }
   | expr2 MINUS expr3 { $$ = minus($1, $3); }
   ;

expr3:
     expr4 { $$ = $1; }
   | expr3 MULT expr4 { $$ = mult($1, $3); }
   | expr3 DIVIDE expr4 { $$ = divide ($1, $3); }
   ;

expr4:
     PLUS expr4 { $$ = $2; }
   | MINUS expr4 { $$ = neg($2); }
   | LPAREN expression RPAREN { $$ = $2; }
   | NUMBER { $$ = number($1); }
   | designator { $$ = $1; }
   ;

designator:
     NAME { $$ = name($1); }
   ;
%%

int main()
{
yyparse();
yydebug=1;}
Imran
Please add 4 spaces at the beginning of source parts, else the generated html looks --erm-- intrerresting.
Rudi
It the main function the lexer is your real main function, then nothing happes because the main function calls nothing. You might transfer the main function at the end of the parser and call yyparse() from there.
Rudi
Hallo Rudi,thanks a lot for your help, i dont know what to do withot such forums, Rudi i've added the #include yydebug... at the start of the parser and in the main function of the parser i added yydebug=1; yyparse(); , but know there appears lots of errors, like undefined reference to all my tokens, why, is it right when i compile with gcc-3 iftest.tab.c lex.yy.c -lfl, bt when i compile with gcc-3 -o iftest.tab.c lex.yy.c -lfl then no error appears but co command fil would be created i dont know why, what can i do to solve this, thanks.
Imran
A: 

I have another question, is there a possibility to implement a JMP instruction with flex/bison like in Assembler to go to a label like my example, thanks for your help.

:L1
IF FLAG AND X"0001"
    EVT 23;
ELSE
    WAIT 500 ms;
    JMP L1;
END IF; 
Imran
A: 

Hallo Rudi, now i achieved that the interpreter analysis the input and gives out what is used, ok i had it, thanks for your help, your ideas and comments helped me a lot, here is the flex and bison file:

Here the link of bitbucket.org:

https://bitbucket.org/matrix/tiny/downloads

Lex:

%{
#include "syntaxtree.h"
#include "tiny.tab.h"
#include "string.h"
extern int lineno;

%}

%x COMMENT

%%
"/*" {BEGIN COMMENT;}
<COMMENT>(.|\n) ;
<COMMENT>"*/" BEGIN INITIAL; 
"//".* ; 
"if" {return IF;}
"then" {return THEN;}
"else" {return ELSE;}
"end" {return END;}
"repeat" {return REPEAT;}
"until" {return UNTIL;}
"scan" {return SCAN;}
"print" {return PRINT;}
"for" {return FOR;}
"while" {return WHILE;}
"return" {return RETURN;}
*"jmp" {return JMP;}*
*"goto" {return GOTO;}*
== {return EQUAL;}
[0-9]+ {yylval.val = atoi(yytext); return NUM;}
[a-zA-Z]+ {yylval.name = strdup(yytext); 
       return  ID;}
"\n" {lineno++;}
[-=<>+*/();,] {return yytext[0];}
[ \t]+ ;
. {printf("Symbol %c not recognized in line: %d\n",yytext[0],lineno);}

Yacc:

%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "syntaxtree.h"
#define YYDEBUG 1

extern FILE* yyin;
//The syntaxtree that the parser builds
TreeNode* tree=NULL; 
//lineno in source 
int lineno;

TreeNode* newStmtNode(StmtKind kind);
TreeNode* newExpNode(ExpKind kind);
void printTree(TreeNode * tree);
void yyerror(char * message);

%}
%union{
int val;
char* name;
TreeNode* tree;
char s[17];
}

%token IF THEN ELSE END REPEAT UNTIL SCAN PRINT ID NUM EQUAL FOR WHILE RETURN JMP GOTO
%token <s> NAME
%type <val> NUM
%type <name> ID
%type <tree> stmt_seq stmt if_stmt repeat_stmt for_stmt while_stmt assign_stmt
  read_stmt write_stmt exp simple_exp term factor label_stmt goto_stmt


%%

program : stmt_seq {tree = $1;}


stmt_seq: stmt_seq stmt ';'  
      { TreeNode * t = $1;
        if (t != NULL){ 
          while (t->sibling != NULL){
            t=(t->sibling);
          }
          t->sibling=$2;
          $$ = $1;
        }
        else $$=$2;  
      }
    | stmt ';'

stmt: if_stmt { $$=$1;}
| repeat_stmt { $$=$1;}
| assign_stmt { $$=$1;}
| read_stmt { $$=$1;}
| write_stmt { $$=$1;}
| for_stmt { $$=$1;}
| while_stmt { $$=$1;}
| label_stmt {$$=$1;}
| goto_stmt {$$=$1;}

if_stmt: IF exp stmt_seq END 
      { $$ = newStmtNode(IfK);
        $$->child[0] = $2;
        $$->child[1] = $3;
      }
    | IF exp stmt_seq ELSE stmt_seq END 
      { $$ = newStmtNode(IfK);
        $$->child[0] = $2;
        $$->child[1] = $3;
        $$->child[2] = $5;
      }

repeat_stmt: REPEAT stmt_seq UNTIL '(' exp ')'
       { $$ = newStmtNode(RepeatK);
         $$->child[0] = $2;
         $$->child[1] = $5;
        }

for_stmt: FOR '(' assign_stmt ',' exp ',' assign_stmt ')' stmt_seq END
    { $$ =  newStmtNode(ForK);
      $$->child[0] = $3;
      $$->child[1] = $5;
      $$->child[2] = $7;
      $$->child[3] = $9;
    }

while_stmt: WHILE '(' exp ')' stmt_seq END
    { $$ =  newStmtNode(WhileK);
      $$->child[0] = $3;
      $$->child[1] = $5;
    }

assign_stmt: ID '=' exp
            { $$ = newStmtNode(AssignK);
              $$->child[0] = $3;
              $$->attr.name = $1;
            }


read_stmt: SCAN '('ID ')'
          { $$ = newStmtNode(ReadK);
            $$->attr.name = strdup($3);
          }

write_stmt: PRINT '(' exp ')'
          { $$ = newStmtNode(WriteK);
            $$->child[0] = $3;
          }

**label_stmt: ':' NAME 
    { $$ = newStmtNode(LabelK);
      $$->child[0] = $2;
    }
jmp_stmt: JMP NAME
        {$$ = newStmtNode(JmpK);
        $$->child[0] = $2;
        /*$$->child[1] = $2;*/  
    }**


/*label_stmt: Name ':' {printf("%s:\n",$1);}*/
/*jmp_stmt:  GOTO NAME ';' {printf("\tJNZ %s\n",$2);}*/
/*goto_stmt: GOTO NAME ';' {printf("\tJNZ %s\n",$2);}*/


exp:  simple_exp '<' simple_exp 
  { $$=newExpNode(OpK);
    $$->child[0] = $1;
    $$->child[1] = $3;
    $$->attr.op = "<\0";
  }
  | simple_exp '>' simple_exp 
  { $$=newExpNode(OpK);
    $$->child[0] = $1;
    $$->child[1] = $3;
    $$->attr.op = ">\0";
  }
|  simple_exp EQUAL simple_exp 
  { $$=newExpNode(OpK);
    $$->child[0] = $1;
    $$->child[1] = $3;
    $$->attr.op = "==\0";
  }
| simple_exp { $$ = $1; }
;

simple_exp : simple_exp '+' term
  { $$=newExpNode(OpK);
    $$->child[0] = $1;
    $$->child[1] = $3;
    $$->attr.op = "+\0";
  }
| simple_exp '-' term
  { $$=newExpNode(OpK);
    $$->child[0] = $1;
    $$->child[1] = $3;
    $$->attr.op = "-\0";
  }
| term { $$ = $1; }
;


term: term '*' factor
    { $$=newExpNode(OpK);
      $$->child[0] = $1;
      $$->child[1] = $3;
      $$->attr.op = "*\0";
    }
| term '/' factor
    { $$=newExpNode(OpK);
      $$->child[0] = $1;
      $$->child[1] = $3;
      $$->attr.op = "/\0";
    }
| factor {$$=$1;}
;



factor: '(' exp ')' { $$=$2;}
  | NUM 
    { $$ = newExpNode(ConstK);
      $$->attr.val = $1;
    }
  | ID { $$ = newExpNode(IdK);
         $$->attr.name = $1; }
  ;


%%

int main(int argc,char ** argv){
 yydebug = 0;
 yyin = fopen(argv[1],"r");
yyparse();
printf("\n");
printTree(tree);
printf("\n");
}


void yyerror(char * message){
  printf("%s: line %d\n",message,lineno);
}


void printTree(TreeNode * tree ){
  static int indentno=0;
  int i;
 indentno+=2;  
  while (tree != NULL){
    for(i=0;i<indentno;i++)
      printf(" ");
    if (tree->nodekind==StmtK){
      switch (tree->kind.stmt){
        case IfK:
          printf("If\n");
          break;
        case RepeatK:
          printf("Repeat\n");
          break;
        case ForK:
          printf("For\n");
          break;
        case WhileK:
          printf("While\n");
          break;
        case AssignK:
          printf("Assign to: %s\n",tree->attr.name);
          break;
        case ReadK:
          printf("scan: %s\n",tree->attr.name);
          break;
        case WriteK:
          printf("print\n");
          break;
        **case LabelK:
          printf("Label\n");
          break;
    case JmpK:
          printf("JMP\n");
          break;**
      }
    }
    else if (tree->nodekind==ExpK){
      switch(tree->kind.exp){
        case OpK:
          printf("Op: %s\n",tree->attr.op);
          break;
        case ConstK:
          printf("const: %d\n",tree->attr.val);
          break;
        case IdK:
          printf("Id: %s\n",tree->attr.name);
          break;
      }
    }
    for(i=0;i<4;i++){
      if ((tree->child[1] != NULL) && (tree->child[2] != NULL) && tree->child[i] !=    
        NULL))
        printf("\n"); 
      printTree(tree->child[i]);
    }
    tree = tree->sibling;
  }
  indentno-=2;;
}

TreeNode * newStmtNode(StmtKind kind){
  TreeNode * t = malloc(sizeof(TreeNode));
  int i;
  for(i=0;i<4;i++)
    t->child[i]=NULL;    
  t->sibling=NULL;
  t->nodekind=StmtK;
  t->kind.stmt = kind;
  return t;
}


TreeNode * newExpNode(ExpKind kind){
  TreeNode * t = malloc(sizeof(TreeNode));
  int i;
  for(i=0;i<4;i++)
    t->child[i] = NULL;    
  t->sibling = NULL;
  t->nodekind = ExpK;
  t->kind.exp = kind;
  return t; 
}
Imran
I can't compile it, because syntaxtree.h (and .c) are missing. Do you have a webside where you can put this stuff. Or can you make it a project on bitbucket.org?
Rudi
here the syntaxtree.htypedef enum {StmtK, ExpK} NodeKind;typedef enum {IfK, RepeatK, AssignK, ReadK, WriteK, ForK, WhileK} StmtKind;typedef enum {OpK, ConstK, IdK} ExpKind;typedef struct treeNode{ struct treeNode * child[4]; struct treeNode * sibling; NodeKind nodekind; union {StmtKind stmt; ExpKind exp;} kind; union { char * op; int val; char * name;} attr;} TreeNode;Here is the link where all the files are, the programs name is tiny, you find all files there.
Imran
what i ment was, its allround a syntactical analysis and i want that the interpreter i build should recognise the JMP Instructor, when i give an Input like at the top, and gives me back, that JMP was used und it jumped to the label L1, so i implementet with goto if this runs then i will change it to JMP, i have hanged the code and added some things, you will see what i added i have marked these lines, noe there are two errors, i dint know why. Thanks
Imran
@Imran I had one error in the `label_stmt: Name ':' {printf("%s:\n",$1);}` line, the token is `NAME`, not `Name`. But the lexer does not return NAME, only ID. Also in your example the colon is before the label, so I suggest `label_stmt: ':' ID {printf("%s:\n",$2);}`. Also the lexer does not return the colon, so the rule `[-=<>+*/();,] {return yytext[0];}` must be augmented to `[-=<>+*/();,:] {return yytext[0];}`
Rudi
@Imram the next thing is that there is a statement rule needed for statements starting with an id, also there is no rule in the lexer for the `X"0001"` representation. You might also want numbers in Identifiers, so the ID rule can be read as `[_a-zA-Z][_a-zA-Z0-9]* {yylval.name = ...`
Rudi
Thanks so much for your help, i found out, that ihave to make a table where the commands like JMP are and when a input comes with JMP it has to search after JMP and give it out. JMP must be funktion that jumps to a label, which is defined., have you any idea how i make this table, or how to search after commands given in the input thanks.
Imran
I've found out for the JMP instruction i need a table where i put it in, and if there is a input line, so it must be compared with the table and give it out. You have to search in the tree afte the aim-label of the JMP instruction, or you put a table where you assign a label to a knot of a tree. The main work of this compiler is to analys the input and give out which keywords are used and operators and so on. and JMP is the difficult one because its jump to a certain label. Thanks.
Imran
@Imran I would put the labels both into the AST and into a name->node lookup list, because a) the information where the label is with respect to the other language elements remains but b) the code which handles the jump instructions don't need to scan the entire AST to get the jump target. Also I really reccomend to use a service like bitbucket.org or a private homepage to store the project code, since it is very cumbersome to work with larger code files in SO (especially I can't create patches with bugfixes I found).
Rudi
Hallo Rudi,I have added a code for the label and for the jmp statement, i think so it must go.thanks
Imran
I've added a label_stmt and a jmp_stmtand please show in the case switch-statement if this is right.Rudi i ahve an other question, can you explain me in short words what these lines means.Thanks.
Imran
A: 

Now i found something to implement the JMP Instructer, in flex/bison its called goto, so here the code can yout throw an eye on this code, i cant compile it, error comes, "symbol Name is used, but not defined as token and has no rules ", error message for OUT too, but Out is defined as a token, thanks.

Lex:

%{
#include "goto.tab.h"
%}
%%
[\t\n\r]
=={return(EQ);}
goto{return(GOTO);}
halt{return(HALT);}
if{return(IF);}
in{return(IN);}
int{return(INT);}
out{return(OUT);}
unless{return(UNLESS);}
[0-9]+{strncpy(yylval.s,yytext,16);return(NUMBER);}
[a-zA-Z][a-zA-Z0-9]*{strncpy(yylval.s,yytext,16);return(NAME);}
. {return(yytext[0]);}
%%
int yywrap(){return(1);}

Yacc:

%{
#include <stdlib.h>
%}

%union{char s[17];}
%token <s> NUMBER NAME
%token EQ GOTO IF UNLESS INT IN OUT HALT
%left EQ
%left '+''-'
%left '*'
%%
input: statement | input statement
;
statement: label|intdef|goto|if|unless|halt|out|assign  
;
label: Name ':' {printf("%s:\n",$1);}
;
intdef: INT intlist ';'
;
intlist: integer
|intlist ';'integer
;
integer: NAME {printf("%s:0\n",$1);}
|NAME '=' NUMBER{printf("%s:%\n",$1,$3);}
|NAME '=' '-' NUMBER{printf("%s:-%s\n",$1,$4);}
;
goto: GOTO NAME ';' {printf("\tJNZ %s\n",$2);}
;
if: IF '(' expr ')' GOTO NAME ';' {printf("\tJZ %s\n",$6);}
;
unless: UNLESS '(' expr ')' GOTO NAME ';' {printf("\tJZ %s\n",$6);}
;
halt: HALT ';' {printf("\tHALT\n");}
;
assign: NAME '=' expr ';' {printf("\tPOP %s\n",$1);}
;
expr: NAME {printf("\tPUSH %s\n",$1);}
| NUMBER {printf("\tPUSHI %s\n",$1);}
| IN {printf("\tIN\n");}
| expr '+' expr {printf("\tADD\n");}
| expr '-' expr {printf("\tSUB\n");}
| expr '*' expr {printf("\tMUL\n");}
| expr EQ expr {printf("\tEQ\n");}
;
%%
int yyerror(char *s){print("%s\n",s);}
int main()
{
yyparse();
}
Imran
A: 

I've added a label_stmt and a jmp_stmt

and please show in the case switch-statement if this is right. Rudi i ahve an other question, can you explain me in short words what these lines means.Thanks.

for_stmt: FOR '(' assign_stmt ',' exp ',' assign_stmt ')' stmt_seq END
  { $$ =  newStmtNode(ForK);
    $$->child[0] = $3;
    $$->child[1] = $5;
    $$->child[2] = $7;
    $$->child[3] = $9;
  }

i know what the first line means but after that its alittle bit difficult to understand for me thanks.

Imran
The first line is the syntax to match, the rest are the action. The `$$` variable is the result of the rule, equal to `for_stmt` in the grammar. There is also a %type definition above the grammar rules, which link this rule to a special type in the parser stack union. `$1` to `$9` are right-side the elements of the rule, `FOR`=$1, `assign_stmt`=$3, exp=$5 and so on.
Rudi
A: 

Hallo, So i made all the changes and the program is running, it do all the things it should do except the JMP instructions, i did everything, i defined JMP and notified overall, but it doesnt work, can you help me out there, i have uploaded the new changed files at http://bitbucket.org/matrix/changed-tiny my label_stmt and jmp_stmt (in the yacc file) i think is not right defined, there is something missing, i think im at the end of my mind.

Imran
A: 

Hallo Rudi,

I think i have it, here is my code:

Lex:

"jmp" {return JMP;}
":" {return yytext[0];}

in Yacc:

label_stmt: ':' ID
   { $$ = newStmtNode(LabelK);
     $$->attr.name = $2;
   }

jmp_stmt: JMP ID
   { $$ = newStmtNode(JmpK);
 $$->attr.name = $2;
   }

and the output looks like:

case LabelK:
      printf("Label: %s\n",tree->attr.name);
      break;
case JmpK:
      printf("JMP\n","to Label: %s\n",tree->attr.name);
      break;

what do you say abot that, the whole code is at http://bitbucket.org/matrix/changed-tiny, but it doesnt work like i want, he accept the colon and the JMP instruction, but that was, can you look out why it doesnt work, i mean i have defineed everything and it doesnt work.thanks

Imran