One obvious solution is to define a type hierarchy:
class Type
{
};
class Int : public Type
{
};
class String : public Type
{
};
and so on. As a complete example, let us write an interpreter for a tiny language. The language allows declaring variables like this:
var a 10
That will create an Int
object, assign it the value 10
and store it in a variable's table under the name a
. Operations can be invoked on variables. For instance the addition operation on two Int values looks like:
+ a b
Here is the complete code for the interpreter:
#include <iostream>
#include <string>
#include <vector>
#include <sstream>
#include <cstdlib>
#include <map>
// The base Type object from which all data types are derived.
class Type
{
public:
typedef std::vector<Type*> TypeVector;
virtual ~Type () { }
// Some functions that you may want all types of objects to support:
// Returns the string representation of the object.
virtual const std::string toString () const = 0;
// Returns true if other_obj is the same as this.
virtual bool equals (const Type &other_obj) = 0;
// Invokes an operation on this object with the objects in args
// as arguments.
virtual Type* invoke (const std::string &opr, const TypeVector &args) = 0;
};
// An implementation of Type to represent an integer. The C++ int is
// used to actually store the value. As a consequence this type is
// machine dependent, which might not be what you want for a real
// high-level language.
class Int : public Type
{
public:
Int () : value_ (0), ret_ (NULL) { }
Int (int v) : value_ (v), ret_ (NULL) { }
Int (const std::string &v) : value_ (atoi (v.c_str ())), ret_ (NULL) { }
virtual ~Int ()
{
delete ret_;
}
virtual const std::string toString () const
{
std::ostringstream out;
out << value_;
return out.str ();
}
virtual bool equals (const Type &other_obj)
{
if (&other_obj == this)
return true;
try
{
const Int &i = dynamic_cast<const Int&> (other_obj);
return value_ == i.value_;
}
catch (std::bad_cast ex)
{
return false;
}
}
// As of now, Int supports only addition, represented by '+'.
virtual Type* invoke (const std::string &opr, const TypeVector &args)
{
if (opr == "+")
{
return add (args);
}
return NULL;
}
private:
Type* add (const TypeVector &args)
{
if (ret_ == NULL) ret_ = new Int;
Int *i = dynamic_cast<Int*> (ret_);
Int *arg = dynamic_cast<Int*> (args[0]);
i->value_ = value_ + arg->value_;
return ret_;
}
int value_;
Type *ret_;
};
// We use std::map as a symbol (or variable) table.
typedef std::map<std::string, Type*> VarsTable;
typedef std::vector<std::string> Tokens;
// A simple tokenizer for our language. Takes a line and
// tokenizes it based on whitespaces.
static void
tokenize (const std::string &line, Tokens &tokens)
{
std::istringstream in (line, std::istringstream::in);
while (!in.eof ())
{
std::string token;
in >> token;
tokens.push_back (token);
}
}
// Maps varName to an Int object in the symbol table. To support
// other Types, we need a more complex interpreter that actually infers
// the type of object by looking at the format of value.
static void
setVar (const std::string &varName, const std::string &value,
VarsTable &vars)
{
Type *t = new Int (value);
vars[varName] = t;
}
// Returns a previously mapped value from the symbol table.
static Type *
getVar (const std::string &varName, const VarsTable &vars)
{
VarsTable::const_iterator iter = vars.find (varName);
if (iter == vars.end ())
{
std::cout << "Variable " << varName
<< " not found." << std::endl;
return NULL;
}
return const_cast<Type*> (iter->second);
}
// Invokes opr on the object mapped to the name var01.
// opr should represent a binary operation. var02 will
// be pushed to the args vector. The string represenation of
// the result is printed to the console.
static void
invoke (const std::string &opr, const std::string &var01,
const std::string &var02, const VarsTable &vars)
{
Type::TypeVector args;
Type *arg01 = getVar (var01, vars);
if (arg01 == NULL) return;
Type *arg02 = getVar (var02, vars);
if (arg02 == NULL) return;
args.push_back (arg02);
Type *ret = NULL;
if ((ret = arg01->invoke (opr, args)) != NULL)
std::cout << "=> " << ret->toString () << std::endl;
else
std::cout << "Failed to invoke " << opr << " on "
<< var01 << std::endl;
}
// A simple REPL for our language. Type 'quit' to exit
// the loop.
int
main (int argc, char **argv)
{
VarsTable vars;
std::string line;
while (std::getline (std::cin, line))
{
if (line == "quit")
break;
else
{
Tokens tokens;
tokenize (line, tokens);
if (tokens.size () != 3)
{
std::cout << "Invalid expression." << std::endl;
continue;
}
if (tokens[0] == "var")
setVar (tokens[1], tokens[2], vars);
else
invoke (tokens[0], tokens[1], tokens[2], vars);
}
}
return 0;
}
A sample interaction with the interpreter:
/home/me $ ./mylang
var a 10
var b 20
+ a b
30
+ a c
Variable c not found.
quit