I have been playing around with Scala parser combinators for some time now, and learned some of the ways to make it behave nicely and do the most of the things I want, using the built in function.
But how do you make an embedded language (like php or ruby's erb)? It requires whitespace to not be ignored, outside the embedding of real code.
I managed to make a simple parser that matches all text up to a given regex match, but I am looking for a better, prettier way of doing this. There is propably some already defined function that does the stuff needed.
The test language parses text like:
now: [[ millis; ]]
and now: [[; millis; ]]
and is generated by the following code:
package test
import scala.util.parsing.combinator.RegexParsers
import scala.util.matching.Regex
sealed abstract class Statement
case class Print(s: String) extends Statement
case class Millis() extends Statement
object SimpleLang extends RegexParsers {
def until(r: Regex): Parser[String] = new Parser[String]{
def apply(in: Input) = {
val source = in.source
val offset = in.offset
val start = offset
(r.findFirstMatchIn( source.subSequence(offset, source.length) )) match {
case Some(matched) =>
Success(source.subSequence(offset, offset + matched.start).toString, in.drop(matched.start))
case None =>
Failure("string matching regex `"+ r +"' expected but `"+ in.first +"' found", in.drop(0))
}
}
}
def until(s: String): Parser[String] = until(java.util.regex.Pattern.quote(s).r)
def interpret(stats: List[Statement]): Unit = stats match {
case Print(s) :: rest => {
print(s)
interpret(rest)
}
case Millis() :: rest => {
print(System.currentTimeMillis)
interpret(rest)
}
case Nil => ()
}
def apply(input: String) : List[Statement] = parseAll(beginning, input) match {
case Success(tree,_) => tree
case e: NoSuccess => throw new RuntimeException("Syntax error: " + e)
}
/** GRAMMAR **/
def beginning = (
"[[" ~> stats |
until("[[") ~ "[[" ~ stats ^^ {
case s ~ _ ~ ss => Print(s) :: ss
}
)
def stats = rep1sep(stat, ";")
def stat = (
"millis" ^^^ { Millis() } |
"]]" ~> ( (until("[[") <~ "[[") | until("\\z".r)) ^^ {
case s => Print(s)
}
)
def main(args: Array[String]){
val tree = SimpleLang("now: [[ millis; ]]\nand now: [[; millis; ]]")
println(tree)
interpret(tree)
}
}