4. Intro: combinators, parsers
Scala Parser Combinators from the Ground Up
How to write typical parser
33/35 11/14 Pros, cons 8/9 4/13
Advanced techniques
4
5. Parser?
● Трансформирует текст в структуру
+
2*3 + 4 * 3
2 3
5
6. Hello, parser
import scala.util.parsing.combinator._
import syntactical.StandardTokenParsers
sealed trait Expr
case class Num(i: Int) extends Expr
case class Var(n: String) extends Expr
case class Plus(e1: Expr, e2: Expr) extends Expr
case class Mult(e1: Expr, e2: Expr) extends Expr
object ArithParsers extends StandardTokenParsers with ImplicitConversions {
lexical.delimiters += ("(", ")", "+", "*")
def expr: Parser[Expr] =
term ~ ("+" ~> expr) ^^ Plus | term
def term: Parser[Expr] =
factor ~ ("*" ~> term) ^^ Mult | factor
def factor: Parser[Expr] =
numericLit ^^ { s => Num(s.toInt) } | ident ^^ Var | "(" ~> expr <~ ")"
def parseExpr(s: String) = phrase(expr)(new lexical.Scanner(s))
}
scala> ArithParsers.parseExpr("1")
res1: ArithParsers.ParseResult[parsers2.Expr] = [1.2] parsed: Num(1)
scala> ArithParsers.parseExpr("1 + 1 * 2")
res2: ArithParsers.ParseResult[parsers2.Expr] = [1.10] parsed: Plus(Num(1),Mult(Num(1),Num(2)))
scala> ArithParsers.parseExpr("a * (a * a)")
res3: ArithParsers.ParseResult[parsers2.Expr] = [1.12] parsed: Mult(Var(a),Mult(Var(a),Var(a)))
6
7. Example 2: Lambda calculus
t ::= terms:
x variable
λx.t abstraction
tt application
x y z = ((x y) z)
λx.λy.y = λx.(λy.y)
7
8. Example 2
sealed trait Term
case class Var(n: String) extends Term
case class Lam(v: Var, body: Term) extends Term
case class App(t1: Term, t2: Term) extends Term
object LamParsers extends StandardTokenParsers with ImplicitConversions with PackratParsers {
lexical.delimiters += ("(", ")", ".", "")
lazy val term: PackratParser[Term] =
appTerm | lam
lazy val vrb: PackratParser[Var] =
ident ^^ Var
lazy val lam: PackratParser[Term] =
("" ~> vrb) ~ ("." ~> term) ^^ Lam
lazy val appTerm: PackratParser[Term] =
appTerm ~ aTerm ^^ App | aTerm
lazy val aTerm: PackratParser[Term] =
vrb | "(" ~> term <~ ")"
def parseTerm(s: String) =
phrase(term)(new lexical.Scanner(s))
}
scala> LamParsers.parseTerm("x y z")
res1: LamParsers.ParseResult[parsers.Term] = [1.6] parsed: App(App(Var(x),Var(y)),Var(z))
scala> LamParsers.parseTerm("""x.y.x y""")
res2: LamParsers.ParseResult[parsers.Term] = [1.10] parsed:
Lam(Var(x),Lam(Var(y),App(Var(x),Var(y))))
scala> LamParsers.parseTerm("""(x.x x) (x. x x)""")
res3: LamParsers.ParseResult[parsers.Term] = [1.19] parsed:
App(Lam(Var(x),App(Var(x),Var(x))),Lam(Var(x),App(Var(x),Var(x)))) 8
11. Принципы комбинаторных библиотек
● Соответствие терминологии библиотеки и
терминологии предметной области.
● Состав
● типы,
● примитивы,
● комбинаторы первого порядка,
● комбинаторы высшего порядка.
● Свойство замыкания (композиционность).
● Возможность эффективной реализации.
E. Кирпичев. Элементы функциональных языков. Практика функционального 11
программирования №3.
16. Parsers in Scala
C9 Lectures: Dr. Erik Meijer - Functional Programming Fundamentals Chapter 8 of 13
A. Moors, F. Piessens, M. Odersky. Parser Combinators in Scala. Report CW 49 // Feb 2008
16
18. Parsers in Scala are functional
Background:
● W. Burge. Recursive Programming Techniques.
Addison-Wesley, 1975.
● Ph. Wadler. How to Replace Failure by a List of
Successes. A method for exception handling,
backtracking, and pattern matching in lazy
functional languages // 1985
● G. Hutton. Higher-order functions for parsing //
Journal of functional programming. 1992/2
● J. Fokker. Functional Parsers // 1995
18
19. Parser?
● Трансформирует текст в структуру
+
2*3 + 4 * 3
2 3
19
20. Парсер – это функция
type Parser[A] = String => A
Нет композиции функций, не обязательно парсить всю строку
type Parser[A] = String => (A, String)
Может закончиться неудачей
type Parser[A] = String => Option[(A, String)]
20
28. Where is a problem?
trait SimpleParsers extends SimpleResults {
trait Parser[+T] extends (Input => Result[T]) {
def apply(in: Input): Result[T]
def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] {
def apply(in: Input) = Parser.this(in) match {
case Failure(_, _) => p(in)
case Success(x, n) => Success(x, n)}}
def ~[U](p: => Parser[U]): Parser[(T, U)] = new Parser[(T, U)] {
def apply(in: Input) = Parser.this(in) match {
case Success(x, next) => p(next) match {
case Success(x2, next2) => Success((x, x2), next2)
case Failure(m, n) => Failure(m, n) }
case Failure(m, n) => Failure(m, n)}}
}
}
object OXOParser extends StringParsers {
def oxo = accept('o') ~ accept('x') ~ accept('o')
def oxos: Parser[Any] =
(oxo ~ accept(' ') ~ oxos | oxo)
}
28
29. Too much “threading”
trait SimpleParsers extends SimpleResults {
trait Parser[+T] extends (Input => Result[T]) {
def apply(in: Input): Result[T]
def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] {
def apply(in: Input) = Parser.this(in) match {
case Failure(_, _) => p(in)
case Success(x, n) => Success(x, n)}}
def ~[U](p: => Parser[U]): Parser[(T, U)] = new Parser[(T, U)] {
def apply(in: Input) = Parser.this(in) match {
case Success(x, next) => p(next) match {
case Success(x2, next2) => Success((x, x2), next2)
case Failure(m, n) => Failure(m, n) }
case Failure(m, n) => Failure(m, n)}}
}
}
object OXOParser extends StringParsers {
def oxo = accept('o') ~ accept('x') ~ accept('o')
def oxos: Parser[Any] =
(oxo ~ accept(' ') ~ oxos | oxo)
}
29
30. Improved Results
trait SimpleResults {
type Input
trait Result[+T] {
def next: Input
def map[U](f: T => U): Result[U]
def flatMapWithNext[U](f: T => Input => Result[U]): Result[U]
def append[U >: T](alt: => Result[U]): Result[U]
}
case class Success[+T](result: T, next: Input) extends Result[T] {
def map[U](f: T => U) = Success(f(result), next)
def flatMapWithNext[U](f: T => Input => Result[U]) = f(result)(next)
def append[U >: T](alt: => Result[U]) = this
}
case class Failure(msg: String, next: Input) extends Result[Nothing] {
def map[U](f: Nothing => U) = this
def flatMapWithNext[U](f: Nothing => Input => Result[U]) = this
def append[U](alt: => Result[U]) = alt
}
}
●map -...
●flatMapWithNext - ...
●append – for multiple results (we do not consider it here)
30
31. Parser is a function with many results
type Parser[A] = String => A
type Parser[A] = String => (A, String)
type Parser[A] = String => Option[(A, String)]
type Parser[A] = String => List[(A, String)]
31
32. After improving
trait SimpleResults {
type Input
trait Result[+T] {
def next: Input
def map[U](f: T => U): Result[U]
def flatMapWithNext[U](f: T => Input => Result[U]): Result[U]
def append[U >: T](alt: => Result[U]): Result[U]
}
case class Success[+T](result: T, next: Input) extends Result[T] {
def map[U](f: T => U) = Success(f(result), next)
def flatMapWithNext[U](f: T => Input => Result[U]) = f(result)(next)
def append[U >: T](alt: => Result[U]) = this
}
case class Failure(msg: String, next: Input) extends Result[Nothing] {
def map[U](f: Nothing => U) = this
def flatMapWithNext[U](f: Nothing => Input => Result[U]) = this
def append[U](alt: => Result[U]) = alt
}
}
trait SimpleParsers extends SimpleResults {
abstract class Parser[+T] extends (Input => Result[T]) {
def apply(in: Input): Result[T]
def flatMap[U](f: T => Parser[U]): Parser[U] = new Parser[U] {
def apply(in: Input) = Parser.this(in) flatMapWithNext (f)
}
def map[U](f: T => U): Parser[U] = new Parser[U] {
def apply(in: Input) = Parser.this(in) map (f)
}
def |[U >: T](p: => Parser[U]): Parser[U] = new Parser[U] {
def apply(in: Input) = Parser.this(in) append p(in)
}
def ~[U](p: => Parser[U]): Parser[(T, U)] =
for (a <- this; b <- p) yield (a, b)
Hey! 32
}
}
37. Real Parsers
package scala.util.parsing.combinator
trait Parsers { Stream annotated with
type Elem
type Input = Reader[Elem] coordinates
sealed abstract class ParseResult[+T]
case class Success[+T](result: T, override val next: Input) extends ParseResult[T]
sealed abstract class NoSuccess(val msg: String, override val next: Input)
extends ParseResult[Nothing]
case class Failure(override val msg: String, override val next: Input)
extends NoSuccess(msg, next)
case class Error(override val msg: String, override val next: Input)
extends NoSuccess(msg, next)
...
abstract class Parser[+T] extends (Input => ParseResult[T]) { Controlling
... backtracking
}
case class ~[+a, +b](_1: a, _2: b) {
override def toString = "("+ _1 +"~"+ _2 +")"
}
Deconstructing sequencing
}
package scala.util.parsing.input
abstract class Reader[+T] {
def first: T
def rest: Reader[T] 37
}
53. PROS
● Toт же язык (Scala) – не нужно учить новый
инструмент.
● Исполняемая грамматика - всегда
актуальный код.
● Краткость + богатая выразительность: LL(*) и
больше (в том числе, контекстные
грамматики).
● Можно делать fusion синтаксического
разбора и чего-нибудь еще.
● Модульность 53
54. CONS
● Некоторые простые вещи могут
кодироваться очень непросто.
● Performance.
54
59. + Left Recursion
sealed trait Term
case class Var(n: String) extends Term
case class Lam(v: Var, body: Term) extends Term
case class App(t1: Term, t2: Term) extends Term
object LamParsers extends StandardTokenParsers with ImplicitConversions with PackratParsers {
lexical.delimiters += ("(", ")", ".", "")
lazy val term: PackratParser[Term] =
appTerm | lam
lazy val vrb: PackratParser[Var] =
ident ^^ Var
lazy val lam: PackratParser[Term] =
("" ~> vrb) ~ ("." ~> term) ^^ Lam
lazy val appTerm: PackratParser[Term] =
appTerm ~ aTerm ^^ App | aTerm
lazy val aTerm: PackratParser[Term] =
vrb | "(" ~> term <~ ")"
def parseTerm(s: String) =
phrase(term)(new lexical.Scanner(s))
}
59
60. + Left Recursion
sealed trait Term
case class Var(n: String) extends Term
case class Lam(v: Var, body: Term) extends Term
case class App(t1: Term, t2: Term) extends Term
object LamParsers extends StandardTokenParsers with ImplicitConversions with PackratParsers {
lexical.delimiters += ("(", ")", ".", "")
lazy val term: PackratParser[Term] =
appTerm | lam
lazy val vrb: PackratParser[Var] =
ident ^^ Var
lazy val lam: PackratParser[Term] =
("" ~> vrb) ~ ("." ~> term) ^^ Lam
lazy val appTerm: PackratParser[Term] =
appTerm ~ aTerm ^^ App | aTerm lazy val
lazy val aTerm: PackratParser[Term] =
vrb | "(" ~> term <~ ")"
def parseTerm(s: String) =
phrase(term)(new lexical.Scanner(s))
}
60
61. Without Left Recursion
sealed trait Term
case class Var(n: String) extends Term
case class Lam(v: Var, body: Term) extends Term
case class App(t1: Term, t2: Term) extends Term
object LamParsers extends StandardTokenParsers with ImplicitConversions {
lexical.delimiters += ("(", ")", ".", "")
lazy val term: Parser[Term] =
appTerm | lam
lazy val vrb: Parser[Var] =
ident ^^ Var
lazy val lam: Parser[Term] =
("" ~> vrb) ~ ("." ~> term) ^^ Lam
lazy val appTerm: Parser[Term] =
(aTerm +) ^^ { _.reduceLeft(App) }
lazy val aTerm: Parser[Term] =
vrb | "(" ~> term <~ ")"
def parseTerm(s: String) =
phrase(term)(new lexical.Scanner(s))
}
61