# HG changeset patch # User wenzelm # Date 1392388947 -3600 # Node ID 28d4db6c6e79215e6e5687a9f75454b5b02ae0d5 # Parent 74db756853d4fae362427b95ffccdb15cf5952cb tuned signature -- separate Lexicon from Parsers (in accordance to ML version); diff -r 74db756853d4 -r 28d4db6c6e79 src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Fri Feb 14 14:52:50 2014 +0100 +++ b/src/Pure/General/scan.scala Fri Feb 14 15:42:27 2014 +0100 @@ -28,99 +28,12 @@ - /** Lexicon -- position tree **/ - - object Lexicon - { - /* representation */ - - sealed case class Tree(val branches: Map[Char, (String, Tree)]) - private val empty_tree = Tree(Map()) - - val empty: Lexicon = new Lexicon(empty_tree) - def apply(elems: String*): Lexicon = empty ++ elems - } - - final class Lexicon private(main_tree: Lexicon.Tree) extends RegexParsers - { - import Lexicon.Tree - - - /* auxiliary operations */ - - private def content(tree: Tree, result: List[String]): List[String] = - (result /: tree.branches.toList) ((res, entry) => - entry match { case (_, (s, tr)) => - if (s.isEmpty) content(tr, res) else content(tr, s :: res) }) - - private def lookup(str: CharSequence): Option[(Boolean, Tree)] = - { - val len = str.length - def look(tree: Tree, tip: Boolean, i: Int): Option[(Boolean, Tree)] = - { - if (i < len) { - tree.branches.get(str.charAt(i)) match { - case Some((s, tr)) => look(tr, !s.isEmpty, i + 1) - case None => None - } - } else Some(tip, tree) - } - look(main_tree, false, 0) - } - - def completions(str: CharSequence): List[String] = - lookup(str) match { - case Some((true, tree)) => content(tree, List(str.toString)) - case Some((false, tree)) => content(tree, Nil) - case None => Nil - } + /** parser combinators **/ - - /* pseudo Set methods */ - - def iterator: Iterator[String] = content(main_tree, Nil).sorted.iterator - - override def toString: String = iterator.mkString("Lexicon(", ", ", ")") - - def empty: Lexicon = Lexicon.empty - def isEmpty: Boolean = main_tree.branches.isEmpty - - def contains(elem: String): Boolean = - lookup(elem) match { - case Some((tip, _)) => tip - case _ => false - } - - - /* add elements */ + object Parsers extends Parsers - def + (elem: String): Lexicon = - if (contains(elem)) this - else { - val len = elem.length - def extend(tree: Tree, i: Int): Tree = - if (i < len) { - val c = elem.charAt(i) - val end = (i + 1 == len) - tree.branches.get(c) match { - case Some((s, tr)) => - Tree(tree.branches + - (c -> (if (end) elem else s, extend(tr, i + 1)))) - case None => - Tree(tree.branches + - (c -> (if (end) elem else "", extend(Lexicon.empty_tree, i + 1)))) - } - } - else tree - new Lexicon(extend(main_tree, 0)) - } - - def ++ (elems: TraversableOnce[String]): Lexicon = (this /: elems)(_ + _) - - - - /** RegexParsers methods **/ - + trait Parsers extends RegexParsers + { override val whiteSpace = "".r @@ -130,33 +43,6 @@ p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None) - /* keywords from lexicon */ - - def keyword: Parser[String] = new Parser[String] - { - def apply(in: Input) = - { - val source = in.source - val offset = in.offset - val len = source.length - offset - - def scan(tree: Tree, result: String, i: Int): String = - { - if (i < len) { - tree.branches.get(source.charAt(offset + i)) match { - case Some((s, tr)) => scan(tr, if (s.isEmpty) result else s, i + 1) - case None => result - } - } - else result - } - val result = scan(main_tree, "", 0) - if (result.isEmpty) Failure("keyword expected", in) - else Success(result, in.drop(result.length)) - } - }.named("keyword") - - /* symbol range */ def symbol_range(pred: Symbol.Symbol => Boolean, min_count: Int, max_count: Int): Parser[String] = @@ -391,6 +277,19 @@ } + /* keyword */ + + def keyword(lexicon: Lexicon): Parser[String] = new Parser[String] + { + def apply(in: Input) = + { + val result = lexicon.scan(in) + if (result.isEmpty) Failure("keyword expected", in) + else Success(result, in.drop(result.length)) + } + }.named("keyword") + + /* outer syntax tokens */ private def delimited_token: Parser[Token] = @@ -404,7 +303,7 @@ string | (alt_string | (verb | (cart | cmt))) } - private def other_token(is_command: String => Boolean) + private def other_token(lexicon: Lexicon, is_command: String => Boolean) : Parser[Token] = { val letdigs1 = many1(Symbol.is_letdig) @@ -434,7 +333,8 @@ (x => Token(Token.Kind.SYM_IDENT, x)) val command_keyword = - keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x)) + keyword(lexicon) ^^ + (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x)) val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x)) @@ -451,10 +351,11 @@ command_keyword) | bad)) } - def token(is_command: String => Boolean): Parser[Token] = - delimited_token | other_token(is_command) + def token(lexicon: Lexicon, is_command: String => Boolean): Parser[Token] = + delimited_token | other_token(lexicon, is_command) - def token_context(is_command: String => Boolean, ctxt: Context): Parser[(Token, Context)] = + def token_context(lexicon: Lexicon, is_command: String => Boolean, ctxt: Context) + : Parser[(Token, Context)] = { val string = quoted_context("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) } @@ -463,9 +364,120 @@ val verb = verbatim_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) } val cart = cartouche_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) } val cmt = comment_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.COMMENT, x), c) } - val other = other_token(is_command) ^^ { case x => (x, Finished) } + val other = other_token(lexicon, is_command) ^^ { case x => (x, Finished) } string | (alt_string | (verb | (cart | (cmt | other)))) } } + + + + /** Lexicon -- position tree **/ + + object Lexicon + { + /* representation */ + + private sealed case class Tree(val branches: Map[Char, (String, Tree)]) + private val empty_tree = Tree(Map()) + + val empty: Lexicon = new Lexicon(empty_tree) + def apply(elems: String*): Lexicon = empty ++ elems + } + + final class Lexicon private(rep: Lexicon.Tree) + { + /* auxiliary operations */ + + private def content(tree: Lexicon.Tree, result: List[String]): List[String] = + (result /: tree.branches.toList) ((res, entry) => + entry match { case (_, (s, tr)) => + if (s.isEmpty) content(tr, res) else content(tr, s :: res) }) + + private def lookup(str: CharSequence): Option[(Boolean, Lexicon.Tree)] = + { + val len = str.length + def look(tree: Lexicon.Tree, tip: Boolean, i: Int): Option[(Boolean, Lexicon.Tree)] = + { + if (i < len) { + tree.branches.get(str.charAt(i)) match { + case Some((s, tr)) => look(tr, !s.isEmpty, i + 1) + case None => None + } + } else Some(tip, tree) + } + look(rep, false, 0) + } + + def completions(str: CharSequence): List[String] = + lookup(str) match { + case Some((true, tree)) => content(tree, List(str.toString)) + case Some((false, tree)) => content(tree, Nil) + case None => Nil + } + + + /* pseudo Set methods */ + + def iterator: Iterator[String] = content(rep, Nil).sorted.iterator + + override def toString: String = iterator.mkString("Lexicon(", ", ", ")") + + def empty: Lexicon = Lexicon.empty + def isEmpty: Boolean = rep.branches.isEmpty + + def contains(elem: String): Boolean = + lookup(elem) match { + case Some((tip, _)) => tip + case _ => false + } + + + /* add elements */ + + def + (elem: String): Lexicon = + if (contains(elem)) this + else { + val len = elem.length + def extend(tree: Lexicon.Tree, i: Int): Lexicon.Tree = + if (i < len) { + val c = elem.charAt(i) + val end = (i + 1 == len) + tree.branches.get(c) match { + case Some((s, tr)) => + Lexicon.Tree(tree.branches + + (c -> (if (end) elem else s, extend(tr, i + 1)))) + case None => + Lexicon.Tree(tree.branches + + (c -> (if (end) elem else "", extend(Lexicon.empty_tree, i + 1)))) + } + } + else tree + new Lexicon(extend(rep, 0)) + } + + def ++ (elems: TraversableOnce[String]): Lexicon = (this /: elems)(_ + _) + + + /* scan */ + + def scan(in: Reader[Char]): String = + { + val source = in.source + val offset = in.offset + val len = source.length - offset + + def scan_tree(tree: Lexicon.Tree, result: String, i: Int): String = + { + if (i < len) { + tree.branches.get(source.charAt(offset + i)) match { + case Some((s, tr)) => scan_tree(tr, if (s.isEmpty) result else s, i + 1) + case None => result + } + } + else result + } + scan_tree(rep, "", 0) + } + } } diff -r 74db756853d4 -r 28d4db6c6e79 src/Pure/Isar/completion.scala --- a/src/Pure/Isar/completion.scala Fri Feb 14 14:52:50 2014 +0100 +++ b/src/Pure/Isar/completion.scala Fri Feb 14 15:42:27 2014 +0100 @@ -185,8 +185,8 @@ line: CharSequence): Option[Completion.Result] = { val raw_result = - abbrevs_lex.parse(abbrevs_lex.keyword, new Library.Reverse(line)) match { - case abbrevs_lex.Success(reverse_a, _) => + Scan.Parsers.parse(Scan.Parsers.keyword(abbrevs_lex), new Library.Reverse(line)) match { + case Scan.Parsers.Success(reverse_a, _) => val abbrevs = abbrevs_map.get_list(reverse_a) abbrevs match { case Nil => None diff -r 74db756853d4 -r 28d4db6c6e79 src/Pure/Isar/outer_syntax.scala --- a/src/Pure/Isar/outer_syntax.scala Fri Feb 14 14:52:50 2014 +0100 +++ b/src/Pure/Isar/outer_syntax.scala Fri Feb 14 15:42:27 2014 +0100 @@ -128,10 +128,8 @@ def scan(input: Reader[Char]): List[Token] = { - import lexicon._ - - parseAll(rep(token(is_command)), input) match { - case Success(tokens, _) => tokens + Scan.Parsers.parseAll(Scan.Parsers.rep(Scan.Parsers.token(lexicon, is_command)), input) match { + case Scan.Parsers.Success(tokens, _) => tokens case _ => error("Unexpected failure of tokenizing input:\n" + input.source.toString) } } @@ -141,15 +139,13 @@ def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) = { - import lexicon._ - var in: Reader[Char] = new CharSequenceReader(input) val toks = new mutable.ListBuffer[Token] var ctxt = context while (!in.atEnd) { - parse(token_context(is_command, ctxt), in) match { - case Success((x, c), rest) => { toks += x; ctxt = c; in = rest } - case NoSuccess(_, rest) => + Scan.Parsers.parse(Scan.Parsers.token_context(lexicon, is_command, ctxt), in) match { + case Scan.Parsers.Success((x, c), rest) => { toks += x; ctxt = c; in = rest } + case Scan.Parsers.NoSuccess(_, rest) => error("Unexpected failure of tokenizing input:\n" + rest.source.toString) } } diff -r 74db756853d4 -r 28d4db6c6e79 src/Pure/Isar/token.scala --- a/src/Pure/Isar/token.scala Fri Feb 14 14:52:50 2014 +0100 +++ b/src/Pure/Isar/token.scala Fri Feb 14 15:42:27 2014 +0100 @@ -100,11 +100,11 @@ def is_end: Boolean = is_keyword && source == "end" def content: String = - if (kind == Token.Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source) - else if (kind == Token.Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source) - else if (kind == Token.Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source) - else if (kind == Token.Kind.CARTOUCHE) Scan.Lexicon.empty.cartouche_content(source) - else if (kind == Token.Kind.COMMENT) Scan.Lexicon.empty.comment_content(source) + if (kind == Token.Kind.STRING) Scan.Parsers.quoted_content("\"", source) + else if (kind == Token.Kind.ALT_STRING) Scan.Parsers.quoted_content("`", source) + else if (kind == Token.Kind.VERBATIM) Scan.Parsers.verbatim_content(source) + else if (kind == Token.Kind.CARTOUCHE) Scan.Parsers.cartouche_content(source) + else if (kind == Token.Kind.COMMENT) Scan.Parsers.comment_content(source) else source def text: (String, String) = diff -r 74db756853d4 -r 28d4db6c6e79 src/Pure/Thy/thy_header.scala --- a/src/Pure/Thy/thy_header.scala Fri Feb 14 14:52:50 2014 +0100 +++ b/src/Pure/Thy/thy_header.scala Fri Feb 14 15:42:27 2014 +0100 @@ -82,13 +82,13 @@ def read(reader: Reader[Char]): Thy_Header = { - val token = lexicon.token(_ => false) + val token = Scan.Parsers.token(lexicon, _ => false) val toks = new mutable.ListBuffer[Token] @tailrec def scan_to_begin(in: Reader[Char]) { token(in) match { - case lexicon.Success(tok, rest) => + case Scan.Parsers.Success(tok, rest) => toks += tok if (!tok.is_begin) scan_to_begin(rest) case _ =>