# HG changeset patch # User wenzelm # Date 1308237916 -7200 # Node ID 0206466ee473cdefe312ef3eb504775b361abe04 # Parent 957617fe07650e1879a05e9d6fa96580c027a58f some support for partial scans with explicit context; clarified junk vs. junk1; diff -r 957617fe0765 -r 0206466ee473 src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Thu Jun 16 11:59:29 2011 +0200 +++ b/src/Pure/General/scan.scala Thu Jun 16 17:25:16 2011 +0200 @@ -18,6 +18,16 @@ object Scan { + /** context of partial scans **/ + + sealed abstract class Context + case object Finished extends Context + case class Quoted(quote: String) extends Context + case object Verbatim extends Context + case class Comment(depth: Int) extends Context + + + /** Lexicon -- position tree **/ object Lexicon @@ -116,6 +126,12 @@ override val whiteSpace = "".r + /* optional termination */ + + def opt_term[T](p: => Parser[T]): Parser[Option[T]] = + p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None) + + /* keywords from lexicon */ def keyword: Parser[String] = new Parser[String] @@ -178,12 +194,15 @@ /* quoted strings */ + private def quoted_body(quote: String): Parser[String] = + { + rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" | + (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ^^ (_.mkString) + } + private def quoted(quote: String): Parser[String] = { - quote ~ - rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" | - (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~ - quote ^^ { case x ~ ys ~ z => x + ys.mkString + z } + quote ~ quoted_body(quote) ~ quote ^^ { case x ~ y ~ z => x + y + z } }.named("quoted") def quoted_content(quote: String, source: String): String = @@ -199,13 +218,30 @@ else body } + def quoted_context(quote: String, ctxt: Context): Parser[(String, Context)] = + { + ctxt match { + case Finished => + quote ~ quoted_body(quote) ~ opt_term(quote) ^^ + { case x ~ y ~ Some(z) => (x + y + z, Finished) + case x ~ y ~ None => (x + y, Quoted(quote)) } + case Quoted(q) if q == quote => + quoted_body(quote) ~ opt_term(quote) ^^ + { case x ~ Some(y) => (x + y, Finished) + case x ~ None => (x, ctxt) } + case _ => failure("") + } + }.named("quoted_context") + /* verbatim text */ + private def verbatim_body: Parser[String] = + rep(many1(sym => sym != "*") | """\*(?!\})""".r) ^^ (_.mkString) + private def verbatim: Parser[String] = { - "{*" ~ rep(many1(sym => sym != "*") | """\*(?!\})""".r) ~ "*}" ^^ - { case x ~ ys ~ z => x + ys.mkString + z } + "{*" ~ verbatim_body ~ "*}" ^^ { case x ~ y ~ z => x + y + z } }.named("verbatim") def verbatim_content(source: String): String = @@ -214,6 +250,21 @@ source.substring(2, source.length - 2) } + def verbatim_context(ctxt: Context): Parser[(String, Context)] = + { + ctxt match { + case Finished => + "{*" ~ verbatim_body ~ opt_term("*}") ^^ + { case x ~ y ~ Some(z) => (x + y + z, Finished) + case x ~ y ~ None => (x + y, Verbatim) } + case Verbatim => + verbatim_body ~ opt_term("*}") ^^ + { case x ~ Some(y) => (x + y, Finished) + case x ~ None => (x, Verbatim) } + case _ => failure("") + } + }.named("verbatim_context") + /* nested comments */ @@ -254,7 +305,18 @@ /* outer syntax tokens */ - def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Token] = + private def delimited_token: Parser[Token] = + { + val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x)) + val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x)) + val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x)) + val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x)) + + string | (alt_string | (verb | cmt)) + } + + private def other_token(symbols: Symbol.Interpretation, is_command: String => Boolean) + : Parser[Token] = { val id = one(symbols.is_letter) ~ many(symbols.is_letdig) ^^ { case x ~ y => x + y } val nat = many1(symbols.is_digit) @@ -278,23 +340,43 @@ val space = many1(symbols.is_blank) ^^ (x => Token(Token.Kind.SPACE, x)) - val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x)) - val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x)) + // FIXME check + val junk = many(sym => !(symbols.is_blank(sym))) + val junk1 = many1(sym => !(symbols.is_blank(sym))) - val junk = many1(sym => !(symbols.is_blank(sym))) val bad_delimiter = ("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) } - val bad = junk ^^ (x => Token(Token.Kind.UNPARSED, x)) + val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x)) + + val command_keyword = + keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x)) + space | (bad_delimiter | + (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) ||| + command_keyword) | bad)) + } - /* tokens */ + def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Token] = + delimited_token | other_token(symbols, is_command) - (space | (string | (alt_string | (verbatim ^^ (x => Token(Token.Kind.VERBATIM, x)) | - comment ^^ (x => Token(Token.Kind.COMMENT, x)))))) | - bad_delimiter | - ((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) ||| - keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))) | - bad + def token_context(symbols: Symbol.Interpretation, is_command: String => Boolean, ctxt: Context) + : Parser[(Token, Context)] = + { + val string = + quoted_context("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) } + val alt_string = + quoted_context("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) } + val verb = + verbatim_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) } + // FIXME comment_context + val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x)) + val other: Parser[(Token, Context)] = + ctxt match { + case Finished => (cmt | other_token(symbols, is_command)) ^^ { case x => (x, Finished) } + case _ => failure("") + } + + string | (alt_string | (verb | other)) } } diff -r 957617fe0765 -r 0206466ee473 src/Pure/Isar/outer_syntax.scala --- a/src/Pure/Isar/outer_syntax.scala Thu Jun 16 11:59:29 2011 +0200 +++ b/src/Pure/Isar/outer_syntax.scala Thu Jun 16 17:25:16 2011 +0200 @@ -8,6 +8,7 @@ import scala.util.parsing.input.{Reader, CharSequenceReader} +import scala.collection.mutable class Outer_Syntax(symbols: Symbol.Interpretation) @@ -73,4 +74,21 @@ def scan(input: CharSequence): List[Token] = scan(new CharSequenceReader(input)) + + def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) = + { + import lexicon._ + + var in: Reader[Char] = new CharSequenceReader(input) + val toks = new mutable.ListBuffer[Token] + var ctxt = context + while (!in.atEnd) { + parse(token_context(symbols, is_command, ctxt), in) match { + case Success((x, c), rest) => { toks += x; ctxt = c; in = rest } + case NoSuccess(_, rest) => + error("Unexpected failure of tokenizing input:\n" + rest.source.toString) + } + } + (toks.toList, ctxt) + } }