# HG changeset patch # User wenzelm # Date 1392472371 -3600 # Node ID 2581fbee5b9565f9764fb84deb09a7b99d413e6d # Parent cf829d10d1d41ece4310a57cc678adc272c77ec3 partial scans via ML_Lex.tokenize_context; simplified ML_char: no gaps (hardly relevant in practice); tuned; diff -r cf829d10d1d4 -r 2581fbee5b95 src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Fri Feb 14 22:03:48 2014 +0100 +++ b/src/Pure/General/scan.scala Sat Feb 15 14:52:51 2014 +0100 @@ -17,9 +17,9 @@ object Scan { - /** context of partial scans **/ + /** context of partial scans (line boundary) **/ - sealed abstract class Context + abstract class Context case object Finished extends Context case class Quoted(quote: String) extends Context case object Verbatim extends Context diff -r cf829d10d1d4 -r 2581fbee5b95 src/Pure/ML/ml_lex.scala --- a/src/Pure/ML/ml_lex.scala Fri Feb 14 22:03:48 2014 +0100 +++ b/src/Pure/ML/ml_lex.scala Sat Feb 15 14:52:51 2014 +0100 @@ -6,6 +6,8 @@ package isabelle + +import scala.collection.mutable import scala.util.parsing.input.{Reader, CharSequenceReader} @@ -35,6 +37,8 @@ /** parsers **/ + case object ML_String extends Scan.Context + private val lexicon = Scan.Lexicon("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=", "=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as", @@ -50,6 +54,9 @@ private val blanks1 = many1(character(Symbol.is_ascii_blank)) + private val gap = "\\" ~ blanks1 ~ "\\" ^^ { case x ~ y ~ z => x + y + z } + private val gap_start = "\\" ~ blanks1 ~ """\z""".r ^^ { case x ~ y ~ _ => x + y } + private val escape = one(character("\"\\abtnvfr".contains(_))) | "^" ~ one(character(c => '@' <= c && c <= '_')) ^^ { case x ~ y => x + y } | @@ -59,26 +66,62 @@ one(character(c => c != '"' && c != '\\' && ' ' <= c && c <= '~')) | "\\" ~ escape ^^ { case x ~ y => x + y } - private val gap = "\\" ~ blanks1 ~ "\\" ^^ { case x ~ y ~ z => x + y + z } - private val gaps = rep(gap) ^^ (_.mkString) + + /* ML char -- without gaps */ + + private val ml_char: Parser[Token] = + "#\"" ~ str ~ "\"" ^^ { case x ~ y ~ z => Token(Kind.CHAR, x + y + z) } + + private val recover_ml_char: Parser[String] = + "#\"" ~ opt(str) ^^ { case x ~ Some(y) => x + y case x ~ None => x } + + + /* ML string */ + + private val ml_string_body: Parser[String] = + rep(gap | str) ^^ (_.mkString) + + private val recover_ml_string: Parser[String] = + "\"" ~ ml_string_body ^^ { case x ~ y => x + y } + + private val ml_string: Parser[Token] = + "\"" ~ ml_string_body ~ "\"" ^^ { case x ~ y ~ z => Token(Kind.STRING, x + y + z) } + + private def ml_string_context(ctxt: Scan.Context): Parser[(Token, Scan.Context)] = + { + def result(x: String, c: Scan.Context) = (Token(Kind.STRING, x), c) + + ctxt match { + case Scan.Finished => + "\"" ~ ml_string_body ~ ("\"" | gap_start) ^^ + { case x ~ y ~ z => result(x + y + z, if (z == "\"") Scan.Finished else ML_String) } + case ML_String => + blanks1 ~ opt_term("\\" ~ ml_string_body ~ ("\"" | gap_start)) ^^ + { case x ~ Some(y ~ z ~ w) => + result(x + y + z + w, if (w == "\"") Scan.Finished else ML_String) + case x ~ None => result(x, ML_String) } + case _ => failure("") + } + } + + + /* ML comment */ + + private val ml_comment: Parser[Token] = + comment ^^ (x => Token(Kind.COMMENT, x)) + + private def ml_comment_context(ctxt: Scan.Context): Parser[(Token, Scan.Context)] = + comment_context(ctxt) ^^ { case (x, c) => (Token(Kind.COMMENT, x), c) } /* delimited token */ private def delimited_token: Parser[Token] = - { - val char = - "#\"" ~ gaps ~ str ~ gaps ~ "\"" ^^ - { case u ~ v ~ x ~ y ~ z => Token(Kind.CHAR, u + v + x + y + z) } + ml_char | (ml_string | ml_comment) - val string = - "\"" ~ (rep(gap | str) ^^ (_.mkString)) ~ "\"" ^^ - { case x ~ y ~ z => Token(Kind.STRING, x + y + z) } + private val recover_delimited: Parser[Token] = + (recover_ml_char | (recover_ml_string | recover_comment)) ^^ (x => Token(Kind.ERROR, x)) - val cmt = comment ^^ (x => Token(Kind.COMMENT, x)) - - char | (string | cmt) - } private def other_token: Parser[Token] = { @@ -123,20 +166,7 @@ decint ~ exp ^^ { case x ~ y => x + y }) ^^ (x => Token(Kind.REAL, x)) - /* recover delimited */ - - val recover_char = - "#\"" ~ gaps ~ (opt(str ~ gaps) ^^ { case Some(x ~ y) => x + y case None => "" }) ^^ - { case x ~ y ~ z => x + y + z } - - val recover_string = - "\"" ~ (rep(gap | str) ^^ (_.mkString)) ^^ { case x ~ y => x + y } - - val recover_delimited = - (recover_char | (recover_string | recover_comment)) ^^ (x => Token(Kind.ERROR, x)) - - - /* token */ + /* main */ val space = blanks1 ^^ (x => Token(Kind.SPACE, x)) @@ -148,9 +178,22 @@ (((word | (real | (int | (long_ident | (ident | type_var))))) ||| keyword) | bad)) } + + /* token */ + def token: Parser[Token] = delimited_token | other_token + + def token_context(ctxt: Scan.Context): Parser[(Token, Scan.Context)] = + { + val other = (ml_char | other_token) ^^ (x => (x, Scan.Finished)) + + ml_string_context(ctxt) | (ml_comment_context(ctxt) | other) + } } + + /* tokenize */ + def tokenize(input: CharSequence): List[Token] = { Parsers.parseAll(Parsers.rep(Parsers.token), new CharSequenceReader(input)) match { @@ -158,5 +201,20 @@ case _ => error("Unexpected failure of tokenizing input:\n" + input.toString) } } + + def tokenize_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) = + { + var in: Reader[Char] = new CharSequenceReader(input) + val toks = new mutable.ListBuffer[Token] + var ctxt = context + while (!in.atEnd) { + Parsers.parse(Parsers.token_context(ctxt), in) match { + case Parsers.Success((x, c), rest) => { toks += x; ctxt = c; in = rest } + case Parsers.NoSuccess(_, rest) => + error("Unexpected failure of tokenizing input:\n" + rest.source.toString) + } + } + (toks.toList, ctxt) + } }