--- a/src/Pure/ML/ml_lex.scala Fri Feb 14 22:03:48 2014 +0100
+++ b/src/Pure/ML/ml_lex.scala Sat Feb 15 14:52:51 2014 +0100
@@ -6,6 +6,8 @@
package isabelle
+
+import scala.collection.mutable
import scala.util.parsing.input.{Reader, CharSequenceReader}
@@ -35,6 +37,8 @@
/** parsers **/
+ case object ML_String extends Scan.Context
+
private val lexicon =
Scan.Lexicon("#", "(", ")", ",", "->", "...", ":", ":>", ";", "=",
"=>", "[", "]", "_", "{", "|", "}", "abstype", "and", "andalso", "as",
@@ -50,6 +54,9 @@
private val blanks1 = many1(character(Symbol.is_ascii_blank))
+ private val gap = "\\" ~ blanks1 ~ "\\" ^^ { case x ~ y ~ z => x + y + z }
+ private val gap_start = "\\" ~ blanks1 ~ """\z""".r ^^ { case x ~ y ~ _ => x + y }
+
private val escape =
one(character("\"\\abtnvfr".contains(_))) |
"^" ~ one(character(c => '@' <= c && c <= '_')) ^^ { case x ~ y => x + y } |
@@ -59,26 +66,62 @@
one(character(c => c != '"' && c != '\\' && ' ' <= c && c <= '~')) |
"\\" ~ escape ^^ { case x ~ y => x + y }
- private val gap = "\\" ~ blanks1 ~ "\\" ^^ { case x ~ y ~ z => x + y + z }
- private val gaps = rep(gap) ^^ (_.mkString)
+
+ /* ML char -- without gaps */
+
+ private val ml_char: Parser[Token] =
+ "#\"" ~ str ~ "\"" ^^ { case x ~ y ~ z => Token(Kind.CHAR, x + y + z) }
+
+ private val recover_ml_char: Parser[String] =
+ "#\"" ~ opt(str) ^^ { case x ~ Some(y) => x + y case x ~ None => x }
+
+
+ /* ML string */
+
+ private val ml_string_body: Parser[String] =
+ rep(gap | str) ^^ (_.mkString)
+
+ private val recover_ml_string: Parser[String] =
+ "\"" ~ ml_string_body ^^ { case x ~ y => x + y }
+
+ private val ml_string: Parser[Token] =
+ "\"" ~ ml_string_body ~ "\"" ^^ { case x ~ y ~ z => Token(Kind.STRING, x + y + z) }
+
+ private def ml_string_context(ctxt: Scan.Context): Parser[(Token, Scan.Context)] =
+ {
+ def result(x: String, c: Scan.Context) = (Token(Kind.STRING, x), c)
+
+ ctxt match {
+ case Scan.Finished =>
+ "\"" ~ ml_string_body ~ ("\"" | gap_start) ^^
+ { case x ~ y ~ z => result(x + y + z, if (z == "\"") Scan.Finished else ML_String) }
+ case ML_String =>
+ blanks1 ~ opt_term("\\" ~ ml_string_body ~ ("\"" | gap_start)) ^^
+ { case x ~ Some(y ~ z ~ w) =>
+ result(x + y + z + w, if (w == "\"") Scan.Finished else ML_String)
+ case x ~ None => result(x, ML_String) }
+ case _ => failure("")
+ }
+ }
+
+
+ /* ML comment */
+
+ private val ml_comment: Parser[Token] =
+ comment ^^ (x => Token(Kind.COMMENT, x))
+
+ private def ml_comment_context(ctxt: Scan.Context): Parser[(Token, Scan.Context)] =
+ comment_context(ctxt) ^^ { case (x, c) => (Token(Kind.COMMENT, x), c) }
/* delimited token */
private def delimited_token: Parser[Token] =
- {
- val char =
- "#\"" ~ gaps ~ str ~ gaps ~ "\"" ^^
- { case u ~ v ~ x ~ y ~ z => Token(Kind.CHAR, u + v + x + y + z) }
+ ml_char | (ml_string | ml_comment)
- val string =
- "\"" ~ (rep(gap | str) ^^ (_.mkString)) ~ "\"" ^^
- { case x ~ y ~ z => Token(Kind.STRING, x + y + z) }
+ private val recover_delimited: Parser[Token] =
+ (recover_ml_char | (recover_ml_string | recover_comment)) ^^ (x => Token(Kind.ERROR, x))
- val cmt = comment ^^ (x => Token(Kind.COMMENT, x))
-
- char | (string | cmt)
- }
private def other_token: Parser[Token] =
{
@@ -123,20 +166,7 @@
decint ~ exp ^^ { case x ~ y => x + y }) ^^ (x => Token(Kind.REAL, x))
- /* recover delimited */
-
- val recover_char =
- "#\"" ~ gaps ~ (opt(str ~ gaps) ^^ { case Some(x ~ y) => x + y case None => "" }) ^^
- { case x ~ y ~ z => x + y + z }
-
- val recover_string =
- "\"" ~ (rep(gap | str) ^^ (_.mkString)) ^^ { case x ~ y => x + y }
-
- val recover_delimited =
- (recover_char | (recover_string | recover_comment)) ^^ (x => Token(Kind.ERROR, x))
-
-
- /* token */
+ /* main */
val space = blanks1 ^^ (x => Token(Kind.SPACE, x))
@@ -148,9 +178,22 @@
(((word | (real | (int | (long_ident | (ident | type_var))))) ||| keyword) | bad))
}
+
+ /* token */
+
def token: Parser[Token] = delimited_token | other_token
+
+ def token_context(ctxt: Scan.Context): Parser[(Token, Scan.Context)] =
+ {
+ val other = (ml_char | other_token) ^^ (x => (x, Scan.Finished))
+
+ ml_string_context(ctxt) | (ml_comment_context(ctxt) | other)
+ }
}
+
+ /* tokenize */
+
def tokenize(input: CharSequence): List[Token] =
{
Parsers.parseAll(Parsers.rep(Parsers.token), new CharSequenceReader(input)) match {
@@ -158,5 +201,20 @@
case _ => error("Unexpected failure of tokenizing input:\n" + input.toString)
}
}
+
+ def tokenize_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) =
+ {
+ var in: Reader[Char] = new CharSequenceReader(input)
+ val toks = new mutable.ListBuffer[Token]
+ var ctxt = context
+ while (!in.atEnd) {
+ Parsers.parse(Parsers.token_context(ctxt), in) match {
+ case Parsers.Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
+ case Parsers.NoSuccess(_, rest) =>
+ error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
+ }
+ }
+ (toks.toList, ctxt)
+ }
}