some support for partial scans with explicit context;
authorwenzelm
Thu, 16 Jun 2011 17:25:16 +0200
changeset 43411 0206466ee473
parent 43410 957617fe0765
child 43412 81517eed8a78
some support for partial scans with explicit context; clarified junk vs. junk1;
src/Pure/General/scan.scala
src/Pure/Isar/outer_syntax.scala
--- a/src/Pure/General/scan.scala	Thu Jun 16 11:59:29 2011 +0200
+++ b/src/Pure/General/scan.scala	Thu Jun 16 17:25:16 2011 +0200
@@ -18,6 +18,16 @@
 
 object Scan
 {
+  /** context of partial scans **/
+
+  sealed abstract class Context
+  case object Finished extends Context
+  case class Quoted(quote: String) extends Context
+  case object Verbatim extends Context
+  case class Comment(depth: Int) extends Context
+
+
+
   /** Lexicon -- position tree **/
 
   object Lexicon
@@ -116,6 +126,12 @@
     override val whiteSpace = "".r
 
 
+    /* optional termination */
+
+    def opt_term[T](p: => Parser[T]): Parser[Option[T]] =
+      p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None)
+
+
     /* keywords from lexicon */
 
     def keyword: Parser[String] = new Parser[String]
@@ -178,12 +194,15 @@
 
     /* quoted strings */
 
+    private def quoted_body(quote: String): Parser[String] =
+    {
+      rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
+        (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ^^ (_.mkString)
+    }
+
     private def quoted(quote: String): Parser[String] =
     {
-      quote ~
-        rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
-          (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
-      quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
+      quote ~ quoted_body(quote) ~ quote ^^ { case x ~ y ~ z => x + y + z }
     }.named("quoted")
 
     def quoted_content(quote: String, source: String): String =
@@ -199,13 +218,30 @@
       else body
     }
 
+    def quoted_context(quote: String, ctxt: Context): Parser[(String, Context)] =
+    {
+      ctxt match {
+        case Finished =>
+          quote ~ quoted_body(quote) ~ opt_term(quote) ^^
+            { case x ~ y ~ Some(z) => (x + y + z, Finished)
+              case x ~ y ~ None => (x + y, Quoted(quote)) }
+        case Quoted(q) if q == quote =>
+          quoted_body(quote) ~ opt_term(quote) ^^
+            { case x ~ Some(y) => (x + y, Finished)
+              case x ~ None => (x, ctxt) }
+        case _ => failure("")
+      }
+    }.named("quoted_context")
+
 
     /* verbatim text */
 
+    private def verbatim_body: Parser[String] =
+      rep(many1(sym => sym != "*") | """\*(?!\})""".r) ^^ (_.mkString)
+
     private def verbatim: Parser[String] =
     {
-      "{*" ~ rep(many1(sym => sym != "*") | """\*(?!\})""".r) ~ "*}" ^^
-        { case x ~ ys ~ z => x + ys.mkString + z }
+      "{*" ~ verbatim_body ~ "*}" ^^ { case x ~ y ~ z => x + y + z }
     }.named("verbatim")
 
     def verbatim_content(source: String): String =
@@ -214,6 +250,21 @@
       source.substring(2, source.length - 2)
     }
 
+    def verbatim_context(ctxt: Context): Parser[(String, Context)] =
+    {
+      ctxt match {
+        case Finished =>
+          "{*" ~ verbatim_body ~ opt_term("*}") ^^
+            { case x ~ y ~ Some(z) => (x + y + z, Finished)
+              case x ~ y ~ None => (x + y, Verbatim) }
+        case Verbatim =>
+          verbatim_body ~ opt_term("*}") ^^
+            { case x ~ Some(y) => (x + y, Finished)
+              case x ~ None => (x, Verbatim) }
+        case _ => failure("")
+      }
+    }.named("verbatim_context")
+
 
     /* nested comments */
 
@@ -254,7 +305,18 @@
 
     /* outer syntax tokens */
 
-    def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Token] =
+    private def delimited_token: Parser[Token] =
+    {
+      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
+      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
+      val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
+      val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
+
+      string | (alt_string | (verb | cmt))
+    }
+
+    private def other_token(symbols: Symbol.Interpretation, is_command: String => Boolean)
+      : Parser[Token] =
     {
       val id = one(symbols.is_letter) ~ many(symbols.is_letdig) ^^ { case x ~ y => x + y }
       val nat = many1(symbols.is_digit)
@@ -278,23 +340,43 @@
 
       val space = many1(symbols.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
 
-      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
-      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
+      // FIXME check
+      val junk = many(sym => !(symbols.is_blank(sym)))
+      val junk1 = many1(sym => !(symbols.is_blank(sym)))
 
-      val junk = many1(sym => !(symbols.is_blank(sym)))
       val bad_delimiter =
         ("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) }
-      val bad = junk ^^ (x => Token(Token.Kind.UNPARSED, x))
+      val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x))
+
+      val command_keyword =
+        keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
 
+      space | (bad_delimiter |
+        (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
+          command_keyword) | bad))
+    }
 
-      /* tokens */
+    def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Token] =
+      delimited_token | other_token(symbols, is_command)
 
-      (space | (string | (alt_string | (verbatim ^^ (x => Token(Token.Kind.VERBATIM, x)) |
-        comment ^^ (x => Token(Token.Kind.COMMENT, x)))))) |
-      bad_delimiter |
-      ((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
-        keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))) |
-      bad
+    def token_context(symbols: Symbol.Interpretation, is_command: String => Boolean, ctxt: Context)
+      : Parser[(Token, Context)] =
+    {
+      val string =
+        quoted_context("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
+      val alt_string =
+        quoted_context("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
+      val verb =
+        verbatim_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
+      // FIXME comment_context
+      val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
+      val other: Parser[(Token, Context)] =
+        ctxt match {
+          case Finished => (cmt | other_token(symbols, is_command)) ^^ { case x => (x, Finished) }
+          case _ => failure("")
+        }
+
+      string | (alt_string | (verb | other))
     }
   }
 
--- a/src/Pure/Isar/outer_syntax.scala	Thu Jun 16 11:59:29 2011 +0200
+++ b/src/Pure/Isar/outer_syntax.scala	Thu Jun 16 17:25:16 2011 +0200
@@ -8,6 +8,7 @@
 
 
 import scala.util.parsing.input.{Reader, CharSequenceReader}
+import scala.collection.mutable
 
 
 class Outer_Syntax(symbols: Symbol.Interpretation)
@@ -73,4 +74,21 @@
 
   def scan(input: CharSequence): List[Token] =
     scan(new CharSequenceReader(input))
+
+  def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) =
+  {
+    import lexicon._
+
+    var in: Reader[Char] = new CharSequenceReader(input)
+    val toks = new mutable.ListBuffer[Token]
+    var ctxt = context
+    while (!in.atEnd) {
+      parse(token_context(symbols, is_command, ctxt), in) match {
+        case Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
+        case NoSuccess(_, rest) =>
+          error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
+      }
+    }
+    (toks.toList, ctxt)
+  }
 }