--- a/src/Pure/General/scan.scala Thu Jun 16 11:59:29 2011 +0200
+++ b/src/Pure/General/scan.scala Thu Jun 16 17:25:16 2011 +0200
@@ -18,6 +18,16 @@
object Scan
{
+ /** context of partial scans **/
+
+ sealed abstract class Context
+ case object Finished extends Context
+ case class Quoted(quote: String) extends Context
+ case object Verbatim extends Context
+ case class Comment(depth: Int) extends Context
+
+
+
/** Lexicon -- position tree **/
object Lexicon
@@ -116,6 +126,12 @@
override val whiteSpace = "".r
+ /* optional termination */
+
+ def opt_term[T](p: => Parser[T]): Parser[Option[T]] =
+ p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None)
+
+
/* keywords from lexicon */
def keyword: Parser[String] = new Parser[String]
@@ -178,12 +194,15 @@
/* quoted strings */
+ private def quoted_body(quote: String): Parser[String] =
+ {
+ rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
+ (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ^^ (_.mkString)
+ }
+
private def quoted(quote: String): Parser[String] =
{
- quote ~
- rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" |
- (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
- quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
+ quote ~ quoted_body(quote) ~ quote ^^ { case x ~ y ~ z => x + y + z }
}.named("quoted")
def quoted_content(quote: String, source: String): String =
@@ -199,13 +218,30 @@
else body
}
+ def quoted_context(quote: String, ctxt: Context): Parser[(String, Context)] =
+ {
+ ctxt match {
+ case Finished =>
+ quote ~ quoted_body(quote) ~ opt_term(quote) ^^
+ { case x ~ y ~ Some(z) => (x + y + z, Finished)
+ case x ~ y ~ None => (x + y, Quoted(quote)) }
+ case Quoted(q) if q == quote =>
+ quoted_body(quote) ~ opt_term(quote) ^^
+ { case x ~ Some(y) => (x + y, Finished)
+ case x ~ None => (x, ctxt) }
+ case _ => failure("")
+ }
+ }.named("quoted_context")
+
/* verbatim text */
+ private def verbatim_body: Parser[String] =
+ rep(many1(sym => sym != "*") | """\*(?!\})""".r) ^^ (_.mkString)
+
private def verbatim: Parser[String] =
{
- "{*" ~ rep(many1(sym => sym != "*") | """\*(?!\})""".r) ~ "*}" ^^
- { case x ~ ys ~ z => x + ys.mkString + z }
+ "{*" ~ verbatim_body ~ "*}" ^^ { case x ~ y ~ z => x + y + z }
}.named("verbatim")
def verbatim_content(source: String): String =
@@ -214,6 +250,21 @@
source.substring(2, source.length - 2)
}
+ def verbatim_context(ctxt: Context): Parser[(String, Context)] =
+ {
+ ctxt match {
+ case Finished =>
+ "{*" ~ verbatim_body ~ opt_term("*}") ^^
+ { case x ~ y ~ Some(z) => (x + y + z, Finished)
+ case x ~ y ~ None => (x + y, Verbatim) }
+ case Verbatim =>
+ verbatim_body ~ opt_term("*}") ^^
+ { case x ~ Some(y) => (x + y, Finished)
+ case x ~ None => (x, Verbatim) }
+ case _ => failure("")
+ }
+ }.named("verbatim_context")
+
/* nested comments */
@@ -254,7 +305,18 @@
/* outer syntax tokens */
- def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Token] =
+ private def delimited_token: Parser[Token] =
+ {
+ val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
+ val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
+ val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
+ val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
+
+ string | (alt_string | (verb | cmt))
+ }
+
+ private def other_token(symbols: Symbol.Interpretation, is_command: String => Boolean)
+ : Parser[Token] =
{
val id = one(symbols.is_letter) ~ many(symbols.is_letdig) ^^ { case x ~ y => x + y }
val nat = many1(symbols.is_digit)
@@ -278,23 +340,43 @@
val space = many1(symbols.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
- val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
- val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
+ // FIXME check
+ val junk = many(sym => !(symbols.is_blank(sym)))
+ val junk1 = many1(sym => !(symbols.is_blank(sym)))
- val junk = many1(sym => !(symbols.is_blank(sym)))
val bad_delimiter =
("\"" | "`" | "{*" | "(*") ~ junk ^^ { case x ~ y => Token(Token.Kind.UNPARSED, x + y) }
- val bad = junk ^^ (x => Token(Token.Kind.UNPARSED, x))
+ val bad = junk1 ^^ (x => Token(Token.Kind.UNPARSED, x))
+
+ val command_keyword =
+ keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))
+ space | (bad_delimiter |
+ (((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
+ command_keyword) | bad))
+ }
- /* tokens */
+ def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Token] =
+ delimited_token | other_token(symbols, is_command)
- (space | (string | (alt_string | (verbatim ^^ (x => Token(Token.Kind.VERBATIM, x)) |
- comment ^^ (x => Token(Token.Kind.COMMENT, x)))))) |
- bad_delimiter |
- ((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||
- keyword ^^ (x => Token(if (is_command(x)) Token.Kind.COMMAND else Token.Kind.KEYWORD, x))) |
- bad
+ def token_context(symbols: Symbol.Interpretation, is_command: String => Boolean, ctxt: Context)
+ : Parser[(Token, Context)] =
+ {
+ val string =
+ quoted_context("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
+ val alt_string =
+ quoted_context("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
+ val verb =
+ verbatim_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
+ // FIXME comment_context
+ val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
+ val other: Parser[(Token, Context)] =
+ ctxt match {
+ case Finished => (cmt | other_token(symbols, is_command)) ^^ { case x => (x, Finished) }
+ case _ => failure("")
+ }
+
+ string | (alt_string | (verb | other))
}
}
--- a/src/Pure/Isar/outer_syntax.scala Thu Jun 16 11:59:29 2011 +0200
+++ b/src/Pure/Isar/outer_syntax.scala Thu Jun 16 17:25:16 2011 +0200
@@ -8,6 +8,7 @@
import scala.util.parsing.input.{Reader, CharSequenceReader}
+import scala.collection.mutable
class Outer_Syntax(symbols: Symbol.Interpretation)
@@ -73,4 +74,21 @@
def scan(input: CharSequence): List[Token] =
scan(new CharSequenceReader(input))
+
+ def scan_context(input: CharSequence, context: Scan.Context): (List[Token], Scan.Context) =
+ {
+ import lexicon._
+
+ var in: Reader[Char] = new CharSequenceReader(input)
+ val toks = new mutable.ListBuffer[Token]
+ var ctxt = context
+ while (!in.atEnd) {
+ parse(token_context(symbols, is_command, ctxt), in) match {
+ case Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
+ case NoSuccess(_, rest) =>
+ error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
+ }
+ }
+ (toks.toList, ctxt)
+ }
}