simplified result of keyword and symbols parser;
authorwenzelm
Sun, 20 Dec 2009 15:42:40 +0100
changeset 34140 31be1235d0fb
parent 34139 d1ded303fe0e
child 34141 297b2149077d
simplified result of keyword and symbols parser; some support for parsing outer syntax tokens;
src/Pure/General/scan.scala
--- a/src/Pure/General/scan.scala	Sun Dec 20 15:42:12 2009 +0100
+++ b/src/Pure/General/scan.scala	Sun Dec 20 15:42:40 2009 +0100
@@ -107,12 +107,10 @@
 
     override val whiteSpace = "".r
 
-    type Result = (String, Boolean)
-
 
     /* keywords from lexicon */
 
-    def keyword: Parser[Result] = new Parser[Result]
+    def keyword: Parser[String] = new Parser[String]
     {
       def apply(in: Input) =
       {
@@ -120,28 +118,27 @@
         val offset = in.offset
         val len = source.length - offset
 
-        def scan(tree: Tree, result: Result, i: Int): Result =
+        def scan(tree: Tree, result: String, i: Int): String =
         {
           if (i < len) {
             tree.branches.get(source.charAt(offset + i)) match {
-              case Some((s, tr)) =>
-                scan(tr, if (s.isEmpty) result else (s, tr.branches.isEmpty), i + 1)
+              case Some((s, tr)) => scan(tr, if (s.isEmpty) result else s, i + 1)
               case None => result
             }
           }
           else result
         }
-        val result @ (text, terminated) = scan(main_tree, ("", false), 0)
-        if (text.isEmpty) Failure("keyword expected", in)
-        else Success(result, in.drop(text.length))
+        val result = scan(main_tree, "", 0)
+        if (result.isEmpty) Failure("keyword expected", in)
+        else Success(result, in.drop(result.length))
       }
     }.named("keyword")
 
 
     /* symbols */
 
-    def symbols(pred: String => Boolean, min_count: Int, max_count: Int): Parser[CharSequence] =
-      new Parser[CharSequence]
+    def symbols(pred: String => Boolean, min_count: Int, max_count: Int): Parser[String] =
+      new Parser[String]
       {
         def apply(in: Input) =
         {
@@ -161,14 +158,86 @@
             }
             else finished = true
           }
-          if (count < min_count) Failure("bad symbols", in)
-          else Success(in.source.subSequence(start, i), in.drop(i - start))
+          if (count < min_count) Failure("bad input", in)
+          else Success(in.source.subSequence(start, i).toString, in.drop(i - start))
         }
       }.named("symbols")
 
-    def one(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 1, 1)
-    def many(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 0, Integer.MAX_VALUE)
-    def many1(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 1, Integer.MAX_VALUE)
+    def one(pred: String => Boolean): Parser[String] = symbols(pred, 1, 1)
+    def many(pred: String => Boolean): Parser[String] = symbols(pred, 0, Integer.MAX_VALUE)
+    def many1(pred: String => Boolean): Parser[String] = symbols(pred, 1, Integer.MAX_VALUE)
+
+
+    /* delimited text */
+
+    def quoted(quote: String) =
+      quote ~
+        rep(many1(sym => sym != quote && sym != "\\" && Symbol.is_closed(sym)) |
+          "\\" + quote | "\\\\" |
+          (("""\\\d\d\d""".r) ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ~
+      quote ^^ { case x ~ ys ~ z => x + ys.mkString + z }
+
+    def quoted_content(quote: String, source: String): String =
+    {
+      require(parseAll(quoted(quote), source).successful)
+      source.substring(1, source.length - 1)  // FIXME proper escapes, recode utf8 (!?)
+    }
+
+
+    def verbatim =
+      "{*" ~ rep(many1(sym => sym != "*" && Symbol.is_closed(sym)) | """\*(?!\})""".r) ~ "*}" ^^
+        { case x ~ ys ~ z => x + ys.mkString + z }
+
+    def verbatim_content(source: String): String =
+    {
+      require(parseAll(verbatim, source).successful)
+      source.substring(2, source.length - 2)
+    }
+
+
+    /* outer syntax tokens */
+
+    def token(symbols: Symbol.Interpretation, is_command: String => Boolean):
+      Parser[Outer_Lex.Token] =
+    {
+      import Outer_Lex._
+
+      val id = one(symbols.is_letter) ~ many(symbols.is_letdig) ^^ { case x ~ y => x + y }
+      val nat = many1(symbols.is_digit)
+      val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
+
+      val ident = id ~ rep("." ~> id) ^^
+        { case x ~ Nil => Ident(x)
+          case x ~ ys => Long_Ident((x :: ys).mkString(".")) }
+
+      val var_ = "?" ~ id_nat ^^ { case x ~ y => Var(x + y) }
+      val type_ident = "'" ~ id ^^ { case x ~ y => Type_Ident(x + y) }
+      val type_var = "?'" ~ id_nat ^^ { case x ~ y => Type_Var(x + y) }
+      val nat_ = nat ^^ Nat
+
+      val sym_ident =
+        (many1(symbols.is_symbolic_char) |
+          one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^ Sym_Ident
+
+      val space = many1(symbols.is_blank) ^^ Space
+
+      val string = quoted("\"") ^^ String_Token
+      val alt_string = quoted("`") ^^ Alt_String_Token
+
+      val comment: Parser[Token] = failure("FIXME")
+
+      val bad_input = many1(sym => !(symbols.is_blank(sym))) ^^ Bad_Input
+
+
+      /* tokens */
+
+      // FIXME right-assoc !?
+
+      (string | alt_string | verbatim ^^ Verbatim | comment | space) |
+      ((ident | var_ | type_ident | type_var | nat_ | sym_ident) |||
+        keyword ^^ (x => if (is_command(x)) Command(x) else Keyword(x))) |
+      bad_input
+    }
   }
 }