explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
authorwenzelm
Tue, 22 Dec 2009 14:58:13 +0100
changeset 34157 0a0a19153626
parent 34156 3a7937841585
child 34158 8b66bd211dcf
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure; added Token_Reader; tuned;
src/Pure/General/scan.scala
src/Pure/Isar/outer_lex.scala
--- a/src/Pure/General/scan.scala	Mon Dec 21 16:50:28 2009 +0000
+++ b/src/Pure/General/scan.scala	Tue Dec 22 14:58:13 2009 +0100
@@ -135,9 +135,9 @@
     }.named("keyword")
 
 
-    /* symbols */
+    /* symbol range */
 
-    def symbols(pred: String => Boolean, min_count: Int, max_count: Int): Parser[String] =
+    def symbol_range(pred: String => Boolean, min_count: Int, max_count: Int): Parser[String] =
       new Parser[String]
       {
         def apply(in: Input) =
@@ -161,11 +161,11 @@
           if (count < min_count) Failure("bad input", in)
           else Success(in.source.subSequence(start, i).toString, in.drop(i - start))
         }
-      }.named("symbols")
+      }.named("symbol_range")
 
-    def one(pred: String => Boolean): Parser[String] = symbols(pred, 1, 1)
-    def many(pred: String => Boolean): Parser[String] = symbols(pred, 0, Integer.MAX_VALUE)
-    def many1(pred: String => Boolean): Parser[String] = symbols(pred, 1, Integer.MAX_VALUE)
+    def one(pred: String => Boolean): Parser[String] = symbol_range(pred, 1, 1)
+    def many(pred: String => Boolean): Parser[String] = symbol_range(pred, 0, Integer.MAX_VALUE)
+    def many1(pred: String => Boolean): Parser[String] = symbol_range(pred, 1, Integer.MAX_VALUE)
 
 
     /* quoted strings */
@@ -246,38 +246,41 @@
     def token(symbols: Symbol.Interpretation, is_command: String => Boolean):
       Parser[Outer_Lex.Token] =
     {
-      import Outer_Lex._
+      import Outer_Lex.Token_Kind, Outer_Lex.Token
 
       val id = one(symbols.is_letter) ~ many(symbols.is_letdig) ^^ { case x ~ y => x + y }
       val nat = many1(symbols.is_digit)
       val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
 
       val ident = id ~ rep("." ~> id) ^^
-        { case x ~ Nil => Ident(x)
-          case x ~ ys => Long_Ident((x :: ys).mkString(".")) }
+        { case x ~ Nil => Token(Token_Kind.IDENT, x)
+          case x ~ ys => Token(Token_Kind.LONG_IDENT, (x :: ys).mkString(".")) }
 
-      val var_ = "?" ~ id_nat ^^ { case x ~ y => Var(x + y) }
-      val type_ident = "'" ~ id ^^ { case x ~ y => Type_Ident(x + y) }
-      val type_var = "?'" ~ id_nat ^^ { case x ~ y => Type_Var(x + y) }
-      val nat_ = nat ^^ Nat
+      val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token_Kind.VAR, x + y) }
+      val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token_Kind.TYPE_IDENT, x + y) }
+      val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token_Kind.TYPE_VAR, x + y) }
+      val nat_ = nat ^^ (x => Token(Token_Kind.NAT, x))
 
       val sym_ident =
         (many1(symbols.is_symbolic_char) |
-          one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^ Sym_Ident
+          one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^
+        (x => Token(Token_Kind.SYM_IDENT, x))
 
-      val space = many1(symbols.is_blank) ^^ Space
+      val space = many1(symbols.is_blank) ^^ (x => Token(Token_Kind.SPACE, x))
 
-      val string = quoted("\"") ^^ String_Token
-      val alt_string = quoted("`") ^^ Alt_String_Token
+      val string = quoted("\"") ^^ (x => Token(Token_Kind.STRING, x))
+      val alt_string = quoted("`") ^^ (x => Token(Token_Kind.ALT_STRING, x))
 
-      val bad_input = many1(sym => !(symbols.is_blank(sym))) ^^ Bad_Input
+      val bad_input = many1(sym => !(symbols.is_blank(sym))) ^^
+        (x => Token(Token_Kind.BAD_INPUT, x))
 
 
       /* tokens */
 
-      (space | (string | (alt_string | (verbatim ^^ Verbatim | comment ^^ Comment)))) |
+      (space | (string | (alt_string | (verbatim ^^ (x => Token(Token_Kind.VERBATIM, x)) |
+        comment ^^ (x => Token(Token_Kind.COMMENT, x)))))) |
       ((ident | (var_ | (type_ident | (type_var | (nat_ | sym_ident))))) |||
-        keyword ^^ (x => if (is_command(x)) Command(x) else Keyword(x))) |
+        keyword ^^ (x => Token(if (is_command(x)) Token_Kind.COMMAND else Token_Kind.KEYWORD, x))) |
       bad_input
     }
   }
--- a/src/Pure/Isar/outer_lex.scala	Mon Dec 21 16:50:28 2009 +0000
+++ b/src/Pure/Isar/outer_lex.scala	Tue Dec 22 14:58:13 2009 +0100
@@ -9,92 +9,84 @@
 
 object Outer_Lex
 {
-  sealed abstract class Token
-  {
-    def source: String
-    def content: String = source
-
-    def is_delimited: Boolean = false
-    def is_name: Boolean = false
-    def is_xname: Boolean = false
-    def is_text: Boolean = false
-    def is_space: Boolean = false
-    def is_comment: Boolean = false
-    def is_proper: Boolean = !(is_space || is_comment)
-  }
-
-  case class Command(val source: String) extends Token
-
-  case class Keyword(val source: String) extends Token
+  /* tokens */
 
-  case class Ident(val source: String) extends Token
-  {
-    override def is_name = true
-    override def is_xname = true
-    override def is_text = true
-  }
-
-  case class Long_Ident(val source: String) extends Token
+  object Token_Kind extends Enumeration
   {
-    override def is_xname = true
-    override def is_text = true
-  }
-
-  case class Sym_Ident(val source: String) extends Token
-  {
-    override def is_name = true
-    override def is_xname = true
-    override def is_text = true
+    val COMMAND = Value("command")
+    val KEYWORD = Value("keyword")
+    val IDENT = Value("identifier")
+    val LONG_IDENT = Value("long identifier")
+    val SYM_IDENT = Value("symbolic identifier")
+    val VAR = Value("schematic variable")
+    val TYPE_IDENT = Value("type variable")
+    val TYPE_VAR = Value("schematic type variable")
+    val NAT = Value("number")
+    val STRING = Value("string")
+    val ALT_STRING = Value("back-quoted string")
+    val VERBATIM = Value("verbatim text")
+    val SPACE = Value("white space")
+    val COMMENT = Value("comment text")
+    val BAD_INPUT = Value("bad input")
+    val UNPARSED = Value("unparsed input")
   }
 
-  case class Var(val source: String) extends Token
-
-  case class Type_Ident(val source: String) extends Token
-
-  case class Type_Var(val source: String) extends Token
-
-  case class Nat(val source: String) extends Token
+  sealed case class Token(val kind: Token_Kind.Value, val source: String)
   {
-    override def is_name = true
-    override def is_xname = true
-    override def is_text = true
-  }
+    def is_delimited: Boolean =
+      kind == Token_Kind.STRING ||
+      kind == Token_Kind.ALT_STRING ||
+      kind == Token_Kind.VERBATIM ||
+      kind == Token_Kind.COMMENT
+    def is_name: Boolean =
+      kind == Token_Kind.IDENT ||
+      kind == Token_Kind.SYM_IDENT ||
+      kind == Token_Kind.STRING ||
+      kind == Token_Kind.NAT
+    def is_xname: Boolean = is_name || kind == Token_Kind.LONG_IDENT
+    def is_text: Boolean = is_xname || kind == Token_Kind.VERBATIM
+    def is_space: Boolean = kind == Token_Kind.SPACE
+    def is_comment: Boolean = kind == Token_Kind.COMMENT
+    def is_proper: Boolean = !(is_space || is_comment)
 
-  case class String_Token(val source: String) extends Token
-  {
-    override def content = Scan.Lexicon.empty.quoted_content("\"", source)
-    override def is_delimited = true
-    override def is_name = true
-    override def is_xname = true
-    override def is_text = true
+    def content: String =
+      if (kind == Token_Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source)
+      else if (kind == Token_Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source)
+      else if (kind == Token_Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source)
+      else if (kind == Token_Kind.COMMENT) Scan.Lexicon.empty.comment_content(source)
+      else source
+
+    def text: (String, String) =
+      if (kind == Token_Kind.COMMAND && source == ";") ("terminator", "")
+      else (kind.toString, source)
   }
 
-  case class Alt_String_Token(val source: String) extends Token
-  {
-    override def content = Scan.Lexicon.empty.quoted_content("`", source)
-    override def is_delimited = true
-  }
+
+  /* token reader */
 
-  case class Verbatim(val source: String) extends Token
+  class Line_Position(val line: Int) extends scala.util.parsing.input.Position
   {
-    override def content = Scan.Lexicon.empty.verbatim_content(source)
-    override def is_delimited = true
-    override def is_text = true
+    def column = 0
+    def lineContents = ""
+    override def toString = line.toString
+
+    def advance(token: Token): Line_Position =
+    {
+      var n = 0
+      for (c <- token.content if c == '\n') n += 1
+      if (n == 0) this else new Line_Position(line + n)
+    }
   }
 
-  case class Space(val source: String) extends Token
+  abstract class Reader extends scala.util.parsing.input.Reader[Token]
+
+  private class Token_Reader(tokens: List[Token], val pos: Line_Position) extends Reader
   {
-    override def is_space = true
+    def first = tokens.head
+    def rest = new Token_Reader(tokens.tail, pos.advance(first))
+    def atEnd = tokens.isEmpty
   }
 
-  case class Comment(val source: String) extends Token
-  {
-    override def content = Scan.Lexicon.empty.comment_content(source)
-    override def is_delimited = true
-    override def is_comment = true
-  }
-
-  case class Bad_Input(val source: String) extends Token
-  case class Unparsed(val source: String) extends Token
+  def reader(tokens: List[Token]): Reader = new Token_Reader(tokens, new Line_Position(1))
 }