# HG changeset patch # User wenzelm # Date 1261490293 -3600 # Node ID 0a0a1915362655559d841e4502ec6ff5818f997a # Parent 3a7937841585a2f2d98683f24ee5981356877cb3 explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure; added Token_Reader; tuned; diff -r 3a7937841585 -r 0a0a19153626 src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Mon Dec 21 16:50:28 2009 +0000 +++ b/src/Pure/General/scan.scala Tue Dec 22 14:58:13 2009 +0100 @@ -135,9 +135,9 @@ }.named("keyword") - /* symbols */ + /* symbol range */ - def symbols(pred: String => Boolean, min_count: Int, max_count: Int): Parser[String] = + def symbol_range(pred: String => Boolean, min_count: Int, max_count: Int): Parser[String] = new Parser[String] { def apply(in: Input) = @@ -161,11 +161,11 @@ if (count < min_count) Failure("bad input", in) else Success(in.source.subSequence(start, i).toString, in.drop(i - start)) } - }.named("symbols") + }.named("symbol_range") - def one(pred: String => Boolean): Parser[String] = symbols(pred, 1, 1) - def many(pred: String => Boolean): Parser[String] = symbols(pred, 0, Integer.MAX_VALUE) - def many1(pred: String => Boolean): Parser[String] = symbols(pred, 1, Integer.MAX_VALUE) + def one(pred: String => Boolean): Parser[String] = symbol_range(pred, 1, 1) + def many(pred: String => Boolean): Parser[String] = symbol_range(pred, 0, Integer.MAX_VALUE) + def many1(pred: String => Boolean): Parser[String] = symbol_range(pred, 1, Integer.MAX_VALUE) /* quoted strings */ @@ -246,38 +246,41 @@ def token(symbols: Symbol.Interpretation, is_command: String => Boolean): Parser[Outer_Lex.Token] = { - import Outer_Lex._ + import Outer_Lex.Token_Kind, Outer_Lex.Token val id = one(symbols.is_letter) ~ many(symbols.is_letdig) ^^ { case x ~ y => x + y } val nat = many1(symbols.is_digit) val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x } val ident = id ~ rep("." ~> id) ^^ - { case x ~ Nil => Ident(x) - case x ~ ys => Long_Ident((x :: ys).mkString(".")) } + { case x ~ Nil => Token(Token_Kind.IDENT, x) + case x ~ ys => Token(Token_Kind.LONG_IDENT, (x :: ys).mkString(".")) } - val var_ = "?" ~ id_nat ^^ { case x ~ y => Var(x + y) } - val type_ident = "'" ~ id ^^ { case x ~ y => Type_Ident(x + y) } - val type_var = "?'" ~ id_nat ^^ { case x ~ y => Type_Var(x + y) } - val nat_ = nat ^^ Nat + val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token_Kind.VAR, x + y) } + val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token_Kind.TYPE_IDENT, x + y) } + val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token_Kind.TYPE_VAR, x + y) } + val nat_ = nat ^^ (x => Token(Token_Kind.NAT, x)) val sym_ident = (many1(symbols.is_symbolic_char) | - one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^ Sym_Ident + one(sym => symbols.is_symbolic(sym) & Symbol.is_closed(sym))) ^^ + (x => Token(Token_Kind.SYM_IDENT, x)) - val space = many1(symbols.is_blank) ^^ Space + val space = many1(symbols.is_blank) ^^ (x => Token(Token_Kind.SPACE, x)) - val string = quoted("\"") ^^ String_Token - val alt_string = quoted("`") ^^ Alt_String_Token + val string = quoted("\"") ^^ (x => Token(Token_Kind.STRING, x)) + val alt_string = quoted("`") ^^ (x => Token(Token_Kind.ALT_STRING, x)) - val bad_input = many1(sym => !(symbols.is_blank(sym))) ^^ Bad_Input + val bad_input = many1(sym => !(symbols.is_blank(sym))) ^^ + (x => Token(Token_Kind.BAD_INPUT, x)) /* tokens */ - (space | (string | (alt_string | (verbatim ^^ Verbatim | comment ^^ Comment)))) | + (space | (string | (alt_string | (verbatim ^^ (x => Token(Token_Kind.VERBATIM, x)) | + comment ^^ (x => Token(Token_Kind.COMMENT, x)))))) | ((ident | (var_ | (type_ident | (type_var | (nat_ | sym_ident))))) ||| - keyword ^^ (x => if (is_command(x)) Command(x) else Keyword(x))) | + keyword ^^ (x => Token(if (is_command(x)) Token_Kind.COMMAND else Token_Kind.KEYWORD, x))) | bad_input } } diff -r 3a7937841585 -r 0a0a19153626 src/Pure/Isar/outer_lex.scala --- a/src/Pure/Isar/outer_lex.scala Mon Dec 21 16:50:28 2009 +0000 +++ b/src/Pure/Isar/outer_lex.scala Tue Dec 22 14:58:13 2009 +0100 @@ -9,92 +9,84 @@ object Outer_Lex { - sealed abstract class Token - { - def source: String - def content: String = source - - def is_delimited: Boolean = false - def is_name: Boolean = false - def is_xname: Boolean = false - def is_text: Boolean = false - def is_space: Boolean = false - def is_comment: Boolean = false - def is_proper: Boolean = !(is_space || is_comment) - } - - case class Command(val source: String) extends Token - - case class Keyword(val source: String) extends Token + /* tokens */ - case class Ident(val source: String) extends Token - { - override def is_name = true - override def is_xname = true - override def is_text = true - } - - case class Long_Ident(val source: String) extends Token + object Token_Kind extends Enumeration { - override def is_xname = true - override def is_text = true - } - - case class Sym_Ident(val source: String) extends Token - { - override def is_name = true - override def is_xname = true - override def is_text = true + val COMMAND = Value("command") + val KEYWORD = Value("keyword") + val IDENT = Value("identifier") + val LONG_IDENT = Value("long identifier") + val SYM_IDENT = Value("symbolic identifier") + val VAR = Value("schematic variable") + val TYPE_IDENT = Value("type variable") + val TYPE_VAR = Value("schematic type variable") + val NAT = Value("number") + val STRING = Value("string") + val ALT_STRING = Value("back-quoted string") + val VERBATIM = Value("verbatim text") + val SPACE = Value("white space") + val COMMENT = Value("comment text") + val BAD_INPUT = Value("bad input") + val UNPARSED = Value("unparsed input") } - case class Var(val source: String) extends Token - - case class Type_Ident(val source: String) extends Token - - case class Type_Var(val source: String) extends Token - - case class Nat(val source: String) extends Token + sealed case class Token(val kind: Token_Kind.Value, val source: String) { - override def is_name = true - override def is_xname = true - override def is_text = true - } + def is_delimited: Boolean = + kind == Token_Kind.STRING || + kind == Token_Kind.ALT_STRING || + kind == Token_Kind.VERBATIM || + kind == Token_Kind.COMMENT + def is_name: Boolean = + kind == Token_Kind.IDENT || + kind == Token_Kind.SYM_IDENT || + kind == Token_Kind.STRING || + kind == Token_Kind.NAT + def is_xname: Boolean = is_name || kind == Token_Kind.LONG_IDENT + def is_text: Boolean = is_xname || kind == Token_Kind.VERBATIM + def is_space: Boolean = kind == Token_Kind.SPACE + def is_comment: Boolean = kind == Token_Kind.COMMENT + def is_proper: Boolean = !(is_space || is_comment) - case class String_Token(val source: String) extends Token - { - override def content = Scan.Lexicon.empty.quoted_content("\"", source) - override def is_delimited = true - override def is_name = true - override def is_xname = true - override def is_text = true + def content: String = + if (kind == Token_Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source) + else if (kind == Token_Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source) + else if (kind == Token_Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source) + else if (kind == Token_Kind.COMMENT) Scan.Lexicon.empty.comment_content(source) + else source + + def text: (String, String) = + if (kind == Token_Kind.COMMAND && source == ";") ("terminator", "") + else (kind.toString, source) } - case class Alt_String_Token(val source: String) extends Token - { - override def content = Scan.Lexicon.empty.quoted_content("`", source) - override def is_delimited = true - } + + /* token reader */ - case class Verbatim(val source: String) extends Token + class Line_Position(val line: Int) extends scala.util.parsing.input.Position { - override def content = Scan.Lexicon.empty.verbatim_content(source) - override def is_delimited = true - override def is_text = true + def column = 0 + def lineContents = "" + override def toString = line.toString + + def advance(token: Token): Line_Position = + { + var n = 0 + for (c <- token.content if c == '\n') n += 1 + if (n == 0) this else new Line_Position(line + n) + } } - case class Space(val source: String) extends Token + abstract class Reader extends scala.util.parsing.input.Reader[Token] + + private class Token_Reader(tokens: List[Token], val pos: Line_Position) extends Reader { - override def is_space = true + def first = tokens.head + def rest = new Token_Reader(tokens.tail, pos.advance(first)) + def atEnd = tokens.isEmpty } - case class Comment(val source: String) extends Token - { - override def content = Scan.Lexicon.empty.comment_content(source) - override def is_delimited = true - override def is_comment = true - } - - case class Bad_Input(val source: String) extends Token - case class Unparsed(val source: String) extends Token + def reader(tokens: List[Token]): Reader = new Token_Reader(tokens, new Line_Position(1)) }