| author | Andreas Lochbihler | 
| Tue, 12 Jun 2012 15:32:14 +0200 | |
| changeset 48101 | 1b9796b7ab03 | 
| parent 47012 | 0e246130486b | 
| child 48335 | 2f923e994056 | 
| permissions | -rw-r--r-- | 
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 1 | /* Title: Pure/Isar/token.scala | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 2 | Author: Makarius | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 3 | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 4 | Outer token syntax for Isabelle/Isar. | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 5 | */ | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 6 | |
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 7 | package isabelle | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 8 | |
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 9 | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 10 | object Token | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 11 | {
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 12 | /* tokens */ | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 13 | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 14 | object Kind extends Enumeration | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 15 |   {
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 16 |     val COMMAND = Value("command")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 17 |     val KEYWORD = Value("keyword")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 18 |     val IDENT = Value("identifier")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 19 |     val LONG_IDENT = Value("long identifier")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 20 |     val SYM_IDENT = Value("symbolic identifier")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 21 |     val VAR = Value("schematic variable")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 22 |     val TYPE_IDENT = Value("type variable")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 23 |     val TYPE_VAR = Value("schematic type variable")
 | 
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38367diff
changeset | 24 |     val NAT = Value("natural number")
 | 
| 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38367diff
changeset | 25 |     val FLOAT = Value("floating-point number")
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 26 |     val STRING = Value("string")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 27 |     val ALT_STRING = Value("back-quoted string")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 28 |     val VERBATIM = Value("verbatim text")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 29 |     val SPACE = Value("white space")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 30 |     val COMMENT = Value("comment text")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 31 |     val UNPARSED = Value("unparsed input")
 | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 32 | } | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 33 | |
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 34 | |
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 35 | /* token reader */ | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 36 | |
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 37 | class Line_Position(val line: Int) extends scala.util.parsing.input.Position | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 38 |   {
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 39 | def column = 0 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 40 | def lineContents = "" | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 41 | override def toString = line.toString | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 42 | |
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 43 | def advance(token: Token): Line_Position = | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 44 |     {
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 45 | var n = 0 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 46 | for (c <- token.content if c == '\n') n += 1 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 47 | if (n == 0) this else new Line_Position(line + n) | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 48 | } | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 49 | } | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 50 | |
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 51 | abstract class Reader extends scala.util.parsing.input.Reader[Token] | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 52 | |
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 53 | private class Token_Reader(tokens: List[Token], val pos: Line_Position) extends Reader | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 54 |   {
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 55 | def first = tokens.head | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 56 | def rest = new Token_Reader(tokens.tail, pos.advance(first)) | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 57 | def atEnd = tokens.isEmpty | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 58 | } | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 59 | |
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 60 | def reader(tokens: List[Token]): Reader = new Token_Reader(tokens, new Line_Position(1)) | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 61 | } | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 62 | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 63 | |
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 64 | sealed case class Token(val kind: Token.Kind.Value, val source: String) | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 65 | {
 | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 66 | def is_command: Boolean = kind == Token.Kind.COMMAND | 
| 43430 
1ed88ddf1268
more uniform treatment of "keyword" vs. "operator";
 wenzelm parents: 
43418diff
changeset | 67 | def is_operator: Boolean = kind == Token.Kind.KEYWORD && !Symbol.is_ascii_identifier(source) | 
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 68 | def is_delimited: Boolean = | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 69 | kind == Token.Kind.STRING || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 70 | kind == Token.Kind.ALT_STRING || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 71 | kind == Token.Kind.VERBATIM || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 72 | kind == Token.Kind.COMMENT | 
| 46943 | 73 | def is_string: Boolean = kind == Token.Kind.STRING | 
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 74 | def is_name: Boolean = | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 75 | kind == Token.Kind.IDENT || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 76 | kind == Token.Kind.SYM_IDENT || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 77 | kind == Token.Kind.STRING || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 78 | kind == Token.Kind.NAT | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 79 | def is_xname: Boolean = is_name || kind == Token.Kind.LONG_IDENT | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 80 | def is_text: Boolean = is_xname || kind == Token.Kind.VERBATIM | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 81 | def is_space: Boolean = kind == Token.Kind.SPACE | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 82 | def is_comment: Boolean = kind == Token.Kind.COMMENT | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 83 | def is_ignored: Boolean = is_space || is_comment | 
| 47012 
0e246130486b
clarified command span classification: strict Command.is_command, permissive Command.name;
 wenzelm parents: 
46943diff
changeset | 84 | def is_unparsed: Boolean = kind == Token.Kind.UNPARSED | 
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 85 | |
| 43611 | 86 | def is_begin: Boolean = kind == Token.Kind.KEYWORD && source == "begin" | 
| 87 | def is_end: Boolean = kind == Token.Kind.COMMAND && source == "end" | |
| 88 | ||
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 89 | def content: String = | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 90 |     if (kind == Token.Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source)
 | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 91 |     else if (kind == Token.Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source)
 | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 92 | else if (kind == Token.Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source) | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 93 | else if (kind == Token.Kind.COMMENT) Scan.Lexicon.empty.comment_content(source) | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 94 | else source | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 95 | |
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 96 | def text: (String, String) = | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 97 |     if (kind == Token.Kind.COMMAND && source == ";") ("terminator", "")
 | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 98 | else (kind.toString, source) | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 99 | } | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 100 |