| author | wenzelm | 
| Tue, 18 Jun 2013 15:31:52 +0200 | |
| changeset 52415 | d9fed6e99a57 | 
| parent 51048 | 123be08eed88 | 
| child 55033 | 8e8243975860 | 
| permissions | -rw-r--r-- | 
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 1 | /* Title: Pure/Isar/token.scala | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 2 | Author: Makarius | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 3 | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 4 | Outer token syntax for Isabelle/Isar. | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 5 | */ | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 6 | |
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 7 | package isabelle | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 8 | |
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 9 | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 10 | object Token | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 11 | {
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 12 | /* tokens */ | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 13 | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 14 | object Kind extends Enumeration | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 15 |   {
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 16 |     val COMMAND = Value("command")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 17 |     val KEYWORD = Value("keyword")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 18 |     val IDENT = Value("identifier")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 19 |     val LONG_IDENT = Value("long identifier")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 20 |     val SYM_IDENT = Value("symbolic identifier")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 21 |     val VAR = Value("schematic variable")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 22 |     val TYPE_IDENT = Value("type variable")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 23 |     val TYPE_VAR = Value("schematic type variable")
 | 
| 40290 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38367diff
changeset | 24 |     val NAT = Value("natural number")
 | 
| 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 wenzelm parents: 
38367diff
changeset | 25 |     val FLOAT = Value("floating-point number")
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 26 |     val STRING = Value("string")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 27 |     val ALT_STRING = Value("back-quoted string")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 28 |     val VERBATIM = Value("verbatim text")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 29 |     val SPACE = Value("white space")
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 30 |     val COMMENT = Value("comment text")
 | 
| 48754 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 wenzelm parents: 
48718diff
changeset | 31 |     val ERROR = Value("bad input")
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 32 |     val UNPARSED = Value("unparsed input")
 | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 33 | } | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 34 | |
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 35 | |
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 36 | /* token reader */ | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 37 | |
| 48335 | 38 | class Position(val line: Int, val file: String) extends scala.util.parsing.input.Position | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 39 |   {
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 40 | def column = 0 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 41 | def lineContents = "" | 
| 48335 | 42 | override def toString = | 
| 43 |       if (file == "") ("line " + line.toString)
 | |
| 44 |       else ("line " + line.toString + " of " + quote(file))
 | |
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 45 | |
| 48335 | 46 | def advance(token: Token): Position = | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 47 |     {
 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 48 | var n = 0 | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 49 | for (c <- token.content if c == '\n') n += 1 | 
| 48335 | 50 | if (n == 0) this else new Position(line + n, file) | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 51 | } | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 52 | } | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 53 | |
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 54 | abstract class Reader extends scala.util.parsing.input.Reader[Token] | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 55 | |
| 48335 | 56 | private class Token_Reader(tokens: List[Token], val pos: Position) extends Reader | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 57 |   {
 | 
| 34157 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 58 | def first = tokens.head | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 59 | def rest = new Token_Reader(tokens.tail, pos.advance(first)) | 
| 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 wenzelm parents: 
34143diff
changeset | 60 | def atEnd = tokens.isEmpty | 
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 61 | } | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 62 | |
| 48335 | 63 | def reader(tokens: List[Token], file: String = ""): Reader = | 
| 64 | new Token_Reader(tokens, new Position(1, file)) | |
| 34139 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 65 | } | 
| 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 wenzelm parents: diff
changeset | 66 | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 67 | |
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 68 | sealed case class Token(val kind: Token.Kind.Value, val source: String) | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 69 | {
 | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 70 | def is_command: Boolean = kind == Token.Kind.COMMAND | 
| 48718 | 71 | def is_keyword: Boolean = kind == Token.Kind.KEYWORD | 
| 72 | def is_operator: Boolean = is_keyword && !Symbol.is_ascii_identifier(source) | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 73 | def is_delimited: Boolean = | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 74 | kind == Token.Kind.STRING || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 75 | kind == Token.Kind.ALT_STRING || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 76 | kind == Token.Kind.VERBATIM || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 77 | kind == Token.Kind.COMMENT | 
| 48365 
d88aefda01c4
basic support for stand-alone options with external string representation;
 wenzelm parents: 
48349diff
changeset | 78 | def is_ident: Boolean = kind == Token.Kind.IDENT | 
| 48605 
e777363440d6
allow negative int values as well, according to real = int | float;
 wenzelm parents: 
48599diff
changeset | 79 | def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT | 
| 46943 | 80 | def is_string: Boolean = kind == Token.Kind.STRING | 
| 48349 
a78e5d399599
support Session.Queue with ordering and dependencies;
 wenzelm parents: 
48335diff
changeset | 81 | def is_nat: Boolean = kind == Token.Kind.NAT | 
| 48365 
d88aefda01c4
basic support for stand-alone options with external string representation;
 wenzelm parents: 
48349diff
changeset | 82 | def is_float: Boolean = kind == Token.Kind.FLOAT | 
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 83 | def is_name: Boolean = | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 84 | kind == Token.Kind.IDENT || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 85 | kind == Token.Kind.SYM_IDENT || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 86 | kind == Token.Kind.STRING || | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 87 | kind == Token.Kind.NAT | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 88 | def is_xname: Boolean = is_name || kind == Token.Kind.LONG_IDENT | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 89 | def is_text: Boolean = is_xname || kind == Token.Kind.VERBATIM | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 90 | def is_space: Boolean = kind == Token.Kind.SPACE | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 91 | def is_comment: Boolean = kind == Token.Kind.COMMENT | 
| 51048 
123be08eed88
clarified notion of Command.proper_range (according to Token.is_proper), especially relevant for Active.try_replace_command, to avoid loosing subsequent comments accidentally;
 wenzelm parents: 
48754diff
changeset | 92 | def is_improper: Boolean = is_space || is_comment | 
| 48599 | 93 | def is_proper: Boolean = !is_space && !is_comment | 
| 48754 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 wenzelm parents: 
48718diff
changeset | 94 | def is_error: Boolean = kind == Token.Kind.ERROR | 
| 47012 
0e246130486b
clarified command span classification: strict Command.is_command, permissive Command.name;
 wenzelm parents: 
46943diff
changeset | 95 | def is_unparsed: Boolean = kind == Token.Kind.UNPARSED | 
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 96 | |
| 48754 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 wenzelm parents: 
48718diff
changeset | 97 | def is_unfinished: Boolean = is_error && | 
| 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 wenzelm parents: 
48718diff
changeset | 98 |    (source.startsWith("\"") ||
 | 
| 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 wenzelm parents: 
48718diff
changeset | 99 |     source.startsWith("`") ||
 | 
| 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 wenzelm parents: 
48718diff
changeset | 100 |     source.startsWith("{*") ||
 | 
| 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 wenzelm parents: 
48718diff
changeset | 101 |     source.startsWith("(*"))
 | 
| 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 wenzelm parents: 
48718diff
changeset | 102 | |
| 48718 | 103 | def is_begin: Boolean = is_keyword && source == "begin" | 
| 104 | def is_end: Boolean = is_keyword && source == "end" | |
| 43611 | 105 | |
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 106 | def content: String = | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 107 |     if (kind == Token.Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source)
 | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 108 |     else if (kind == Token.Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source)
 | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 109 | else if (kind == Token.Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source) | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 110 | else if (kind == Token.Kind.COMMENT) Scan.Lexicon.empty.comment_content(source) | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 111 | else source | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 112 | |
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 113 | def text: (String, String) = | 
| 48718 | 114 |     if (is_command && source == ";") ("terminator", "")
 | 
| 36956 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 115 | else (kind.toString, source) | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 116 | } | 
| 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 wenzelm parents: 
34311diff
changeset | 117 |