author | wenzelm |
Sat, 18 Jan 2014 19:15:12 +0100 | |
changeset 55033 | 8e8243975860 |
parent 51048 | 123be08eed88 |
child 55035 | 68afbb5ce4ff |
permissions | -rw-r--r-- |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
1 |
/* Title: Pure/Isar/token.scala |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
2 |
Author: Makarius |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
3 |
|
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
4 |
Outer token syntax for Isabelle/Isar. |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
5 |
*/ |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
6 |
|
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
7 |
package isabelle |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
8 |
|
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
9 |
|
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
10 |
object Token |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
11 |
{ |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
12 |
/* tokens */ |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
13 |
|
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
14 |
object Kind extends Enumeration |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
15 |
{ |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
16 |
val COMMAND = Value("command") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
17 |
val KEYWORD = Value("keyword") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
18 |
val IDENT = Value("identifier") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
19 |
val LONG_IDENT = Value("long identifier") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
20 |
val SYM_IDENT = Value("symbolic identifier") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
21 |
val VAR = Value("schematic variable") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
22 |
val TYPE_IDENT = Value("type variable") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
23 |
val TYPE_VAR = Value("schematic type variable") |
40290
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents:
38367
diff
changeset
|
24 |
val NAT = Value("natural number") |
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents:
38367
diff
changeset
|
25 |
val FLOAT = Value("floating-point number") |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
26 |
val STRING = Value("string") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
27 |
val ALT_STRING = Value("back-quoted string") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
28 |
val VERBATIM = Value("verbatim text") |
55033 | 29 |
val CARTOUCHE = Value("cartouche") |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
30 |
val SPACE = Value("white space") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
31 |
val COMMENT = Value("comment text") |
48754
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
32 |
val ERROR = Value("bad input") |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
33 |
val UNPARSED = Value("unparsed input") |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
34 |
} |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
35 |
|
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
36 |
|
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
37 |
/* token reader */ |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
38 |
|
48335 | 39 |
class Position(val line: Int, val file: String) extends scala.util.parsing.input.Position |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
40 |
{ |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
41 |
def column = 0 |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
42 |
def lineContents = "" |
48335 | 43 |
override def toString = |
44 |
if (file == "") ("line " + line.toString) |
|
45 |
else ("line " + line.toString + " of " + quote(file)) |
|
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
46 |
|
48335 | 47 |
def advance(token: Token): Position = |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
48 |
{ |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
49 |
var n = 0 |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
50 |
for (c <- token.content if c == '\n') n += 1 |
48335 | 51 |
if (n == 0) this else new Position(line + n, file) |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
52 |
} |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
53 |
} |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
54 |
|
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
55 |
abstract class Reader extends scala.util.parsing.input.Reader[Token] |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
56 |
|
48335 | 57 |
private class Token_Reader(tokens: List[Token], val pos: Position) extends Reader |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
58 |
{ |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
59 |
def first = tokens.head |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
60 |
def rest = new Token_Reader(tokens.tail, pos.advance(first)) |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
61 |
def atEnd = tokens.isEmpty |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
62 |
} |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
63 |
|
48335 | 64 |
def reader(tokens: List[Token], file: String = ""): Reader = |
65 |
new Token_Reader(tokens, new Position(1, file)) |
|
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
66 |
} |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
67 |
|
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
68 |
|
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
69 |
sealed case class Token(val kind: Token.Kind.Value, val source: String) |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
70 |
{ |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
71 |
def is_command: Boolean = kind == Token.Kind.COMMAND |
48718 | 72 |
def is_keyword: Boolean = kind == Token.Kind.KEYWORD |
73 |
def is_operator: Boolean = is_keyword && !Symbol.is_ascii_identifier(source) |
|
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
74 |
def is_delimited: Boolean = |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
75 |
kind == Token.Kind.STRING || |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
76 |
kind == Token.Kind.ALT_STRING || |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
77 |
kind == Token.Kind.VERBATIM || |
55033 | 78 |
kind == Token.Kind.CARTOUCHE || |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
79 |
kind == Token.Kind.COMMENT |
48365
d88aefda01c4
basic support for stand-alone options with external string representation;
wenzelm
parents:
48349
diff
changeset
|
80 |
def is_ident: Boolean = kind == Token.Kind.IDENT |
48605
e777363440d6
allow negative int values as well, according to real = int | float;
wenzelm
parents:
48599
diff
changeset
|
81 |
def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT |
46943 | 82 |
def is_string: Boolean = kind == Token.Kind.STRING |
48349
a78e5d399599
support Session.Queue with ordering and dependencies;
wenzelm
parents:
48335
diff
changeset
|
83 |
def is_nat: Boolean = kind == Token.Kind.NAT |
48365
d88aefda01c4
basic support for stand-alone options with external string representation;
wenzelm
parents:
48349
diff
changeset
|
84 |
def is_float: Boolean = kind == Token.Kind.FLOAT |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
85 |
def is_name: Boolean = |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
86 |
kind == Token.Kind.IDENT || |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
87 |
kind == Token.Kind.SYM_IDENT || |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
88 |
kind == Token.Kind.STRING || |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
89 |
kind == Token.Kind.NAT |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
90 |
def is_xname: Boolean = is_name || kind == Token.Kind.LONG_IDENT |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
91 |
def is_text: Boolean = is_xname || kind == Token.Kind.VERBATIM |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
92 |
def is_space: Boolean = kind == Token.Kind.SPACE |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
93 |
def is_comment: Boolean = kind == Token.Kind.COMMENT |
51048
123be08eed88
clarified notion of Command.proper_range (according to Token.is_proper), especially relevant for Active.try_replace_command, to avoid loosing subsequent comments accidentally;
wenzelm
parents:
48754
diff
changeset
|
94 |
def is_improper: Boolean = is_space || is_comment |
48599 | 95 |
def is_proper: Boolean = !is_space && !is_comment |
48754
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
96 |
def is_error: Boolean = kind == Token.Kind.ERROR |
47012
0e246130486b
clarified command span classification: strict Command.is_command, permissive Command.name;
wenzelm
parents:
46943
diff
changeset
|
97 |
def is_unparsed: Boolean = kind == Token.Kind.UNPARSED |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
98 |
|
48754
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
99 |
def is_unfinished: Boolean = is_error && |
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
100 |
(source.startsWith("\"") || |
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
101 |
source.startsWith("`") || |
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
102 |
source.startsWith("{*") || |
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
103 |
source.startsWith("(*")) |
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
104 |
|
48718 | 105 |
def is_begin: Boolean = is_keyword && source == "begin" |
106 |
def is_end: Boolean = is_keyword && source == "end" |
|
43611 | 107 |
|
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
108 |
def content: String = |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
109 |
if (kind == Token.Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source) |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
110 |
else if (kind == Token.Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source) |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
111 |
else if (kind == Token.Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source) |
55033 | 112 |
else if (kind == Token.Kind.CARTOUCHE) Scan.Lexicon.empty.cartouche_content(source) |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
113 |
else if (kind == Token.Kind.COMMENT) Scan.Lexicon.empty.comment_content(source) |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
114 |
else source |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
115 |
|
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
116 |
def text: (String, String) = |
48718 | 117 |
if (is_command && source == ";") ("terminator", "") |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
118 |
else (kind.toString, source) |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
119 |
} |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
120 |