author | wenzelm |
Fri, 20 Nov 2020 23:47:34 +0100 | |
changeset 72669 | 5e7916535860 |
parent 71601 | 97ccf48c2f0c |
child 72744 | 0017eb17ac1c |
permissions | -rw-r--r-- |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
1 |
/* Title: Pure/Isar/token.scala |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
2 |
Author: Makarius |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
3 |
|
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
4 |
Outer token syntax for Isabelle/Isar. |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
5 |
*/ |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
6 |
|
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
7 |
package isabelle |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
8 |
|
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
9 |
|
59083 | 10 |
import scala.collection.mutable |
11 |
import scala.util.parsing.input |
|
12 |
||
13 |
||
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
14 |
object Token |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
15 |
{ |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
16 |
/* tokens */ |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
17 |
|
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
18 |
object Kind extends Enumeration |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
19 |
{ |
59081 | 20 |
/*immediate source*/ |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
21 |
val COMMAND = Value("command") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
22 |
val KEYWORD = Value("keyword") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
23 |
val IDENT = Value("identifier") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
24 |
val LONG_IDENT = Value("long identifier") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
25 |
val SYM_IDENT = Value("symbolic identifier") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
26 |
val VAR = Value("schematic variable") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
27 |
val TYPE_IDENT = Value("type variable") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
28 |
val TYPE_VAR = Value("schematic type variable") |
40290
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents:
38367
diff
changeset
|
29 |
val NAT = Value("natural number") |
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents:
38367
diff
changeset
|
30 |
val FLOAT = Value("floating-point number") |
59081 | 31 |
val SPACE = Value("white space") |
32 |
/*delimited content*/ |
|
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
33 |
val STRING = Value("string") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
34 |
val ALT_STRING = Value("back-quoted string") |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
35 |
val VERBATIM = Value("verbatim text") |
55512 | 36 |
val CARTOUCHE = Value("text cartouche") |
67439
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
37 |
val INFORMAL_COMMENT = Value("informal comment") |
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
38 |
val FORMAL_COMMENT = Value("formal comment") |
59081 | 39 |
/*special content*/ |
48754
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
40 |
val ERROR = Value("bad input") |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
41 |
val UNPARSED = Value("unparsed input") |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
42 |
} |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
43 |
|
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
44 |
|
55494 | 45 |
/* parsers */ |
46 |
||
47 |
object Parsers extends Parsers |
|
48 |
||
67439
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
49 |
trait Parsers extends Scan.Parsers with Comment.Parsers |
55494 | 50 |
{ |
51 |
private def delimited_token: Parser[Token] = |
|
52 |
{ |
|
53 |
val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x)) |
|
54 |
val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x)) |
|
55 |
val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x)) |
|
56 |
val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x)) |
|
67439
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
57 |
val cmt = comment ^^ (x => Token(Token.Kind.INFORMAL_COMMENT, x)) |
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
58 |
val formal_cmt = comment_cartouche ^^ (x => Token(Token.Kind.FORMAL_COMMENT, x)) |
55494 | 59 |
|
67439
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
60 |
string | (alt_string | (verb | (cart | (cmt | formal_cmt)))) |
55494 | 61 |
} |
62 |
||
58900 | 63 |
private def other_token(keywords: Keyword.Keywords): Parser[Token] = |
55494 | 64 |
{ |
65 |
val letdigs1 = many1(Symbol.is_letdig) |
|
62103 | 66 |
val sub = one(s => s == Symbol.sub_decoded || s == Symbol.sub) |
55494 | 67 |
val id = |
68 |
one(Symbol.is_letter) ~ |
|
69 |
(rep(letdigs1 | (sub ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^ |
|
70 |
{ case x ~ y => x + y } |
|
71 |
||
72 |
val nat = many1(Symbol.is_digit) |
|
73 |
val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z } |
|
74 |
val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x } |
|
75 |
||
76 |
val ident = id ~ rep("." ~> id) ^^ |
|
77 |
{ case x ~ Nil => Token(Token.Kind.IDENT, x) |
|
78 |
case x ~ ys => Token(Token.Kind.LONG_IDENT, (x :: ys).mkString(".")) } |
|
79 |
||
80 |
val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.VAR, x + y) } |
|
81 |
val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token.Kind.TYPE_IDENT, x + y) } |
|
82 |
val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.TYPE_VAR, x + y) } |
|
83 |
val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x)) |
|
84 |
val float = |
|
85 |
("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x)) |
|
86 |
||
87 |
val sym_ident = |
|
88 |
(many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^ |
|
89 |
(x => Token(Token.Kind.SYM_IDENT, x)) |
|
90 |
||
58899 | 91 |
val keyword = |
58900 | 92 |
literal(keywords.minor) ^^ (x => Token(Token.Kind.KEYWORD, x)) ||| |
93 |
literal(keywords.major) ^^ (x => Token(Token.Kind.COMMAND, x)) |
|
55494 | 94 |
|
95 |
val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x)) |
|
96 |
||
97 |
val recover_delimited = |
|
98 |
(recover_quoted("\"") | |
|
99 |
(recover_quoted("`") | |
|
100 |
(recover_verbatim | |
|
101 |
(recover_cartouche | recover_comment)))) ^^ (x => Token(Token.Kind.ERROR, x)) |
|
102 |
||
103 |
val bad = one(_ => true) ^^ (x => Token(Token.Kind.ERROR, x)) |
|
104 |
||
105 |
space | (recover_delimited | |
|
106 |
(((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) ||| |
|
58899 | 107 |
keyword) | bad)) |
55494 | 108 |
} |
109 |
||
58900 | 110 |
def token(keywords: Keyword.Keywords): Parser[Token] = |
67446 | 111 |
delimited_token | other_token(keywords) |
55494 | 112 |
|
58900 | 113 |
def token_line(keywords: Keyword.Keywords, ctxt: Scan.Line_Context) |
55510
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
wenzelm
parents:
55505
diff
changeset
|
114 |
: Parser[(Token, Scan.Line_Context)] = |
55494 | 115 |
{ |
116 |
val string = |
|
55510
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
wenzelm
parents:
55505
diff
changeset
|
117 |
quoted_line("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) } |
55494 | 118 |
val alt_string = |
55510
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
wenzelm
parents:
55505
diff
changeset
|
119 |
quoted_line("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) } |
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
wenzelm
parents:
55505
diff
changeset
|
120 |
val verb = verbatim_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) } |
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
wenzelm
parents:
55505
diff
changeset
|
121 |
val cart = cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) } |
67439
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
122 |
val cmt = comment_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.INFORMAL_COMMENT, x), c) } |
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
123 |
val formal_cmt = |
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
124 |
comment_cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.FORMAL_COMMENT, x), c) } |
58900 | 125 |
val other = other_token(keywords) ^^ { case x => (x, Scan.Finished) } |
55494 | 126 |
|
67439
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
127 |
string | (alt_string | (verb | (cart | (cmt | (formal_cmt | other))))) |
55494 | 128 |
} |
129 |
} |
|
130 |
||
131 |
||
59083 | 132 |
/* explode */ |
133 |
||
134 |
def explode(keywords: Keyword.Keywords, inp: CharSequence): List[Token] = |
|
64824 | 135 |
Parsers.parseAll(Parsers.rep(Parsers.token(keywords)), Scan.char_reader(inp)) match { |
59083 | 136 |
case Parsers.Success(tokens, _) => tokens |
137 |
case _ => error("Unexpected failure of tokenizing input:\n" + inp.toString) |
|
138 |
} |
|
139 |
||
140 |
def explode_line(keywords: Keyword.Keywords, inp: CharSequence, context: Scan.Line_Context) |
|
141 |
: (List[Token], Scan.Line_Context) = |
|
142 |
{ |
|
64824 | 143 |
var in: input.Reader[Char] = Scan.char_reader(inp) |
59083 | 144 |
val toks = new mutable.ListBuffer[Token] |
145 |
var ctxt = context |
|
146 |
while (!in.atEnd) { |
|
147 |
Parsers.parse(Parsers.token_line(keywords, ctxt), in) match { |
|
60215 | 148 |
case Parsers.Success((x, c), rest) => toks += x; ctxt = c; in = rest |
59083 | 149 |
case Parsers.NoSuccess(_, rest) => |
150 |
error("Unexpected failure of tokenizing input:\n" + rest.source.toString) |
|
151 |
} |
|
152 |
} |
|
153 |
(toks.toList, ctxt) |
|
154 |
} |
|
155 |
||
64671 | 156 |
val newline: Token = explode(Keyword.Keywords.empty, "\n").head |
157 |
||
59083 | 158 |
|
69603 | 159 |
/* embedded */ |
160 |
||
161 |
def read_embedded(keywords: Keyword.Keywords, inp: CharSequence): Option[Token] = |
|
162 |
explode(keywords, inp) match { |
|
163 |
case List(tok) if tok.is_embedded => Some(tok) |
|
164 |
case _ => None |
|
165 |
} |
|
166 |
||
167 |
||
65523 | 168 |
/* names */ |
169 |
||
170 |
def read_name(keywords: Keyword.Keywords, inp: CharSequence): Option[Token] = |
|
171 |
explode(keywords, inp) match { |
|
172 |
case List(tok) if tok.is_name => Some(tok) |
|
173 |
case _ => None |
|
174 |
} |
|
175 |
||
176 |
def quote_name(keywords: Keyword.Keywords, name: String): String = |
|
177 |
if (read_name(keywords, name).isDefined) name |
|
178 |
else quote(name.replace("\"", "\\\"")) |
|
179 |
||
180 |
||
67132 | 181 |
/* plain antiquotation (0 or 1 args) */ |
182 |
||
183 |
def read_antiq_arg(keywords: Keyword.Keywords, inp: CharSequence): Option[(String, Option[String])] = |
|
184 |
explode(keywords, inp).filter(_.is_proper) match { |
|
185 |
case List(t) if t.is_name => Some(t.content, None) |
|
186 |
case List(t1, t2) if t1.is_name && t2.is_embedded => Some(t1.content, Some(t2.content)) |
|
187 |
case _ => None |
|
188 |
} |
|
189 |
||
190 |
||
59735 | 191 |
/* implode */ |
192 |
||
193 |
def implode(toks: List[Token]): String = |
|
194 |
toks match { |
|
195 |
case List(tok) => tok.source |
|
60215 | 196 |
case _ => toks.map(_.source).mkString |
59735 | 197 |
} |
198 |
||
199 |
||
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
200 |
/* token reader */ |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
201 |
|
56464 | 202 |
object Pos |
203 |
{ |
|
59706
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents:
59705
diff
changeset
|
204 |
val none: Pos = new Pos(0, 0, "", "") |
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents:
59705
diff
changeset
|
205 |
val start: Pos = new Pos(1, 1, "", "") |
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents:
59705
diff
changeset
|
206 |
def file(file: String): Pos = new Pos(1, 1, file, "") |
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents:
59705
diff
changeset
|
207 |
def id(id: String): Pos = new Pos(0, 1, "", id) |
59715
4f0d0e4ad68d
avoid duplicate header errors, more precise positions;
wenzelm
parents:
59706
diff
changeset
|
208 |
val command: Pos = id(Markup.COMMAND) |
56464 | 209 |
} |
210 |
||
59671
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
211 |
final class Pos private[Token]( |
59696 | 212 |
val line: Int, |
213 |
val offset: Symbol.Offset, |
|
59706
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents:
59705
diff
changeset
|
214 |
val file: String, |
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents:
59705
diff
changeset
|
215 |
val id: String) |
64824 | 216 |
extends input.Position |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
217 |
{ |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
218 |
def column = 0 |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
219 |
def lineContents = "" |
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
220 |
|
67895 | 221 |
def advance(token: Token): Pos = advance(token.source) |
222 |
def advance(source: String): Pos = |
|
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
223 |
{ |
59671
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
224 |
var line1 = line |
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
225 |
var offset1 = offset |
67895 | 226 |
for (s <- Symbol.iterator(source)) { |
59671
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
227 |
if (line1 > 0 && Symbol.is_newline(s)) line1 += 1 |
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
228 |
if (offset1 > 0) offset1 += 1 |
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
229 |
} |
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
230 |
if (line1 == line && offset1 == offset) this |
59706
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents:
59705
diff
changeset
|
231 |
else new Pos(line1, offset1, file, id) |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
232 |
} |
56464 | 233 |
|
59695 | 234 |
private def position(end_offset: Symbol.Offset): Position.T = |
59671
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
235 |
(if (line > 0) Position.Line(line) else Nil) ::: |
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
236 |
(if (offset > 0) Position.Offset(offset) else Nil) ::: |
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
237 |
(if (end_offset > 0) Position.End_Offset(end_offset) else Nil) ::: |
59706
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents:
59705
diff
changeset
|
238 |
(if (file != "") Position.File(file) else Nil) ::: |
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
wenzelm
parents:
59705
diff
changeset
|
239 |
(if (id != "") Position.Id_String(id) else Nil) |
59671
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
240 |
|
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
241 |
def position(): Position.T = position(0) |
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
242 |
def position(token: Token): Position.T = position(advance(token).offset) |
67895 | 243 |
def position(source: String): Position.T = position(advance(source).offset) |
59671
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
wenzelm
parents:
59122
diff
changeset
|
244 |
|
64728 | 245 |
override def toString: String = Position.here(position(), delimited = false) |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
246 |
} |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
247 |
|
64824 | 248 |
abstract class Reader extends input.Reader[Token] |
34157
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
wenzelm
parents:
34143
diff
changeset
|
249 |
|
56464 | 250 |
private class Token_Reader(tokens: List[Token], val pos: Pos) extends Reader |
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
251 |
{ |
71601 | 252 |
def first: Token = tokens.head |
253 |
def rest: Token_Reader = new Token_Reader(tokens.tail, pos.advance(first)) |
|
254 |
def atEnd: Boolean = tokens.isEmpty |
|
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
255 |
} |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
256 |
|
59705 | 257 |
def reader(tokens: List[Token], start: Token.Pos): Reader = |
258 |
new Token_Reader(tokens, start) |
|
65335 | 259 |
|
260 |
||
261 |
/* XML data representation */ |
|
262 |
||
263 |
val encode: XML.Encode.T[Token] = (tok: Token) => |
|
264 |
{ |
|
265 |
import XML.Encode._ |
|
266 |
pair(int, string)(tok.kind.id, tok.source) |
|
267 |
} |
|
268 |
||
269 |
val decode: XML.Decode.T[Token] = (body: XML.Body) => |
|
270 |
{ |
|
271 |
import XML.Decode._ |
|
272 |
val (k, s) = pair(int, string)(body) |
|
273 |
Token(Kind(k), s) |
|
274 |
} |
|
34139
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
275 |
} |
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
wenzelm
parents:
diff
changeset
|
276 |
|
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
277 |
|
60215 | 278 |
sealed case class Token(kind: Token.Kind.Value, source: String) |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
279 |
{ |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
280 |
def is_command: Boolean = kind == Token.Kind.COMMAND |
63446 | 281 |
def is_command(name: String): Boolean = kind == Token.Kind.COMMAND && source == name |
48718 | 282 |
def is_keyword: Boolean = kind == Token.Kind.KEYWORD |
63446 | 283 |
def is_keyword(name: String): Boolean = kind == Token.Kind.KEYWORD && source == name |
63450 | 284 |
def is_keyword(name: Char): Boolean = |
285 |
kind == Token.Kind.KEYWORD && source.length == 1 && source(0) == name |
|
55505 | 286 |
def is_delimiter: Boolean = is_keyword && !Symbol.is_ascii_identifier(source) |
48365
d88aefda01c4
basic support for stand-alone options with external string representation;
wenzelm
parents:
48349
diff
changeset
|
287 |
def is_ident: Boolean = kind == Token.Kind.IDENT |
48605
e777363440d6
allow negative int values as well, according to real = int | float;
wenzelm
parents:
48599
diff
changeset
|
288 |
def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT |
46943 | 289 |
def is_string: Boolean = kind == Token.Kind.STRING |
48349
a78e5d399599
support Session.Queue with ordering and dependencies;
wenzelm
parents:
48335
diff
changeset
|
290 |
def is_nat: Boolean = kind == Token.Kind.NAT |
48365
d88aefda01c4
basic support for stand-alone options with external string representation;
wenzelm
parents:
48349
diff
changeset
|
291 |
def is_float: Boolean = kind == Token.Kind.FLOAT |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
292 |
def is_name: Boolean = |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
293 |
kind == Token.Kind.IDENT || |
62969 | 294 |
kind == Token.Kind.LONG_IDENT || |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
295 |
kind == Token.Kind.SYM_IDENT || |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
296 |
kind == Token.Kind.STRING || |
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
297 |
kind == Token.Kind.NAT |
64471
c40c2975fb02
more uniform path syntax, as in ML (see 5a7c919a4ada);
wenzelm
parents:
63477
diff
changeset
|
298 |
def is_embedded: Boolean = is_name || |
c40c2975fb02
more uniform path syntax, as in ML (see 5a7c919a4ada);
wenzelm
parents:
63477
diff
changeset
|
299 |
kind == Token.Kind.CARTOUCHE || |
c40c2975fb02
more uniform path syntax, as in ML (see 5a7c919a4ada);
wenzelm
parents:
63477
diff
changeset
|
300 |
kind == Token.Kind.VAR || |
c40c2975fb02
more uniform path syntax, as in ML (see 5a7c919a4ada);
wenzelm
parents:
63477
diff
changeset
|
301 |
kind == Token.Kind.TYPE_IDENT || |
c40c2975fb02
more uniform path syntax, as in ML (see 5a7c919a4ada);
wenzelm
parents:
63477
diff
changeset
|
302 |
kind == Token.Kind.TYPE_VAR |
67432 | 303 |
def is_text: Boolean = is_embedded || kind == Token.Kind.VERBATIM |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
304 |
def is_space: Boolean = kind == Token.Kind.SPACE |
67441 | 305 |
def is_informal_comment: Boolean = kind == Token.Kind.INFORMAL_COMMENT |
306 |
def is_formal_comment: Boolean = kind == Token.Kind.FORMAL_COMMENT |
|
69891
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
wenzelm
parents:
69603
diff
changeset
|
307 |
def is_marker: Boolean = |
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
wenzelm
parents:
69603
diff
changeset
|
308 |
kind == Token.Kind.FORMAL_COMMENT && |
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
wenzelm
parents:
69603
diff
changeset
|
309 |
(source.startsWith(Symbol.marker) || source.startsWith(Symbol.marker_decoded)) |
67441 | 310 |
def is_comment: Boolean = is_informal_comment || is_formal_comment |
68729
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
wenzelm
parents:
67895
diff
changeset
|
311 |
def is_ignored: Boolean = is_space || is_informal_comment |
48599 | 312 |
def is_proper: Boolean = !is_space && !is_comment |
48754
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
313 |
def is_error: Boolean = kind == Token.Kind.ERROR |
47012
0e246130486b
clarified command span classification: strict Command.is_command, permissive Command.name;
wenzelm
parents:
46943
diff
changeset
|
314 |
def is_unparsed: Boolean = kind == Token.Kind.UNPARSED |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
315 |
|
48754
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
316 |
def is_unfinished: Boolean = is_error && |
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
317 |
(source.startsWith("\"") || |
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
318 |
source.startsWith("`") || |
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
319 |
source.startsWith("{*") || |
57021
6a8fd2ac6756
explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
wenzelm
parents:
56998
diff
changeset
|
320 |
source.startsWith("(*") || |
6a8fd2ac6756
explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
wenzelm
parents:
56998
diff
changeset
|
321 |
source.startsWith(Symbol.open) || |
6a8fd2ac6756
explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
wenzelm
parents:
56998
diff
changeset
|
322 |
source.startsWith(Symbol.open_decoded)) |
48754
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
wenzelm
parents:
48718
diff
changeset
|
323 |
|
71601 | 324 |
def is_open_bracket: Boolean = is_keyword && Word.open_brackets.exists(is_keyword) |
325 |
def is_close_bracket: Boolean = is_keyword && Word.close_brackets.exists(is_keyword) |
|
63450 | 326 |
|
63446 | 327 |
def is_begin: Boolean = is_keyword("begin") |
328 |
def is_end: Boolean = is_command("end") |
|
63477
f5c81436b930
clarified indentation: 'begin' is treated like a separate command without indent;
wenzelm
parents:
63450
diff
changeset
|
329 |
def is_begin_or_command: Boolean = is_begin || is_command |
43611 | 330 |
|
72669 | 331 |
def symbol_length: Symbol.Offset = Symbol.iterator(source).length |
332 |
||
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
333 |
def content: String = |
55492
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents:
55137
diff
changeset
|
334 |
if (kind == Token.Kind.STRING) Scan.Parsers.quoted_content("\"", source) |
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents:
55137
diff
changeset
|
335 |
else if (kind == Token.Kind.ALT_STRING) Scan.Parsers.quoted_content("`", source) |
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents:
55137
diff
changeset
|
336 |
else if (kind == Token.Kind.VERBATIM) Scan.Parsers.verbatim_content(source) |
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
wenzelm
parents:
55137
diff
changeset
|
337 |
else if (kind == Token.Kind.CARTOUCHE) Scan.Parsers.cartouche_content(source) |
67439
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
338 |
else if (kind == Token.Kind.INFORMAL_COMMENT) Scan.Parsers.comment_content(source) |
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
wenzelm
parents:
67432
diff
changeset
|
339 |
else if (kind == Token.Kind.FORMAL_COMMENT) Comment.content(source) |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
340 |
else source |
66914 | 341 |
|
66915
f4259adc928a
disallow blanks, relevant for session_name / theory_name e.g. in build log files;
wenzelm
parents:
66914
diff
changeset
|
342 |
def is_system_name: Boolean = |
f4259adc928a
disallow blanks, relevant for session_name / theory_name e.g. in build log files;
wenzelm
parents:
66914
diff
changeset
|
343 |
{ |
f4259adc928a
disallow blanks, relevant for session_name / theory_name e.g. in build log files;
wenzelm
parents:
66914
diff
changeset
|
344 |
val s = content |
69551
adb52af5ba55
exclude file name components that are special on Windows;
wenzelm
parents:
68730
diff
changeset
|
345 |
is_name && Path.is_wellformed(s) && |
71601 | 346 |
!s.exists(Symbol.is_ascii_blank) && |
69551
adb52af5ba55
exclude file name components that are special on Windows;
wenzelm
parents:
68730
diff
changeset
|
347 |
!Path.is_reserved(s) |
66915
f4259adc928a
disallow blanks, relevant for session_name / theory_name e.g. in build log files;
wenzelm
parents:
66914
diff
changeset
|
348 |
} |
36956
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
wenzelm
parents:
34311
diff
changeset
|
349 |
} |