| author | Fabian Huch <huch@in.tum.de> | 
| Tue, 11 Jun 2024 14:27:04 +0200 | |
| changeset 80347 | 613ac8c77a84 | 
| parent 77030 | d7dc5b1e4381 | 
| child 82120 | a4aa45999dd7 | 
| permissions | -rw-r--r-- | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
1  | 
/* Title: Pure/Isar/token.scala  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
2  | 
Author: Makarius  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
3  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
4  | 
Outer token syntax for Isabelle/Isar.  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
5  | 
*/  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
6  | 
|
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
7  | 
package isabelle  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
8  | 
|
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
9  | 
|
| 59083 | 10  | 
import scala.collection.mutable  | 
11  | 
import scala.util.parsing.input  | 
|
12  | 
||
13  | 
||
| 75393 | 14  | 
object Token {
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
15  | 
/* tokens */  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
16  | 
|
| 75393 | 17  | 
  object Kind extends Enumeration {
 | 
| 59081 | 18  | 
/*immediate source*/  | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
19  | 
    val COMMAND = Value("command")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
20  | 
    val KEYWORD = Value("keyword")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
21  | 
    val IDENT = Value("identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
22  | 
    val LONG_IDENT = Value("long identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
23  | 
    val SYM_IDENT = Value("symbolic identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
24  | 
    val VAR = Value("schematic variable")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
25  | 
    val TYPE_IDENT = Value("type variable")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
26  | 
    val TYPE_VAR = Value("schematic type variable")
 | 
| 
40290
 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 
wenzelm 
parents: 
38367 
diff
changeset
 | 
27  | 
    val NAT = Value("natural number")
 | 
| 
 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 
wenzelm 
parents: 
38367 
diff
changeset
 | 
28  | 
    val FLOAT = Value("floating-point number")
 | 
| 59081 | 29  | 
    val SPACE = Value("white space")
 | 
30  | 
/*delimited content*/  | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
31  | 
    val STRING = Value("string")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
32  | 
    val ALT_STRING = Value("back-quoted string")
 | 
| 55512 | 33  | 
    val CARTOUCHE = Value("text cartouche")
 | 
| 
74373
 
6e4093927dbb
outer syntax: support for control-cartouche tokens;
 
wenzelm 
parents: 
72744 
diff
changeset
 | 
34  | 
    val CONTROL = Value("control cartouche")
 | 
| 
67439
 
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
 
wenzelm 
parents: 
67432 
diff
changeset
 | 
35  | 
    val INFORMAL_COMMENT = Value("informal comment")
 | 
| 
 
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
 
wenzelm 
parents: 
67432 
diff
changeset
 | 
36  | 
    val FORMAL_COMMENT = Value("formal comment")
 | 
| 59081 | 37  | 
/*special content*/  | 
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
38  | 
    val ERROR = Value("bad input")
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
39  | 
    val UNPARSED = Value("unparsed input")
 | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
40  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
41  | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
42  | 
|
| 55494 | 43  | 
/* parsers */  | 
44  | 
||
45  | 
object Parsers extends Parsers  | 
|
46  | 
||
| 75393 | 47  | 
  trait Parsers extends Scan.Parsers with Comment.Parsers {
 | 
48  | 
    private def delimited_token: Parser[Token] = {
 | 
|
| 55494 | 49  | 
      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
 | 
50  | 
      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
 | 
|
| 
67439
 
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
 
wenzelm 
parents: 
67432 
diff
changeset
 | 
51  | 
val cmt = comment ^^ (x => Token(Token.Kind.INFORMAL_COMMENT, x))  | 
| 
 
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
 
wenzelm 
parents: 
67432 
diff
changeset
 | 
52  | 
val formal_cmt = comment_cartouche ^^ (x => Token(Token.Kind.FORMAL_COMMENT, x))  | 
| 
74373
 
6e4093927dbb
outer syntax: support for control-cartouche tokens;
 
wenzelm 
parents: 
72744 
diff
changeset
 | 
53  | 
val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x))  | 
| 
 
6e4093927dbb
outer syntax: support for control-cartouche tokens;
 
wenzelm 
parents: 
72744 
diff
changeset
 | 
54  | 
val ctrl = control_cartouche ^^ (x => Token(Token.Kind.CONTROL, x))  | 
| 55494 | 55  | 
|
| 74887 | 56  | 
string | (alt_string | (cmt | (formal_cmt | (cart | ctrl))))  | 
| 55494 | 57  | 
}  | 
58  | 
||
| 75393 | 59  | 
    private def other_token(keywords: Keyword.Keywords): Parser[Token] = {
 | 
| 55494 | 60  | 
val letdigs1 = many1(Symbol.is_letdig)  | 
| 62103 | 61  | 
val sub = one(s => s == Symbol.sub_decoded || s == Symbol.sub)  | 
| 55494 | 62  | 
val id =  | 
63  | 
one(Symbol.is_letter) ~  | 
|
64  | 
          (rep(letdigs1 | (sub ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^
 | 
|
65  | 
        { case x ~ y => x + y }
 | 
|
66  | 
||
67  | 
val nat = many1(Symbol.is_digit)  | 
|
68  | 
      val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z }
 | 
|
69  | 
      val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
 | 
|
70  | 
||
71  | 
      val ident = id ~ rep("." ~> id) ^^
 | 
|
72  | 
        { case x ~ Nil => Token(Token.Kind.IDENT, x)
 | 
|
73  | 
          case x ~ ys => Token(Token.Kind.LONG_IDENT, (x :: ys).mkString(".")) }
 | 
|
74  | 
||
75  | 
      val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.VAR, x + y) }
 | 
|
76  | 
      val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token.Kind.TYPE_IDENT, x + y) }
 | 
|
77  | 
      val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.TYPE_VAR, x + y) }
 | 
|
78  | 
val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x))  | 
|
79  | 
val float =  | 
|
80  | 
        ("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x))
 | 
|
81  | 
||
82  | 
val sym_ident =  | 
|
83  | 
(many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^  | 
|
84  | 
(x => Token(Token.Kind.SYM_IDENT, x))  | 
|
85  | 
||
| 58899 | 86  | 
val keyword =  | 
| 
75384
 
20093a63d03b
updated to scala-parser-combinators 2.1.0, which also fits to scala-3.0.2;
 
wenzelm 
parents: 
74887 
diff
changeset
 | 
87  | 
literal(keywords.major) ^^ (x => Token(Token.Kind.COMMAND, x)) |||  | 
| 
 
20093a63d03b
updated to scala-parser-combinators 2.1.0, which also fits to scala-3.0.2;
 
wenzelm 
parents: 
74887 
diff
changeset
 | 
88  | 
literal(keywords.minor) ^^ (x => Token(Token.Kind.KEYWORD, x))  | 
| 55494 | 89  | 
|
90  | 
val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))  | 
|
91  | 
||
92  | 
val recover_delimited =  | 
|
93  | 
        (recover_quoted("\"") |
 | 
|
94  | 
          (recover_quoted("`") |
 | 
|
| 74887 | 95  | 
(recover_cartouche | recover_comment))) ^^ (x => Token(Token.Kind.ERROR, x))  | 
| 55494 | 96  | 
|
97  | 
val bad = one(_ => true) ^^ (x => Token(Token.Kind.ERROR, x))  | 
|
98  | 
||
99  | 
space | (recover_delimited |  | 
|
| 
75384
 
20093a63d03b
updated to scala-parser-combinators 2.1.0, which also fits to scala-3.0.2;
 
wenzelm 
parents: 
74887 
diff
changeset
 | 
100  | 
((keyword |||  | 
| 
 
20093a63d03b
updated to scala-parser-combinators 2.1.0, which also fits to scala-3.0.2;
 
wenzelm 
parents: 
74887 
diff
changeset
 | 
101  | 
(ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident))))))) | bad))  | 
| 55494 | 102  | 
}  | 
103  | 
||
| 58900 | 104  | 
def token(keywords: Keyword.Keywords): Parser[Token] =  | 
| 67446 | 105  | 
delimited_token | other_token(keywords)  | 
| 55494 | 106  | 
|
| 75393 | 107  | 
def token_line(  | 
108  | 
keywords: Keyword.Keywords,  | 
|
109  | 
ctxt: Scan.Line_Context  | 
|
110  | 
    ) : Parser[(Token, Scan.Line_Context)] = {
 | 
|
| 55494 | 111  | 
val string =  | 
| 
55510
 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 
wenzelm 
parents: 
55505 
diff
changeset
 | 
112  | 
        quoted_line("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
 | 
| 55494 | 113  | 
val alt_string =  | 
| 
55510
 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 
wenzelm 
parents: 
55505 
diff
changeset
 | 
114  | 
        quoted_line("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
 | 
| 
 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 
wenzelm 
parents: 
55505 
diff
changeset
 | 
115  | 
      val cart = cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) }
 | 
| 
67439
 
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
 
wenzelm 
parents: 
67432 
diff
changeset
 | 
116  | 
      val cmt = comment_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.INFORMAL_COMMENT, x), c) }
 | 
| 
 
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
 
wenzelm 
parents: 
67432 
diff
changeset
 | 
117  | 
val formal_cmt =  | 
| 
 
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
 
wenzelm 
parents: 
67432 
diff
changeset
 | 
118  | 
        comment_cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.FORMAL_COMMENT, x), c) }
 | 
| 58900 | 119  | 
      val other = other_token(keywords) ^^ { case x => (x, Scan.Finished) }
 | 
| 55494 | 120  | 
|
| 74887 | 121  | 
string | (alt_string | (cart | (cmt | (formal_cmt | other))))  | 
| 55494 | 122  | 
}  | 
123  | 
}  | 
|
124  | 
||
125  | 
||
| 59083 | 126  | 
/* explode */  | 
127  | 
||
128  | 
def explode(keywords: Keyword.Keywords, inp: CharSequence): List[Token] =  | 
|
| 64824 | 129  | 
    Parsers.parseAll(Parsers.rep(Parsers.token(keywords)), Scan.char_reader(inp)) match {
 | 
| 59083 | 130  | 
case Parsers.Success(tokens, _) => tokens  | 
131  | 
      case _ => error("Unexpected failure of tokenizing input:\n" + inp.toString)
 | 
|
132  | 
}  | 
|
133  | 
||
| 75393 | 134  | 
def explode_line(  | 
135  | 
keywords: Keyword.Keywords,  | 
|
136  | 
inp: CharSequence,  | 
|
137  | 
context: Scan.Line_Context  | 
|
138  | 
  ) : (List[Token], Scan.Line_Context) = {
 | 
|
| 64824 | 139  | 
var in: input.Reader[Char] = Scan.char_reader(inp)  | 
| 59083 | 140  | 
val toks = new mutable.ListBuffer[Token]  | 
141  | 
var ctxt = context  | 
|
142  | 
    while (!in.atEnd) {
 | 
|
| 75420 | 143  | 
val result = Parsers.parse(Parsers.token_line(keywords, ctxt), in)  | 
144  | 
      (result : @unchecked) match {
 | 
|
| 60215 | 145  | 
case Parsers.Success((x, c), rest) => toks += x; ctxt = c; in = rest  | 
| 59083 | 146  | 
case Parsers.NoSuccess(_, rest) =>  | 
147  | 
          error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
 | 
|
148  | 
}  | 
|
149  | 
}  | 
|
150  | 
(toks.toList, ctxt)  | 
|
151  | 
}  | 
|
152  | 
||
| 64671 | 153  | 
val newline: Token = explode(Keyword.Keywords.empty, "\n").head  | 
154  | 
||
| 59083 | 155  | 
|
| 69603 | 156  | 
/* embedded */  | 
157  | 
||
158  | 
def read_embedded(keywords: Keyword.Keywords, inp: CharSequence): Option[Token] =  | 
|
159  | 
    explode(keywords, inp) match {
 | 
|
160  | 
case List(tok) if tok.is_embedded => Some(tok)  | 
|
161  | 
case _ => None  | 
|
162  | 
}  | 
|
163  | 
||
164  | 
||
| 65523 | 165  | 
/* names */  | 
166  | 
||
167  | 
def read_name(keywords: Keyword.Keywords, inp: CharSequence): Option[Token] =  | 
|
168  | 
    explode(keywords, inp) match {
 | 
|
169  | 
case List(tok) if tok.is_name => Some(tok)  | 
|
170  | 
case _ => None  | 
|
171  | 
}  | 
|
172  | 
||
173  | 
def quote_name(keywords: Keyword.Keywords, name: String): String =  | 
|
174  | 
if (read_name(keywords, name).isDefined) name  | 
|
175  | 
    else quote(name.replace("\"", "\\\""))
 | 
|
176  | 
||
177  | 
||
| 67132 | 178  | 
/* plain antiquotation (0 or 1 args) */  | 
179  | 
||
180  | 
def read_antiq_arg(keywords: Keyword.Keywords, inp: CharSequence): Option[(String, Option[String])] =  | 
|
181  | 
    explode(keywords, inp).filter(_.is_proper) match {
 | 
|
182  | 
case List(t) if t.is_name => Some(t.content, None)  | 
|
183  | 
case List(t1, t2) if t1.is_name && t2.is_embedded => Some(t1.content, Some(t2.content))  | 
|
184  | 
case _ => None  | 
|
185  | 
}  | 
|
186  | 
||
187  | 
||
| 59735 | 188  | 
/* implode */  | 
189  | 
||
190  | 
def implode(toks: List[Token]): String =  | 
|
191  | 
    toks match {
 | 
|
192  | 
case List(tok) => tok.source  | 
|
| 60215 | 193  | 
case _ => toks.map(_.source).mkString  | 
| 59735 | 194  | 
}  | 
195  | 
||
196  | 
||
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
197  | 
/* token reader */  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
198  | 
|
| 75393 | 199  | 
  object Pos {
 | 
| 
59706
 
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
 
wenzelm 
parents: 
59705 
diff
changeset
 | 
200  | 
val none: Pos = new Pos(0, 0, "", "")  | 
| 
 
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
 
wenzelm 
parents: 
59705 
diff
changeset
 | 
201  | 
val start: Pos = new Pos(1, 1, "", "")  | 
| 
 
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
 
wenzelm 
parents: 
59705 
diff
changeset
 | 
202  | 
def file(file: String): Pos = new Pos(1, 1, file, "")  | 
| 
 
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
 
wenzelm 
parents: 
59705 
diff
changeset
 | 
203  | 
def id(id: String): Pos = new Pos(0, 1, "", id)  | 
| 
59715
 
4f0d0e4ad68d
avoid duplicate header errors, more precise positions;
 
wenzelm 
parents: 
59706 
diff
changeset
 | 
204  | 
val command: Pos = id(Markup.COMMAND)  | 
| 56464 | 205  | 
}  | 
206  | 
||
| 
59671
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
207  | 
final class Pos private[Token](  | 
| 59696 | 208  | 
val line: Int,  | 
209  | 
val offset: Symbol.Offset,  | 
|
| 
59706
 
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
 
wenzelm 
parents: 
59705 
diff
changeset
 | 
210  | 
val file: String,  | 
| 
 
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
 
wenzelm 
parents: 
59705 
diff
changeset
 | 
211  | 
val id: String)  | 
| 75393 | 212  | 
  extends input.Position {
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
213  | 
def column = 0  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
214  | 
def lineContents = ""  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
215  | 
|
| 67895 | 216  | 
def advance(token: Token): Pos = advance(token.source)  | 
| 75393 | 217  | 
    def advance(source: String): Pos = {
 | 
| 
59671
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
218  | 
var line1 = line  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
219  | 
var offset1 = offset  | 
| 67895 | 220  | 
      for (s <- Symbol.iterator(source)) {
 | 
| 
59671
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
221  | 
if (line1 > 0 && Symbol.is_newline(s)) line1 += 1  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
222  | 
if (offset1 > 0) offset1 += 1  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
223  | 
}  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
224  | 
if (line1 == line && offset1 == offset) this  | 
| 
59706
 
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
 
wenzelm 
parents: 
59705 
diff
changeset
 | 
225  | 
else new Pos(line1, offset1, file, id)  | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
226  | 
}  | 
| 56464 | 227  | 
|
| 
77030
 
d7dc5b1e4381
proper positions for Isabelle/ML, instead of Isabelle/Scala;
 
wenzelm 
parents: 
75420 
diff
changeset
 | 
228  | 
def position(end_offset: Symbol.Offset): Position.T =  | 
| 
59671
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
229  | 
(if (line > 0) Position.Line(line) else Nil) :::  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
230  | 
(if (offset > 0) Position.Offset(offset) else Nil) :::  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
231  | 
(if (end_offset > 0) Position.End_Offset(end_offset) else Nil) :::  | 
| 
59706
 
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
 
wenzelm 
parents: 
59705 
diff
changeset
 | 
232  | 
(if (file != "") Position.File(file) else Nil) :::  | 
| 
 
bf6ca55aae13
proper command id for inlined errors, which is important for Command.State.accumulate;
 
wenzelm 
parents: 
59705 
diff
changeset
 | 
233  | 
(if (id != "") Position.Id_String(id) else Nil)  | 
| 
59671
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
234  | 
|
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
235  | 
def position(): Position.T = position(0)  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
236  | 
def position(token: Token): Position.T = position(advance(token).offset)  | 
| 67895 | 237  | 
def position(source: String): Position.T = position(advance(source).offset)  | 
| 
59671
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
238  | 
|
| 64728 | 239  | 
override def toString: String = Position.here(position(), delimited = false)  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
240  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
241  | 
|
| 64824 | 242  | 
abstract class Reader extends input.Reader[Token]  | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
243  | 
|
| 75393 | 244  | 
  private class Token_Reader(tokens: List[Token], val pos: Pos) extends Reader {
 | 
| 71601 | 245  | 
def first: Token = tokens.head  | 
246  | 
def rest: Token_Reader = new Token_Reader(tokens.tail, pos.advance(first))  | 
|
247  | 
def atEnd: Boolean = tokens.isEmpty  | 
|
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
248  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
249  | 
|
| 59705 | 250  | 
def reader(tokens: List[Token], start: Token.Pos): Reader =  | 
251  | 
new Token_Reader(tokens, start)  | 
|
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
252  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
253  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
254  | 
|
| 75393 | 255  | 
sealed case class Token(kind: Token.Kind.Value, source: String) {
 | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
256  | 
def is_command: Boolean = kind == Token.Kind.COMMAND  | 
| 63446 | 257  | 
def is_command(name: String): Boolean = kind == Token.Kind.COMMAND && source == name  | 
| 48718 | 258  | 
def is_keyword: Boolean = kind == Token.Kind.KEYWORD  | 
| 63446 | 259  | 
def is_keyword(name: String): Boolean = kind == Token.Kind.KEYWORD && source == name  | 
| 63450 | 260  | 
def is_keyword(name: Char): Boolean =  | 
261  | 
kind == Token.Kind.KEYWORD && source.length == 1 && source(0) == name  | 
|
| 55505 | 262  | 
def is_delimiter: Boolean = is_keyword && !Symbol.is_ascii_identifier(source)  | 
| 
48365
 
d88aefda01c4
basic support for stand-alone options with external string representation;
 
wenzelm 
parents: 
48349 
diff
changeset
 | 
263  | 
def is_ident: Boolean = kind == Token.Kind.IDENT  | 
| 
48605
 
e777363440d6
allow negative int values as well, according to real = int | float;
 
wenzelm 
parents: 
48599 
diff
changeset
 | 
264  | 
def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT  | 
| 46943 | 265  | 
def is_string: Boolean = kind == Token.Kind.STRING  | 
| 
48349
 
a78e5d399599
support Session.Queue with ordering and dependencies;
 
wenzelm 
parents: 
48335 
diff
changeset
 | 
266  | 
def is_nat: Boolean = kind == Token.Kind.NAT  | 
| 
48365
 
d88aefda01c4
basic support for stand-alone options with external string representation;
 
wenzelm 
parents: 
48349 
diff
changeset
 | 
267  | 
def is_float: Boolean = kind == Token.Kind.FLOAT  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
268  | 
def is_name: Boolean =  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
269  | 
kind == Token.Kind.IDENT ||  | 
| 62969 | 270  | 
kind == Token.Kind.LONG_IDENT ||  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
271  | 
kind == Token.Kind.SYM_IDENT ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
272  | 
kind == Token.Kind.STRING ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
273  | 
kind == Token.Kind.NAT  | 
| 
64471
 
c40c2975fb02
more uniform path syntax, as in ML (see 5a7c919a4ada);
 
wenzelm 
parents: 
63477 
diff
changeset
 | 
274  | 
def is_embedded: Boolean = is_name ||  | 
| 
 
c40c2975fb02
more uniform path syntax, as in ML (see 5a7c919a4ada);
 
wenzelm 
parents: 
63477 
diff
changeset
 | 
275  | 
kind == Token.Kind.CARTOUCHE ||  | 
| 
 
c40c2975fb02
more uniform path syntax, as in ML (see 5a7c919a4ada);
 
wenzelm 
parents: 
63477 
diff
changeset
 | 
276  | 
kind == Token.Kind.VAR ||  | 
| 
 
c40c2975fb02
more uniform path syntax, as in ML (see 5a7c919a4ada);
 
wenzelm 
parents: 
63477 
diff
changeset
 | 
277  | 
kind == Token.Kind.TYPE_IDENT ||  | 
| 
 
c40c2975fb02
more uniform path syntax, as in ML (see 5a7c919a4ada);
 
wenzelm 
parents: 
63477 
diff
changeset
 | 
278  | 
kind == Token.Kind.TYPE_VAR  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
279  | 
def is_space: Boolean = kind == Token.Kind.SPACE  | 
| 67441 | 280  | 
def is_informal_comment: Boolean = kind == Token.Kind.INFORMAL_COMMENT  | 
281  | 
def is_formal_comment: Boolean = kind == Token.Kind.FORMAL_COMMENT  | 
|
| 
69891
 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 
wenzelm 
parents: 
69603 
diff
changeset
 | 
282  | 
def is_marker: Boolean =  | 
| 
 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 
wenzelm 
parents: 
69603 
diff
changeset
 | 
283  | 
kind == Token.Kind.FORMAL_COMMENT &&  | 
| 
 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 
wenzelm 
parents: 
69603 
diff
changeset
 | 
284  | 
(source.startsWith(Symbol.marker) || source.startsWith(Symbol.marker_decoded))  | 
| 67441 | 285  | 
def is_comment: Boolean = is_informal_comment || is_formal_comment  | 
| 
68729
 
3a02b424d5fb
clarified ignored span / core range: include formal comments, e.g. relevant for error messages from antiquotations;
 
wenzelm 
parents: 
67895 
diff
changeset
 | 
286  | 
def is_ignored: Boolean = is_space || is_informal_comment  | 
| 48599 | 287  | 
def is_proper: Boolean = !is_space && !is_comment  | 
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
288  | 
def is_error: Boolean = kind == Token.Kind.ERROR  | 
| 
47012
 
0e246130486b
clarified command span classification: strict Command.is_command, permissive Command.name;
 
wenzelm 
parents: 
46943 
diff
changeset
 | 
289  | 
def is_unparsed: Boolean = kind == Token.Kind.UNPARSED  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
290  | 
|
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
291  | 
def is_unfinished: Boolean = is_error &&  | 
| 
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
292  | 
   (source.startsWith("\"") ||
 | 
| 
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
293  | 
    source.startsWith("`") ||
 | 
| 
57021
 
6a8fd2ac6756
explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
 
wenzelm 
parents: 
56998 
diff
changeset
 | 
294  | 
    source.startsWith("(*") ||
 | 
| 
 
6a8fd2ac6756
explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
 
wenzelm 
parents: 
56998 
diff
changeset
 | 
295  | 
source.startsWith(Symbol.open) ||  | 
| 
 
6a8fd2ac6756
explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
 
wenzelm 
parents: 
56998 
diff
changeset
 | 
296  | 
source.startsWith(Symbol.open_decoded))  | 
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
297  | 
|
| 71601 | 298  | 
def is_open_bracket: Boolean = is_keyword && Word.open_brackets.exists(is_keyword)  | 
299  | 
def is_close_bracket: Boolean = is_keyword && Word.close_brackets.exists(is_keyword)  | 
|
| 63450 | 300  | 
|
| 63446 | 301  | 
  def is_begin: Boolean = is_keyword("begin")
 | 
302  | 
  def is_end: Boolean = is_command("end")
 | 
|
| 
63477
 
f5c81436b930
clarified indentation: 'begin' is treated like a separate command without indent;
 
wenzelm 
parents: 
63450 
diff
changeset
 | 
303  | 
def is_begin_or_command: Boolean = is_begin || is_command  | 
| 43611 | 304  | 
|
| 72669 | 305  | 
def symbol_length: Symbol.Offset = Symbol.iterator(source).length  | 
306  | 
||
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
307  | 
def content: String =  | 
| 
55492
 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 
wenzelm 
parents: 
55137 
diff
changeset
 | 
308  | 
    if (kind == Token.Kind.STRING) Scan.Parsers.quoted_content("\"", source)
 | 
| 
 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 
wenzelm 
parents: 
55137 
diff
changeset
 | 
309  | 
    else if (kind == Token.Kind.ALT_STRING) Scan.Parsers.quoted_content("`", source)
 | 
| 
 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 
wenzelm 
parents: 
55137 
diff
changeset
 | 
310  | 
else if (kind == Token.Kind.CARTOUCHE) Scan.Parsers.cartouche_content(source)  | 
| 
67439
 
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
 
wenzelm 
parents: 
67432 
diff
changeset
 | 
311  | 
else if (kind == Token.Kind.INFORMAL_COMMENT) Scan.Parsers.comment_content(source)  | 
| 
 
78759a7bd874
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
 
wenzelm 
parents: 
67432 
diff
changeset
 | 
312  | 
else if (kind == Token.Kind.FORMAL_COMMENT) Comment.content(source)  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
313  | 
else source  | 
| 66914 | 314  | 
|
| 75393 | 315  | 
  def is_system_name: Boolean = {
 | 
| 
66915
 
f4259adc928a
disallow blanks, relevant for session_name / theory_name e.g. in build log files;
 
wenzelm 
parents: 
66914 
diff
changeset
 | 
316  | 
val s = content  | 
| 
69551
 
adb52af5ba55
exclude file name components that are special on Windows;
 
wenzelm 
parents: 
68730 
diff
changeset
 | 
317  | 
is_name && Path.is_wellformed(s) &&  | 
| 71601 | 318  | 
!s.exists(Symbol.is_ascii_blank) &&  | 
| 
69551
 
adb52af5ba55
exclude file name components that are special on Windows;
 
wenzelm 
parents: 
68730 
diff
changeset
 | 
319  | 
!Path.is_reserved(s)  | 
| 
66915
 
f4259adc928a
disallow blanks, relevant for session_name / theory_name e.g. in build log files;
 
wenzelm 
parents: 
66914 
diff
changeset
 | 
320  | 
}  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
321  | 
}  |