| author | wenzelm | 
| Sun, 15 Mar 2015 20:35:47 +0100 | |
| changeset 59705 | 740a0ca7e09b | 
| parent 59701 | 8ab877c91992 | 
| child 59706 | bf6ca55aae13 | 
| permissions | -rw-r--r-- | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
1  | 
/* Title: Pure/Isar/token.scala  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
2  | 
Author: Makarius  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
3  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
4  | 
Outer token syntax for Isabelle/Isar.  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
5  | 
*/  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
6  | 
|
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
7  | 
package isabelle  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
8  | 
|
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
9  | 
|
| 59083 | 10  | 
import scala.collection.mutable  | 
11  | 
import scala.util.parsing.input  | 
|
12  | 
||
13  | 
||
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
14  | 
object Token  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
15  | 
{
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
16  | 
/* tokens */  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
17  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
18  | 
object Kind extends Enumeration  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
19  | 
  {
 | 
| 59081 | 20  | 
/*immediate source*/  | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
21  | 
    val COMMAND = Value("command")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
22  | 
    val KEYWORD = Value("keyword")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
23  | 
    val IDENT = Value("identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
24  | 
    val LONG_IDENT = Value("long identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
25  | 
    val SYM_IDENT = Value("symbolic identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
26  | 
    val VAR = Value("schematic variable")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
27  | 
    val TYPE_IDENT = Value("type variable")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
28  | 
    val TYPE_VAR = Value("schematic type variable")
 | 
| 
40290
 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 
wenzelm 
parents: 
38367 
diff
changeset
 | 
29  | 
    val NAT = Value("natural number")
 | 
| 
 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 
wenzelm 
parents: 
38367 
diff
changeset
 | 
30  | 
    val FLOAT = Value("floating-point number")
 | 
| 59081 | 31  | 
    val SPACE = Value("white space")
 | 
32  | 
/*delimited content*/  | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
33  | 
    val STRING = Value("string")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
34  | 
    val ALT_STRING = Value("back-quoted string")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
35  | 
    val VERBATIM = Value("verbatim text")
 | 
| 55512 | 36  | 
    val CARTOUCHE = Value("text cartouche")
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
37  | 
    val COMMENT = Value("comment text")
 | 
| 59081 | 38  | 
/*special content*/  | 
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
39  | 
    val ERROR = Value("bad input")
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
40  | 
    val UNPARSED = Value("unparsed input")
 | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
41  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
42  | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
43  | 
|
| 55494 | 44  | 
/* parsers */  | 
45  | 
||
46  | 
object Parsers extends Parsers  | 
|
47  | 
||
48  | 
trait Parsers extends Scan.Parsers  | 
|
49  | 
  {
 | 
|
50  | 
private def delimited_token: Parser[Token] =  | 
|
51  | 
    {
 | 
|
52  | 
      val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
 | 
|
53  | 
      val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
 | 
|
54  | 
val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))  | 
|
55  | 
val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x))  | 
|
56  | 
val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))  | 
|
57  | 
||
58  | 
string | (alt_string | (verb | (cart | cmt)))  | 
|
59  | 
}  | 
|
60  | 
||
| 58900 | 61  | 
private def other_token(keywords: Keyword.Keywords): Parser[Token] =  | 
| 55494 | 62  | 
    {
 | 
63  | 
val letdigs1 = many1(Symbol.is_letdig)  | 
|
64  | 
val sub = one(s => s == Symbol.sub_decoded || s == "\\<^sub>")  | 
|
65  | 
val id =  | 
|
66  | 
one(Symbol.is_letter) ~  | 
|
67  | 
          (rep(letdigs1 | (sub ~ letdigs1 ^^ { case x ~ y => x + y })) ^^ (_.mkString)) ^^
 | 
|
68  | 
        { case x ~ y => x + y }
 | 
|
69  | 
||
70  | 
val nat = many1(Symbol.is_digit)  | 
|
71  | 
      val natdot = nat ~ "." ~ nat ^^ { case x ~ y ~ z => x + y + z }
 | 
|
72  | 
      val id_nat = id ~ opt("." ~ nat) ^^ { case x ~ Some(y ~ z) => x + y + z case x ~ None => x }
 | 
|
73  | 
||
74  | 
      val ident = id ~ rep("." ~> id) ^^
 | 
|
75  | 
        { case x ~ Nil => Token(Token.Kind.IDENT, x)
 | 
|
76  | 
          case x ~ ys => Token(Token.Kind.LONG_IDENT, (x :: ys).mkString(".")) }
 | 
|
77  | 
||
78  | 
      val var_ = "?" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.VAR, x + y) }
 | 
|
79  | 
      val type_ident = "'" ~ id ^^ { case x ~ y => Token(Token.Kind.TYPE_IDENT, x + y) }
 | 
|
80  | 
      val type_var = "?'" ~ id_nat ^^ { case x ~ y => Token(Token.Kind.TYPE_VAR, x + y) }
 | 
|
81  | 
val nat_ = nat ^^ (x => Token(Token.Kind.NAT, x))  | 
|
82  | 
val float =  | 
|
83  | 
        ("-" ~ natdot ^^ { case x ~ y => x + y } | natdot) ^^ (x => Token(Token.Kind.FLOAT, x))
 | 
|
84  | 
||
85  | 
val sym_ident =  | 
|
86  | 
(many1(Symbol.is_symbolic_char) | one(sym => Symbol.is_symbolic(sym))) ^^  | 
|
87  | 
(x => Token(Token.Kind.SYM_IDENT, x))  | 
|
88  | 
||
| 58899 | 89  | 
val keyword =  | 
| 58900 | 90  | 
literal(keywords.minor) ^^ (x => Token(Token.Kind.KEYWORD, x)) |||  | 
91  | 
literal(keywords.major) ^^ (x => Token(Token.Kind.COMMAND, x))  | 
|
| 55494 | 92  | 
|
93  | 
val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))  | 
|
94  | 
||
95  | 
val recover_delimited =  | 
|
96  | 
        (recover_quoted("\"") |
 | 
|
97  | 
          (recover_quoted("`") |
 | 
|
98  | 
(recover_verbatim |  | 
|
99  | 
(recover_cartouche | recover_comment)))) ^^ (x => Token(Token.Kind.ERROR, x))  | 
|
100  | 
||
101  | 
val bad = one(_ => true) ^^ (x => Token(Token.Kind.ERROR, x))  | 
|
102  | 
||
103  | 
space | (recover_delimited |  | 
|
104  | 
(((ident | (var_ | (type_ident | (type_var | (float | (nat_ | sym_ident)))))) |||  | 
|
| 58899 | 105  | 
keyword) | bad))  | 
| 55494 | 106  | 
}  | 
107  | 
||
| 58900 | 108  | 
def token(keywords: Keyword.Keywords): Parser[Token] =  | 
109  | 
delimited_token | other_token(keywords)  | 
|
| 55494 | 110  | 
|
| 58900 | 111  | 
def token_line(keywords: Keyword.Keywords, ctxt: Scan.Line_Context)  | 
| 
55510
 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 
wenzelm 
parents: 
55505 
diff
changeset
 | 
112  | 
: Parser[(Token, Scan.Line_Context)] =  | 
| 55494 | 113  | 
    {
 | 
114  | 
val string =  | 
|
| 
55510
 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 
wenzelm 
parents: 
55505 
diff
changeset
 | 
115  | 
        quoted_line("\"", ctxt) ^^ { case (x, c) => (Token(Token.Kind.STRING, x), c) }
 | 
| 55494 | 116  | 
val alt_string =  | 
| 
55510
 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 
wenzelm 
parents: 
55505 
diff
changeset
 | 
117  | 
        quoted_line("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
 | 
| 
 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 
wenzelm 
parents: 
55505 
diff
changeset
 | 
118  | 
      val verb = verbatim_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
 | 
| 
 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 
wenzelm 
parents: 
55505 
diff
changeset
 | 
119  | 
      val cart = cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) }
 | 
| 
 
1585a65aad64
tuned signature -- emphasize line-oriented aspect;
 
wenzelm 
parents: 
55505 
diff
changeset
 | 
120  | 
      val cmt = comment_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.COMMENT, x), c) }
 | 
| 58900 | 121  | 
      val other = other_token(keywords) ^^ { case x => (x, Scan.Finished) }
 | 
| 55494 | 122  | 
|
123  | 
string | (alt_string | (verb | (cart | (cmt | other))))  | 
|
124  | 
}  | 
|
125  | 
}  | 
|
126  | 
||
127  | 
||
| 59083 | 128  | 
/* explode */  | 
129  | 
||
130  | 
def explode(keywords: Keyword.Keywords, inp: CharSequence): List[Token] =  | 
|
131  | 
  {
 | 
|
132  | 
val in: input.Reader[Char] = new input.CharSequenceReader(inp)  | 
|
133  | 
    Parsers.parseAll(Parsers.rep(Parsers.token(keywords)), in) match {
 | 
|
134  | 
case Parsers.Success(tokens, _) => tokens  | 
|
135  | 
      case _ => error("Unexpected failure of tokenizing input:\n" + inp.toString)
 | 
|
136  | 
}  | 
|
137  | 
}  | 
|
138  | 
||
139  | 
def explode_line(keywords: Keyword.Keywords, inp: CharSequence, context: Scan.Line_Context)  | 
|
140  | 
: (List[Token], Scan.Line_Context) =  | 
|
141  | 
  {
 | 
|
142  | 
var in: input.Reader[Char] = new input.CharSequenceReader(inp)  | 
|
143  | 
val toks = new mutable.ListBuffer[Token]  | 
|
144  | 
var ctxt = context  | 
|
145  | 
    while (!in.atEnd) {
 | 
|
146  | 
      Parsers.parse(Parsers.token_line(keywords, ctxt), in) match {
 | 
|
147  | 
        case Parsers.Success((x, c), rest) => { toks += x; ctxt = c; in = rest }
 | 
|
148  | 
case Parsers.NoSuccess(_, rest) =>  | 
|
149  | 
          error("Unexpected failure of tokenizing input:\n" + rest.source.toString)
 | 
|
150  | 
}  | 
|
151  | 
}  | 
|
152  | 
(toks.toList, ctxt)  | 
|
153  | 
}  | 
|
154  | 
||
155  | 
||
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
156  | 
/* token reader */  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
157  | 
|
| 56464 | 158  | 
object Pos  | 
159  | 
  {
 | 
|
| 59696 | 160  | 
val none: Pos = new Pos(0, 0, "")  | 
| 59705 | 161  | 
val start: Pos = new Pos(1, 1, "")  | 
162  | 
val offset: Pos = new Pos(0, 1, "")  | 
|
163  | 
def file(file: String): Pos = new Pos(1, 1, file)  | 
|
| 56464 | 164  | 
}  | 
165  | 
||
| 
59671
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
166  | 
final class Pos private[Token](  | 
| 59696 | 167  | 
val line: Int,  | 
168  | 
val offset: Symbol.Offset,  | 
|
169  | 
val file: String)  | 
|
| 56464 | 170  | 
extends scala.util.parsing.input.Position  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
171  | 
  {
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
172  | 
def column = 0  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
173  | 
def lineContents = ""  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
174  | 
|
| 56464 | 175  | 
def advance(token: Token): Pos =  | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
176  | 
    {
 | 
| 
59671
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
177  | 
var line1 = line  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
178  | 
var offset1 = offset  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
179  | 
      for (s <- Symbol.iterator(token.source)) {
 | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
180  | 
if (line1 > 0 && Symbol.is_newline(s)) line1 += 1  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
181  | 
if (offset1 > 0) offset1 += 1  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
182  | 
}  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
183  | 
if (line1 == line && offset1 == offset) this  | 
| 59696 | 184  | 
else new Pos(line1, offset1, file)  | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
185  | 
}  | 
| 56464 | 186  | 
|
| 59695 | 187  | 
private def position(end_offset: Symbol.Offset): Position.T =  | 
| 
59671
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
188  | 
(if (line > 0) Position.Line(line) else Nil) :::  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
189  | 
(if (offset > 0) Position.Offset(offset) else Nil) :::  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
190  | 
(if (end_offset > 0) Position.End_Offset(end_offset) else Nil) :::  | 
| 59696 | 191  | 
(if (file != "") Position.File(file) else Nil)  | 
| 
59671
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
192  | 
|
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
193  | 
def position(): Position.T = position(0)  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
194  | 
def position(token: Token): Position.T = position(advance(token).offset)  | 
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
195  | 
|
| 
 
9715eb8e9408
more precise position information in Isabelle/Scala, with YXML markup as in Isabelle/ML;
 
wenzelm 
parents: 
59122 
diff
changeset
 | 
196  | 
override def toString: String = Position.here_undelimited(position())  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
197  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
198  | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
199  | 
abstract class Reader extends scala.util.parsing.input.Reader[Token]  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
200  | 
|
| 56464 | 201  | 
private class Token_Reader(tokens: List[Token], val pos: Pos) extends Reader  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
202  | 
  {
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
203  | 
def first = tokens.head  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
204  | 
def rest = new Token_Reader(tokens.tail, pos.advance(first))  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
205  | 
def atEnd = tokens.isEmpty  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
206  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
207  | 
|
| 59705 | 208  | 
def reader(tokens: List[Token], start: Token.Pos): Reader =  | 
209  | 
new Token_Reader(tokens, start)  | 
|
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
210  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
211  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
212  | 
|
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
213  | 
sealed case class Token(val kind: Token.Kind.Value, val source: String)  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
214  | 
{
 | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
215  | 
def is_command: Boolean = kind == Token.Kind.COMMAND  | 
| 59122 | 216  | 
def is_command_kind(keywords: Keyword.Keywords, pred: String => Boolean): Boolean =  | 
| 59701 | 217  | 
is_command && keywords.is_command_kind(source, pred)  | 
| 48718 | 218  | 
def is_keyword: Boolean = kind == Token.Kind.KEYWORD  | 
| 55505 | 219  | 
def is_delimiter: Boolean = is_keyword && !Symbol.is_ascii_identifier(source)  | 
| 
48365
 
d88aefda01c4
basic support for stand-alone options with external string representation;
 
wenzelm 
parents: 
48349 
diff
changeset
 | 
220  | 
def is_ident: Boolean = kind == Token.Kind.IDENT  | 
| 
48605
 
e777363440d6
allow negative int values as well, according to real = int | float;
 
wenzelm 
parents: 
48599 
diff
changeset
 | 
221  | 
def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT  | 
| 46943 | 222  | 
def is_string: Boolean = kind == Token.Kind.STRING  | 
| 
48349
 
a78e5d399599
support Session.Queue with ordering and dependencies;
 
wenzelm 
parents: 
48335 
diff
changeset
 | 
223  | 
def is_nat: Boolean = kind == Token.Kind.NAT  | 
| 
48365
 
d88aefda01c4
basic support for stand-alone options with external string representation;
 
wenzelm 
parents: 
48349 
diff
changeset
 | 
224  | 
def is_float: Boolean = kind == Token.Kind.FLOAT  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
225  | 
def is_name: Boolean =  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
226  | 
kind == Token.Kind.IDENT ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
227  | 
kind == Token.Kind.SYM_IDENT ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
228  | 
kind == Token.Kind.STRING ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
229  | 
kind == Token.Kind.NAT  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
230  | 
def is_xname: Boolean = is_name || kind == Token.Kind.LONG_IDENT  | 
| 
56998
 
ebf3c9681406
clarified is_text in accordance to ML version (7e0178c84994), e.g. relevant for 'header' syntax in PIDE front-end;
 
wenzelm 
parents: 
56532 
diff
changeset
 | 
231  | 
def is_text: Boolean = is_xname || kind == Token.Kind.VERBATIM || kind == Token.Kind.CARTOUCHE  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
232  | 
def is_space: Boolean = kind == Token.Kind.SPACE  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
233  | 
def is_comment: Boolean = kind == Token.Kind.COMMENT  | 
| 
51048
 
123be08eed88
clarified notion of Command.proper_range (according to Token.is_proper), especially relevant for Active.try_replace_command, to avoid loosing subsequent comments accidentally;
 
wenzelm 
parents: 
48754 
diff
changeset
 | 
234  | 
def is_improper: Boolean = is_space || is_comment  | 
| 48599 | 235  | 
def is_proper: Boolean = !is_space && !is_comment  | 
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
236  | 
def is_error: Boolean = kind == Token.Kind.ERROR  | 
| 
47012
 
0e246130486b
clarified command span classification: strict Command.is_command, permissive Command.name;
 
wenzelm 
parents: 
46943 
diff
changeset
 | 
237  | 
def is_unparsed: Boolean = kind == Token.Kind.UNPARSED  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
238  | 
|
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
239  | 
def is_unfinished: Boolean = is_error &&  | 
| 
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
240  | 
   (source.startsWith("\"") ||
 | 
| 
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
241  | 
    source.startsWith("`") ||
 | 
| 
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
242  | 
    source.startsWith("{*") ||
 | 
| 
57021
 
6a8fd2ac6756
explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
 
wenzelm 
parents: 
56998 
diff
changeset
 | 
243  | 
    source.startsWith("(*") ||
 | 
| 
 
6a8fd2ac6756
explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
 
wenzelm 
parents: 
56998 
diff
changeset
 | 
244  | 
source.startsWith(Symbol.open) ||  | 
| 
 
6a8fd2ac6756
explicit treatment of unfinished cartouches, which is important for Thy_Syntax.consolidate_spans;
 
wenzelm 
parents: 
56998 
diff
changeset
 | 
245  | 
source.startsWith(Symbol.open_decoded))  | 
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
246  | 
|
| 48718 | 247  | 
def is_begin: Boolean = is_keyword && source == "begin"  | 
| 58751 | 248  | 
def is_end: Boolean = is_command && source == "end"  | 
| 43611 | 249  | 
|
| 58753 | 250  | 
  def is_begin_block: Boolean = is_command && source == "{"
 | 
251  | 
def is_end_block: Boolean = is_command && source == "}"  | 
|
252  | 
||
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
253  | 
def content: String =  | 
| 
55492
 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 
wenzelm 
parents: 
55137 
diff
changeset
 | 
254  | 
    if (kind == Token.Kind.STRING) Scan.Parsers.quoted_content("\"", source)
 | 
| 
 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 
wenzelm 
parents: 
55137 
diff
changeset
 | 
255  | 
    else if (kind == Token.Kind.ALT_STRING) Scan.Parsers.quoted_content("`", source)
 | 
| 
 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 
wenzelm 
parents: 
55137 
diff
changeset
 | 
256  | 
else if (kind == Token.Kind.VERBATIM) Scan.Parsers.verbatim_content(source)  | 
| 
 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 
wenzelm 
parents: 
55137 
diff
changeset
 | 
257  | 
else if (kind == Token.Kind.CARTOUCHE) Scan.Parsers.cartouche_content(source)  | 
| 
 
28d4db6c6e79
tuned signature -- separate Lexicon from Parsers (in accordance to ML version);
 
wenzelm 
parents: 
55137 
diff
changeset
 | 
258  | 
else if (kind == Token.Kind.COMMENT) Scan.Parsers.comment_content(source)  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
259  | 
else source  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
260  | 
}  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
261  |