| author | wenzelm | 
| Fri, 23 Aug 2013 20:35:50 +0200 | |
| changeset 53171 | a5e54d4d9081 | 
| parent 51048 | 123be08eed88 | 
| child 55033 | 8e8243975860 | 
| permissions | -rw-r--r-- | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
1  | 
/* Title: Pure/Isar/token.scala  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
2  | 
Author: Makarius  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
3  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
4  | 
Outer token syntax for Isabelle/Isar.  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
5  | 
*/  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
6  | 
|
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
7  | 
package isabelle  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
8  | 
|
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
9  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
10  | 
object Token  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
11  | 
{
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
12  | 
/* tokens */  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
13  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
14  | 
object Kind extends Enumeration  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
15  | 
  {
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
16  | 
    val COMMAND = Value("command")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
17  | 
    val KEYWORD = Value("keyword")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
18  | 
    val IDENT = Value("identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
19  | 
    val LONG_IDENT = Value("long identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
20  | 
    val SYM_IDENT = Value("symbolic identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
21  | 
    val VAR = Value("schematic variable")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
22  | 
    val TYPE_IDENT = Value("type variable")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
23  | 
    val TYPE_VAR = Value("schematic type variable")
 | 
| 
40290
 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 
wenzelm 
parents: 
38367 
diff
changeset
 | 
24  | 
    val NAT = Value("natural number")
 | 
| 
 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 
wenzelm 
parents: 
38367 
diff
changeset
 | 
25  | 
    val FLOAT = Value("floating-point number")
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
26  | 
    val STRING = Value("string")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
27  | 
    val ALT_STRING = Value("back-quoted string")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
28  | 
    val VERBATIM = Value("verbatim text")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
29  | 
    val SPACE = Value("white space")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
30  | 
    val COMMENT = Value("comment text")
 | 
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
31  | 
    val ERROR = Value("bad input")
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
32  | 
    val UNPARSED = Value("unparsed input")
 | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
33  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
34  | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
35  | 
|
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
36  | 
/* token reader */  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
37  | 
|
| 48335 | 38  | 
class Position(val line: Int, val file: String) extends scala.util.parsing.input.Position  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
39  | 
  {
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
40  | 
def column = 0  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
41  | 
def lineContents = ""  | 
| 48335 | 42  | 
override def toString =  | 
43  | 
      if (file == "") ("line " + line.toString)
 | 
|
44  | 
      else ("line " + line.toString + " of " + quote(file))
 | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
45  | 
|
| 48335 | 46  | 
def advance(token: Token): Position =  | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
47  | 
    {
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
48  | 
var n = 0  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
49  | 
for (c <- token.content if c == '\n') n += 1  | 
| 48335 | 50  | 
if (n == 0) this else new Position(line + n, file)  | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
51  | 
}  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
52  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
53  | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
54  | 
abstract class Reader extends scala.util.parsing.input.Reader[Token]  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
55  | 
|
| 48335 | 56  | 
private class Token_Reader(tokens: List[Token], val pos: Position) extends Reader  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
57  | 
  {
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
58  | 
def first = tokens.head  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
59  | 
def rest = new Token_Reader(tokens.tail, pos.advance(first))  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
60  | 
def atEnd = tokens.isEmpty  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
61  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
62  | 
|
| 48335 | 63  | 
def reader(tokens: List[Token], file: String = ""): Reader =  | 
64  | 
new Token_Reader(tokens, new Position(1, file))  | 
|
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
65  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
66  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
67  | 
|
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
68  | 
sealed case class Token(val kind: Token.Kind.Value, val source: String)  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
69  | 
{
 | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
70  | 
def is_command: Boolean = kind == Token.Kind.COMMAND  | 
| 48718 | 71  | 
def is_keyword: Boolean = kind == Token.Kind.KEYWORD  | 
72  | 
def is_operator: Boolean = is_keyword && !Symbol.is_ascii_identifier(source)  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
73  | 
def is_delimited: Boolean =  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
74  | 
kind == Token.Kind.STRING ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
75  | 
kind == Token.Kind.ALT_STRING ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
76  | 
kind == Token.Kind.VERBATIM ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
77  | 
kind == Token.Kind.COMMENT  | 
| 
48365
 
d88aefda01c4
basic support for stand-alone options with external string representation;
 
wenzelm 
parents: 
48349 
diff
changeset
 | 
78  | 
def is_ident: Boolean = kind == Token.Kind.IDENT  | 
| 
48605
 
e777363440d6
allow negative int values as well, according to real = int | float;
 
wenzelm 
parents: 
48599 
diff
changeset
 | 
79  | 
def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT  | 
| 46943 | 80  | 
def is_string: Boolean = kind == Token.Kind.STRING  | 
| 
48349
 
a78e5d399599
support Session.Queue with ordering and dependencies;
 
wenzelm 
parents: 
48335 
diff
changeset
 | 
81  | 
def is_nat: Boolean = kind == Token.Kind.NAT  | 
| 
48365
 
d88aefda01c4
basic support for stand-alone options with external string representation;
 
wenzelm 
parents: 
48349 
diff
changeset
 | 
82  | 
def is_float: Boolean = kind == Token.Kind.FLOAT  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
83  | 
def is_name: Boolean =  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
84  | 
kind == Token.Kind.IDENT ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
85  | 
kind == Token.Kind.SYM_IDENT ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
86  | 
kind == Token.Kind.STRING ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
87  | 
kind == Token.Kind.NAT  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
88  | 
def is_xname: Boolean = is_name || kind == Token.Kind.LONG_IDENT  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
89  | 
def is_text: Boolean = is_xname || kind == Token.Kind.VERBATIM  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
90  | 
def is_space: Boolean = kind == Token.Kind.SPACE  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
91  | 
def is_comment: Boolean = kind == Token.Kind.COMMENT  | 
| 
51048
 
123be08eed88
clarified notion of Command.proper_range (according to Token.is_proper), especially relevant for Active.try_replace_command, to avoid loosing subsequent comments accidentally;
 
wenzelm 
parents: 
48754 
diff
changeset
 | 
92  | 
def is_improper: Boolean = is_space || is_comment  | 
| 48599 | 93  | 
def is_proper: Boolean = !is_space && !is_comment  | 
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
94  | 
def is_error: Boolean = kind == Token.Kind.ERROR  | 
| 
47012
 
0e246130486b
clarified command span classification: strict Command.is_command, permissive Command.name;
 
wenzelm 
parents: 
46943 
diff
changeset
 | 
95  | 
def is_unparsed: Boolean = kind == Token.Kind.UNPARSED  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
96  | 
|
| 
48754
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
97  | 
def is_unfinished: Boolean = is_error &&  | 
| 
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
98  | 
   (source.startsWith("\"") ||
 | 
| 
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
99  | 
    source.startsWith("`") ||
 | 
| 
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
100  | 
    source.startsWith("{*") ||
 | 
| 
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
101  | 
    source.startsWith("(*"))
 | 
| 
 
c2c1e5944536
clarified undefined, unparsed, unfinished command spans;
 
wenzelm 
parents: 
48718 
diff
changeset
 | 
102  | 
|
| 48718 | 103  | 
def is_begin: Boolean = is_keyword && source == "begin"  | 
104  | 
def is_end: Boolean = is_keyword && source == "end"  | 
|
| 43611 | 105  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
106  | 
def content: String =  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
107  | 
    if (kind == Token.Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source)
 | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
108  | 
    else if (kind == Token.Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source)
 | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
109  | 
else if (kind == Token.Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source)  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
110  | 
else if (kind == Token.Kind.COMMENT) Scan.Lexicon.empty.comment_content(source)  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
111  | 
else source  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
112  | 
|
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
113  | 
def text: (String, String) =  | 
| 48718 | 114  | 
    if (is_command && source == ";") ("terminator", "")
 | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
115  | 
else (kind.toString, source)  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
116  | 
}  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
117  |