| author | wenzelm | 
| Mon, 30 Jul 2012 13:42:45 +0200 | |
| changeset 48599 | 5e64b7770f35 | 
| parent 48365 | d88aefda01c4 | 
| child 48605 | e777363440d6 | 
| permissions | -rw-r--r-- | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
1  | 
/* Title: Pure/Isar/token.scala  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
2  | 
Author: Makarius  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
3  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
4  | 
Outer token syntax for Isabelle/Isar.  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
5  | 
*/  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
6  | 
|
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
7  | 
package isabelle  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
8  | 
|
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
9  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
10  | 
object Token  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
11  | 
{
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
12  | 
/* tokens */  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
13  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
14  | 
object Kind extends Enumeration  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
15  | 
  {
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
16  | 
    val COMMAND = Value("command")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
17  | 
    val KEYWORD = Value("keyword")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
18  | 
    val IDENT = Value("identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
19  | 
    val LONG_IDENT = Value("long identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
20  | 
    val SYM_IDENT = Value("symbolic identifier")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
21  | 
    val VAR = Value("schematic variable")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
22  | 
    val TYPE_IDENT = Value("type variable")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
23  | 
    val TYPE_VAR = Value("schematic type variable")
 | 
| 
40290
 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 
wenzelm 
parents: 
38367 
diff
changeset
 | 
24  | 
    val NAT = Value("natural number")
 | 
| 
 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 
wenzelm 
parents: 
38367 
diff
changeset
 | 
25  | 
    val FLOAT = Value("floating-point number")
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
26  | 
    val STRING = Value("string")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
27  | 
    val ALT_STRING = Value("back-quoted string")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
28  | 
    val VERBATIM = Value("verbatim text")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
29  | 
    val SPACE = Value("white space")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
30  | 
    val COMMENT = Value("comment text")
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
31  | 
    val UNPARSED = Value("unparsed input")
 | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
32  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
33  | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
34  | 
|
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
35  | 
/* token reader */  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
36  | 
|
| 48335 | 37  | 
class Position(val line: Int, val file: String) extends scala.util.parsing.input.Position  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
38  | 
  {
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
39  | 
def column = 0  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
40  | 
def lineContents = ""  | 
| 48335 | 41  | 
override def toString =  | 
42  | 
      if (file == "") ("line " + line.toString)
 | 
|
43  | 
      else ("line " + line.toString + " of " + quote(file))
 | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
44  | 
|
| 48335 | 45  | 
def advance(token: Token): Position =  | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
46  | 
    {
 | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
47  | 
var n = 0  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
48  | 
for (c <- token.content if c == '\n') n += 1  | 
| 48335 | 49  | 
if (n == 0) this else new Position(line + n, file)  | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
50  | 
}  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
51  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
52  | 
|
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
53  | 
abstract class Reader extends scala.util.parsing.input.Reader[Token]  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
54  | 
|
| 48335 | 55  | 
private class Token_Reader(tokens: List[Token], val pos: Position) extends Reader  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
56  | 
  {
 | 
| 
34157
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
57  | 
def first = tokens.head  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
58  | 
def rest = new Token_Reader(tokens.tail, pos.advance(first))  | 
| 
 
0a0a19153626
explicit representation of Token_Kind -- cannot really depend on runtime types due to erasure;
 
wenzelm 
parents: 
34143 
diff
changeset
 | 
59  | 
def atEnd = tokens.isEmpty  | 
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
60  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
61  | 
|
| 48335 | 62  | 
def reader(tokens: List[Token], file: String = ""): Reader =  | 
63  | 
new Token_Reader(tokens, new Position(1, file))  | 
|
| 
34139
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
64  | 
}  | 
| 
 
d1ded303fe0e
Outer lexical syntax for Isabelle/Isar -- Scala version.
 
wenzelm 
parents:  
diff
changeset
 | 
65  | 
|
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
66  | 
|
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
67  | 
sealed case class Token(val kind: Token.Kind.Value, val source: String)  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
68  | 
{
 | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
69  | 
def is_command: Boolean = kind == Token.Kind.COMMAND  | 
| 
43430
 
1ed88ddf1268
more uniform treatment of "keyword" vs. "operator";
 
wenzelm 
parents: 
43418 
diff
changeset
 | 
70  | 
def is_operator: Boolean = kind == Token.Kind.KEYWORD && !Symbol.is_ascii_identifier(source)  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
71  | 
def is_delimited: Boolean =  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
72  | 
kind == Token.Kind.STRING ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
73  | 
kind == Token.Kind.ALT_STRING ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
74  | 
kind == Token.Kind.VERBATIM ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
75  | 
kind == Token.Kind.COMMENT  | 
| 
48365
 
d88aefda01c4
basic support for stand-alone options with external string representation;
 
wenzelm 
parents: 
48349 
diff
changeset
 | 
76  | 
def is_ident: Boolean = kind == Token.Kind.IDENT  | 
| 46943 | 77  | 
def is_string: Boolean = kind == Token.Kind.STRING  | 
| 
48349
 
a78e5d399599
support Session.Queue with ordering and dependencies;
 
wenzelm 
parents: 
48335 
diff
changeset
 | 
78  | 
def is_nat: Boolean = kind == Token.Kind.NAT  | 
| 
48365
 
d88aefda01c4
basic support for stand-alone options with external string representation;
 
wenzelm 
parents: 
48349 
diff
changeset
 | 
79  | 
def is_float: Boolean = kind == Token.Kind.FLOAT  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
80  | 
def is_name: Boolean =  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
81  | 
kind == Token.Kind.IDENT ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
82  | 
kind == Token.Kind.SYM_IDENT ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
83  | 
kind == Token.Kind.STRING ||  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
84  | 
kind == Token.Kind.NAT  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
85  | 
def is_xname: Boolean = is_name || kind == Token.Kind.LONG_IDENT  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
86  | 
def is_text: Boolean = is_xname || kind == Token.Kind.VERBATIM  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
87  | 
def is_space: Boolean = kind == Token.Kind.SPACE  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
88  | 
def is_comment: Boolean = kind == Token.Kind.COMMENT  | 
| 48599 | 89  | 
def is_proper: Boolean = !is_space && !is_comment  | 
| 
47012
 
0e246130486b
clarified command span classification: strict Command.is_command, permissive Command.name;
 
wenzelm 
parents: 
46943 
diff
changeset
 | 
90  | 
def is_unparsed: Boolean = kind == Token.Kind.UNPARSED  | 
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
91  | 
|
| 43611 | 92  | 
def is_begin: Boolean = kind == Token.Kind.KEYWORD && source == "begin"  | 
93  | 
def is_end: Boolean = kind == Token.Kind.COMMAND && source == "end"  | 
|
94  | 
||
| 
36956
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
95  | 
def content: String =  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
96  | 
    if (kind == Token.Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source)
 | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
97  | 
    else if (kind == Token.Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source)
 | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
98  | 
else if (kind == Token.Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source)  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
99  | 
else if (kind == Token.Kind.COMMENT) Scan.Lexicon.empty.comment_content(source)  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
100  | 
else source  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
101  | 
|
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
102  | 
def text: (String, String) =  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
103  | 
    if (kind == Token.Kind.COMMAND && source == ";") ("terminator", "")
 | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
104  | 
else (kind.toString, source)  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
105  | 
}  | 
| 
 
21be4832c362
renamed class Outer_Lex to Token and Token_Kind to Token.Kind;
 
wenzelm 
parents: 
34311 
diff
changeset
 | 
106  |