| author | wenzelm | 
| Sat, 23 Jul 2011 16:37:17 +0200 | |
| changeset 43947 | 9b00f09f7721 | 
| parent 43432 | 224006e5ac46 | 
| child 44706 | fe319b45315c | 
| permissions | -rw-r--r-- | 
| 18 | 1  | 
(* Title: Pure/Syntax/lexicon.ML  | 
2  | 
Author: Tobias Nipkow and Markus Wenzel, TU Muenchen  | 
|
| 0 | 3  | 
|
| 4703 | 4  | 
Lexer for the inner Isabelle syntax (terms and types).  | 
| 18 | 5  | 
*)  | 
| 0 | 6  | 
|
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
7  | 
signature LEXICON =  | 
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
8  | 
sig  | 
| 42476 | 9  | 
structure Syntax:  | 
10  | 
sig  | 
|
11  | 
val const: string -> term  | 
|
12  | 
val free: string -> term  | 
|
13  | 
val var: indexname -> term  | 
|
14  | 
end  | 
|
| 0 | 15  | 
val is_identifier: string -> bool  | 
| 14679 | 16  | 
val is_ascii_identifier: string -> bool  | 
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
17  | 
val is_xid: string -> bool  | 
| 20165 | 18  | 
val is_tid: string -> bool  | 
| 30573 | 19  | 
val scan_id: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
20  | 
val scan_longid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
21  | 
val scan_tid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
22  | 
val scan_nat: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
23  | 
val scan_int: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
| 
40290
 
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
 
wenzelm 
parents: 
39510 
diff
changeset
 | 
24  | 
val scan_float: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
| 30573 | 25  | 
val scan_hex: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
26  | 
val scan_bin: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
27  | 
val scan_var: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
28  | 
val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
29  | 
datatype token_kind =  | 
| 27887 | 30  | 
Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |  | 
| 28904 | 31  | 
NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
32  | 
datatype token = Token of token_kind * string * Position.range  | 
| 27806 | 33  | 
val str_of_token: token -> string  | 
34  | 
val pos_of_token: token -> Position.T  | 
|
| 27887 | 35  | 
val is_proper: token -> bool  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
36  | 
val mk_eof: Position.T -> token  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
37  | 
val eof: token  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
38  | 
val is_eof: token -> bool  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
39  | 
val stopper: token Scan.stopper  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
40  | 
val idT: typ  | 
| 3828 | 41  | 
val longidT: typ  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
42  | 
val varT: typ  | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
43  | 
val tidT: typ  | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
44  | 
val tvarT: typ  | 
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
45  | 
val terminals: string list  | 
| 
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
46  | 
val is_terminal: string -> bool  | 
| 
38237
 
8b0383334031
prefer Context_Position.report where a proper context is available -- notably for "inner" entities;
 
wenzelm 
parents: 
35428 
diff
changeset
 | 
47  | 
val report_token: Proof.context -> token -> unit  | 
| 
39510
 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
48  | 
val reported_token_range: Proof.context -> token -> string  | 
| 18 | 49  | 
val matching_tokens: token * token -> bool  | 
50  | 
val valued_token: token -> bool  | 
|
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
51  | 
val predef_term: string -> token option  | 
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
52  | 
val implode_xstr: string list -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
53  | 
val explode_xstr: string -> string list  | 
| 30573 | 54  | 
val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list  | 
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
55  | 
val read_indexname: string -> indexname  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
56  | 
val read_var: string -> term  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
57  | 
val read_variable: string -> indexname option  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
58  | 
val read_nat: string -> int option  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
59  | 
val read_int: string -> int option  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
60  | 
  val read_xnum: string -> {radix: int, leading_zeros: int, value: int}
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
61  | 
  val read_float: string -> {mant: int, exp: int}
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
62  | 
val mark_class: string -> string val unmark_class: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
63  | 
val mark_type: string -> string val unmark_type: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
64  | 
val mark_const: string -> string val unmark_const: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
65  | 
val mark_fixed: string -> string val unmark_fixed: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
66  | 
val unmark:  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
67  | 
   {case_class: string -> 'a,
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
68  | 
case_type: string -> 'a,  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
69  | 
case_const: string -> 'a,  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
70  | 
case_fixed: string -> 'a,  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
71  | 
case_default: string -> 'a} -> string -> 'a  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
72  | 
val is_marked: string -> bool  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
73  | 
val dummy_type: term  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
74  | 
val fun_type: term  | 
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
75  | 
end;  | 
| 0 | 76  | 
|
| 14679 | 77  | 
structure Lexicon: LEXICON =  | 
| 0 | 78  | 
struct  | 
79  | 
||
| 42476 | 80  | 
(** syntaxtic terms **)  | 
81  | 
||
82  | 
structure Syntax =  | 
|
83  | 
struct  | 
|
84  | 
||
85  | 
fun const c = Const (c, dummyT);  | 
|
86  | 
fun free x = Free (x, dummyT);  | 
|
87  | 
fun var xi = Var (xi, dummyT);  | 
|
88  | 
||
89  | 
end;  | 
|
90  | 
||
91  | 
||
92  | 
||
| 18 | 93  | 
(** is_identifier etc. **)  | 
94  | 
||
| 16150 | 95  | 
val is_identifier = Symbol.is_ident o Symbol.explode;  | 
| 18 | 96  | 
|
| 14679 | 97  | 
fun is_ascii_identifier s =  | 
98  | 
let val cs = Symbol.explode s  | 
|
| 16150 | 99  | 
in forall Symbol.is_ascii cs andalso Symbol.is_ident cs end;  | 
| 14679 | 100  | 
|
| 18 | 101  | 
fun is_xid s =  | 
| 4703 | 102  | 
(case Symbol.explode s of  | 
| 16150 | 103  | 
"_" :: cs => Symbol.is_ident cs  | 
104  | 
| cs => Symbol.is_ident cs);  | 
|
| 18 | 105  | 
|
| 
330
 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 
clasohm 
parents: 
237 
diff
changeset
 | 
106  | 
fun is_tid s =  | 
| 4703 | 107  | 
(case Symbol.explode s of  | 
| 16150 | 108  | 
"'" :: cs => Symbol.is_ident cs  | 
| 18 | 109  | 
| _ => false);  | 
110  | 
||
| 0 | 111  | 
|
112  | 
||
| 4703 | 113  | 
(** basic scanners **)  | 
114  | 
||
| 30573 | 115  | 
open Basic_Symbol_Pos;  | 
| 27773 | 116  | 
|
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43432 
diff
changeset
 | 
117  | 
fun !!! msg = Symbol_Pos.!!! (fn () => "Inner lexical error: " ^ msg);  | 
| 27773 | 118  | 
|
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
119  | 
val scan_id =  | 
| 
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
120  | 
Scan.one (Symbol.is_letter o Symbol_Pos.symbol) :::  | 
| 
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
121  | 
Scan.many (Symbol.is_letdig o Symbol_Pos.symbol);  | 
| 
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
122  | 
|
| 27773 | 123  | 
val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat);  | 
124  | 
val scan_tid = $$$ "'" @@@ scan_id;  | 
|
| 4703 | 125  | 
|
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
126  | 
val scan_nat = Scan.many1 (Symbol.is_digit o Symbol_Pos.symbol);  | 
| 27773 | 127  | 
val scan_int = $$$ "-" @@@ scan_nat || scan_nat;  | 
| 28904 | 128  | 
val scan_natdot = scan_nat @@@ $$$ "." @@@ scan_nat;  | 
129  | 
val scan_float = $$$ "-" @@@ scan_natdot || scan_natdot;  | 
|
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
130  | 
val scan_hex = $$$ "0" @@@ $$$ "x" @@@ Scan.many1 (Symbol.is_ascii_hex o Symbol_Pos.symbol);  | 
| 27773 | 131  | 
val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1");  | 
| 4703 | 132  | 
|
| 27773 | 133  | 
val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ scan_nat) [];  | 
134  | 
val scan_var = $$$ "?" @@@ scan_id_nat;  | 
|
135  | 
val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat;  | 
|
| 4703 | 136  | 
|
137  | 
||
138  | 
||
| 18 | 139  | 
(** datatype token **)  | 
| 0 | 140  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
141  | 
datatype token_kind =  | 
| 27887 | 142  | 
Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |  | 
| 28904 | 143  | 
NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF;  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
144  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
145  | 
datatype token = Token of token_kind * string * Position.range;  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
146  | 
|
| 27806 | 147  | 
fun str_of_token (Token (_, s, _)) = s;  | 
148  | 
fun pos_of_token (Token (_, _, (pos, _))) = pos;  | 
|
149  | 
||
| 27887 | 150  | 
fun is_proper (Token (Space, _, _)) = false  | 
151  | 
| is_proper (Token (Comment, _, _)) = false  | 
|
152  | 
| is_proper _ = true;  | 
|
153  | 
||
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
154  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
155  | 
(* stopper *)  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
156  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
157  | 
fun mk_eof pos = Token (EOF, "", (pos, Position.none));  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
158  | 
val eof = mk_eof Position.none;  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
159  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
160  | 
fun is_eof (Token (EOF, _, _)) = true  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
161  | 
| is_eof _ = false;  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
162  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
163  | 
val stopper = Scan.stopper (K eof) is_eof;  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
164  | 
|
| 0 | 165  | 
|
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
166  | 
(* terminal arguments *)  | 
| 0 | 167  | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
168  | 
val idT = Type ("id", []);
 | 
| 3828 | 169  | 
val longidT = Type ("longid", []);
 | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
170  | 
val varT = Type ("var", []);
 | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
171  | 
val tidT = Type ("tid", []);
 | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
172  | 
val tvarT = Type ("tvar", []);
 | 
| 0 | 173  | 
|
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
174  | 
val terminal_kinds =  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
175  | 
 [("id", IdentSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
176  | 
  ("longid", LongIdentSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
177  | 
  ("var", VarSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
178  | 
  ("tid", TFreeSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
179  | 
  ("tvar", TVarSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
180  | 
  ("num", NumSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
181  | 
  ("float_token", FloatSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
182  | 
  ("xnum", XNumSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
183  | 
  ("xstr", StrSy)];
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
184  | 
|
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
185  | 
val terminals = map #1 terminal_kinds;  | 
| 20664 | 186  | 
val is_terminal = member (op =) terminals;  | 
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
187  | 
|
| 0 | 188  | 
|
| 27887 | 189  | 
(* markup *)  | 
190  | 
||
191  | 
val token_kind_markup =  | 
|
192  | 
fn Literal => Markup.literal  | 
|
193  | 
| IdentSy => Markup.ident  | 
|
194  | 
| LongIdentSy => Markup.ident  | 
|
195  | 
| VarSy => Markup.var  | 
|
196  | 
| TFreeSy => Markup.tfree  | 
|
197  | 
| TVarSy => Markup.tvar  | 
|
| 
29318
 
6337d1cb2ba0
added numeral, which supercedes num, xnum, float;
 
wenzelm 
parents: 
29156 
diff
changeset
 | 
198  | 
| NumSy => Markup.numeral  | 
| 
 
6337d1cb2ba0
added numeral, which supercedes num, xnum, float;
 
wenzelm 
parents: 
29156 
diff
changeset
 | 
199  | 
| FloatSy => Markup.numeral  | 
| 
 
6337d1cb2ba0
added numeral, which supercedes num, xnum, float;
 
wenzelm 
parents: 
29156 
diff
changeset
 | 
200  | 
| XNumSy => Markup.numeral  | 
| 
 
6337d1cb2ba0
added numeral, which supercedes num, xnum, float;
 
wenzelm 
parents: 
29156 
diff
changeset
 | 
201  | 
| StrSy => Markup.inner_string  | 
| 
38474
 
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
 
wenzelm 
parents: 
38237 
diff
changeset
 | 
202  | 
| Space => Markup.empty  | 
| 27887 | 203  | 
| Comment => Markup.inner_comment  | 
| 
38474
 
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
 
wenzelm 
parents: 
38237 
diff
changeset
 | 
204  | 
| EOF => Markup.empty;  | 
| 27887 | 205  | 
|
| 
43432
 
224006e5ac46
inner literal/delimiter corresponds to outer keyword/operator;
 
wenzelm 
parents: 
42476 
diff
changeset
 | 
206  | 
fun report_token ctxt (Token (kind, s, (pos, _))) =  | 
| 
 
224006e5ac46
inner literal/delimiter corresponds to outer keyword/operator;
 
wenzelm 
parents: 
42476 
diff
changeset
 | 
207  | 
let val markup =  | 
| 
 
224006e5ac46
inner literal/delimiter corresponds to outer keyword/operator;
 
wenzelm 
parents: 
42476 
diff
changeset
 | 
208  | 
if kind = Literal andalso not (is_ascii_identifier s) then Markup.delimiter  | 
| 
 
224006e5ac46
inner literal/delimiter corresponds to outer keyword/operator;
 
wenzelm 
parents: 
42476 
diff
changeset
 | 
209  | 
else token_kind_markup kind  | 
| 
 
224006e5ac46
inner literal/delimiter corresponds to outer keyword/operator;
 
wenzelm 
parents: 
42476 
diff
changeset
 | 
210  | 
in Context_Position.report ctxt pos markup end;  | 
| 27887 | 211  | 
|
| 
39510
 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
212  | 
fun reported_token_range ctxt tok =  | 
| 
39168
 
e3ac771235f7
report token range after inner parse error -- often provides important clues about misunderstanding concerning lexical phase;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
213  | 
if is_proper tok  | 
| 
39510
 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
214  | 
then Context_Position.reported_text ctxt (pos_of_token tok) Markup.token_range ""  | 
| 
 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
215  | 
else "";  | 
| 
39168
 
e3ac771235f7
report token range after inner parse error -- often provides important clues about misunderstanding concerning lexical phase;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
216  | 
|
| 27887 | 217  | 
|
| 18 | 218  | 
(* matching_tokens *)  | 
| 0 | 219  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
220  | 
fun matching_tokens (Token (Literal, x, _), Token (Literal, y, _)) = x = y  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
221  | 
| matching_tokens (Token (k, _, _), Token (k', _, _)) = k = k';  | 
| 0 | 222  | 
|
223  | 
||
| 18 | 224  | 
(* valued_token *)  | 
| 0 | 225  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
226  | 
fun valued_token (Token (Literal, _, _)) = false  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
227  | 
| valued_token (Token (EOF, _, _)) = false  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
228  | 
| valued_token _ = true;  | 
| 0 | 229  | 
|
230  | 
||
| 18 | 231  | 
(* predef_term *)  | 
| 0 | 232  | 
|
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
233  | 
fun predef_term s =  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
234  | 
(case AList.lookup (op =) terminal_kinds s of  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
235  | 
SOME sy => SOME (Token (sy, s, Position.no_range))  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
236  | 
| NONE => NONE);  | 
| 0 | 237  | 
|
238  | 
||
| 4703 | 239  | 
(* xstr tokens *)  | 
| 18 | 240  | 
|
| 14730 | 241  | 
val scan_chr =  | 
| 27773 | 242  | 
$$$ "\\" |-- $$$ "'" ||  | 
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
243  | 
Scan.one  | 
| 
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
244  | 
((fn s => s <> "\\" andalso s <> "'" andalso Symbol.is_regular s) o  | 
| 
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
245  | 
Symbol_Pos.symbol) >> single ||  | 
| 27773 | 246  | 
$$$ "'" --| Scan.ahead (~$$$ "'");  | 
| 14730 | 247  | 
|
248  | 
val scan_str =  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
249  | 
$$$ "'" @@@ $$$ "'" @@@ !!! "missing end of string"  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
250  | 
((Scan.repeat scan_chr >> flat) @@@ $$$ "'" @@@ $$$ "'");  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
251  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
252  | 
val scan_str_body =  | 
| 27773 | 253  | 
$$$ "'" |-- $$$ "'" |-- !!! "missing end of string"  | 
254  | 
((Scan.repeat scan_chr >> flat) --| $$$ "'" --| $$$ "'");  | 
|
| 14730 | 255  | 
|
| 0 | 256  | 
|
| 4703 | 257  | 
fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));  | 
| 18 | 258  | 
|
| 4703 | 259  | 
fun explode_xstr str =  | 
| 30573 | 260  | 
(case Scan.read Symbol_Pos.stopper scan_str_body (Symbol_Pos.explode (str, Position.none)) of  | 
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
261  | 
SOME cs => map Symbol_Pos.symbol cs  | 
| 5868 | 262  | 
  | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
 | 
| 18 | 263  | 
|
264  | 
||
| 27806 | 265  | 
|
| 18 | 266  | 
(** tokenize **)  | 
267  | 
||
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
268  | 
fun token_leq (Token (_, s1, _), Token (_, s2, _)) = s1 <= s2;  | 
| 30573 | 269  | 
fun token kind ss = Token (kind, Symbol_Pos.content ss, Symbol_Pos.range ss);  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
270  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
271  | 
fun tokenize lex xids syms =  | 
| 18 | 272  | 
let  | 
273  | 
val scan_xid =  | 
|
| 27773 | 274  | 
if xids then $$$ "_" @@@ scan_id || scan_id  | 
| 18 | 275  | 
else scan_id;  | 
276  | 
||
| 20096 | 277  | 
val scan_num = scan_hex || scan_bin || scan_int;  | 
278  | 
||
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
279  | 
val scan_val =  | 
| 
26007
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
280  | 
scan_tvar >> token TVarSy ||  | 
| 
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
281  | 
scan_var >> token VarSy ||  | 
| 
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
282  | 
scan_tid >> token TFreeSy ||  | 
| 28904 | 283  | 
scan_float >> token FloatSy ||  | 
| 
26007
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
284  | 
scan_num >> token NumSy ||  | 
| 27773 | 285  | 
$$$ "#" @@@ scan_num >> token XNumSy ||  | 
| 
26007
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
286  | 
scan_longid >> token LongIdentSy ||  | 
| 
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
287  | 
scan_xid >> token IdentSy;  | 
| 18 | 288  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
289  | 
val scan_lit = Scan.literal lex >> token Literal;  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
290  | 
|
| 4703 | 291  | 
val scan_token =  | 
| 30573 | 292  | 
Symbol_Pos.scan_comment !!! >> token Comment ||  | 
| 27887 | 293  | 
Scan.max token_leq scan_lit scan_val ||  | 
294  | 
scan_str >> token StrSy ||  | 
|
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
295  | 
Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol) >> token Space;  | 
| 18 | 296  | 
in  | 
| 27773 | 297  | 
(case Scan.error  | 
| 30573 | 298  | 
(Scan.finite Symbol_Pos.stopper (Scan.repeat scan_token)) syms of  | 
| 27887 | 299  | 
(toks, []) => toks  | 
| 30573 | 300  | 
    | (_, ss) => error ("Inner lexical error at: " ^ Symbol_Pos.content ss ^
 | 
301  | 
Position.str_of (#1 (Symbol_Pos.range ss))))  | 
|
| 18 | 302  | 
end;  | 
303  | 
||
304  | 
||
305  | 
||
306  | 
(** scan variables **)  | 
|
307  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
308  | 
(* scan_indexname *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
309  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
310  | 
local  | 
| 18 | 311  | 
|
| 27773 | 312  | 
val scan_vname =  | 
| 18 | 313  | 
let  | 
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
314  | 
fun nat n [] = n  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
315  | 
| nat n (c :: cs) = nat (n * 10 + (ord c - ord "0")) cs;  | 
| 18 | 316  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
317  | 
fun idxname cs ds = (implode (rev cs), nat 0 ds);  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
318  | 
fun chop_idx [] ds = idxname [] ds  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
319  | 
| chop_idx (cs as (_ :: "\\<^isub>" :: _)) ds = idxname cs ds  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
320  | 
| chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
321  | 
| chop_idx (c :: cs) ds =  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
322  | 
if Symbol.is_digit c then chop_idx cs (c :: ds)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
323  | 
else idxname (c :: cs) ds;  | 
| 18 | 324  | 
|
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
325  | 
val scan = (scan_id >> map Symbol_Pos.symbol) --  | 
| 
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
326  | 
Scan.optional ($$$ "." |-- scan_nat >> (nat 0 o map Symbol_Pos.symbol)) ~1;  | 
| 18 | 327  | 
in  | 
| 27773 | 328  | 
scan >>  | 
329  | 
(fn (cs, ~1) => chop_idx (rev cs) []  | 
|
330  | 
| (cs, i) => (implode cs, i))  | 
|
| 18 | 331  | 
end;  | 
332  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
333  | 
in  | 
| 18 | 334  | 
|
| 27773 | 335  | 
val scan_indexname = $$$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i)) || scan_vname;
 | 
| 
15443
 
07f78cc82a73
indexname function now parses type variables as well; changed input
 
berghofe 
parents: 
14981 
diff
changeset
 | 
336  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
337  | 
end;  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
338  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
339  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
340  | 
(* indexname *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
341  | 
|
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
342  | 
fun read_indexname s =  | 
| 30573 | 343  | 
(case Scan.read Symbol_Pos.stopper scan_indexname (Symbol_Pos.explode (s, Position.none)) of  | 
| 15531 | 344  | 
SOME xi => xi  | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
345  | 
  | _ => error ("Lexical error in variable name: " ^ quote s));
 | 
| 18 | 346  | 
|
347  | 
||
| 4703 | 348  | 
(* read_var *)  | 
| 18 | 349  | 
|
| 4703 | 350  | 
fun read_var str =  | 
| 18 | 351  | 
let  | 
352  | 
val scan =  | 
|
| 42476 | 353  | 
$$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol_Pos.is_eof)  | 
354  | 
>> Syntax.var ||  | 
|
355  | 
Scan.many (Symbol.is_regular o Symbol_Pos.symbol)  | 
|
356  | 
>> (Syntax.free o implode o map Symbol_Pos.symbol);  | 
|
| 30573 | 357  | 
in the (Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode (str, Position.none))) end;  | 
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
358  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
359  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
360  | 
(* read_variable *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
361  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
362  | 
fun read_variable str =  | 
| 27773 | 363  | 
let val scan = $$$ "?" |-- scan_indexname || scan_indexname  | 
| 30573 | 364  | 
in Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode (str, Position.none)) end;  | 
| 4587 | 365  | 
|
366  | 
||
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
367  | 
(* read numbers *)  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
368  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
369  | 
local  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
370  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
371  | 
fun nat cs =  | 
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
372  | 
Option.map (#1 o Library.read_int o map Symbol_Pos.symbol)  | 
| 30573 | 373  | 
(Scan.read Symbol_Pos.stopper scan_nat cs);  | 
| 5860 | 374  | 
|
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
375  | 
in  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
376  | 
|
| 30573 | 377  | 
fun read_nat s = nat (Symbol_Pos.explode (s, Position.none));  | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
378  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
379  | 
fun read_int s =  | 
| 30573 | 380  | 
(case Symbol_Pos.explode (s, Position.none) of  | 
| 27773 | 381  | 
    ("-", _) :: cs => Option.map ~ (nat cs)
 | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
382  | 
| cs => nat cs);  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
383  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
384  | 
end;  | 
| 5860 | 385  | 
|
386  | 
||
| 20096 | 387  | 
(* read_xnum: hex/bin/decimal *)  | 
| 9326 | 388  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
389  | 
local  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
390  | 
|
| 20096 | 391  | 
val ten = ord "0" + 10;  | 
392  | 
val a = ord "a";  | 
|
393  | 
val A = ord "A";  | 
|
| 35428 | 394  | 
val _ = a > A orelse raise Fail "Bad ASCII";  | 
| 20096 | 395  | 
|
396  | 
fun remap_hex c =  | 
|
397  | 
let val x = ord c in  | 
|
398  | 
if x >= a then chr (x - a + ten)  | 
|
399  | 
else if x >= A then chr (x - A + ten)  | 
|
400  | 
else c  | 
|
401  | 
end;  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
402  | 
|
| 21781 | 403  | 
fun leading_zeros ["0"] = 0  | 
404  | 
  | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
 | 
|
405  | 
| leading_zeros _ = 0;  | 
|
406  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
407  | 
in  | 
| 
15965
 
f422f8283491
Use of IntInf.int instead of int in most numeric simprocs; avoids
 
paulson 
parents: 
15570 
diff
changeset
 | 
408  | 
|
| 9326 | 409  | 
fun read_xnum str =  | 
410  | 
let  | 
|
| 20096 | 411  | 
val (sign, radix, digs) =  | 
412  | 
(case Symbol.explode (perhaps (try (unprefix "#")) str) of  | 
|
413  | 
"0" :: "x" :: cs => (1, 16, map remap_hex cs)  | 
|
414  | 
| "0" :: "b" :: cs => (1, 2, cs)  | 
|
415  | 
| "-" :: cs => (~1, 10, cs)  | 
|
416  | 
| cs => (1, 10, cs));  | 
|
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
417  | 
in  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
418  | 
   {radix = radix,
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
419  | 
leading_zeros = leading_zeros digs,  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
420  | 
value = sign * #1 (Library.read_radix_int radix digs)}  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
421  | 
end;  | 
| 9326 | 422  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
423  | 
end;  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
424  | 
|
| 28904 | 425  | 
fun read_float str =  | 
426  | 
let  | 
|
427  | 
val (sign, cs) =  | 
|
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
428  | 
(case Symbol.explode str of  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
429  | 
"-" :: cs => (~1, cs)  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
430  | 
| cs => (1, cs));  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
431  | 
val (intpart, fracpart) =  | 
| 28904 | 432  | 
(case take_prefix Symbol.is_digit cs of  | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
433  | 
(intpart, "." :: fracpart) => (intpart, fracpart)  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
434  | 
| _ => raise Fail "read_float");  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
435  | 
in  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
436  | 
   {mant = sign * #1 (Library.read_int (intpart @ fracpart)),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
437  | 
exp = length fracpart}  | 
| 
42046
 
6341c23baf10
added Lexicon.encode_position, Lexicon.decode_position;
 
wenzelm 
parents: 
40525 
diff
changeset
 | 
438  | 
end;  | 
| 
 
6341c23baf10
added Lexicon.encode_position, Lexicon.decode_position;
 
wenzelm 
parents: 
40525 
diff
changeset
 | 
439  | 
|
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
440  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
441  | 
(* marked logical entities *)  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
442  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
443  | 
fun marker s = (prefix s, unprefix s);  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
444  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
445  | 
val (mark_class, unmark_class) = marker "\\<^class>";  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
446  | 
val (mark_type, unmark_type) = marker "\\<^type>";  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
447  | 
val (mark_const, unmark_const) = marker "\\<^const>";  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
448  | 
val (mark_fixed, unmark_fixed) = marker "\\<^fixed>";  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
449  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
450  | 
fun unmark {case_class, case_type, case_const, case_fixed, case_default} s =
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
451  | 
(case try unmark_class s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
452  | 
SOME c => case_class c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
453  | 
| NONE =>  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
454  | 
(case try unmark_type s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
455  | 
SOME c => case_type c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
456  | 
| NONE =>  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
457  | 
(case try unmark_const s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
458  | 
SOME c => case_const c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
459  | 
| NONE =>  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
460  | 
(case try unmark_fixed s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
461  | 
SOME c => case_fixed c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
462  | 
| NONE => case_default s))));  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
463  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
464  | 
val is_marked =  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
465  | 
  unmark {case_class = K true, case_type = K true, case_const = K true,
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
466  | 
case_fixed = K true, case_default = K false};  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
467  | 
|
| 42476 | 468  | 
val dummy_type = Syntax.const (mark_type "dummy");  | 
469  | 
val fun_type = Syntax.const (mark_type "fun");  | 
|
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
470  | 
|
| 0 | 471  | 
end;  |