| author | bulwahn | 
| Thu, 08 Dec 2016 15:21:18 +0100 | |
| changeset 64542 | c7d76708379f | 
| parent 64275 | ac2abc987cf9 | 
| child 67352 | 5f7f339f3d7e | 
| permissions | -rw-r--r-- | 
| 18 | 1  | 
(* Title: Pure/Syntax/lexicon.ML  | 
2  | 
Author: Tobias Nipkow and Markus Wenzel, TU Muenchen  | 
|
| 0 | 3  | 
|
| 4703 | 4  | 
Lexer for the inner Isabelle syntax (terms and types).  | 
| 18 | 5  | 
*)  | 
| 0 | 6  | 
|
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
7  | 
signature LEXICON =  | 
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
8  | 
sig  | 
| 42476 | 9  | 
structure Syntax:  | 
10  | 
sig  | 
|
11  | 
val const: string -> term  | 
|
12  | 
val free: string -> term  | 
|
13  | 
val var: indexname -> term  | 
|
14  | 
end  | 
|
| 30573 | 15  | 
val scan_id: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
16  | 
val scan_longid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
17  | 
val scan_tid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
18  | 
val scan_hex: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
19  | 
val scan_bin: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
20  | 
val scan_var: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
21  | 
val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list  | 
|
| 50239 | 22  | 
val is_tid: string -> bool  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
23  | 
datatype token_kind =  | 
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
24  | 
Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |  | 
| 58421 | 25  | 
FloatSy | StrSy | StringSy | Cartouche | Space | Comment | EOF  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
26  | 
datatype token = Token of token_kind * string * Position.range  | 
| 27806 | 27  | 
val str_of_token: token -> string  | 
28  | 
val pos_of_token: token -> Position.T  | 
|
| 27887 | 29  | 
val is_proper: token -> bool  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
30  | 
val mk_eof: Position.T -> token  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
31  | 
val eof: token  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
32  | 
val is_eof: token -> bool  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
33  | 
val stopper: token Scan.stopper  | 
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
34  | 
val terminals: string list  | 
| 
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
35  | 
val is_terminal: string -> bool  | 
| 
49821
 
d15fe10593ff
clarified output token markup (see also bc22daeed49e);
 
wenzelm 
parents: 
48992 
diff
changeset
 | 
36  | 
val literal_markup: string -> Markup.T  | 
| 44736 | 37  | 
val report_of_token: token -> Position.report  | 
| 
39510
 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
38  | 
val reported_token_range: Proof.context -> token -> string  | 
| 18 | 39  | 
val matching_tokens: token * token -> bool  | 
40  | 
val valued_token: token -> bool  | 
|
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
41  | 
val predef_term: string -> token option  | 
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
42  | 
val implode_string: Symbol.symbol list -> string  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
43  | 
val explode_string: string * Position.T -> Symbol_Pos.T list  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
44  | 
val implode_str: Symbol.symbol list -> string  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
45  | 
val explode_str: string * Position.T -> Symbol_Pos.T list  | 
| 30573 | 46  | 
val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list  | 
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
47  | 
val read_indexname: string -> indexname  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
48  | 
val read_var: string -> term  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
49  | 
val read_variable: string -> indexname option  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
50  | 
val read_nat: string -> int option  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
51  | 
val read_int: string -> int option  | 
| 58421 | 52  | 
  val read_num: string -> {radix: int, leading_zeros: int, value: int}
 | 
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
53  | 
  val read_float: string -> {mant: int, exp: int}
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
54  | 
val mark_class: string -> string val unmark_class: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
55  | 
val mark_type: string -> string val unmark_type: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
56  | 
val mark_const: string -> string val unmark_const: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
57  | 
val mark_fixed: string -> string val unmark_fixed: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
58  | 
val unmark:  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
59  | 
   {case_class: string -> 'a,
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
60  | 
case_type: string -> 'a,  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
61  | 
case_const: string -> 'a,  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
62  | 
case_fixed: string -> 'a,  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
63  | 
case_default: string -> 'a} -> string -> 'a  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
64  | 
val is_marked: string -> bool  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
65  | 
val dummy_type: term  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
66  | 
val fun_type: term  | 
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
67  | 
end;  | 
| 0 | 68  | 
|
| 14679 | 69  | 
structure Lexicon: LEXICON =  | 
| 0 | 70  | 
struct  | 
71  | 
||
| 42476 | 72  | 
(** syntaxtic terms **)  | 
73  | 
||
74  | 
structure Syntax =  | 
|
75  | 
struct  | 
|
76  | 
||
77  | 
fun const c = Const (c, dummyT);  | 
|
78  | 
fun free x = Free (x, dummyT);  | 
|
79  | 
fun var xi = Var (xi, dummyT);  | 
|
80  | 
||
81  | 
end;  | 
|
82  | 
||
83  | 
||
84  | 
||
| 4703 | 85  | 
(** basic scanners **)  | 
86  | 
||
| 30573 | 87  | 
open Basic_Symbol_Pos;  | 
| 27773 | 88  | 
|
| 55105 | 89  | 
val err_prefix = "Inner lexical error: ";  | 
90  | 
||
91  | 
fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg);  | 
|
| 27773 | 92  | 
|
| 50239 | 93  | 
val scan_id = Symbol_Pos.scan_ident;  | 
| 61476 | 94  | 
val scan_longid = scan_id @@@ Scan.repeats1 ($$$ "." @@@ scan_id);  | 
| 27773 | 95  | 
val scan_tid = $$$ "'" @@@ scan_id;  | 
| 4703 | 96  | 
|
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
97  | 
val scan_hex = $$$ "0" @@@ $$$ "x" @@@ Scan.many1 (Symbol.is_ascii_hex o Symbol_Pos.symbol);  | 
| 27773 | 98  | 
val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1");  | 
| 62782 | 99  | 
val scan_num = scan_hex || scan_bin || Symbol_Pos.scan_nat;  | 
| 4703 | 100  | 
|
| 62782 | 101  | 
val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ Symbol_Pos.scan_nat) [];  | 
| 27773 | 102  | 
val scan_var = $$$ "?" @@@ scan_id_nat;  | 
103  | 
val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat;  | 
|
| 4703 | 104  | 
|
| 50239 | 105  | 
fun is_tid s =  | 
106  | 
(case try (unprefix "'") s of  | 
|
107  | 
SOME s' => Symbol_Pos.is_identifier s'  | 
|
108  | 
| NONE => false);  | 
|
109  | 
||
| 4703 | 110  | 
|
111  | 
||
| 18 | 112  | 
(** datatype token **)  | 
| 0 | 113  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
114  | 
datatype token_kind =  | 
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
115  | 
Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | NumSy |  | 
| 58421 | 116  | 
FloatSy | StrSy | StringSy | Cartouche | Space | Comment | EOF;  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
117  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
118  | 
datatype token = Token of token_kind * string * Position.range;  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
119  | 
|
| 27806 | 120  | 
fun str_of_token (Token (_, s, _)) = s;  | 
121  | 
fun pos_of_token (Token (_, _, (pos, _))) = pos;  | 
|
122  | 
||
| 27887 | 123  | 
fun is_proper (Token (Space, _, _)) = false  | 
124  | 
| is_proper (Token (Comment, _, _)) = false  | 
|
125  | 
| is_proper _ = true;  | 
|
126  | 
||
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
127  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
128  | 
(* stopper *)  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
129  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
130  | 
fun mk_eof pos = Token (EOF, "", (pos, Position.none));  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
131  | 
val eof = mk_eof Position.none;  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
132  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
133  | 
fun is_eof (Token (EOF, _, _)) = true  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
134  | 
| is_eof _ = false;  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
135  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
136  | 
val stopper = Scan.stopper (K eof) is_eof;  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
137  | 
|
| 0 | 138  | 
|
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
139  | 
(* terminal arguments *)  | 
| 0 | 140  | 
|
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
141  | 
val terminal_kinds =  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
142  | 
 [("id", IdentSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
143  | 
  ("longid", LongIdentSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
144  | 
  ("var", VarSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
145  | 
  ("tid", TFreeSy),
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
146  | 
  ("tvar", TVarSy),
 | 
| 
45703
 
c7a13ce60161
renamed inner syntax categories "num" to "num_token" and "xnum" to "xnum_token";
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
147  | 
  ("num_token", NumSy),
 | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
148  | 
  ("float_token", FloatSy),
 | 
| 55033 | 149  | 
  ("str_token", StrSy),
 | 
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
150  | 
  ("string_token", StringSy),
 | 
| 55033 | 151  | 
  ("cartouche", Cartouche)];
 | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
152  | 
|
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
153  | 
val terminals = map #1 terminal_kinds;  | 
| 20664 | 154  | 
val is_terminal = member (op =) terminals;  | 
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
155  | 
|
| 0 | 156  | 
|
| 27887 | 157  | 
(* markup *)  | 
158  | 
||
| 
49821
 
d15fe10593ff
clarified output token markup (see also bc22daeed49e);
 
wenzelm 
parents: 
48992 
diff
changeset
 | 
159  | 
fun literal_markup s =  | 
| 52189 | 160  | 
if Symbol.is_ascii_identifier s orelse exists Symbol.is_letter (Symbol.explode s)  | 
161  | 
then Markup.literal  | 
|
162  | 
else Markup.delimiter;  | 
|
| 
49821
 
d15fe10593ff
clarified output token markup (see also bc22daeed49e);
 
wenzelm 
parents: 
48992 
diff
changeset
 | 
163  | 
|
| 27887 | 164  | 
val token_kind_markup =  | 
| 
51612
 
6a1e40f9dd55
added var_position in analogy to longid_position, for typing reports on input;
 
wenzelm 
parents: 
50242 
diff
changeset
 | 
165  | 
fn TFreeSy => Markup.tfree  | 
| 55033 | 166  | 
| TVarSy => Markup.tvar  | 
167  | 
| NumSy => Markup.numeral  | 
|
| 
50201
 
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
 
wenzelm 
parents: 
49821 
diff
changeset
 | 
168  | 
| FloatSy => Markup.numeral  | 
| 55033 | 169  | 
| StrSy => Markup.inner_string  | 
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
170  | 
| StringSy => Markup.inner_string  | 
| 55033 | 171  | 
| Cartouche => Markup.inner_cartouche  | 
| 
50201
 
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
 
wenzelm 
parents: 
49821 
diff
changeset
 | 
172  | 
| Comment => Markup.inner_comment  | 
| 55033 | 173  | 
| _ => Markup.empty;  | 
| 27887 | 174  | 
|
| 44736 | 175  | 
fun report_of_token (Token (kind, s, (pos, _))) =  | 
| 
49821
 
d15fe10593ff
clarified output token markup (see also bc22daeed49e);
 
wenzelm 
parents: 
48992 
diff
changeset
 | 
176  | 
let val markup = if kind = Literal then literal_markup s else token_kind_markup kind  | 
| 44736 | 177  | 
in (pos, markup) end;  | 
| 27887 | 178  | 
|
| 
39510
 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
179  | 
fun reported_token_range ctxt tok =  | 
| 
39168
 
e3ac771235f7
report token range after inner parse error -- often provides important clues about misunderstanding concerning lexical phase;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
180  | 
if is_proper tok  | 
| 
50201
 
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
 
wenzelm 
parents: 
49821 
diff
changeset
 | 
181  | 
then Context_Position.reported_text ctxt (pos_of_token tok) Markup.token_range ""  | 
| 
39510
 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
182  | 
else "";  | 
| 
39168
 
e3ac771235f7
report token range after inner parse error -- often provides important clues about misunderstanding concerning lexical phase;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
183  | 
|
| 27887 | 184  | 
|
| 18 | 185  | 
(* matching_tokens *)  | 
| 0 | 186  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
187  | 
fun matching_tokens (Token (Literal, x, _), Token (Literal, y, _)) = x = y  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
188  | 
| matching_tokens (Token (k, _, _), Token (k', _, _)) = k = k';  | 
| 0 | 189  | 
|
190  | 
||
| 18 | 191  | 
(* valued_token *)  | 
| 0 | 192  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
193  | 
fun valued_token (Token (Literal, _, _)) = false  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
194  | 
| valued_token (Token (EOF, _, _)) = false  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
195  | 
| valued_token _ = true;  | 
| 0 | 196  | 
|
197  | 
||
| 18 | 198  | 
(* predef_term *)  | 
| 0 | 199  | 
|
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
200  | 
fun predef_term s =  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
201  | 
(case AList.lookup (op =) terminal_kinds s of  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
202  | 
SOME sy => SOME (Token (sy, s, Position.no_range))  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
203  | 
| NONE => NONE);  | 
| 0 | 204  | 
|
205  | 
||
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
206  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
207  | 
(** string literals **)  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
208  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
209  | 
fun explode_literal scan_body (str, pos) =  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
210  | 
(case Scan.read Symbol_Pos.stopper scan_body (Symbol_Pos.explode (str, pos)) of  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
211  | 
SOME ss => ss  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
212  | 
| _ => error (err_prefix ^ "malformed string literal " ^ quote str ^ Position.here pos));  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
213  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
214  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
215  | 
(* string *)  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
216  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
217  | 
val scan_string = Scan.trace (Symbol_Pos.scan_string_qq err_prefix) >> #2;  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
218  | 
val scan_string_body = Symbol_Pos.scan_string_qq err_prefix >> (#1 o #2);  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
219  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
220  | 
fun implode_string ss = quote (implode (map (fn "\"" => "\\\"" | s => s) ss));  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
221  | 
val explode_string = explode_literal scan_string_body;  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
222  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
223  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
224  | 
(* str *)  | 
| 18 | 225  | 
|
| 14730 | 226  | 
val scan_chr =  | 
| 55107 | 227  | 
$$ "\\" |-- $$$ "'" ||  | 
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
228  | 
Scan.one  | 
| 58854 | 229  | 
((fn s => s <> "\\" andalso s <> "'" andalso Symbol.not_eof s) o  | 
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
230  | 
Symbol_Pos.symbol) >> single ||  | 
| 55107 | 231  | 
$$$ "'" --| Scan.ahead (~$$ "'");  | 
| 14730 | 232  | 
|
233  | 
val scan_str =  | 
|
| 55106 | 234  | 
Scan.ahead ($$ "'" -- $$ "'") |--  | 
235  | 
!!! "unclosed string literal"  | 
|
| 61476 | 236  | 
($$$ "'" @@@ $$$ "'" @@@ Scan.repeats scan_chr @@@ $$$ "'" @@@ $$$ "'");  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
237  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
238  | 
val scan_str_body =  | 
| 55107 | 239  | 
Scan.ahead ($$ "'" |-- $$ "'") |--  | 
| 55106 | 240  | 
!!! "unclosed string literal"  | 
| 61476 | 241  | 
($$ "'" |-- $$ "'" |-- Scan.repeats scan_chr --| $$ "'" --| $$ "'");  | 
| 14730 | 242  | 
|
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
243  | 
fun implode_str ss = enclose "''" "''" (implode (map (fn "'" => "\\'" | s => s) ss));  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
244  | 
val explode_str = explode_literal scan_str_body;  | 
| 18 | 245  | 
|
246  | 
||
| 27806 | 247  | 
|
| 18 | 248  | 
(** tokenize **)  | 
249  | 
||
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
250  | 
fun token_leq (Token (_, s1, _), Token (_, s2, _)) = s1 <= s2;  | 
| 30573 | 251  | 
fun token kind ss = Token (kind, Symbol_Pos.content ss, Symbol_Pos.range ss);  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
252  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
253  | 
fun tokenize lex xids syms =  | 
| 18 | 254  | 
let  | 
255  | 
val scan_xid =  | 
|
| 
55962
 
fbd0e768bc8f
special identifier "__" (i.e. empty name with internal suffix) serves as wild-card for completion;
 
wenzelm 
parents: 
55624 
diff
changeset
 | 
256  | 
(if xids then $$$ "_" @@@ scan_id || scan_id else scan_id) ||  | 
| 
 
fbd0e768bc8f
special identifier "__" (i.e. empty name with internal suffix) serves as wild-card for completion;
 
wenzelm 
parents: 
55624 
diff
changeset
 | 
257  | 
$$$ "_" @@@ $$$ "_";  | 
| 18 | 258  | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
259  | 
val scan_val =  | 
| 
26007
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
260  | 
scan_tvar >> token TVarSy ||  | 
| 
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
261  | 
scan_var >> token VarSy ||  | 
| 
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
262  | 
scan_tid >> token TFreeSy ||  | 
| 62782 | 263  | 
Symbol_Pos.scan_float >> token FloatSy ||  | 
| 58421 | 264  | 
scan_num >> token NumSy ||  | 
| 
26007
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
265  | 
scan_longid >> token LongIdentSy ||  | 
| 
 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 
wenzelm 
parents: 
24630 
diff
changeset
 | 
266  | 
scan_xid >> token IdentSy;  | 
| 18 | 267  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
268  | 
val scan_lit = Scan.literal lex >> token Literal;  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
269  | 
|
| 4703 | 270  | 
val scan_token =  | 
| 55105 | 271  | 
Symbol_Pos.scan_cartouche err_prefix >> token Cartouche ||  | 
272  | 
Symbol_Pos.scan_comment err_prefix >> token Comment ||  | 
|
| 27887 | 273  | 
Scan.max token_leq scan_lit scan_val ||  | 
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
274  | 
scan_string >> token StringSy ||  | 
| 27887 | 275  | 
scan_str >> token StrSy ||  | 
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
276  | 
Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol) >> token Space;  | 
| 18 | 277  | 
in  | 
| 27773 | 278  | 
(case Scan.error  | 
| 30573 | 279  | 
(Scan.finite Symbol_Pos.stopper (Scan.repeat scan_token)) syms of  | 
| 27887 | 280  | 
(toks, []) => toks  | 
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
281  | 
| (_, ss) =>  | 
| 55624 | 282  | 
        error ("Inner lexical error" ^ Position.here (#1 (Symbol_Pos.range ss)) ^
 | 
283  | 
          Markup.markup Markup.no_report ("\nat " ^ quote (Symbol_Pos.content ss))))
 | 
|
284  | 
||
| 18 | 285  | 
end;  | 
286  | 
||
287  | 
||
288  | 
||
289  | 
(** scan variables **)  | 
|
290  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
291  | 
(* scan_indexname *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
292  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
293  | 
local  | 
| 18 | 294  | 
|
| 27773 | 295  | 
val scan_vname =  | 
| 18 | 296  | 
let  | 
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
297  | 
fun nat n [] = n  | 
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
62819 
diff
changeset
 | 
298  | 
| nat n (c :: cs) = nat (n * 10 + (ord c - Char.ord #"0")) cs;  | 
| 18 | 299  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
300  | 
fun idxname cs ds = (implode (rev cs), nat 0 ds);  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
301  | 
fun chop_idx [] ds = idxname [] ds  | 
| 
62529
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
302  | 
| chop_idx (cs as (_ :: "\<^sub>" :: _)) ds = idxname cs ds  | 
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
303  | 
| chop_idx (c :: cs) ds =  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
304  | 
if Symbol.is_digit c then chop_idx cs (c :: ds)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
305  | 
else idxname (c :: cs) ds;  | 
| 18 | 306  | 
|
| 50239 | 307  | 
val scan =  | 
308  | 
(scan_id >> map Symbol_Pos.symbol) --  | 
|
| 62782 | 309  | 
Scan.optional ($$ "." |-- Symbol_Pos.scan_nat >> (nat 0 o map Symbol_Pos.symbol)) ~1;  | 
| 18 | 310  | 
in  | 
| 27773 | 311  | 
scan >>  | 
312  | 
(fn (cs, ~1) => chop_idx (rev cs) []  | 
|
313  | 
| (cs, i) => (implode cs, i))  | 
|
| 18 | 314  | 
end;  | 
315  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
316  | 
in  | 
| 18 | 317  | 
|
| 55107 | 318  | 
val scan_indexname = $$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i)) || scan_vname;
 | 
| 
15443
 
07f78cc82a73
indexname function now parses type variables as well; changed input
 
berghofe 
parents: 
14981 
diff
changeset
 | 
319  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
320  | 
end;  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
321  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
322  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
323  | 
(* indexname *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
324  | 
|
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
325  | 
fun read_indexname s =  | 
| 62751 | 326  | 
(case Scan.read Symbol_Pos.stopper scan_indexname (Symbol_Pos.explode0 s) of  | 
| 15531 | 327  | 
SOME xi => xi  | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
328  | 
  | _ => error ("Lexical error in variable name: " ^ quote s));
 | 
| 18 | 329  | 
|
330  | 
||
| 4703 | 331  | 
(* read_var *)  | 
| 18 | 332  | 
|
| 4703 | 333  | 
fun read_var str =  | 
| 18 | 334  | 
let  | 
335  | 
val scan =  | 
|
| 55107 | 336  | 
$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol_Pos.is_eof)  | 
| 42476 | 337  | 
>> Syntax.var ||  | 
| 58854 | 338  | 
Scan.many (Symbol.not_eof o Symbol_Pos.symbol)  | 
| 42476 | 339  | 
>> (Syntax.free o implode o map Symbol_Pos.symbol);  | 
| 62751 | 340  | 
in the (Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode0 str)) end;  | 
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
341  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
342  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
343  | 
(* read_variable *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
344  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
345  | 
fun read_variable str =  | 
| 55107 | 346  | 
let val scan = $$ "?" |-- scan_indexname || scan_indexname  | 
| 62751 | 347  | 
in Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode0 str) end;  | 
| 4587 | 348  | 
|
349  | 
||
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
350  | 
(* read numbers *)  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
351  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
352  | 
local  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
353  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
354  | 
fun nat cs =  | 
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
355  | 
Option.map (#1 o Library.read_int o map Symbol_Pos.symbol)  | 
| 62782 | 356  | 
(Scan.read Symbol_Pos.stopper Symbol_Pos.scan_nat cs);  | 
| 5860 | 357  | 
|
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
358  | 
in  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
359  | 
|
| 62751 | 360  | 
fun read_nat s = nat (Symbol_Pos.explode0 s);  | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
361  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
362  | 
fun read_int s =  | 
| 62751 | 363  | 
(case Symbol_Pos.explode0 s of  | 
| 27773 | 364  | 
    ("-", _) :: cs => Option.map ~ (nat cs)
 | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
365  | 
| cs => nat cs);  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
366  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
367  | 
end;  | 
| 5860 | 368  | 
|
369  | 
||
| 58421 | 370  | 
(* read_num: hex/bin/decimal *)  | 
| 9326 | 371  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
372  | 
local  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
373  | 
|
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
62819 
diff
changeset
 | 
374  | 
val ten = Char.ord #"0" + 10;  | 
| 
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
62819 
diff
changeset
 | 
375  | 
val a = Char.ord #"a";  | 
| 
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
62819 
diff
changeset
 | 
376  | 
val A = Char.ord #"A";  | 
| 35428 | 377  | 
val _ = a > A orelse raise Fail "Bad ASCII";  | 
| 20096 | 378  | 
|
379  | 
fun remap_hex c =  | 
|
380  | 
let val x = ord c in  | 
|
381  | 
if x >= a then chr (x - a + ten)  | 
|
382  | 
else if x >= A then chr (x - A + ten)  | 
|
383  | 
else c  | 
|
384  | 
end;  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
385  | 
|
| 21781 | 386  | 
fun leading_zeros ["0"] = 0  | 
387  | 
  | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
 | 
|
388  | 
| leading_zeros _ = 0;  | 
|
389  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
390  | 
in  | 
| 
15965
 
f422f8283491
Use of IntInf.int instead of int in most numeric simprocs; avoids
 
paulson 
parents: 
15570 
diff
changeset
 | 
391  | 
|
| 58421 | 392  | 
fun read_num str =  | 
| 9326 | 393  | 
let  | 
| 58421 | 394  | 
val (radix, digs) =  | 
395  | 
(case Symbol.explode str of  | 
|
396  | 
"0" :: "x" :: cs => (16, map remap_hex cs)  | 
|
397  | 
| "0" :: "b" :: cs => (2, cs)  | 
|
398  | 
| cs => (10, cs));  | 
|
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
399  | 
in  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
400  | 
   {radix = radix,
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
401  | 
leading_zeros = leading_zeros digs,  | 
| 58421 | 402  | 
value = #1 (Library.read_radix_int radix digs)}  | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
403  | 
end;  | 
| 9326 | 404  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
405  | 
end;  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
406  | 
|
| 28904 | 407  | 
fun read_float str =  | 
408  | 
let  | 
|
| 58421 | 409  | 
val cs = Symbol.explode str;  | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
410  | 
val (intpart, fracpart) =  | 
| 28904 | 411  | 
(case take_prefix Symbol.is_digit cs of  | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
412  | 
(intpart, "." :: fracpart) => (intpart, fracpart)  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
413  | 
| _ => raise Fail "read_float");  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
414  | 
in  | 
| 58421 | 415  | 
   {mant = #1 (Library.read_int (intpart @ fracpart)),
 | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
416  | 
exp = length fracpart}  | 
| 
42046
 
6341c23baf10
added Lexicon.encode_position, Lexicon.decode_position;
 
wenzelm 
parents: 
40525 
diff
changeset
 | 
417  | 
end;  | 
| 
 
6341c23baf10
added Lexicon.encode_position, Lexicon.decode_position;
 
wenzelm 
parents: 
40525 
diff
changeset
 | 
418  | 
|
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
419  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
420  | 
(* marked logical entities *)  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
421  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
422  | 
fun marker s = (prefix s, unprefix s);  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
423  | 
|
| 
62529
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
424  | 
val (mark_class, unmark_class) = marker "\<^class>";  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
425  | 
val (mark_type, unmark_type) = marker "\<^type>";  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
426  | 
val (mark_const, unmark_const) = marker "\<^const>";  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
427  | 
val (mark_fixed, unmark_fixed) = marker "\<^fixed>";  | 
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
428  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
429  | 
fun unmark {case_class, case_type, case_const, case_fixed, case_default} s =
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
430  | 
(case try unmark_class s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
431  | 
SOME c => case_class c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
432  | 
| NONE =>  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
433  | 
(case try unmark_type s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
434  | 
SOME c => case_type c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
435  | 
| NONE =>  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
436  | 
(case try unmark_const s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
437  | 
SOME c => case_const c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
438  | 
| NONE =>  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
439  | 
(case try unmark_fixed s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
440  | 
SOME c => case_fixed c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
441  | 
| NONE => case_default s))));  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
442  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
443  | 
val is_marked =  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
444  | 
  unmark {case_class = K true, case_type = K true, case_const = K true,
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
445  | 
case_fixed = K true, case_default = K false};  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
446  | 
|
| 42476 | 447  | 
val dummy_type = Syntax.const (mark_type "dummy");  | 
448  | 
val fun_type = Syntax.const (mark_type "fun");  | 
|
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
449  | 
|
| 59196 | 450  | 
|
451  | 
(* toplevel pretty printing *)  | 
|
452  | 
||
| 62663 | 453  | 
val _ =  | 
| 
62819
 
d3ff367a16a0
careful export of type-dependent functions, without losing their special status;
 
wenzelm 
parents: 
62782 
diff
changeset
 | 
454  | 
ML_system_pp (fn _ => fn _ =>  | 
| 62663 | 455  | 
    Pretty.to_polyml o Pretty.str_list "{" "}" o map quote o Scan.dest_lexicon);
 | 
| 59196 | 456  | 
|
| 0 | 457  | 
end;  |