| author | wenzelm | 
| Wed, 28 Aug 2024 19:40:07 +0200 | |
| changeset 80784 | 3d9e7746d9db | 
| parent 77846 | 5ba68d3bd741 | 
| child 80809 | 4a64fc4d1cde | 
| permissions | -rw-r--r-- | 
| 18 | 1  | 
(* Title: Pure/Syntax/lexicon.ML  | 
2  | 
Author: Tobias Nipkow and Markus Wenzel, TU Muenchen  | 
|
| 0 | 3  | 
|
| 4703 | 4  | 
Lexer for the inner Isabelle syntax (terms and types).  | 
| 18 | 5  | 
*)  | 
| 0 | 6  | 
|
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
7  | 
signature LEXICON =  | 
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
8  | 
sig  | 
| 42476 | 9  | 
structure Syntax:  | 
10  | 
sig  | 
|
11  | 
val const: string -> term  | 
|
12  | 
val free: string -> term  | 
|
13  | 
val var: indexname -> term  | 
|
14  | 
end  | 
|
| 67426 | 15  | 
val scan_id: Symbol_Pos.T list scanner  | 
16  | 
val scan_longid: Symbol_Pos.T list scanner  | 
|
17  | 
val scan_tid: Symbol_Pos.T list scanner  | 
|
18  | 
val scan_hex: Symbol_Pos.T list scanner  | 
|
19  | 
val scan_bin: Symbol_Pos.T list scanner  | 
|
20  | 
val scan_var: Symbol_Pos.T list scanner  | 
|
21  | 
val scan_tvar: Symbol_Pos.T list scanner  | 
|
| 50239 | 22  | 
val is_tid: string -> bool  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
23  | 
datatype token_kind =  | 
| 67548 | 24  | 
Literal | Ident | Long_Ident | Var | Type_Ident | Type_Var | Num | Float | Str |  | 
| 69042 | 25  | 
String | Cartouche | Space | Comment of Comment.kind | Dummy | EOF  | 
| 67554 | 26  | 
eqtype token  | 
| 67412 | 27  | 
val kind_of_token: token -> token_kind  | 
| 27806 | 28  | 
val str_of_token: token -> string  | 
29  | 
val pos_of_token: token -> Position.T  | 
|
| 67551 | 30  | 
val end_pos_of_token: token -> Position.T  | 
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
31  | 
val is_proper: token -> bool  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
32  | 
val dummy: token  | 
| 67552 | 33  | 
val literal: string -> token  | 
34  | 
val is_literal: token -> bool  | 
|
| 70586 | 35  | 
val tokens_match_ord: token ord  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
36  | 
val mk_eof: Position.T -> token  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
37  | 
val eof: token  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
38  | 
val is_eof: token -> bool  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
39  | 
val stopper: token Scan.stopper  | 
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
40  | 
val terminals: string list  | 
| 
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
41  | 
val is_terminal: string -> bool  | 
| 67556 | 42  | 
val get_terminal: string -> token option  | 
| 
73163
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
43  | 
val suppress_literal_markup: string -> bool  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
44  | 
val suppress_markup: token -> bool  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
45  | 
val literal_markup: string -> Markup.T list  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
46  | 
val reports_of_token: token -> Position.report list  | 
| 
39510
 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
47  | 
val reported_token_range: Proof.context -> token -> string  | 
| 18 | 48  | 
val valued_token: token -> bool  | 
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
49  | 
val implode_string: Symbol.symbol list -> string  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
50  | 
val explode_string: string * Position.T -> Symbol_Pos.T list  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
51  | 
val implode_str: Symbol.symbol list -> string  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
52  | 
val explode_str: string * Position.T -> Symbol_Pos.T list  | 
| 30573 | 53  | 
val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list  | 
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
54  | 
val read_indexname: string -> indexname  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
55  | 
val read_var: string -> term  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
56  | 
val read_variable: string -> indexname option  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
57  | 
val read_nat: string -> int option  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
58  | 
val read_int: string -> int option  | 
| 58421 | 59  | 
  val read_num: string -> {radix: int, leading_zeros: int, value: int}
 | 
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
60  | 
  val read_float: string -> {mant: int, exp: int}
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
61  | 
val mark_class: string -> string val unmark_class: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
62  | 
val mark_type: string -> string val unmark_type: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
63  | 
val mark_const: string -> string val unmark_const: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
64  | 
val mark_fixed: string -> string val unmark_fixed: string -> string  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
65  | 
val unmark:  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
66  | 
   {case_class: string -> 'a,
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
67  | 
case_type: string -> 'a,  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
68  | 
case_const: string -> 'a,  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
69  | 
case_fixed: string -> 'a,  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
70  | 
case_default: string -> 'a} -> string -> 'a  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
71  | 
val is_marked: string -> bool  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
72  | 
val dummy_type: term  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
73  | 
val fun_type: term  | 
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
74  | 
end;  | 
| 0 | 75  | 
|
| 14679 | 76  | 
structure Lexicon: LEXICON =  | 
| 0 | 77  | 
struct  | 
78  | 
||
| 69344 | 79  | 
(** syntactic terms **)  | 
| 42476 | 80  | 
|
81  | 
structure Syntax =  | 
|
82  | 
struct  | 
|
83  | 
||
84  | 
fun const c = Const (c, dummyT);  | 
|
85  | 
fun free x = Free (x, dummyT);  | 
|
86  | 
fun var xi = Var (xi, dummyT);  | 
|
87  | 
||
88  | 
end;  | 
|
89  | 
||
90  | 
||
91  | 
||
| 4703 | 92  | 
(** basic scanners **)  | 
93  | 
||
| 30573 | 94  | 
open Basic_Symbol_Pos;  | 
| 27773 | 95  | 
|
| 55105 | 96  | 
val err_prefix = "Inner lexical error: ";  | 
97  | 
||
98  | 
fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg);  | 
|
| 27773 | 99  | 
|
| 50239 | 100  | 
val scan_id = Symbol_Pos.scan_ident;  | 
| 61476 | 101  | 
val scan_longid = scan_id @@@ Scan.repeats1 ($$$ "." @@@ scan_id);  | 
| 27773 | 102  | 
val scan_tid = $$$ "'" @@@ scan_id;  | 
| 4703 | 103  | 
|
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
104  | 
val scan_hex = $$$ "0" @@@ $$$ "x" @@@ Scan.many1 (Symbol.is_ascii_hex o Symbol_Pos.symbol);  | 
| 27773 | 105  | 
val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1");  | 
| 62782 | 106  | 
val scan_num = scan_hex || scan_bin || Symbol_Pos.scan_nat;  | 
| 4703 | 107  | 
|
| 62782 | 108  | 
val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ Symbol_Pos.scan_nat) [];  | 
| 27773 | 109  | 
val scan_var = $$$ "?" @@@ scan_id_nat;  | 
110  | 
val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat;  | 
|
| 4703 | 111  | 
|
| 50239 | 112  | 
fun is_tid s =  | 
113  | 
(case try (unprefix "'") s of  | 
|
114  | 
SOME s' => Symbol_Pos.is_identifier s'  | 
|
115  | 
| NONE => false);  | 
|
116  | 
||
| 4703 | 117  | 
|
118  | 
||
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
119  | 
(** tokens **)  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
120  | 
|
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
121  | 
(* datatype token_kind *)  | 
| 0 | 122  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
123  | 
datatype token_kind =  | 
| 67548 | 124  | 
Literal | Ident | Long_Ident | Var | Type_Ident | Type_Var | Num | Float | Str |  | 
| 69042 | 125  | 
String | Cartouche | Space | Comment of Comment.kind | Dummy | EOF;  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
126  | 
|
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
127  | 
val token_kinds =  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
128  | 
Vector.fromList  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
129  | 
[Literal, Ident, Long_Ident, Var, Type_Ident, Type_Var, Num, Float, Str,  | 
| 69042 | 130  | 
String, Cartouche, Space, Comment Comment.Comment, Comment Comment.Cancel,  | 
131  | 
Comment Comment.Latex, Dummy, EOF];  | 
|
| 67539 | 132  | 
|
| 
77846
 
5ba68d3bd741
more operations, following Isabelle/ML conventions;
 
wenzelm 
parents: 
73198 
diff
changeset
 | 
133  | 
val token_kind = Vector.nth token_kinds;  | 
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
134  | 
fun token_kind_index k = #1 (the (Vector.findi (fn (_, k') => k = k') token_kinds));  | 
| 67539 | 135  | 
|
136  | 
||
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
137  | 
(* datatype token *)  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
138  | 
|
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
139  | 
datatype token = Token of int * string * Position.range;  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
140  | 
|
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
141  | 
fun index_of_token (Token (i, _, _)) = i;  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
142  | 
val kind_of_token = index_of_token #> token_kind;  | 
| 27806 | 143  | 
fun str_of_token (Token (_, s, _)) = s;  | 
144  | 
fun pos_of_token (Token (_, _, (pos, _))) = pos;  | 
|
| 67551 | 145  | 
fun end_pos_of_token (Token (_, _, (_, end_pos))) = end_pos;  | 
| 27806 | 146  | 
|
| 
67425
 
7d4a088dbc0e
clarified modules: uniform notion of formal comments;
 
wenzelm 
parents: 
67413 
diff
changeset
 | 
147  | 
val is_proper = kind_of_token #> (fn Space => false | Comment _ => false | _ => true);  | 
| 27887 | 148  | 
|
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
149  | 
val dummy = Token (token_kind_index Dummy, "", Position.no_range);  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
150  | 
|
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
151  | 
|
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
152  | 
(* literals *)  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
153  | 
|
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
154  | 
val literal_index = token_kind_index Literal;  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
155  | 
fun literal s = Token (literal_index, s, Position.no_range);  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
156  | 
fun is_literal tok = index_of_token tok = literal_index;  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
157  | 
|
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
158  | 
fun tokens_match_ord toks =  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
159  | 
let val is = apply2 index_of_token toks in  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
160  | 
(case int_ord is of  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
161  | 
EQUAL => if #1 is = literal_index then fast_string_ord (apply2 str_of_token toks) else EQUAL  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
162  | 
| ord => ord)  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
163  | 
end;  | 
| 67550 | 164  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
165  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
166  | 
(* stopper *)  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
167  | 
|
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
168  | 
val eof_index = token_kind_index EOF;  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
169  | 
fun mk_eof pos = Token (eof_index, "", (pos, Position.none));  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
170  | 
val eof = mk_eof Position.none;  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
171  | 
|
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
172  | 
fun is_eof tok = index_of_token tok = eof_index;  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
173  | 
val stopper = Scan.stopper (K eof) is_eof;  | 
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
174  | 
|
| 0 | 175  | 
|
| 67556 | 176  | 
(* terminal symbols *)  | 
| 0 | 177  | 
|
| 67556 | 178  | 
val terminal_symbols =  | 
179  | 
  [("id", Ident),
 | 
|
180  | 
   ("longid", Long_Ident),
 | 
|
181  | 
   ("var", Var),
 | 
|
182  | 
   ("tid", Type_Ident),
 | 
|
183  | 
   ("tvar", Type_Var),
 | 
|
184  | 
   ("num_token", Num),
 | 
|
185  | 
   ("float_token", Float),
 | 
|
186  | 
   ("str_token", Str),
 | 
|
187  | 
   ("string_token", String),
 | 
|
188  | 
   ("cartouche", Cartouche)]
 | 
|
189  | 
|> map (apsnd token_kind_index)  | 
|
190  | 
|> Symtab.make;  | 
|
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
191  | 
|
| 67556 | 192  | 
val terminals = Symtab.keys terminal_symbols;  | 
193  | 
val is_terminal = Symtab.defined terminal_symbols;  | 
|
194  | 
fun get_terminal s =  | 
|
195  | 
(case Symtab.lookup terminal_symbols s of  | 
|
196  | 
SOME i => SOME (Token (i, s, Position.no_range))  | 
|
197  | 
| NONE => NONE);  | 
|
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
198  | 
|
| 0 | 199  | 
|
| 27887 | 200  | 
(* markup *)  | 
201  | 
||
| 
73198
 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 
wenzelm 
parents: 
73163 
diff
changeset
 | 
202  | 
val suppress_literal_markup = Symbol.has_control_block o Symbol.explode;  | 
| 
73163
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
203  | 
fun suppress_markup tok = is_literal tok andalso suppress_literal_markup (str_of_token tok);  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
204  | 
|
| 
49821
 
d15fe10593ff
clarified output token markup (see also bc22daeed49e);
 
wenzelm 
parents: 
48992 
diff
changeset
 | 
205  | 
fun literal_markup s =  | 
| 
73163
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
206  | 
let val syms = Symbol.explode s in  | 
| 
73198
 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 
wenzelm 
parents: 
73163 
diff
changeset
 | 
207  | 
if Symbol.has_control_block syms then []  | 
| 
73163
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
208  | 
else if Symbol.is_ascii_identifier s orelse exists Symbol.is_letter syms  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
209  | 
then [Markup.literal]  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
210  | 
else [Markup.delimiter]  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
211  | 
end;  | 
| 
49821
 
d15fe10593ff
clarified output token markup (see also bc22daeed49e);
 
wenzelm 
parents: 
48992 
diff
changeset
 | 
212  | 
|
| 27887 | 213  | 
val token_kind_markup =  | 
| 
73163
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
214  | 
fn Type_Ident => [Markup.tfree]  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
215  | 
| Type_Var => [Markup.tvar]  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
216  | 
| Num => [Markup.numeral]  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
217  | 
| Float => [Markup.numeral]  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
218  | 
| Str => [Markup.inner_string]  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
219  | 
| String => [Markup.inner_string]  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
220  | 
| Cartouche => [Markup.inner_cartouche]  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
221  | 
| Comment _ => [Markup.comment1]  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
222  | 
| _ => [];  | 
| 27887 | 223  | 
|
| 
73163
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
224  | 
fun reports_of_token tok =  | 
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
225  | 
let  | 
| 
73163
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
226  | 
val pos = pos_of_token tok;  | 
| 
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
227  | 
val markups =  | 
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
228  | 
if is_literal tok then literal_markup (str_of_token tok)  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
229  | 
else token_kind_markup (kind_of_token tok);  | 
| 
73163
 
624c2b98860a
suppress markup for literal tokens with block control symbols, for better PIDE/HTML output (see also d15fe10593ff);
 
wenzelm 
parents: 
70586 
diff
changeset
 | 
230  | 
in map (pair pos) markups end;  | 
| 27887 | 231  | 
|
| 
39510
 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
232  | 
fun reported_token_range ctxt tok =  | 
| 
39168
 
e3ac771235f7
report token range after inner parse error -- often provides important clues about misunderstanding concerning lexical phase;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
233  | 
if is_proper tok  | 
| 
50201
 
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
 
wenzelm 
parents: 
49821 
diff
changeset
 | 
234  | 
then Context_Position.reported_text ctxt (pos_of_token tok) Markup.token_range ""  | 
| 
39510
 
d9f5f01faa1b
Syntax.read_asts error: report token ranges within message -- no side-effect here;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
235  | 
else "";  | 
| 
39168
 
e3ac771235f7
report token range after inner parse error -- often provides important clues about misunderstanding concerning lexical phase;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
236  | 
|
| 27887 | 237  | 
|
| 18 | 238  | 
(* valued_token *)  | 
| 0 | 239  | 
|
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
240  | 
fun valued_token tok =  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
241  | 
not (is_literal tok orelse is_eof tok);  | 
| 0 | 242  | 
|
243  | 
||
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
244  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
245  | 
(** string literals **)  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
246  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
247  | 
fun explode_literal scan_body (str, pos) =  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
248  | 
(case Scan.read Symbol_Pos.stopper scan_body (Symbol_Pos.explode (str, pos)) of  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
249  | 
SOME ss => ss  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
250  | 
| _ => error (err_prefix ^ "malformed string literal " ^ quote str ^ Position.here pos));  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
251  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
252  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
253  | 
(* string *)  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
254  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
255  | 
val scan_string = Scan.trace (Symbol_Pos.scan_string_qq err_prefix) >> #2;  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
256  | 
val scan_string_body = Symbol_Pos.scan_string_qq err_prefix >> (#1 o #2);  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
257  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
258  | 
fun implode_string ss = quote (implode (map (fn "\"" => "\\\"" | s => s) ss));  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
259  | 
val explode_string = explode_literal scan_string_body;  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
260  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
261  | 
|
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
262  | 
(* str *)  | 
| 18 | 263  | 
|
| 14730 | 264  | 
val scan_chr =  | 
| 55107 | 265  | 
$$ "\\" |-- $$$ "'" ||  | 
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
266  | 
Scan.one  | 
| 58854 | 267  | 
((fn s => s <> "\\" andalso s <> "'" andalso Symbol.not_eof s) o  | 
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
268  | 
Symbol_Pos.symbol) >> single ||  | 
| 55107 | 269  | 
$$$ "'" --| Scan.ahead (~$$ "'");  | 
| 14730 | 270  | 
|
271  | 
val scan_str =  | 
|
| 55106 | 272  | 
Scan.ahead ($$ "'" -- $$ "'") |--  | 
273  | 
!!! "unclosed string literal"  | 
|
| 61476 | 274  | 
($$$ "'" @@@ $$$ "'" @@@ Scan.repeats scan_chr @@@ $$$ "'" @@@ $$$ "'");  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
275  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
276  | 
val scan_str_body =  | 
| 55107 | 277  | 
Scan.ahead ($$ "'" |-- $$ "'") |--  | 
| 55106 | 278  | 
!!! "unclosed string literal"  | 
| 61476 | 279  | 
($$ "'" |-- $$ "'" |-- Scan.repeats scan_chr --| $$ "'" --| $$ "'");  | 
| 14730 | 280  | 
|
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
281  | 
fun implode_str ss = enclose "''" "''" (implode (map (fn "'" => "\\'" | s => s) ss));  | 
| 
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
282  | 
val explode_str = explode_literal scan_str_body;  | 
| 18 | 283  | 
|
284  | 
||
| 27806 | 285  | 
|
| 18 | 286  | 
(** tokenize **)  | 
287  | 
||
| 67412 | 288  | 
val token_leq = op <= o apply2 str_of_token;  | 
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
289  | 
|
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
290  | 
fun token kind =  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
291  | 
let val i = token_kind_index kind  | 
| 
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
292  | 
in fn ss => Token (i, Symbol_Pos.content ss, Symbol_Pos.range ss) end;  | 
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
293  | 
|
| 
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
294  | 
fun tokenize lex xids syms =  | 
| 18 | 295  | 
let  | 
296  | 
val scan_xid =  | 
|
| 
55962
 
fbd0e768bc8f
special identifier "__" (i.e. empty name with internal suffix) serves as wild-card for completion;
 
wenzelm 
parents: 
55624 
diff
changeset
 | 
297  | 
(if xids then $$$ "_" @@@ scan_id || scan_id else scan_id) ||  | 
| 
 
fbd0e768bc8f
special identifier "__" (i.e. empty name with internal suffix) serves as wild-card for completion;
 
wenzelm 
parents: 
55624 
diff
changeset
 | 
298  | 
$$$ "_" @@@ $$$ "_";  | 
| 18 | 299  | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
300  | 
val scan_val =  | 
| 67548 | 301  | 
scan_tvar >> token Type_Var ||  | 
302  | 
scan_var >> token Var ||  | 
|
303  | 
scan_tid >> token Type_Ident ||  | 
|
304  | 
Symbol_Pos.scan_float >> token Float ||  | 
|
305  | 
scan_num >> token Num ||  | 
|
306  | 
scan_longid >> token Long_Ident ||  | 
|
307  | 
scan_xid >> token Ident;  | 
|
| 18 | 308  | 
|
| 
27800
 
df444ddeff56
datatype token: maintain range, tuned representation;
 
wenzelm 
parents: 
27773 
diff
changeset
 | 
309  | 
val scan_lit = Scan.literal lex >> token Literal;  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
310  | 
|
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
311  | 
val scan =  | 
| 55105 | 312  | 
Symbol_Pos.scan_cartouche err_prefix >> token Cartouche ||  | 
| 
69891
 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 
wenzelm 
parents: 
69344 
diff
changeset
 | 
313  | 
Comment.scan_inner >> (fn (kind, ss) => token (Comment kind) ss) ||  | 
| 27887 | 314  | 
Scan.max token_leq scan_lit scan_val ||  | 
| 67548 | 315  | 
scan_string >> token String ||  | 
316  | 
scan_str >> token Str ||  | 
|
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
317  | 
Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol) >> token Space;  | 
| 18 | 318  | 
in  | 
| 
67555
 
c550e38dd131
more efficient tokens_match_ord based on token_kind_index;
 
wenzelm 
parents: 
67554 
diff
changeset
 | 
319  | 
(case Scan.error (Scan.finite Symbol_Pos.stopper (Scan.repeat scan)) syms of  | 
| 27887 | 320  | 
(toks, []) => toks  | 
| 
55108
 
0b7a0c1fdf7e
inner syntax token language allows regular quoted strings;
 
wenzelm 
parents: 
55107 
diff
changeset
 | 
321  | 
| (_, ss) =>  | 
| 55624 | 322  | 
        error ("Inner lexical error" ^ Position.here (#1 (Symbol_Pos.range ss)) ^
 | 
323  | 
          Markup.markup Markup.no_report ("\nat " ^ quote (Symbol_Pos.content ss))))
 | 
|
| 18 | 324  | 
end;  | 
325  | 
||
326  | 
||
327  | 
||
328  | 
(** scan variables **)  | 
|
329  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
330  | 
(* scan_indexname *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
331  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
332  | 
local  | 
| 18 | 333  | 
|
| 27773 | 334  | 
val scan_vname =  | 
| 18 | 335  | 
let  | 
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
336  | 
fun nat n [] = n  | 
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
62819 
diff
changeset
 | 
337  | 
| nat n (c :: cs) = nat (n * 10 + (ord c - Char.ord #"0")) cs;  | 
| 18 | 338  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
339  | 
fun idxname cs ds = (implode (rev cs), nat 0 ds);  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
340  | 
fun chop_idx [] ds = idxname [] ds  | 
| 
62529
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
341  | 
| chop_idx (cs as (_ :: "\<^sub>" :: _)) ds = idxname cs ds  | 
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
342  | 
| chop_idx (c :: cs) ds =  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
343  | 
if Symbol.is_digit c then chop_idx cs (c :: ds)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
344  | 
else idxname (c :: cs) ds;  | 
| 18 | 345  | 
|
| 50239 | 346  | 
val scan =  | 
347  | 
(scan_id >> map Symbol_Pos.symbol) --  | 
|
| 62782 | 348  | 
Scan.optional ($$ "." |-- Symbol_Pos.scan_nat >> (nat 0 o map Symbol_Pos.symbol)) ~1;  | 
| 18 | 349  | 
in  | 
| 27773 | 350  | 
scan >>  | 
351  | 
(fn (cs, ~1) => chop_idx (rev cs) []  | 
|
352  | 
| (cs, i) => (implode cs, i))  | 
|
| 18 | 353  | 
end;  | 
354  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
355  | 
in  | 
| 18 | 356  | 
|
| 55107 | 357  | 
val scan_indexname = $$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i)) || scan_vname;
 | 
| 
15443
 
07f78cc82a73
indexname function now parses type variables as well; changed input
 
berghofe 
parents: 
14981 
diff
changeset
 | 
358  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
359  | 
end;  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
360  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
361  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
362  | 
(* indexname *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
363  | 
|
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
364  | 
fun read_indexname s =  | 
| 62751 | 365  | 
(case Scan.read Symbol_Pos.stopper scan_indexname (Symbol_Pos.explode0 s) of  | 
| 15531 | 366  | 
SOME xi => xi  | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
367  | 
  | _ => error ("Lexical error in variable name: " ^ quote s));
 | 
| 18 | 368  | 
|
369  | 
||
| 4703 | 370  | 
(* read_var *)  | 
| 18 | 371  | 
|
| 4703 | 372  | 
fun read_var str =  | 
| 18 | 373  | 
let  | 
374  | 
val scan =  | 
|
| 55107 | 375  | 
$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol_Pos.is_eof)  | 
| 42476 | 376  | 
>> Syntax.var ||  | 
| 58854 | 377  | 
Scan.many (Symbol.not_eof o Symbol_Pos.symbol)  | 
| 42476 | 378  | 
>> (Syntax.free o implode o map Symbol_Pos.symbol);  | 
| 62751 | 379  | 
in the (Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode0 str)) end;  | 
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
380  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
381  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
382  | 
(* read_variable *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
383  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
384  | 
fun read_variable str =  | 
| 55107 | 385  | 
let val scan = $$ "?" |-- scan_indexname || scan_indexname  | 
| 62751 | 386  | 
in Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode0 str) end;  | 
| 4587 | 387  | 
|
388  | 
||
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
389  | 
(* read numbers *)  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
390  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
391  | 
local  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
392  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
393  | 
fun nat cs =  | 
| 
40525
 
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
394  | 
Option.map (#1 o Library.read_int o map Symbol_Pos.symbol)  | 
| 62782 | 395  | 
(Scan.read Symbol_Pos.stopper Symbol_Pos.scan_nat cs);  | 
| 5860 | 396  | 
|
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
397  | 
in  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
398  | 
|
| 62751 | 399  | 
fun read_nat s = nat (Symbol_Pos.explode0 s);  | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
400  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
401  | 
fun read_int s =  | 
| 62751 | 402  | 
(case Symbol_Pos.explode0 s of  | 
| 27773 | 403  | 
    ("-", _) :: cs => Option.map ~ (nat cs)
 | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
404  | 
| cs => nat cs);  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
405  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
406  | 
end;  | 
| 5860 | 407  | 
|
408  | 
||
| 58421 | 409  | 
(* read_num: hex/bin/decimal *)  | 
| 9326 | 410  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
411  | 
local  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
412  | 
|
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
62819 
diff
changeset
 | 
413  | 
val ten = Char.ord #"0" + 10;  | 
| 
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
62819 
diff
changeset
 | 
414  | 
val a = Char.ord #"a";  | 
| 
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
62819 
diff
changeset
 | 
415  | 
val A = Char.ord #"A";  | 
| 35428 | 416  | 
val _ = a > A orelse raise Fail "Bad ASCII";  | 
| 20096 | 417  | 
|
418  | 
fun remap_hex c =  | 
|
419  | 
let val x = ord c in  | 
|
420  | 
if x >= a then chr (x - a + ten)  | 
|
421  | 
else if x >= A then chr (x - A + ten)  | 
|
422  | 
else c  | 
|
423  | 
end;  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
424  | 
|
| 21781 | 425  | 
fun leading_zeros ["0"] = 0  | 
426  | 
  | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
 | 
|
427  | 
| leading_zeros _ = 0;  | 
|
428  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
429  | 
in  | 
| 
15965
 
f422f8283491
Use of IntInf.int instead of int in most numeric simprocs; avoids
 
paulson 
parents: 
15570 
diff
changeset
 | 
430  | 
|
| 58421 | 431  | 
fun read_num str =  | 
| 9326 | 432  | 
let  | 
| 58421 | 433  | 
val (radix, digs) =  | 
434  | 
(case Symbol.explode str of  | 
|
435  | 
"0" :: "x" :: cs => (16, map remap_hex cs)  | 
|
436  | 
| "0" :: "b" :: cs => (2, cs)  | 
|
437  | 
| cs => (10, cs));  | 
|
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
438  | 
in  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
439  | 
   {radix = radix,
 | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
440  | 
leading_zeros = leading_zeros digs,  | 
| 58421 | 441  | 
value = #1 (Library.read_radix_int radix digs)}  | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
442  | 
end;  | 
| 9326 | 443  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
444  | 
end;  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
445  | 
|
| 28904 | 446  | 
fun read_float str =  | 
447  | 
let  | 
|
| 58421 | 448  | 
val cs = Symbol.explode str;  | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
449  | 
val (intpart, fracpart) =  | 
| 67522 | 450  | 
(case chop_prefix Symbol.is_digit cs of  | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
451  | 
(intpart, "." :: fracpart) => (intpart, fracpart)  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
452  | 
| _ => raise Fail "read_float");  | 
| 
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
453  | 
in  | 
| 58421 | 454  | 
   {mant = #1 (Library.read_int (intpart @ fracpart)),
 | 
| 
29156
 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 
wenzelm 
parents: 
28904 
diff
changeset
 | 
455  | 
exp = length fracpart}  | 
| 
42046
 
6341c23baf10
added Lexicon.encode_position, Lexicon.decode_position;
 
wenzelm 
parents: 
40525 
diff
changeset
 | 
456  | 
end;  | 
| 
 
6341c23baf10
added Lexicon.encode_position, Lexicon.decode_position;
 
wenzelm 
parents: 
40525 
diff
changeset
 | 
457  | 
|
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
458  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
459  | 
(* marked logical entities *)  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
460  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
461  | 
fun marker s = (prefix s, unprefix s);  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
462  | 
|
| 
62529
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
463  | 
val (mark_class, unmark_class) = marker "\<^class>";  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
464  | 
val (mark_type, unmark_type) = marker "\<^type>";  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
465  | 
val (mark_const, unmark_const) = marker "\<^const>";  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
61476 
diff
changeset
 | 
466  | 
val (mark_fixed, unmark_fixed) = marker "\<^fixed>";  | 
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
467  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
468  | 
fun unmark {case_class, case_type, case_const, case_fixed, case_default} s =
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
469  | 
(case try unmark_class s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
470  | 
SOME c => case_class c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
471  | 
| NONE =>  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
472  | 
(case try unmark_type s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
473  | 
SOME c => case_type c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
474  | 
| NONE =>  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
475  | 
(case try unmark_const s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
476  | 
SOME c => case_const c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
477  | 
| NONE =>  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
478  | 
(case try unmark_fixed s of  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
479  | 
SOME c => case_fixed c  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
480  | 
| NONE => case_default s))));  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
481  | 
|
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
482  | 
val is_marked =  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
483  | 
  unmark {case_class = K true, case_type = K true, case_const = K true,
 | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
484  | 
case_fixed = K true, case_default = K false};  | 
| 
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
485  | 
|
| 42476 | 486  | 
val dummy_type = Syntax.const (mark_type "dummy");  | 
487  | 
val fun_type = Syntax.const (mark_type "fun");  | 
|
| 
42290
 
b1f544c84040
discontinued special treatment of structure Lexicon;
 
wenzelm 
parents: 
42264 
diff
changeset
 | 
488  | 
|
| 59196 | 489  | 
|
490  | 
(* toplevel pretty printing *)  | 
|
491  | 
||
| 62663 | 492  | 
val _ =  | 
| 
62819
 
d3ff367a16a0
careful export of type-dependent functions, without losing their special status;
 
wenzelm 
parents: 
62782 
diff
changeset
 | 
493  | 
ML_system_pp (fn _ => fn _ =>  | 
| 62663 | 494  | 
    Pretty.to_polyml o Pretty.str_list "{" "}" o map quote o Scan.dest_lexicon);
 | 
| 59196 | 495  | 
|
| 0 | 496  | 
end;  |