| author | wenzelm | 
| Wed, 11 Jul 2007 17:47:45 +0200 | |
| changeset 23784 | 75e6b9dd5336 | 
| parent 22686 | 68496cc300a4 | 
| child 23802 | cd09234405b6 | 
| permissions | -rw-r--r-- | 
| 18 | 1  | 
(* Title: Pure/Syntax/lexicon.ML  | 
| 0 | 2  | 
ID: $Id$  | 
| 18 | 3  | 
Author: Tobias Nipkow and Markus Wenzel, TU Muenchen  | 
| 0 | 4  | 
|
| 4703 | 5  | 
Lexer for the inner Isabelle syntax (terms and types).  | 
| 18 | 6  | 
*)  | 
| 0 | 7  | 
|
8  | 
signature LEXICON0 =  | 
|
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
9  | 
sig  | 
| 0 | 10  | 
val is_identifier: string -> bool  | 
| 14679 | 11  | 
val is_ascii_identifier: string -> bool  | 
| 20165 | 12  | 
val is_tid: string -> bool  | 
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
13  | 
val implode_xstr: string list -> string  | 
| 
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
14  | 
val explode_xstr: string -> string list  | 
| 4703 | 15  | 
val scan_id: string list -> string * string list  | 
16  | 
val scan_longid: string list -> string * string list  | 
|
17  | 
val scan_var: string list -> string * string list  | 
|
18  | 
val scan_tid: string list -> string * string list  | 
|
| 4902 | 19  | 
val scan_tvar: string list -> string * string list  | 
| 4703 | 20  | 
val scan_nat: string list -> string * string list  | 
21  | 
val scan_int: string list -> string * string list  | 
|
| 
20067
 
26bac504ef90
hex and binary numerals (contributed by Rafal Kolanski)
 
kleing 
parents: 
19918 
diff
changeset
 | 
22  | 
val scan_hex: string list -> string * string list  | 
| 
 
26bac504ef90
hex and binary numerals (contributed by Rafal Kolanski)
 
kleing 
parents: 
19918 
diff
changeset
 | 
23  | 
val scan_bin: string list -> string * string list  | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
24  | 
val read_indexname: string -> indexname  | 
| 4703 | 25  | 
val read_var: string -> term  | 
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
26  | 
val read_variable: string -> indexname option  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
27  | 
val const: string -> term  | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
28  | 
val free: string -> term  | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
29  | 
val var: indexname -> term  | 
| 5860 | 30  | 
val read_nat: string -> int option  | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
31  | 
val read_int: string -> int option  | 
| 21781 | 32  | 
  val read_xnum: string -> {radix: int, leading_zeros: int, value: IntInf.int}
 | 
| 7784 | 33  | 
val read_idents: string -> string list  | 
| 19002 | 34  | 
val fixedN: string  | 
35  | 
val constN: string  | 
|
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
36  | 
end;  | 
| 0 | 37  | 
|
38  | 
signature LEXICON =  | 
|
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
39  | 
sig  | 
| 18 | 40  | 
include LEXICON0  | 
41  | 
val is_xid: string -> bool  | 
|
42  | 
datatype token =  | 
|
43  | 
Token of string |  | 
|
44  | 
IdentSy of string |  | 
|
| 3828 | 45  | 
LongIdentSy of string |  | 
| 18 | 46  | 
VarSy of string |  | 
47  | 
TFreeSy of string |  | 
|
48  | 
TVarSy of string |  | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
49  | 
NumSy of string |  | 
| 11697 | 50  | 
XNumSy of string |  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
51  | 
StrSy of string |  | 
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
52  | 
EndToken  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
53  | 
val idT: typ  | 
| 3828 | 54  | 
val longidT: typ  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
55  | 
val varT: typ  | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
56  | 
val tidT: typ  | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
57  | 
val tvarT: typ  | 
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
58  | 
val terminals: string list  | 
| 
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
59  | 
val is_terminal: string -> bool  | 
| 18 | 60  | 
val str_of_token: token -> string  | 
61  | 
val display_token: token -> string  | 
|
62  | 
val matching_tokens: token * token -> bool  | 
|
63  | 
val valued_token: token -> bool  | 
|
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
64  | 
val predef_term: string -> token option  | 
| 4703 | 65  | 
val tokenize: Scan.lexicon -> bool -> string list -> token list  | 
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
66  | 
end;  | 
| 0 | 67  | 
|
| 14679 | 68  | 
structure Lexicon: LEXICON =  | 
| 0 | 69  | 
struct  | 
70  | 
||
| 
4247
 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 
wenzelm 
parents: 
3828 
diff
changeset
 | 
71  | 
|
| 18 | 72  | 
(** is_identifier etc. **)  | 
73  | 
||
| 16150 | 74  | 
val is_identifier = Symbol.is_ident o Symbol.explode;  | 
| 18 | 75  | 
|
| 14679 | 76  | 
fun is_ascii_identifier s =  | 
77  | 
let val cs = Symbol.explode s  | 
|
| 16150 | 78  | 
in forall Symbol.is_ascii cs andalso Symbol.is_ident cs end;  | 
| 14679 | 79  | 
|
| 18 | 80  | 
fun is_xid s =  | 
| 4703 | 81  | 
(case Symbol.explode s of  | 
| 16150 | 82  | 
"_" :: cs => Symbol.is_ident cs  | 
83  | 
| cs => Symbol.is_ident cs);  | 
|
| 18 | 84  | 
|
| 
330
 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 
clasohm 
parents: 
237 
diff
changeset
 | 
85  | 
fun is_tid s =  | 
| 4703 | 86  | 
(case Symbol.explode s of  | 
| 16150 | 87  | 
"'" :: cs => Symbol.is_ident cs  | 
| 18 | 88  | 
| _ => false);  | 
89  | 
||
| 0 | 90  | 
|
91  | 
||
| 4703 | 92  | 
(** basic scanners **)  | 
93  | 
||
| 
21858
 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 
wenzelm 
parents: 
21781 
diff
changeset
 | 
94  | 
val scan_letter_letdigs = Scan.one Symbol.is_letter -- Scan.many Symbol.is_letdig >> op ::;  | 
| 
 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 
wenzelm 
parents: 
21781 
diff
changeset
 | 
95  | 
val scan_digits1 = Scan.many1 Symbol.is_digit;  | 
| 
 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 
wenzelm 
parents: 
21781 
diff
changeset
 | 
96  | 
val scan_hex1 = Scan.many1 (Symbol.is_digit orf Symbol.is_hex_letter);  | 
| 
 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 
wenzelm 
parents: 
21781 
diff
changeset
 | 
97  | 
val scan_bin1 = Scan.many1 (fn s => s = "0" orelse s = "1");  | 
| 4703 | 98  | 
|
99  | 
val scan_id = scan_letter_letdigs >> implode;  | 
|
100  | 
val scan_longid = scan_id ^^ (Scan.repeat1 ($$ "." ^^ scan_id) >> implode);  | 
|
101  | 
val scan_tid = $$ "'" ^^ scan_id;  | 
|
102  | 
||
103  | 
val scan_nat = scan_digits1 >> implode;  | 
|
| 5513 | 104  | 
val scan_int = $$ "-" ^^ scan_nat || scan_nat;  | 
| 4703 | 105  | 
|
| 20091 | 106  | 
val scan_hex = $$ "0" ^^ $$ "x" ^^ (scan_hex1 >> implode);  | 
107  | 
val scan_bin = $$ "0" ^^ $$ "b" ^^ (scan_bin1 >> implode);  | 
|
| 
20067
 
26bac504ef90
hex and binary numerals (contributed by Rafal Kolanski)
 
kleing 
parents: 
19918 
diff
changeset
 | 
108  | 
|
| 4703 | 109  | 
val scan_id_nat = scan_id ^^ Scan.optional ($$ "." ^^ scan_nat) "";  | 
110  | 
val scan_var = $$ "?" ^^ scan_id_nat;  | 
|
| 4902 | 111  | 
val scan_tvar = $$ "?" ^^ $$ "'" ^^ scan_id_nat;  | 
| 4703 | 112  | 
|
113  | 
||
114  | 
||
| 18 | 115  | 
(** datatype token **)  | 
| 0 | 116  | 
|
| 18 | 117  | 
datatype token =  | 
118  | 
Token of string |  | 
|
119  | 
IdentSy of string |  | 
|
| 3828 | 120  | 
LongIdentSy of string |  | 
| 18 | 121  | 
VarSy of string |  | 
122  | 
TFreeSy of string |  | 
|
123  | 
TVarSy of string |  | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
124  | 
NumSy of string |  | 
| 11697 | 125  | 
XNumSy of string |  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
126  | 
StrSy of string |  | 
| 18 | 127  | 
EndToken;  | 
| 0 | 128  | 
|
129  | 
||
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
130  | 
(* terminal arguments *)  | 
| 0 | 131  | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
132  | 
val idT = Type ("id", []);
 | 
| 3828 | 133  | 
val longidT = Type ("longid", []);
 | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
134  | 
val varT = Type ("var", []);
 | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
135  | 
val tidT = Type ("tid", []);
 | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
136  | 
val tvarT = Type ("tvar", []);
 | 
| 0 | 137  | 
|
| 11697 | 138  | 
val terminals = ["id", "longid", "var", "tid", "tvar", "num", "xnum", "xstr"];  | 
| 20664 | 139  | 
val is_terminal = member (op =) terminals;  | 
| 
237
 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 
wenzelm 
parents: 
164 
diff
changeset
 | 
140  | 
|
| 0 | 141  | 
|
| 18 | 142  | 
(* str_of_token *)  | 
| 0 | 143  | 
|
| 18 | 144  | 
fun str_of_token (Token s) = s  | 
145  | 
| str_of_token (IdentSy s) = s  | 
|
| 3828 | 146  | 
| str_of_token (LongIdentSy s) = s  | 
| 18 | 147  | 
| str_of_token (VarSy s) = s  | 
148  | 
| str_of_token (TFreeSy s) = s  | 
|
149  | 
| str_of_token (TVarSy s) = s  | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
150  | 
| str_of_token (NumSy s) = s  | 
| 11697 | 151  | 
| str_of_token (XNumSy s) = s  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
152  | 
| str_of_token (StrSy s) = s  | 
| 
376
 
d3d01131470f
extended signature SCANNER by some basic scanners and type lexicon;
 
wenzelm 
parents: 
330 
diff
changeset
 | 
153  | 
| str_of_token EndToken = "EOF";  | 
| 0 | 154  | 
|
| 18 | 155  | 
|
156  | 
(* display_token *)  | 
|
| 0 | 157  | 
|
| 18 | 158  | 
fun display_token (Token s) = quote s  | 
159  | 
  | display_token (IdentSy s) = "id(" ^ s ^ ")"
 | 
|
| 3828 | 160  | 
  | display_token (LongIdentSy s) = "longid(" ^ s ^ ")"
 | 
| 18 | 161  | 
  | display_token (VarSy s) = "var(" ^ s ^ ")"
 | 
| 
330
 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 
clasohm 
parents: 
237 
diff
changeset
 | 
162  | 
  | display_token (TFreeSy s) = "tid(" ^ s ^ ")"
 | 
| 18 | 163  | 
  | display_token (TVarSy s) = "tvar(" ^ s ^ ")"
 | 
| 11697 | 164  | 
  | display_token (NumSy s) = "num(" ^ s ^ ")"
 | 
165  | 
  | display_token (XNumSy s) = "xnum(" ^ s ^ ")"
 | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
166  | 
  | display_token (StrSy s) = "xstr(" ^ s ^ ")"
 | 
| 18 | 167  | 
| display_token EndToken = "";  | 
| 0 | 168  | 
|
| 18 | 169  | 
|
170  | 
(* matching_tokens *)  | 
|
| 0 | 171  | 
|
| 18 | 172  | 
fun matching_tokens (Token x, Token y) = (x = y)  | 
173  | 
| matching_tokens (IdentSy _, IdentSy _) = true  | 
|
| 3828 | 174  | 
| matching_tokens (LongIdentSy _, LongIdentSy _) = true  | 
| 18 | 175  | 
| matching_tokens (VarSy _, VarSy _) = true  | 
176  | 
| matching_tokens (TFreeSy _, TFreeSy _) = true  | 
|
177  | 
| matching_tokens (TVarSy _, TVarSy _) = true  | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
178  | 
| matching_tokens (NumSy _, NumSy _) = true  | 
| 11697 | 179  | 
| matching_tokens (XNumSy _, XNumSy _) = true  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
180  | 
| matching_tokens (StrSy _, StrSy _) = true  | 
| 18 | 181  | 
| matching_tokens (EndToken, EndToken) = true  | 
182  | 
| matching_tokens _ = false;  | 
|
| 0 | 183  | 
|
184  | 
||
| 18 | 185  | 
(* valued_token *)  | 
| 0 | 186  | 
|
| 18 | 187  | 
fun valued_token (Token _) = false  | 
188  | 
| valued_token (IdentSy _) = true  | 
|
| 3828 | 189  | 
| valued_token (LongIdentSy _) = true  | 
| 18 | 190  | 
| valued_token (VarSy _) = true  | 
191  | 
| valued_token (TFreeSy _) = true  | 
|
192  | 
| valued_token (TVarSy _) = true  | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
193  | 
| valued_token (NumSy _) = true  | 
| 11697 | 194  | 
| valued_token (XNumSy _) = true  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
195  | 
| valued_token (StrSy _) = true  | 
| 18 | 196  | 
| valued_token EndToken = false;  | 
| 0 | 197  | 
|
198  | 
||
| 18 | 199  | 
(* predef_term *)  | 
| 0 | 200  | 
|
| 15531 | 201  | 
fun predef_term "id" = SOME (IdentSy "id")  | 
202  | 
| predef_term "longid" = SOME (LongIdentSy "longid")  | 
|
203  | 
| predef_term "var" = SOME (VarSy "var")  | 
|
204  | 
| predef_term "tid" = SOME (TFreeSy "tid")  | 
|
205  | 
| predef_term "tvar" = SOME (TVarSy "tvar")  | 
|
206  | 
| predef_term "num" = SOME (NumSy "num")  | 
|
207  | 
| predef_term "xnum" = SOME (XNumSy "xnum")  | 
|
208  | 
| predef_term "xstr" = SOME (StrSy "xstr")  | 
|
209  | 
| predef_term _ = NONE;  | 
|
| 0 | 210  | 
|
211  | 
||
| 4703 | 212  | 
(* xstr tokens *)  | 
| 18 | 213  | 
|
| 14730 | 214  | 
fun lex_err msg prfx (cs, _) =  | 
215  | 
"Inner lexical error: " ^ msg ^ " at " ^ quote (prfx ^ Symbol.beginning 10 cs);  | 
|
216  | 
||
217  | 
val scan_chr =  | 
|
| 21774 | 218  | 
$$ "\\" |-- $$ "'" ||  | 
| 
23784
 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
22686 
diff
changeset
 | 
219  | 
Scan.one (fn s => s <> "\\" andalso s <> "'" andalso Symbol.is_regular s) ||  | 
| 19305 | 220  | 
$$ "'" --| Scan.ahead (~$$ "'");  | 
| 14730 | 221  | 
|
222  | 
val scan_str =  | 
|
223  | 
$$ "'" |-- $$ "'" |-- !! (lex_err "missing end of string" "''")  | 
|
224  | 
(Scan.repeat scan_chr --| $$ "'" --| $$ "'");  | 
|
225  | 
||
| 0 | 226  | 
|
| 4703 | 227  | 
fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs));  | 
| 18 | 228  | 
|
| 4703 | 229  | 
fun explode_xstr str =  | 
| 5868 | 230  | 
(case Scan.read Symbol.stopper scan_str (Symbol.explode str) of  | 
| 15531 | 231  | 
SOME cs => cs  | 
| 5868 | 232  | 
  | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
 | 
| 18 | 233  | 
|
234  | 
||
| 14730 | 235  | 
(* nested comments *)  | 
236  | 
||
237  | 
val scan_cmt =  | 
|
238  | 
  Scan.depend (fn d => $$ "(" ^^ $$ "*" >> pair (d + 1)) ||
 | 
|
239  | 
Scan.depend (fn 0 => Scan.fail | d => $$ "*" ^^ $$ ")" >> pair (d - 1)) ||  | 
|
| 19305 | 240  | 
Scan.lift ($$ "*" --| Scan.ahead (~$$ ")")) ||  | 
| 
23784
 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
22686 
diff
changeset
 | 
241  | 
Scan.lift (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s));  | 
| 14730 | 242  | 
|
| 14783 | 243  | 
val scan_comment =  | 
244  | 
  $$ "(" -- $$ "*" -- !! (lex_err "missing end of comment" "(*")
 | 
|
| 14730 | 245  | 
(Scan.pass 0 (Scan.repeat scan_cmt) -- $$ "*" -- $$ ")")  | 
| 14783 | 246  | 
>> K ();  | 
| 14730 | 247  | 
|
248  | 
||
| 18 | 249  | 
|
250  | 
(** tokenize **)  | 
|
251  | 
||
| 2363 | 252  | 
fun tokenize lex xids chs =  | 
| 18 | 253  | 
let  | 
254  | 
val scan_xid =  | 
|
255  | 
if xids then $$ "_" ^^ scan_id || scan_id  | 
|
256  | 
else scan_id;  | 
|
257  | 
||
| 20096 | 258  | 
val scan_num = scan_hex || scan_bin || scan_int;  | 
259  | 
||
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
260  | 
val scan_val =  | 
| 4902 | 261  | 
scan_tvar >> pair TVarSy ||  | 
| 4703 | 262  | 
scan_var >> pair VarSy ||  | 
263  | 
scan_tid >> pair TFreeSy ||  | 
|
| 20096 | 264  | 
scan_num >> pair NumSy ||  | 
265  | 
$$ "#" ^^ scan_num >> pair XNumSy ||  | 
|
| 3828 | 266  | 
scan_longid >> pair LongIdentSy ||  | 
| 18 | 267  | 
scan_xid >> pair IdentSy;  | 
268  | 
||
| 4703 | 269  | 
val scan_lit = Scan.literal lex >> (pair Token o implode);  | 
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
270  | 
|
| 4703 | 271  | 
val scan_token =  | 
| 15531 | 272  | 
scan_comment >> K NONE ||  | 
273  | 
Scan.max (op <= o pairself snd) scan_lit scan_val >> (fn (tk, s) => SOME (tk s)) ||  | 
|
274  | 
scan_str >> (SOME o StrSy o implode_xstr) ||  | 
|
275  | 
Scan.one Symbol.is_blank >> K NONE;  | 
|
| 18 | 276  | 
in  | 
| 4938 | 277  | 
(case Scan.error (Scan.finite Symbol.stopper (Scan.repeat scan_token)) chs of  | 
| 
19482
 
9f11af8f7ef9
tuned basic list operators (flat, maps, map_filter);
 
wenzelm 
parents: 
19305 
diff
changeset
 | 
278  | 
(toks, []) => map_filter I toks @ [EndToken]  | 
| 4703 | 279  | 
    | (_, cs) => error ("Inner lexical error at: " ^ quote (implode cs)))
 | 
| 18 | 280  | 
end;  | 
281  | 
||
282  | 
||
283  | 
||
284  | 
(** scan variables **)  | 
|
285  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
286  | 
(* scan_indexname *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
287  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
288  | 
local  | 
| 18 | 289  | 
|
290  | 
fun scan_vname chrs =  | 
|
291  | 
let  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
292  | 
fun nat n [] = n  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
293  | 
| nat n (c :: cs) = nat (n * 10 + (ord c - ord "0")) cs;  | 
| 18 | 294  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
295  | 
fun idxname cs ds = (implode (rev cs), nat 0 ds);  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
296  | 
fun chop_idx [] ds = idxname [] ds  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
297  | 
| chop_idx (cs as (_ :: "\\<^isub>" :: _)) ds = idxname cs ds  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
298  | 
| chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
299  | 
| chop_idx (c :: cs) ds =  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
300  | 
if Symbol.is_digit c then chop_idx cs (c :: ds)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
301  | 
else idxname (c :: cs) ds;  | 
| 18 | 302  | 
|
303  | 
val scan =  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
304  | 
scan_letter_letdigs -- Scan.optional ($$ "." |-- scan_digits1 >> nat 0) ~1;  | 
| 18 | 305  | 
in  | 
306  | 
(case scan chrs of  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
307  | 
((cs, ~1), cs') => (chop_idx (rev cs) [], cs')  | 
| 18 | 308  | 
| ((cs, i), cs') => ((implode cs, i), cs'))  | 
309  | 
end;  | 
|
310  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
311  | 
in  | 
| 18 | 312  | 
|
| 
15443
 
07f78cc82a73
indexname function now parses type variables as well; changed input
 
berghofe 
parents: 
14981 
diff
changeset
 | 
313  | 
val scan_indexname =  | 
| 
 
07f78cc82a73
indexname function now parses type variables as well; changed input
 
berghofe 
parents: 
14981 
diff
changeset
 | 
314  | 
     $$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i))
 | 
| 
 
07f78cc82a73
indexname function now parses type variables as well; changed input
 
berghofe 
parents: 
14981 
diff
changeset
 | 
315  | 
|| scan_vname;  | 
| 
 
07f78cc82a73
indexname function now parses type variables as well; changed input
 
berghofe 
parents: 
14981 
diff
changeset
 | 
316  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
317  | 
end;  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
318  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
319  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
320  | 
(* indexname *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
321  | 
|
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
322  | 
fun read_indexname s =  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
323  | 
(case Scan.read Symbol.stopper scan_indexname (Symbol.explode s) of  | 
| 15531 | 324  | 
SOME xi => xi  | 
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
325  | 
  | _ => error ("Lexical error in variable name: " ^ quote s));
 | 
| 18 | 326  | 
|
327  | 
||
| 4703 | 328  | 
(* read_var *)  | 
| 18 | 329  | 
|
| 
550
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
330  | 
fun const c = Const (c, dummyT);  | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
331  | 
fun free x = Free (x, dummyT);  | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
332  | 
fun var xi = Var (xi, dummyT);  | 
| 
 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 
wenzelm 
parents: 
376 
diff
changeset
 | 
333  | 
|
| 4703 | 334  | 
fun read_var str =  | 
| 18 | 335  | 
let  | 
336  | 
val scan =  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
337  | 
$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol.is_eof) >> var ||  | 
| 
23784
 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
22686 
diff
changeset
 | 
338  | 
Scan.many Symbol.is_regular >> (free o implode);  | 
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
339  | 
in the (Scan.read Symbol.stopper scan (Symbol.explode str)) end;  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
340  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
341  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
342  | 
(* read_variable *)  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
343  | 
|
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
344  | 
fun read_variable str =  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
345  | 
let val scan = $$ "?" |-- scan_indexname || scan_indexname  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
346  | 
in Scan.read Symbol.stopper scan (Symbol.explode str) end;  | 
| 4587 | 347  | 
|
348  | 
||
| 19002 | 349  | 
(* specific identifiers *)  | 
| 5260 | 350  | 
|
| 19002 | 351  | 
val constN = "\\<^const>";  | 
352  | 
val fixedN = "\\<^fixed>";  | 
|
353  | 
||
| 5260 | 354  | 
|
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
355  | 
(* read numbers *)  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
356  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
357  | 
local  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
358  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
359  | 
fun nat cs =  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
360  | 
Option.map (#1 o Library.read_int)  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
361  | 
(Scan.read Symbol.stopper scan_digits1 cs);  | 
| 5860 | 362  | 
|
| 
20313
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
363  | 
in  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
364  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
365  | 
val read_nat = nat o Symbol.explode;  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
366  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
367  | 
fun read_int s =  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
368  | 
(case Symbol.explode s of  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
369  | 
"-" :: cs => Option.map ~ (nat cs)  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
370  | 
| cs => nat cs);  | 
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
371  | 
|
| 
 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 
wenzelm 
parents: 
20165 
diff
changeset
 | 
372  | 
end;  | 
| 5860 | 373  | 
|
374  | 
||
| 20096 | 375  | 
(* read_xnum: hex/bin/decimal *)  | 
| 9326 | 376  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
377  | 
local  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
378  | 
|
| 20096 | 379  | 
val ten = ord "0" + 10;  | 
380  | 
val a = ord "a";  | 
|
381  | 
val A = ord "A";  | 
|
| 
21858
 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 
wenzelm 
parents: 
21781 
diff
changeset
 | 
382  | 
val _ = a > A orelse error "Bad ASCII";  | 
| 20096 | 383  | 
|
384  | 
fun remap_hex c =  | 
|
385  | 
let val x = ord c in  | 
|
386  | 
if x >= a then chr (x - a + ten)  | 
|
387  | 
else if x >= A then chr (x - A + ten)  | 
|
388  | 
else c  | 
|
389  | 
end;  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
390  | 
|
| 21781 | 391  | 
fun leading_zeros ["0"] = 0  | 
392  | 
  | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
 | 
|
393  | 
| leading_zeros _ = 0;  | 
|
394  | 
||
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
395  | 
in  | 
| 
15965
 
f422f8283491
Use of IntInf.int instead of int in most numeric simprocs; avoids
 
paulson 
parents: 
15570 
diff
changeset
 | 
396  | 
|
| 9326 | 397  | 
fun read_xnum str =  | 
398  | 
let  | 
|
| 20096 | 399  | 
val (sign, radix, digs) =  | 
400  | 
(case Symbol.explode (perhaps (try (unprefix "#")) str) of  | 
|
401  | 
"0" :: "x" :: cs => (1, 16, map remap_hex cs)  | 
|
402  | 
| "0" :: "b" :: cs => (1, 2, cs)  | 
|
403  | 
| "-" :: cs => (~1, 10, cs)  | 
|
404  | 
| cs => (1, 10, cs));  | 
|
| 
22574
 
e6c25fd3de2a
avoid overloaded integer constants (accomodate Alice);
 
wenzelm 
parents: 
21858 
diff
changeset
 | 
405  | 
val value = IntInf.fromInt sign * #1 (Library.read_intinf radix digs);  | 
| 21781 | 406  | 
  in {radix = radix, leading_zeros = leading_zeros digs, value = value} end;
 | 
| 9326 | 407  | 
|
| 
15991
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
408  | 
end;  | 
| 
 
670f8e4b5a98
added read_variable: optional question mark on input;
 
wenzelm 
parents: 
15965 
diff
changeset
 | 
409  | 
|
| 9326 | 410  | 
|
| 7784 | 411  | 
(* read_ident(s) *)  | 
412  | 
||
413  | 
fun read_idents str =  | 
|
414  | 
let  | 
|
| 
21858
 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 
wenzelm 
parents: 
21781 
diff
changeset
 | 
415  | 
val blanks = Scan.many Symbol.is_blank;  | 
| 
23784
 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
22686 
diff
changeset
 | 
416  | 
val junk = Scan.many Symbol.is_regular;  | 
| 7784 | 417  | 
val idents = Scan.repeat1 (blanks |-- scan_id --| blanks) -- junk;  | 
418  | 
in  | 
|
419  | 
(case Scan.read Symbol.stopper idents (Symbol.explode str) of  | 
|
| 15531 | 420  | 
SOME (ids, []) => ids  | 
421  | 
    | SOME (_, bad) => error ("Bad identifier: " ^ quote (implode bad))
 | 
|
422  | 
    | NONE => error ("No identifier found in: " ^ quote str))
 | 
|
| 7784 | 423  | 
end;  | 
424  | 
||
| 0 | 425  | 
end;  |