| author | wenzelm | 
| Sat, 05 Jan 2008 21:57:18 +0100 | |
| changeset 25842 | fdabeed7ccd9 | 
| parent 24630 | 351a308ab58d | 
| child 26007 | 3760d3ff4cce | 
| permissions | -rw-r--r-- | 
| 18 | 1 | (* Title: Pure/Syntax/lexicon.ML | 
| 0 | 2 | ID: $Id$ | 
| 18 | 3 | Author: Tobias Nipkow and Markus Wenzel, TU Muenchen | 
| 0 | 4 | |
| 4703 | 5 | Lexer for the inner Isabelle syntax (terms and types). | 
| 18 | 6 | *) | 
| 0 | 7 | |
| 8 | signature LEXICON0 = | |
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 9 | sig | 
| 0 | 10 | val is_identifier: string -> bool | 
| 14679 | 11 | val is_ascii_identifier: string -> bool | 
| 20165 | 12 | val is_tid: string -> bool | 
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 13 | val implode_xstr: string list -> string | 
| 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 14 | val explode_xstr: string -> string list | 
| 4703 | 15 | val scan_id: string list -> string * string list | 
| 16 | val scan_longid: string list -> string * string list | |
| 17 | val scan_var: string list -> string * string list | |
| 18 | val scan_tid: string list -> string * string list | |
| 4902 | 19 | val scan_tvar: string list -> string * string list | 
| 4703 | 20 | val scan_nat: string list -> string * string list | 
| 21 | val scan_int: string list -> string * string list | |
| 20067 
26bac504ef90
hex and binary numerals (contributed by Rafal Kolanski)
 kleing parents: 
19918diff
changeset | 22 | val scan_hex: string list -> string * string list | 
| 
26bac504ef90
hex and binary numerals (contributed by Rafal Kolanski)
 kleing parents: 
19918diff
changeset | 23 | val scan_bin: string list -> string * string list | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 24 | val read_indexname: string -> indexname | 
| 4703 | 25 | val read_var: string -> term | 
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 26 | val read_variable: string -> indexname option | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 27 | val const: string -> term | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 28 | val free: string -> term | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 29 | val var: indexname -> term | 
| 5860 | 30 | val read_nat: string -> int option | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 31 | val read_int: string -> int option | 
| 24630 
351a308ab58d
simplified type int (eliminated IntInf.int, integer);
 wenzelm parents: 
24583diff
changeset | 32 |   val read_xnum: string -> {radix: int, leading_zeros: int, value: int}
 | 
| 7784 | 33 | val read_idents: string -> string list | 
| 19002 | 34 | val fixedN: string | 
| 35 | val constN: string | |
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 36 | end; | 
| 0 | 37 | |
| 38 | signature LEXICON = | |
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 39 | sig | 
| 18 | 40 | include LEXICON0 | 
| 41 | val is_xid: string -> bool | |
| 42 | datatype token = | |
| 43 | Token of string | | |
| 44 | IdentSy of string | | |
| 3828 | 45 | LongIdentSy of string | | 
| 18 | 46 | VarSy of string | | 
| 47 | TFreeSy of string | | |
| 48 | TVarSy of string | | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 49 | NumSy of string | | 
| 11697 | 50 | XNumSy of string | | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 51 | StrSy of string | | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 52 | EndToken | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 53 | val idT: typ | 
| 3828 | 54 | val longidT: typ | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 55 | val varT: typ | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 56 | val tidT: typ | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 57 | val tvarT: typ | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 58 | val terminals: string list | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 59 | val is_terminal: string -> bool | 
| 18 | 60 | val str_of_token: token -> string | 
| 61 | val display_token: token -> string | |
| 62 | val matching_tokens: token * token -> bool | |
| 63 | val valued_token: token -> bool | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 64 | val predef_term: string -> token option | 
| 4703 | 65 | val tokenize: Scan.lexicon -> bool -> string list -> token list | 
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 66 | end; | 
| 0 | 67 | |
| 14679 | 68 | structure Lexicon: LEXICON = | 
| 0 | 69 | struct | 
| 70 | ||
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 71 | |
| 18 | 72 | (** is_identifier etc. **) | 
| 73 | ||
| 16150 | 74 | val is_identifier = Symbol.is_ident o Symbol.explode; | 
| 18 | 75 | |
| 14679 | 76 | fun is_ascii_identifier s = | 
| 77 | let val cs = Symbol.explode s | |
| 16150 | 78 | in forall Symbol.is_ascii cs andalso Symbol.is_ident cs end; | 
| 14679 | 79 | |
| 18 | 80 | fun is_xid s = | 
| 4703 | 81 | (case Symbol.explode s of | 
| 16150 | 82 | "_" :: cs => Symbol.is_ident cs | 
| 83 | | cs => Symbol.is_ident cs); | |
| 18 | 84 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
237diff
changeset | 85 | fun is_tid s = | 
| 4703 | 86 | (case Symbol.explode s of | 
| 16150 | 87 | "'" :: cs => Symbol.is_ident cs | 
| 18 | 88 | | _ => false); | 
| 89 | ||
| 0 | 90 | |
| 91 | ||
| 4703 | 92 | (** basic scanners **) | 
| 93 | ||
| 21858 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
21781diff
changeset | 94 | val scan_letter_letdigs = Scan.one Symbol.is_letter -- Scan.many Symbol.is_letdig >> op ::; | 
| 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
21781diff
changeset | 95 | val scan_digits1 = Scan.many1 Symbol.is_digit; | 
| 24583 
d77e4d48e497
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 wenzelm parents: 
24245diff
changeset | 96 | val scan_hex1 = Scan.many1 Symbol.is_ascii_hex; | 
| 21858 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
21781diff
changeset | 97 | val scan_bin1 = Scan.many1 (fn s => s = "0" orelse s = "1"); | 
| 4703 | 98 | |
| 99 | val scan_id = scan_letter_letdigs >> implode; | |
| 100 | val scan_longid = scan_id ^^ (Scan.repeat1 ($$ "." ^^ scan_id) >> implode); | |
| 101 | val scan_tid = $$ "'" ^^ scan_id; | |
| 102 | ||
| 103 | val scan_nat = scan_digits1 >> implode; | |
| 5513 | 104 | val scan_int = $$ "-" ^^ scan_nat || scan_nat; | 
| 4703 | 105 | |
| 20091 | 106 | val scan_hex = $$ "0" ^^ $$ "x" ^^ (scan_hex1 >> implode); | 
| 107 | val scan_bin = $$ "0" ^^ $$ "b" ^^ (scan_bin1 >> implode); | |
| 20067 
26bac504ef90
hex and binary numerals (contributed by Rafal Kolanski)
 kleing parents: 
19918diff
changeset | 108 | |
| 4703 | 109 | val scan_id_nat = scan_id ^^ Scan.optional ($$ "." ^^ scan_nat) ""; | 
| 110 | val scan_var = $$ "?" ^^ scan_id_nat; | |
| 4902 | 111 | val scan_tvar = $$ "?" ^^ $$ "'" ^^ scan_id_nat; | 
| 4703 | 112 | |
| 113 | ||
| 114 | ||
| 18 | 115 | (** datatype token **) | 
| 0 | 116 | |
| 18 | 117 | datatype token = | 
| 118 | Token of string | | |
| 119 | IdentSy of string | | |
| 3828 | 120 | LongIdentSy of string | | 
| 18 | 121 | VarSy of string | | 
| 122 | TFreeSy of string | | |
| 123 | TVarSy of string | | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 124 | NumSy of string | | 
| 11697 | 125 | XNumSy of string | | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 126 | StrSy of string | | 
| 18 | 127 | EndToken; | 
| 0 | 128 | |
| 129 | ||
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 130 | (* terminal arguments *) | 
| 0 | 131 | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 132 | val idT = Type ("id", []);
 | 
| 3828 | 133 | val longidT = Type ("longid", []);
 | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 134 | val varT = Type ("var", []);
 | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 135 | val tidT = Type ("tid", []);
 | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 136 | val tvarT = Type ("tvar", []);
 | 
| 0 | 137 | |
| 11697 | 138 | val terminals = ["id", "longid", "var", "tid", "tvar", "num", "xnum", "xstr"]; | 
| 20664 | 139 | val is_terminal = member (op =) terminals; | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 140 | |
| 0 | 141 | |
| 18 | 142 | (* str_of_token *) | 
| 0 | 143 | |
| 18 | 144 | fun str_of_token (Token s) = s | 
| 145 | | str_of_token (IdentSy s) = s | |
| 3828 | 146 | | str_of_token (LongIdentSy s) = s | 
| 18 | 147 | | str_of_token (VarSy s) = s | 
| 148 | | str_of_token (TFreeSy s) = s | |
| 149 | | str_of_token (TVarSy s) = s | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 150 | | str_of_token (NumSy s) = s | 
| 11697 | 151 | | str_of_token (XNumSy s) = s | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 152 | | str_of_token (StrSy s) = s | 
| 376 
d3d01131470f
extended signature SCANNER by some basic scanners and type lexicon;
 wenzelm parents: 
330diff
changeset | 153 | | str_of_token EndToken = "EOF"; | 
| 0 | 154 | |
| 18 | 155 | |
| 156 | (* display_token *) | |
| 0 | 157 | |
| 18 | 158 | fun display_token (Token s) = quote s | 
| 159 |   | display_token (IdentSy s) = "id(" ^ s ^ ")"
 | |
| 3828 | 160 |   | display_token (LongIdentSy s) = "longid(" ^ s ^ ")"
 | 
| 18 | 161 |   | display_token (VarSy s) = "var(" ^ s ^ ")"
 | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
237diff
changeset | 162 |   | display_token (TFreeSy s) = "tid(" ^ s ^ ")"
 | 
| 18 | 163 |   | display_token (TVarSy s) = "tvar(" ^ s ^ ")"
 | 
| 11697 | 164 |   | display_token (NumSy s) = "num(" ^ s ^ ")"
 | 
| 165 |   | display_token (XNumSy s) = "xnum(" ^ s ^ ")"
 | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 166 |   | display_token (StrSy s) = "xstr(" ^ s ^ ")"
 | 
| 18 | 167 | | display_token EndToken = ""; | 
| 0 | 168 | |
| 18 | 169 | |
| 170 | (* matching_tokens *) | |
| 0 | 171 | |
| 18 | 172 | fun matching_tokens (Token x, Token y) = (x = y) | 
| 173 | | matching_tokens (IdentSy _, IdentSy _) = true | |
| 3828 | 174 | | matching_tokens (LongIdentSy _, LongIdentSy _) = true | 
| 18 | 175 | | matching_tokens (VarSy _, VarSy _) = true | 
| 176 | | matching_tokens (TFreeSy _, TFreeSy _) = true | |
| 177 | | matching_tokens (TVarSy _, TVarSy _) = true | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 178 | | matching_tokens (NumSy _, NumSy _) = true | 
| 11697 | 179 | | matching_tokens (XNumSy _, XNumSy _) = true | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 180 | | matching_tokens (StrSy _, StrSy _) = true | 
| 18 | 181 | | matching_tokens (EndToken, EndToken) = true | 
| 182 | | matching_tokens _ = false; | |
| 0 | 183 | |
| 184 | ||
| 18 | 185 | (* valued_token *) | 
| 0 | 186 | |
| 18 | 187 | fun valued_token (Token _) = false | 
| 188 | | valued_token (IdentSy _) = true | |
| 3828 | 189 | | valued_token (LongIdentSy _) = true | 
| 18 | 190 | | valued_token (VarSy _) = true | 
| 191 | | valued_token (TFreeSy _) = true | |
| 192 | | valued_token (TVarSy _) = true | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 193 | | valued_token (NumSy _) = true | 
| 11697 | 194 | | valued_token (XNumSy _) = true | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 195 | | valued_token (StrSy _) = true | 
| 18 | 196 | | valued_token EndToken = false; | 
| 0 | 197 | |
| 198 | ||
| 18 | 199 | (* predef_term *) | 
| 0 | 200 | |
| 15531 | 201 | fun predef_term "id" = SOME (IdentSy "id") | 
| 202 | | predef_term "longid" = SOME (LongIdentSy "longid") | |
| 203 | | predef_term "var" = SOME (VarSy "var") | |
| 204 | | predef_term "tid" = SOME (TFreeSy "tid") | |
| 205 | | predef_term "tvar" = SOME (TVarSy "tvar") | |
| 206 | | predef_term "num" = SOME (NumSy "num") | |
| 207 | | predef_term "xnum" = SOME (XNumSy "xnum") | |
| 208 | | predef_term "xstr" = SOME (StrSy "xstr") | |
| 209 | | predef_term _ = NONE; | |
| 0 | 210 | |
| 211 | ||
| 4703 | 212 | (* xstr tokens *) | 
| 18 | 213 | |
| 14730 | 214 | fun lex_err msg prfx (cs, _) = | 
| 215 | "Inner lexical error: " ^ msg ^ " at " ^ quote (prfx ^ Symbol.beginning 10 cs); | |
| 216 | ||
| 217 | val scan_chr = | |
| 21774 | 218 | $$ "\\" |-- $$ "'" || | 
| 23784 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
22686diff
changeset | 219 | Scan.one (fn s => s <> "\\" andalso s <> "'" andalso Symbol.is_regular s) || | 
| 19305 | 220 | $$ "'" --| Scan.ahead (~$$ "'"); | 
| 14730 | 221 | |
| 222 | val scan_str = | |
| 223 | $$ "'" |-- $$ "'" |-- !! (lex_err "missing end of string" "''") | |
| 224 | (Scan.repeat scan_chr --| $$ "'" --| $$ "'"); | |
| 225 | ||
| 0 | 226 | |
| 4703 | 227 | fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs)); | 
| 18 | 228 | |
| 4703 | 229 | fun explode_xstr str = | 
| 5868 | 230 | (case Scan.read Symbol.stopper scan_str (Symbol.explode str) of | 
| 15531 | 231 | SOME cs => cs | 
| 5868 | 232 |   | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
 | 
| 18 | 233 | |
| 234 | ||
| 14730 | 235 | (* nested comments *) | 
| 236 | ||
| 237 | val scan_cmt = | |
| 238 |   Scan.depend (fn d => $$ "(" ^^ $$ "*" >> pair (d + 1)) ||
 | |
| 239 | Scan.depend (fn 0 => Scan.fail | d => $$ "*" ^^ $$ ")" >> pair (d - 1)) || | |
| 19305 | 240 | Scan.lift ($$ "*" --| Scan.ahead (~$$ ")")) || | 
| 23784 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
22686diff
changeset | 241 | Scan.lift (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)); | 
| 14730 | 242 | |
| 14783 | 243 | val scan_comment = | 
| 244 |   $$ "(" -- $$ "*" -- !! (lex_err "missing end of comment" "(*")
 | |
| 14730 | 245 | (Scan.pass 0 (Scan.repeat scan_cmt) -- $$ "*" -- $$ ")") | 
| 14783 | 246 | >> K (); | 
| 14730 | 247 | |
| 248 | ||
| 18 | 249 | |
| 250 | (** tokenize **) | |
| 251 | ||
| 2363 | 252 | fun tokenize lex xids chs = | 
| 18 | 253 | let | 
| 254 | val scan_xid = | |
| 255 | if xids then $$ "_" ^^ scan_id || scan_id | |
| 256 | else scan_id; | |
| 257 | ||
| 20096 | 258 | val scan_num = scan_hex || scan_bin || scan_int; | 
| 259 | ||
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 260 | val scan_val = | 
| 4902 | 261 | scan_tvar >> pair TVarSy || | 
| 4703 | 262 | scan_var >> pair VarSy || | 
| 263 | scan_tid >> pair TFreeSy || | |
| 20096 | 264 | scan_num >> pair NumSy || | 
| 265 | $$ "#" ^^ scan_num >> pair XNumSy || | |
| 3828 | 266 | scan_longid >> pair LongIdentSy || | 
| 18 | 267 | scan_xid >> pair IdentSy; | 
| 268 | ||
| 4703 | 269 | val scan_lit = Scan.literal lex >> (pair Token o implode); | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 270 | |
| 4703 | 271 | val scan_token = | 
| 15531 | 272 | scan_comment >> K NONE || | 
| 273 | Scan.max (op <= o pairself snd) scan_lit scan_val >> (fn (tk, s) => SOME (tk s)) || | |
| 274 | scan_str >> (SOME o StrSy o implode_xstr) || | |
| 275 | Scan.one Symbol.is_blank >> K NONE; | |
| 18 | 276 | in | 
| 4938 | 277 | (case Scan.error (Scan.finite Symbol.stopper (Scan.repeat scan_token)) chs of | 
| 24245 | 278 | (toks, []) => map_filter I toks | 
| 4703 | 279 |     | (_, cs) => error ("Inner lexical error at: " ^ quote (implode cs)))
 | 
| 18 | 280 | end; | 
| 281 | ||
| 282 | ||
| 283 | ||
| 284 | (** scan variables **) | |
| 285 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 286 | (* scan_indexname *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 287 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 288 | local | 
| 18 | 289 | |
| 290 | fun scan_vname chrs = | |
| 291 | let | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 292 | fun nat n [] = n | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 293 | | nat n (c :: cs) = nat (n * 10 + (ord c - ord "0")) cs; | 
| 18 | 294 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 295 | fun idxname cs ds = (implode (rev cs), nat 0 ds); | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 296 | fun chop_idx [] ds = idxname [] ds | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 297 | | chop_idx (cs as (_ :: "\\<^isub>" :: _)) ds = idxname cs ds | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 298 | | chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 299 | | chop_idx (c :: cs) ds = | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 300 | if Symbol.is_digit c then chop_idx cs (c :: ds) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 301 | else idxname (c :: cs) ds; | 
| 18 | 302 | |
| 303 | val scan = | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 304 | scan_letter_letdigs -- Scan.optional ($$ "." |-- scan_digits1 >> nat 0) ~1; | 
| 18 | 305 | in | 
| 306 | (case scan chrs of | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 307 | ((cs, ~1), cs') => (chop_idx (rev cs) [], cs') | 
| 18 | 308 | | ((cs, i), cs') => ((implode cs, i), cs')) | 
| 309 | end; | |
| 310 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 311 | in | 
| 18 | 312 | |
| 15443 
07f78cc82a73
indexname function now parses type variables as well; changed input
 berghofe parents: 
14981diff
changeset | 313 | val scan_indexname = | 
| 
07f78cc82a73
indexname function now parses type variables as well; changed input
 berghofe parents: 
14981diff
changeset | 314 |      $$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i))
 | 
| 
07f78cc82a73
indexname function now parses type variables as well; changed input
 berghofe parents: 
14981diff
changeset | 315 | || scan_vname; | 
| 
07f78cc82a73
indexname function now parses type variables as well; changed input
 berghofe parents: 
14981diff
changeset | 316 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 317 | end; | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 318 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 319 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 320 | (* indexname *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 321 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 322 | fun read_indexname s = | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 323 | (case Scan.read Symbol.stopper scan_indexname (Symbol.explode s) of | 
| 15531 | 324 | SOME xi => xi | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 325 |   | _ => error ("Lexical error in variable name: " ^ quote s));
 | 
| 18 | 326 | |
| 327 | ||
| 4703 | 328 | (* read_var *) | 
| 18 | 329 | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 330 | fun const c = Const (c, dummyT); | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 331 | fun free x = Free (x, dummyT); | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 332 | fun var xi = Var (xi, dummyT); | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 333 | |
| 4703 | 334 | fun read_var str = | 
| 18 | 335 | let | 
| 336 | val scan = | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 337 | $$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol.is_eof) >> var || | 
| 23784 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
22686diff
changeset | 338 | Scan.many Symbol.is_regular >> (free o implode); | 
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 339 | in the (Scan.read Symbol.stopper scan (Symbol.explode str)) end; | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 340 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 341 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 342 | (* read_variable *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 343 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 344 | fun read_variable str = | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 345 | let val scan = $$ "?" |-- scan_indexname || scan_indexname | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 346 | in Scan.read Symbol.stopper scan (Symbol.explode str) end; | 
| 4587 | 347 | |
| 348 | ||
| 19002 | 349 | (* specific identifiers *) | 
| 5260 | 350 | |
| 19002 | 351 | val constN = "\\<^const>"; | 
| 352 | val fixedN = "\\<^fixed>"; | |
| 353 | ||
| 5260 | 354 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 355 | (* read numbers *) | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 356 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 357 | local | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 358 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 359 | fun nat cs = | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 360 | Option.map (#1 o Library.read_int) | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 361 | (Scan.read Symbol.stopper scan_digits1 cs); | 
| 5860 | 362 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 363 | in | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 364 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 365 | val read_nat = nat o Symbol.explode; | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 366 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 367 | fun read_int s = | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 368 | (case Symbol.explode s of | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 369 | "-" :: cs => Option.map ~ (nat cs) | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 370 | | cs => nat cs); | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 371 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 372 | end; | 
| 5860 | 373 | |
| 374 | ||
| 20096 | 375 | (* read_xnum: hex/bin/decimal *) | 
| 9326 | 376 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 377 | local | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 378 | |
| 20096 | 379 | val ten = ord "0" + 10; | 
| 380 | val a = ord "a"; | |
| 381 | val A = ord "A"; | |
| 23802 | 382 | val _ = a > A orelse sys_error "Bad ASCII"; | 
| 20096 | 383 | |
| 384 | fun remap_hex c = | |
| 385 | let val x = ord c in | |
| 386 | if x >= a then chr (x - a + ten) | |
| 387 | else if x >= A then chr (x - A + ten) | |
| 388 | else c | |
| 389 | end; | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 390 | |
| 21781 | 391 | fun leading_zeros ["0"] = 0 | 
| 392 |   | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
 | |
| 393 | | leading_zeros _ = 0; | |
| 394 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 395 | in | 
| 15965 
f422f8283491
Use of IntInf.int instead of int in most numeric simprocs; avoids
 paulson parents: 
15570diff
changeset | 396 | |
| 9326 | 397 | fun read_xnum str = | 
| 398 | let | |
| 20096 | 399 | val (sign, radix, digs) = | 
| 400 | (case Symbol.explode (perhaps (try (unprefix "#")) str) of | |
| 401 | "0" :: "x" :: cs => (1, 16, map remap_hex cs) | |
| 402 | | "0" :: "b" :: cs => (1, 2, cs) | |
| 403 | | "-" :: cs => (~1, 10, cs) | |
| 404 | | cs => (1, 10, cs)); | |
| 24630 
351a308ab58d
simplified type int (eliminated IntInf.int, integer);
 wenzelm parents: 
24583diff
changeset | 405 | val value = sign * #1 (Library.read_radix_int radix digs); | 
| 21781 | 406 |   in {radix = radix, leading_zeros = leading_zeros digs, value = value} end;
 | 
| 9326 | 407 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 408 | end; | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 409 | |
| 9326 | 410 | |
| 7784 | 411 | (* read_ident(s) *) | 
| 412 | ||
| 413 | fun read_idents str = | |
| 414 | let | |
| 21858 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
21781diff
changeset | 415 | val blanks = Scan.many Symbol.is_blank; | 
| 23784 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
22686diff
changeset | 416 | val junk = Scan.many Symbol.is_regular; | 
| 7784 | 417 | val idents = Scan.repeat1 (blanks |-- scan_id --| blanks) -- junk; | 
| 418 | in | |
| 419 | (case Scan.read Symbol.stopper idents (Symbol.explode str) of | |
| 15531 | 420 | SOME (ids, []) => ids | 
| 421 |     | SOME (_, bad) => error ("Bad identifier: " ^ quote (implode bad))
 | |
| 422 |     | NONE => error ("No identifier found in: " ^ quote str))
 | |
| 7784 | 423 | end; | 
| 424 | ||
| 0 | 425 | end; |