| author | wenzelm | 
| Thu, 27 Mar 2008 15:32:19 +0100 | |
| changeset 26436 | dfd6947ab5c2 | 
| parent 26007 | 3760d3ff4cce | 
| child 27121 | 38367dbccae5 | 
| permissions | -rw-r--r-- | 
| 18 | 1 | (* Title: Pure/Syntax/lexicon.ML | 
| 0 | 2 | ID: $Id$ | 
| 18 | 3 | Author: Tobias Nipkow and Markus Wenzel, TU Muenchen | 
| 0 | 4 | |
| 4703 | 5 | Lexer for the inner Isabelle syntax (terms and types). | 
| 18 | 6 | *) | 
| 0 | 7 | |
| 8 | signature LEXICON0 = | |
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 9 | sig | 
| 0 | 10 | val is_identifier: string -> bool | 
| 14679 | 11 | val is_ascii_identifier: string -> bool | 
| 20165 | 12 | val is_tid: string -> bool | 
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 13 | val implode_xstr: string list -> string | 
| 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 14 | val explode_xstr: string -> string list | 
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 15 | val scan_id: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 16 | val scan_longid: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 17 | val scan_tid: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 18 | val scan_nat: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 19 | val scan_int: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 20 | val scan_hex: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 21 | val scan_bin: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 22 | val scan_var: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 23 | val scan_tvar: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 24 | val read_indexname: string -> indexname | 
| 4703 | 25 | val read_var: string -> term | 
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 26 | val read_variable: string -> indexname option | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 27 | val const: string -> term | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 28 | val free: string -> term | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 29 | val var: indexname -> term | 
| 5860 | 30 | val read_nat: string -> int option | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 31 | val read_int: string -> int option | 
| 24630 
351a308ab58d
simplified type int (eliminated IntInf.int, integer);
 wenzelm parents: 
24583diff
changeset | 32 |   val read_xnum: string -> {radix: int, leading_zeros: int, value: int}
 | 
| 7784 | 33 | val read_idents: string -> string list | 
| 19002 | 34 | val fixedN: string | 
| 35 | val constN: string | |
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 36 | end; | 
| 0 | 37 | |
| 38 | signature LEXICON = | |
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 39 | sig | 
| 18 | 40 | include LEXICON0 | 
| 41 | val is_xid: string -> bool | |
| 42 | datatype token = | |
| 43 | Token of string | | |
| 44 | IdentSy of string | | |
| 3828 | 45 | LongIdentSy of string | | 
| 18 | 46 | VarSy of string | | 
| 47 | TFreeSy of string | | |
| 48 | TVarSy of string | | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 49 | NumSy of string | | 
| 11697 | 50 | XNumSy of string | | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 51 | StrSy of string | | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 52 | EndToken | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 53 | val idT: typ | 
| 3828 | 54 | val longidT: typ | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 55 | val varT: typ | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 56 | val tidT: typ | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 57 | val tvarT: typ | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 58 | val terminals: string list | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 59 | val is_terminal: string -> bool | 
| 18 | 60 | val str_of_token: token -> string | 
| 61 | val display_token: token -> string | |
| 62 | val matching_tokens: token * token -> bool | |
| 63 | val valued_token: token -> bool | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 64 | val predef_term: string -> token option | 
| 4703 | 65 | val tokenize: Scan.lexicon -> bool -> string list -> token list | 
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 66 | end; | 
| 0 | 67 | |
| 14679 | 68 | structure Lexicon: LEXICON = | 
| 0 | 69 | struct | 
| 70 | ||
| 18 | 71 | (** is_identifier etc. **) | 
| 72 | ||
| 16150 | 73 | val is_identifier = Symbol.is_ident o Symbol.explode; | 
| 18 | 74 | |
| 14679 | 75 | fun is_ascii_identifier s = | 
| 76 | let val cs = Symbol.explode s | |
| 16150 | 77 | in forall Symbol.is_ascii cs andalso Symbol.is_ident cs end; | 
| 14679 | 78 | |
| 18 | 79 | fun is_xid s = | 
| 4703 | 80 | (case Symbol.explode s of | 
| 16150 | 81 | "_" :: cs => Symbol.is_ident cs | 
| 82 | | cs => Symbol.is_ident cs); | |
| 18 | 83 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
237diff
changeset | 84 | fun is_tid s = | 
| 4703 | 85 | (case Symbol.explode s of | 
| 16150 | 86 | "'" :: cs => Symbol.is_ident cs | 
| 18 | 87 | | _ => false); | 
| 88 | ||
| 0 | 89 | |
| 90 | ||
| 4703 | 91 | (** basic scanners **) | 
| 92 | ||
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 93 | val scan_id = Scan.one Symbol.is_letter ::: Scan.many Symbol.is_letdig; | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 94 | val scan_longid = scan_id @@@ (Scan.repeat1 ($$ "." ::: scan_id) >> flat); | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 95 | val scan_tid = $$ "'" ::: scan_id; | 
| 4703 | 96 | |
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 97 | val scan_nat = Scan.many1 Symbol.is_digit; | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 98 | val scan_int = $$ "-" ::: scan_nat || scan_nat; | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 99 | val scan_hex = $$ "0" ::: $$ "x" ::: Scan.many1 Symbol.is_ascii_hex; | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 100 | val scan_bin = $$ "0" ::: $$ "b" ::: Scan.many1 (fn s => s = "0" orelse s = "1"); | 
| 4703 | 101 | |
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 102 | val scan_id_nat = scan_id @@@ Scan.optional ($$ "." ::: scan_nat) []; | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 103 | val scan_var = $$ "?" ::: scan_id_nat; | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 104 | val scan_tvar = $$ "?" ::: $$ "'" ::: scan_id_nat; | 
| 4703 | 105 | |
| 106 | ||
| 107 | ||
| 18 | 108 | (** datatype token **) | 
| 0 | 109 | |
| 18 | 110 | datatype token = | 
| 111 | Token of string | | |
| 112 | IdentSy of string | | |
| 3828 | 113 | LongIdentSy of string | | 
| 18 | 114 | VarSy of string | | 
| 115 | TFreeSy of string | | |
| 116 | TVarSy of string | | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 117 | NumSy of string | | 
| 11697 | 118 | XNumSy of string | | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 119 | StrSy of string | | 
| 18 | 120 | EndToken; | 
| 0 | 121 | |
| 122 | ||
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 123 | (* terminal arguments *) | 
| 0 | 124 | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 125 | val idT = Type ("id", []);
 | 
| 3828 | 126 | val longidT = Type ("longid", []);
 | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 127 | val varT = Type ("var", []);
 | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 128 | val tidT = Type ("tid", []);
 | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 129 | val tvarT = Type ("tvar", []);
 | 
| 0 | 130 | |
| 11697 | 131 | val terminals = ["id", "longid", "var", "tid", "tvar", "num", "xnum", "xstr"]; | 
| 20664 | 132 | val is_terminal = member (op =) terminals; | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 133 | |
| 0 | 134 | |
| 18 | 135 | (* str_of_token *) | 
| 0 | 136 | |
| 18 | 137 | fun str_of_token (Token s) = s | 
| 138 | | str_of_token (IdentSy s) = s | |
| 3828 | 139 | | str_of_token (LongIdentSy s) = s | 
| 18 | 140 | | str_of_token (VarSy s) = s | 
| 141 | | str_of_token (TFreeSy s) = s | |
| 142 | | str_of_token (TVarSy s) = s | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 143 | | str_of_token (NumSy s) = s | 
| 11697 | 144 | | str_of_token (XNumSy s) = s | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 145 | | str_of_token (StrSy s) = s | 
| 376 
d3d01131470f
extended signature SCANNER by some basic scanners and type lexicon;
 wenzelm parents: 
330diff
changeset | 146 | | str_of_token EndToken = "EOF"; | 
| 0 | 147 | |
| 18 | 148 | |
| 149 | (* display_token *) | |
| 0 | 150 | |
| 18 | 151 | fun display_token (Token s) = quote s | 
| 152 |   | display_token (IdentSy s) = "id(" ^ s ^ ")"
 | |
| 3828 | 153 |   | display_token (LongIdentSy s) = "longid(" ^ s ^ ")"
 | 
| 18 | 154 |   | display_token (VarSy s) = "var(" ^ s ^ ")"
 | 
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
237diff
changeset | 155 |   | display_token (TFreeSy s) = "tid(" ^ s ^ ")"
 | 
| 18 | 156 |   | display_token (TVarSy s) = "tvar(" ^ s ^ ")"
 | 
| 11697 | 157 |   | display_token (NumSy s) = "num(" ^ s ^ ")"
 | 
| 158 |   | display_token (XNumSy s) = "xnum(" ^ s ^ ")"
 | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 159 |   | display_token (StrSy s) = "xstr(" ^ s ^ ")"
 | 
| 18 | 160 | | display_token EndToken = ""; | 
| 0 | 161 | |
| 18 | 162 | |
| 163 | (* matching_tokens *) | |
| 0 | 164 | |
| 18 | 165 | fun matching_tokens (Token x, Token y) = (x = y) | 
| 166 | | matching_tokens (IdentSy _, IdentSy _) = true | |
| 3828 | 167 | | matching_tokens (LongIdentSy _, LongIdentSy _) = true | 
| 18 | 168 | | matching_tokens (VarSy _, VarSy _) = true | 
| 169 | | matching_tokens (TFreeSy _, TFreeSy _) = true | |
| 170 | | matching_tokens (TVarSy _, TVarSy _) = true | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 171 | | matching_tokens (NumSy _, NumSy _) = true | 
| 11697 | 172 | | matching_tokens (XNumSy _, XNumSy _) = true | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 173 | | matching_tokens (StrSy _, StrSy _) = true | 
| 18 | 174 | | matching_tokens (EndToken, EndToken) = true | 
| 175 | | matching_tokens _ = false; | |
| 0 | 176 | |
| 177 | ||
| 18 | 178 | (* valued_token *) | 
| 0 | 179 | |
| 18 | 180 | fun valued_token (Token _) = false | 
| 181 | | valued_token (IdentSy _) = true | |
| 3828 | 182 | | valued_token (LongIdentSy _) = true | 
| 18 | 183 | | valued_token (VarSy _) = true | 
| 184 | | valued_token (TFreeSy _) = true | |
| 185 | | valued_token (TVarSy _) = true | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 186 | | valued_token (NumSy _) = true | 
| 11697 | 187 | | valued_token (XNumSy _) = true | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 188 | | valued_token (StrSy _) = true | 
| 18 | 189 | | valued_token EndToken = false; | 
| 0 | 190 | |
| 191 | ||
| 18 | 192 | (* predef_term *) | 
| 0 | 193 | |
| 15531 | 194 | fun predef_term "id" = SOME (IdentSy "id") | 
| 195 | | predef_term "longid" = SOME (LongIdentSy "longid") | |
| 196 | | predef_term "var" = SOME (VarSy "var") | |
| 197 | | predef_term "tid" = SOME (TFreeSy "tid") | |
| 198 | | predef_term "tvar" = SOME (TVarSy "tvar") | |
| 199 | | predef_term "num" = SOME (NumSy "num") | |
| 200 | | predef_term "xnum" = SOME (XNumSy "xnum") | |
| 201 | | predef_term "xstr" = SOME (StrSy "xstr") | |
| 202 | | predef_term _ = NONE; | |
| 0 | 203 | |
| 204 | ||
| 4703 | 205 | (* xstr tokens *) | 
| 18 | 206 | |
| 14730 | 207 | fun lex_err msg prfx (cs, _) = | 
| 208 | "Inner lexical error: " ^ msg ^ " at " ^ quote (prfx ^ Symbol.beginning 10 cs); | |
| 209 | ||
| 210 | val scan_chr = | |
| 21774 | 211 | $$ "\\" |-- $$ "'" || | 
| 23784 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
22686diff
changeset | 212 | Scan.one (fn s => s <> "\\" andalso s <> "'" andalso Symbol.is_regular s) || | 
| 19305 | 213 | $$ "'" --| Scan.ahead (~$$ "'"); | 
| 14730 | 214 | |
| 215 | val scan_str = | |
| 216 | $$ "'" |-- $$ "'" |-- !! (lex_err "missing end of string" "''") | |
| 217 | (Scan.repeat scan_chr --| $$ "'" --| $$ "'"); | |
| 218 | ||
| 0 | 219 | |
| 4703 | 220 | fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs)); | 
| 18 | 221 | |
| 4703 | 222 | fun explode_xstr str = | 
| 5868 | 223 | (case Scan.read Symbol.stopper scan_str (Symbol.explode str) of | 
| 15531 | 224 | SOME cs => cs | 
| 5868 | 225 |   | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
 | 
| 18 | 226 | |
| 227 | ||
| 14730 | 228 | (* nested comments *) | 
| 229 | ||
| 230 | val scan_cmt = | |
| 231 |   Scan.depend (fn d => $$ "(" ^^ $$ "*" >> pair (d + 1)) ||
 | |
| 232 | Scan.depend (fn 0 => Scan.fail | d => $$ "*" ^^ $$ ")" >> pair (d - 1)) || | |
| 19305 | 233 | Scan.lift ($$ "*" --| Scan.ahead (~$$ ")")) || | 
| 23784 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
22686diff
changeset | 234 | Scan.lift (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)); | 
| 14730 | 235 | |
| 14783 | 236 | val scan_comment = | 
| 237 |   $$ "(" -- $$ "*" -- !! (lex_err "missing end of comment" "(*")
 | |
| 14730 | 238 | (Scan.pass 0 (Scan.repeat scan_cmt) -- $$ "*" -- $$ ")") | 
| 14783 | 239 | >> K (); | 
| 14730 | 240 | |
| 241 | ||
| 18 | 242 | |
| 243 | (** tokenize **) | |
| 244 | ||
| 2363 | 245 | fun tokenize lex xids chs = | 
| 18 | 246 | let | 
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 247 | fun token kind cs = (kind, implode cs); | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 248 | |
| 18 | 249 | val scan_xid = | 
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 250 | if xids then $$ "_" ::: scan_id || scan_id | 
| 18 | 251 | else scan_id; | 
| 252 | ||
| 20096 | 253 | val scan_num = scan_hex || scan_bin || scan_int; | 
| 254 | ||
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 255 | val scan_val = | 
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 256 | scan_tvar >> token TVarSy || | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 257 | scan_var >> token VarSy || | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 258 | scan_tid >> token TFreeSy || | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 259 | scan_num >> token NumSy || | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 260 | $$ "#" ::: scan_num >> token XNumSy || | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 261 | scan_longid >> token LongIdentSy || | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 262 | scan_xid >> token IdentSy; | 
| 18 | 263 | |
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 264 | val scan_lit = Scan.literal lex >> token Token; | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 265 | |
| 4703 | 266 | val scan_token = | 
| 15531 | 267 | scan_comment >> K NONE || | 
| 268 | Scan.max (op <= o pairself snd) scan_lit scan_val >> (fn (tk, s) => SOME (tk s)) || | |
| 269 | scan_str >> (SOME o StrSy o implode_xstr) || | |
| 270 | Scan.one Symbol.is_blank >> K NONE; | |
| 18 | 271 | in | 
| 4938 | 272 | (case Scan.error (Scan.finite Symbol.stopper (Scan.repeat scan_token)) chs of | 
| 24245 | 273 | (toks, []) => map_filter I toks | 
| 4703 | 274 |     | (_, cs) => error ("Inner lexical error at: " ^ quote (implode cs)))
 | 
| 18 | 275 | end; | 
| 276 | ||
| 277 | ||
| 278 | ||
| 279 | (** scan variables **) | |
| 280 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 281 | (* scan_indexname *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 282 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 283 | local | 
| 18 | 284 | |
| 285 | fun scan_vname chrs = | |
| 286 | let | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 287 | fun nat n [] = n | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 288 | | nat n (c :: cs) = nat (n * 10 + (ord c - ord "0")) cs; | 
| 18 | 289 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 290 | fun idxname cs ds = (implode (rev cs), nat 0 ds); | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 291 | fun chop_idx [] ds = idxname [] ds | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 292 | | chop_idx (cs as (_ :: "\\<^isub>" :: _)) ds = idxname cs ds | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 293 | | chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 294 | | chop_idx (c :: cs) ds = | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 295 | if Symbol.is_digit c then chop_idx cs (c :: ds) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 296 | else idxname (c :: cs) ds; | 
| 18 | 297 | |
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 298 | val scan = scan_id -- Scan.optional ($$ "." |-- scan_nat >> nat 0) ~1; | 
| 18 | 299 | in | 
| 300 | (case scan chrs of | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 301 | ((cs, ~1), cs') => (chop_idx (rev cs) [], cs') | 
| 18 | 302 | | ((cs, i), cs') => ((implode cs, i), cs')) | 
| 303 | end; | |
| 304 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 305 | in | 
| 18 | 306 | |
| 15443 
07f78cc82a73
indexname function now parses type variables as well; changed input
 berghofe parents: 
14981diff
changeset | 307 | val scan_indexname = | 
| 
07f78cc82a73
indexname function now parses type variables as well; changed input
 berghofe parents: 
14981diff
changeset | 308 |      $$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i))
 | 
| 
07f78cc82a73
indexname function now parses type variables as well; changed input
 berghofe parents: 
14981diff
changeset | 309 | || scan_vname; | 
| 
07f78cc82a73
indexname function now parses type variables as well; changed input
 berghofe parents: 
14981diff
changeset | 310 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 311 | end; | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 312 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 313 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 314 | (* indexname *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 315 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 316 | fun read_indexname s = | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 317 | (case Scan.read Symbol.stopper scan_indexname (Symbol.explode s) of | 
| 15531 | 318 | SOME xi => xi | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 319 |   | _ => error ("Lexical error in variable name: " ^ quote s));
 | 
| 18 | 320 | |
| 321 | ||
| 4703 | 322 | (* read_var *) | 
| 18 | 323 | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 324 | fun const c = Const (c, dummyT); | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 325 | fun free x = Free (x, dummyT); | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 326 | fun var xi = Var (xi, dummyT); | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 327 | |
| 4703 | 328 | fun read_var str = | 
| 18 | 329 | let | 
| 330 | val scan = | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 331 | $$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol.is_eof) >> var || | 
| 23784 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
22686diff
changeset | 332 | Scan.many Symbol.is_regular >> (free o implode); | 
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 333 | in the (Scan.read Symbol.stopper scan (Symbol.explode str)) end; | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 334 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 335 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 336 | (* read_variable *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 337 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 338 | fun read_variable str = | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 339 | let val scan = $$ "?" |-- scan_indexname || scan_indexname | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 340 | in Scan.read Symbol.stopper scan (Symbol.explode str) end; | 
| 4587 | 341 | |
| 342 | ||
| 19002 | 343 | (* specific identifiers *) | 
| 5260 | 344 | |
| 19002 | 345 | val constN = "\\<^const>"; | 
| 346 | val fixedN = "\\<^fixed>"; | |
| 347 | ||
| 5260 | 348 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 349 | (* read numbers *) | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 350 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 351 | local | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 352 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 353 | fun nat cs = | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 354 | Option.map (#1 o Library.read_int) | 
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 355 | (Scan.read Symbol.stopper scan_nat cs); | 
| 5860 | 356 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 357 | in | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 358 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 359 | val read_nat = nat o Symbol.explode; | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 360 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 361 | fun read_int s = | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 362 | (case Symbol.explode s of | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 363 | "-" :: cs => Option.map ~ (nat cs) | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 364 | | cs => nat cs); | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 365 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 366 | end; | 
| 5860 | 367 | |
| 368 | ||
| 20096 | 369 | (* read_xnum: hex/bin/decimal *) | 
| 9326 | 370 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 371 | local | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 372 | |
| 20096 | 373 | val ten = ord "0" + 10; | 
| 374 | val a = ord "a"; | |
| 375 | val A = ord "A"; | |
| 23802 | 376 | val _ = a > A orelse sys_error "Bad ASCII"; | 
| 20096 | 377 | |
| 378 | fun remap_hex c = | |
| 379 | let val x = ord c in | |
| 380 | if x >= a then chr (x - a + ten) | |
| 381 | else if x >= A then chr (x - A + ten) | |
| 382 | else c | |
| 383 | end; | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 384 | |
| 21781 | 385 | fun leading_zeros ["0"] = 0 | 
| 386 |   | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
 | |
| 387 | | leading_zeros _ = 0; | |
| 388 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 389 | in | 
| 15965 
f422f8283491
Use of IntInf.int instead of int in most numeric simprocs; avoids
 paulson parents: 
15570diff
changeset | 390 | |
| 9326 | 391 | fun read_xnum str = | 
| 392 | let | |
| 20096 | 393 | val (sign, radix, digs) = | 
| 394 | (case Symbol.explode (perhaps (try (unprefix "#")) str) of | |
| 395 | "0" :: "x" :: cs => (1, 16, map remap_hex cs) | |
| 396 | | "0" :: "b" :: cs => (1, 2, cs) | |
| 397 | | "-" :: cs => (~1, 10, cs) | |
| 398 | | cs => (1, 10, cs)); | |
| 24630 
351a308ab58d
simplified type int (eliminated IntInf.int, integer);
 wenzelm parents: 
24583diff
changeset | 399 | val value = sign * #1 (Library.read_radix_int radix digs); | 
| 21781 | 400 |   in {radix = radix, leading_zeros = leading_zeros digs, value = value} end;
 | 
| 9326 | 401 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 402 | end; | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 403 | |
| 9326 | 404 | |
| 7784 | 405 | (* read_ident(s) *) | 
| 406 | ||
| 407 | fun read_idents str = | |
| 408 | let | |
| 21858 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 wenzelm parents: 
21781diff
changeset | 409 | val blanks = Scan.many Symbol.is_blank; | 
| 23784 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 wenzelm parents: 
22686diff
changeset | 410 | val junk = Scan.many Symbol.is_regular; | 
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 411 | val idents = Scan.repeat1 (blanks |-- (scan_id >> implode) --| blanks) -- junk; | 
| 7784 | 412 | in | 
| 413 | (case Scan.read Symbol.stopper idents (Symbol.explode str) of | |
| 15531 | 414 | SOME (ids, []) => ids | 
| 415 |     | SOME (_, bad) => error ("Bad identifier: " ^ quote (implode bad))
 | |
| 416 |     | NONE => error ("No identifier found in: " ^ quote str))
 | |
| 7784 | 417 | end; | 
| 418 | ||
| 0 | 419 | end; |