| author | wenzelm | 
| Tue, 03 Mar 2009 14:07:43 +0100 | |
| changeset 30211 | 556d1810cdad | 
| parent 29565 | 3f8b24fcfbd6 | 
| child 30573 | 49899f26fbd1 | 
| permissions | -rw-r--r-- | 
| 18 | 1 | (* Title: Pure/Syntax/lexicon.ML | 
| 2 | Author: Tobias Nipkow and Markus Wenzel, TU Muenchen | |
| 0 | 3 | |
| 4703 | 4 | Lexer for the inner Isabelle syntax (terms and types). | 
| 18 | 5 | *) | 
| 0 | 6 | |
| 7 | signature LEXICON0 = | |
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 8 | sig | 
| 0 | 9 | val is_identifier: string -> bool | 
| 14679 | 10 | val is_ascii_identifier: string -> bool | 
| 20165 | 11 | val is_tid: string -> bool | 
| 27773 | 12 | val scan_id: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list | 
| 13 | val scan_longid: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list | |
| 14 | val scan_tid: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list | |
| 15 | val scan_nat: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list | |
| 16 | val scan_int: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list | |
| 17 | val scan_hex: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list | |
| 18 | val scan_bin: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list | |
| 19 | val scan_var: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list | |
| 20 | val scan_tvar: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list | |
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 21 | val implode_xstr: string list -> string | 
| 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 22 | val explode_xstr: string -> string list | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 23 | val read_indexname: string -> indexname | 
| 4703 | 24 | val read_var: string -> term | 
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 25 | val read_variable: string -> indexname option | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 26 | val const: string -> term | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 27 | val free: string -> term | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 28 | val var: indexname -> term | 
| 5860 | 29 | val read_nat: string -> int option | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 30 | val read_int: string -> int option | 
| 24630 
351a308ab58d
simplified type int (eliminated IntInf.int, integer);
 wenzelm parents: 
24583diff
changeset | 31 |   val read_xnum: string -> {radix: int, leading_zeros: int, value: int}
 | 
| 28904 | 32 |   val read_float: string -> {mant: int, exp: int}
 | 
| 19002 | 33 | val fixedN: string | 
| 34 | val constN: string | |
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 35 | end; | 
| 0 | 36 | |
| 37 | signature LEXICON = | |
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 38 | sig | 
| 18 | 39 | include LEXICON0 | 
| 40 | val is_xid: string -> bool | |
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 41 | datatype token_kind = | 
| 27887 | 42 | Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | | 
| 28904 | 43 | NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 44 | datatype token = Token of token_kind * string * Position.range | 
| 27806 | 45 | val str_of_token: token -> string | 
| 46 | val pos_of_token: token -> Position.T | |
| 27887 | 47 | val is_proper: token -> bool | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 48 | val mk_eof: Position.T -> token | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 49 | val eof: token | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 50 | val is_eof: token -> bool | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 51 | val stopper: token Scan.stopper | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 52 | val idT: typ | 
| 3828 | 53 | val longidT: typ | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 54 | val varT: typ | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 55 | val tidT: typ | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 56 | val tvarT: typ | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 57 | val terminals: string list | 
| 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 58 | val is_terminal: string -> bool | 
| 28413 
ee73353fb87c
report_token/parse_token: back to context-less version;
 wenzelm parents: 
28407diff
changeset | 59 | val report_token: token -> unit | 
| 18 | 60 | val matching_tokens: token * token -> bool | 
| 61 | val valued_token: token -> bool | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 62 | val predef_term: string -> token option | 
| 27773 | 63 | val tokenize: Scan.lexicon -> bool -> SymbolPos.T list -> token list | 
| 4247 
9bba9251bb4d
added implode_xstr: string list -> string, explode_xstr: string -> string list;
 wenzelm parents: 
3828diff
changeset | 64 | end; | 
| 0 | 65 | |
| 14679 | 66 | structure Lexicon: LEXICON = | 
| 0 | 67 | struct | 
| 68 | ||
| 18 | 69 | (** is_identifier etc. **) | 
| 70 | ||
| 16150 | 71 | val is_identifier = Symbol.is_ident o Symbol.explode; | 
| 18 | 72 | |
| 14679 | 73 | fun is_ascii_identifier s = | 
| 74 | let val cs = Symbol.explode s | |
| 16150 | 75 | in forall Symbol.is_ascii cs andalso Symbol.is_ident cs end; | 
| 14679 | 76 | |
| 18 | 77 | fun is_xid s = | 
| 4703 | 78 | (case Symbol.explode s of | 
| 16150 | 79 | "_" :: cs => Symbol.is_ident cs | 
| 80 | | cs => Symbol.is_ident cs); | |
| 18 | 81 | |
| 330 
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
 clasohm parents: 
237diff
changeset | 82 | fun is_tid s = | 
| 4703 | 83 | (case Symbol.explode s of | 
| 16150 | 84 | "'" :: cs => Symbol.is_ident cs | 
| 18 | 85 | | _ => false); | 
| 86 | ||
| 0 | 87 | |
| 88 | ||
| 4703 | 89 | (** basic scanners **) | 
| 90 | ||
| 27773 | 91 | open BasicSymbolPos; | 
| 92 | ||
| 93 | fun !!! msg = SymbolPos.!!! ("Inner lexical error: " ^ msg);
 | |
| 94 | ||
| 95 | val scan_id = Scan.one (Symbol.is_letter o symbol) ::: Scan.many (Symbol.is_letdig o symbol); | |
| 96 | val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat); | |
| 97 | val scan_tid = $$$ "'" @@@ scan_id; | |
| 4703 | 98 | |
| 27773 | 99 | val scan_nat = Scan.many1 (Symbol.is_digit o symbol); | 
| 100 | val scan_int = $$$ "-" @@@ scan_nat || scan_nat; | |
| 28904 | 101 | val scan_natdot = scan_nat @@@ $$$ "." @@@ scan_nat; | 
| 102 | val scan_float = $$$ "-" @@@ scan_natdot || scan_natdot; | |
| 27773 | 103 | val scan_hex = $$$ "0" @@@ $$$ "x" @@@ Scan.many1 (Symbol.is_ascii_hex o symbol); | 
| 104 | val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1"); | |
| 4703 | 105 | |
| 27773 | 106 | val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ scan_nat) []; | 
| 107 | val scan_var = $$$ "?" @@@ scan_id_nat; | |
| 108 | val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat; | |
| 4703 | 109 | |
| 110 | ||
| 111 | ||
| 18 | 112 | (** datatype token **) | 
| 0 | 113 | |
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 114 | datatype token_kind = | 
| 27887 | 115 | Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy | | 
| 28904 | 116 | NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF; | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 117 | |
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 118 | datatype token = Token of token_kind * string * Position.range; | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 119 | |
| 27806 | 120 | fun str_of_token (Token (_, s, _)) = s; | 
| 121 | fun pos_of_token (Token (_, _, (pos, _))) = pos; | |
| 122 | ||
| 27887 | 123 | fun is_proper (Token (Space, _, _)) = false | 
| 124 | | is_proper (Token (Comment, _, _)) = false | |
| 125 | | is_proper _ = true; | |
| 126 | ||
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 127 | |
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 128 | (* stopper *) | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 129 | |
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 130 | fun mk_eof pos = Token (EOF, "", (pos, Position.none)); | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 131 | val eof = mk_eof Position.none; | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 132 | |
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 133 | fun is_eof (Token (EOF, _, _)) = true | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 134 | | is_eof _ = false; | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 135 | |
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 136 | val stopper = Scan.stopper (K eof) is_eof; | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 137 | |
| 0 | 138 | |
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 139 | (* terminal arguments *) | 
| 0 | 140 | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 141 | val idT = Type ("id", []);
 | 
| 3828 | 142 | val longidT = Type ("longid", []);
 | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 143 | val varT = Type ("var", []);
 | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 144 | val tidT = Type ("tid", []);
 | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 145 | val tvarT = Type ("tvar", []);
 | 
| 0 | 146 | |
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 147 | val terminal_kinds = | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 148 |  [("id", IdentSy),
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 149 |   ("longid", LongIdentSy),
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 150 |   ("var", VarSy),
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 151 |   ("tid", TFreeSy),
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 152 |   ("tvar", TVarSy),
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 153 |   ("num", NumSy),
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 154 |   ("float_token", FloatSy),
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 155 |   ("xnum", XNumSy),
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 156 |   ("xstr", StrSy)];
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 157 | |
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 158 | val terminals = map #1 terminal_kinds; | 
| 20664 | 159 | val is_terminal = member (op =) terminals; | 
| 237 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
 wenzelm parents: 
164diff
changeset | 160 | |
| 0 | 161 | |
| 27887 | 162 | (* markup *) | 
| 163 | ||
| 164 | val token_kind_markup = | |
| 165 | fn Literal => Markup.literal | |
| 166 | | IdentSy => Markup.ident | |
| 167 | | LongIdentSy => Markup.ident | |
| 168 | | VarSy => Markup.var | |
| 169 | | TFreeSy => Markup.tfree | |
| 170 | | TVarSy => Markup.tvar | |
| 29318 
6337d1cb2ba0
added numeral, which supercedes num, xnum, float;
 wenzelm parents: 
29156diff
changeset | 171 | | NumSy => Markup.numeral | 
| 
6337d1cb2ba0
added numeral, which supercedes num, xnum, float;
 wenzelm parents: 
29156diff
changeset | 172 | | FloatSy => Markup.numeral | 
| 
6337d1cb2ba0
added numeral, which supercedes num, xnum, float;
 wenzelm parents: 
29156diff
changeset | 173 | | XNumSy => Markup.numeral | 
| 
6337d1cb2ba0
added numeral, which supercedes num, xnum, float;
 wenzelm parents: 
29156diff
changeset | 174 | | StrSy => Markup.inner_string | 
| 27887 | 175 | | Space => Markup.none | 
| 176 | | Comment => Markup.inner_comment | |
| 177 | | EOF => Markup.none; | |
| 178 | ||
| 28413 
ee73353fb87c
report_token/parse_token: back to context-less version;
 wenzelm parents: 
28407diff
changeset | 179 | fun report_token (Token (kind, _, (pos, _))) = | 
| 
ee73353fb87c
report_token/parse_token: back to context-less version;
 wenzelm parents: 
28407diff
changeset | 180 | Position.report (token_kind_markup kind) pos; | 
| 27887 | 181 | |
| 182 | ||
| 18 | 183 | (* matching_tokens *) | 
| 0 | 184 | |
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 185 | fun matching_tokens (Token (Literal, x, _), Token (Literal, y, _)) = x = y | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 186 | | matching_tokens (Token (k, _, _), Token (k', _, _)) = k = k'; | 
| 0 | 187 | |
| 188 | ||
| 18 | 189 | (* valued_token *) | 
| 0 | 190 | |
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 191 | fun valued_token (Token (Literal, _, _)) = false | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 192 | | valued_token (Token (EOF, _, _)) = false | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 193 | | valued_token _ = true; | 
| 0 | 194 | |
| 195 | ||
| 18 | 196 | (* predef_term *) | 
| 0 | 197 | |
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 198 | fun predef_term s = | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 199 | (case AList.lookup (op =) terminal_kinds s of | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 200 | SOME sy => SOME (Token (sy, s, Position.no_range)) | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 201 | | NONE => NONE); | 
| 0 | 202 | |
| 203 | ||
| 4703 | 204 | (* xstr tokens *) | 
| 18 | 205 | |
| 14730 | 206 | val scan_chr = | 
| 27773 | 207 | $$$ "\\" |-- $$$ "'" || | 
| 208 | Scan.one ((fn s => s <> "\\" andalso s <> "'" andalso Symbol.is_regular s) o symbol) >> single || | |
| 209 | $$$ "'" --| Scan.ahead (~$$$ "'"); | |
| 14730 | 210 | |
| 211 | val scan_str = | |
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 212 | $$$ "'" @@@ $$$ "'" @@@ !!! "missing end of string" | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 213 | ((Scan.repeat scan_chr >> flat) @@@ $$$ "'" @@@ $$$ "'"); | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 214 | |
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 215 | val scan_str_body = | 
| 27773 | 216 | $$$ "'" |-- $$$ "'" |-- !!! "missing end of string" | 
| 217 | ((Scan.repeat scan_chr >> flat) --| $$$ "'" --| $$$ "'"); | |
| 14730 | 218 | |
| 0 | 219 | |
| 4703 | 220 | fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs)); | 
| 18 | 221 | |
| 4703 | 222 | fun explode_xstr str = | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 223 | (case Scan.read SymbolPos.stopper scan_str_body (SymbolPos.explode (str, Position.none)) of | 
| 27773 | 224 | SOME cs => map symbol cs | 
| 5868 | 225 |   | _ => error ("Inner lexical error: literal string expected at " ^ quote str));
 | 
| 18 | 226 | |
| 227 | ||
| 27806 | 228 | |
| 18 | 229 | (** tokenize **) | 
| 230 | ||
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 231 | fun token_leq (Token (_, s1, _), Token (_, s2, _)) = s1 <= s2; | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 232 | fun token kind ss = Token (kind, SymbolPos.content ss, SymbolPos.range ss); | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 233 | |
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 234 | fun tokenize lex xids syms = | 
| 18 | 235 | let | 
| 236 | val scan_xid = | |
| 27773 | 237 | if xids then $$$ "_" @@@ scan_id || scan_id | 
| 18 | 238 | else scan_id; | 
| 239 | ||
| 20096 | 240 | val scan_num = scan_hex || scan_bin || scan_int; | 
| 241 | ||
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 242 | val scan_val = | 
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 243 | scan_tvar >> token TVarSy || | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 244 | scan_var >> token VarSy || | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 245 | scan_tid >> token TFreeSy || | 
| 28904 | 246 | scan_float >> token FloatSy || | 
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 247 | scan_num >> token NumSy || | 
| 27773 | 248 | $$$ "#" @@@ scan_num >> token XNumSy || | 
| 26007 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 249 | scan_longid >> token LongIdentSy || | 
| 
3760d3ff4cce
basic scanners: produce symbol list instead of imploded string;
 wenzelm parents: 
24630diff
changeset | 250 | scan_xid >> token IdentSy; | 
| 18 | 251 | |
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 252 | val scan_lit = Scan.literal lex >> token Literal; | 
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 253 | |
| 4703 | 254 | val scan_token = | 
| 27887 | 255 | SymbolPos.scan_comment !!! >> token Comment || | 
| 256 | Scan.max token_leq scan_lit scan_val || | |
| 257 | scan_str >> token StrSy || | |
| 258 | Scan.many1 (Symbol.is_blank o symbol) >> token Space; | |
| 18 | 259 | in | 
| 27773 | 260 | (case Scan.error | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 261 | (Scan.finite SymbolPos.stopper (Scan.repeat scan_token)) syms of | 
| 27887 | 262 | (toks, []) => toks | 
| 27800 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 263 |     | (_, ss) => error ("Inner lexical error at: " ^ SymbolPos.content ss ^
 | 
| 
df444ddeff56
datatype token: maintain range, tuned representation;
 wenzelm parents: 
27773diff
changeset | 264 | Position.str_of (#1 (SymbolPos.range ss)))) | 
| 18 | 265 | end; | 
| 266 | ||
| 267 | ||
| 268 | ||
| 269 | (** scan variables **) | |
| 270 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 271 | (* scan_indexname *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 272 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 273 | local | 
| 18 | 274 | |
| 27773 | 275 | val scan_vname = | 
| 18 | 276 | let | 
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 277 | fun nat n [] = n | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 278 | | nat n (c :: cs) = nat (n * 10 + (ord c - ord "0")) cs; | 
| 18 | 279 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 280 | fun idxname cs ds = (implode (rev cs), nat 0 ds); | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 281 | fun chop_idx [] ds = idxname [] ds | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 282 | | chop_idx (cs as (_ :: "\\<^isub>" :: _)) ds = idxname cs ds | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 283 | | chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 284 | | chop_idx (c :: cs) ds = | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 285 | if Symbol.is_digit c then chop_idx cs (c :: ds) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 286 | else idxname (c :: cs) ds; | 
| 18 | 287 | |
| 27773 | 288 | val scan = (scan_id >> map symbol) -- | 
| 289 | Scan.optional ($$$ "." |-- scan_nat >> (nat 0 o map symbol)) ~1; | |
| 18 | 290 | in | 
| 27773 | 291 | scan >> | 
| 292 | (fn (cs, ~1) => chop_idx (rev cs) [] | |
| 293 | | (cs, i) => (implode cs, i)) | |
| 18 | 294 | end; | 
| 295 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 296 | in | 
| 18 | 297 | |
| 27773 | 298 | val scan_indexname = $$$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i)) || scan_vname;
 | 
| 15443 
07f78cc82a73
indexname function now parses type variables as well; changed input
 berghofe parents: 
14981diff
changeset | 299 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 300 | end; | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 301 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 302 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 303 | (* indexname *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 304 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 305 | fun read_indexname s = | 
| 27773 | 306 | (case Scan.read SymbolPos.stopper scan_indexname (SymbolPos.explode (s, Position.none)) of | 
| 15531 | 307 | SOME xi => xi | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 308 |   | _ => error ("Lexical error in variable name: " ^ quote s));
 | 
| 18 | 309 | |
| 310 | ||
| 4703 | 311 | (* read_var *) | 
| 18 | 312 | |
| 550 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 313 | fun const c = Const (c, dummyT); | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 314 | fun free x = Free (x, dummyT); | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 315 | fun var xi = Var (xi, dummyT); | 
| 
353eea6ec232
replaced id, var, tid, tvar by idT, varT, tidT, tvarT;
 wenzelm parents: 
376diff
changeset | 316 | |
| 4703 | 317 | fun read_var str = | 
| 18 | 318 | let | 
| 319 | val scan = | |
| 27773 | 320 | $$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one SymbolPos.is_eof) >> var || | 
| 321 | Scan.many (Symbol.is_regular o symbol) >> (free o implode o map symbol); | |
| 322 | in the (Scan.read SymbolPos.stopper scan (SymbolPos.explode (str, Position.none))) end; | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 323 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 324 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 325 | (* read_variable *) | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 326 | |
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 327 | fun read_variable str = | 
| 27773 | 328 | let val scan = $$$ "?" |-- scan_indexname || scan_indexname | 
| 329 | in Scan.read SymbolPos.stopper scan (SymbolPos.explode (str, Position.none)) end; | |
| 4587 | 330 | |
| 331 | ||
| 19002 | 332 | (* specific identifiers *) | 
| 5260 | 333 | |
| 19002 | 334 | val constN = "\\<^const>"; | 
| 335 | val fixedN = "\\<^fixed>"; | |
| 336 | ||
| 5260 | 337 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 338 | (* read numbers *) | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 339 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 340 | local | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 341 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 342 | fun nat cs = | 
| 27773 | 343 | Option.map (#1 o Library.read_int o map symbol) | 
| 344 | (Scan.read SymbolPos.stopper scan_nat cs); | |
| 5860 | 345 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 346 | in | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 347 | |
| 27773 | 348 | fun read_nat s = nat (SymbolPos.explode (s, Position.none)); | 
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 349 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 350 | fun read_int s = | 
| 27773 | 351 | (case SymbolPos.explode (s, Position.none) of | 
| 352 |     ("-", _) :: cs => Option.map ~ (nat cs)
 | |
| 20313 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 353 | | cs => nat cs); | 
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 354 | |
| 
bf9101cc4385
renamed Syntax.indexname to Syntax.read_indexname;
 wenzelm parents: 
20165diff
changeset | 355 | end; | 
| 5860 | 356 | |
| 357 | ||
| 20096 | 358 | (* read_xnum: hex/bin/decimal *) | 
| 9326 | 359 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 360 | local | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 361 | |
| 20096 | 362 | val ten = ord "0" + 10; | 
| 363 | val a = ord "a"; | |
| 364 | val A = ord "A"; | |
| 23802 | 365 | val _ = a > A orelse sys_error "Bad ASCII"; | 
| 20096 | 366 | |
| 367 | fun remap_hex c = | |
| 368 | let val x = ord c in | |
| 369 | if x >= a then chr (x - a + ten) | |
| 370 | else if x >= A then chr (x - A + ten) | |
| 371 | else c | |
| 372 | end; | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 373 | |
| 21781 | 374 | fun leading_zeros ["0"] = 0 | 
| 375 |   | leading_zeros ("0" :: cs) = 1 + leading_zeros cs
 | |
| 376 | | leading_zeros _ = 0; | |
| 377 | ||
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 378 | in | 
| 15965 
f422f8283491
Use of IntInf.int instead of int in most numeric simprocs; avoids
 paulson parents: 
15570diff
changeset | 379 | |
| 9326 | 380 | fun read_xnum str = | 
| 381 | let | |
| 20096 | 382 | val (sign, radix, digs) = | 
| 383 | (case Symbol.explode (perhaps (try (unprefix "#")) str) of | |
| 384 | "0" :: "x" :: cs => (1, 16, map remap_hex cs) | |
| 385 | | "0" :: "b" :: cs => (1, 2, cs) | |
| 386 | | "-" :: cs => (~1, 10, cs) | |
| 387 | | cs => (1, 10, cs)); | |
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 388 | in | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 389 |    {radix = radix,
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 390 | leading_zeros = leading_zeros digs, | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 391 | value = sign * #1 (Library.read_radix_int radix digs)} | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 392 | end; | 
| 9326 | 393 | |
| 15991 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 394 | end; | 
| 
670f8e4b5a98
added read_variable: optional question mark on input;
 wenzelm parents: 
15965diff
changeset | 395 | |
| 28904 | 396 | fun read_float str = | 
| 397 | let | |
| 398 | val (sign, cs) = | |
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 399 | (case Symbol.explode str of | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 400 | "-" :: cs => (~1, cs) | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 401 | | cs => (1, cs)); | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 402 | val (intpart, fracpart) = | 
| 28904 | 403 | (case take_prefix Symbol.is_digit cs of | 
| 29156 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 404 | (intpart, "." :: fracpart) => (intpart, fracpart) | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 405 | | _ => raise Fail "read_float"); | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 406 | in | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 407 |    {mant = sign * #1 (Library.read_int (intpart @ fracpart)),
 | 
| 
89f76a58a378
renamed terminal category "float" to "float_token", to avoid name
 wenzelm parents: 
28904diff
changeset | 408 | exp = length fracpart} | 
| 28904 | 409 | end | 
| 410 | ||
| 0 | 411 | end; |