8 signature LEXICON0 = |
8 signature LEXICON0 = |
9 sig |
9 sig |
10 val is_identifier: string -> bool |
10 val is_identifier: string -> bool |
11 val is_ascii_identifier: string -> bool |
11 val is_ascii_identifier: string -> bool |
12 val is_tid: string -> bool |
12 val is_tid: string -> bool |
|
13 val scan_id: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
|
14 val scan_longid: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
|
15 val scan_tid: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
|
16 val scan_nat: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
|
17 val scan_int: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
|
18 val scan_hex: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
|
19 val scan_bin: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
|
20 val scan_var: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
|
21 val scan_tvar: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
13 val implode_xstr: string list -> string |
22 val implode_xstr: string list -> string |
14 val explode_xstr: string -> string list |
23 val explode_xstr: string -> string list |
15 val scan_id: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list |
|
16 val scan_longid: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list |
|
17 val scan_tid: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list |
|
18 val scan_nat: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list |
|
19 val scan_int: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list |
|
20 val scan_hex: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list |
|
21 val scan_bin: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list |
|
22 val scan_var: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list |
|
23 val scan_tvar: Symbol.symbol list -> Symbol.symbol list * Symbol.symbol list |
|
24 val read_indexname: string -> indexname |
24 val read_indexname: string -> indexname |
25 val read_var: string -> term |
25 val read_var: string -> term |
26 val read_variable: string -> indexname option |
26 val read_variable: string -> indexname option |
27 val const: string -> term |
27 val const: string -> term |
28 val free: string -> term |
28 val free: string -> term |
87 |
87 |
88 |
88 |
89 |
89 |
90 (** basic scanners **) |
90 (** basic scanners **) |
91 |
91 |
92 val scan_id = Scan.one Symbol.is_letter ::: Scan.many Symbol.is_letdig; |
92 open BasicSymbolPos; |
93 val scan_longid = scan_id @@@ (Scan.repeat1 ($$ "." ::: scan_id) >> flat); |
93 |
94 val scan_tid = $$ "'" ::: scan_id; |
94 fun !!! msg = SymbolPos.!!! ("Inner lexical error: " ^ msg); |
95 |
95 |
96 val scan_nat = Scan.many1 Symbol.is_digit; |
96 val scan_id = Scan.one (Symbol.is_letter o symbol) ::: Scan.many (Symbol.is_letdig o symbol); |
97 val scan_int = $$ "-" ::: scan_nat || scan_nat; |
97 val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat); |
98 val scan_hex = $$ "0" ::: $$ "x" ::: Scan.many1 Symbol.is_ascii_hex; |
98 val scan_tid = $$$ "'" @@@ scan_id; |
99 val scan_bin = $$ "0" ::: $$ "b" ::: Scan.many1 (fn s => s = "0" orelse s = "1"); |
99 |
100 |
100 val scan_nat = Scan.many1 (Symbol.is_digit o symbol); |
101 val scan_id_nat = scan_id @@@ Scan.optional ($$ "." ::: scan_nat) []; |
101 val scan_int = $$$ "-" @@@ scan_nat || scan_nat; |
102 val scan_var = $$ "?" ::: scan_id_nat; |
102 val scan_hex = $$$ "0" @@@ $$$ "x" @@@ Scan.many1 (Symbol.is_ascii_hex o symbol); |
103 val scan_tvar = $$ "?" ::: $$ "'" ::: scan_id_nat; |
103 val scan_bin = $$$ "0" @@@ $$$ "b" @@@ Scan.many1 (fn (s, _) => s = "0" orelse s = "1"); |
|
104 |
|
105 val scan_id_nat = scan_id @@@ Scan.optional ($$$ "." @@@ scan_nat) []; |
|
106 val scan_var = $$$ "?" @@@ scan_id_nat; |
|
107 val scan_tvar = $$$ "?" @@@ $$$ "'" @@@ scan_id_nat; |
104 |
108 |
105 |
109 |
106 |
110 |
107 (** datatype token **) |
111 (** datatype token **) |
108 |
112 |
201 | predef_term _ = NONE; |
205 | predef_term _ = NONE; |
202 |
206 |
203 |
207 |
204 (* xstr tokens *) |
208 (* xstr tokens *) |
205 |
209 |
206 fun lex_err msg prfx (cs, _) = |
|
207 "Inner lexical error: " ^ msg ^ " at " ^ quote (prfx ^ Symbol.beginning 10 cs); |
|
208 |
|
209 val scan_chr = |
210 val scan_chr = |
210 $$ "\\" |-- $$ "'" || |
211 $$$ "\\" |-- $$$ "'" || |
211 Scan.one (fn s => s <> "\\" andalso s <> "'" andalso Symbol.is_regular s) || |
212 Scan.one ((fn s => s <> "\\" andalso s <> "'" andalso Symbol.is_regular s) o symbol) >> single || |
212 $$ "'" --| Scan.ahead (~$$ "'"); |
213 $$$ "'" --| Scan.ahead (~$$$ "'"); |
213 |
214 |
214 val scan_str = |
215 val scan_str = |
215 $$ "'" |-- $$ "'" |-- !! (lex_err "missing end of string" "''") |
216 $$$ "'" |-- $$$ "'" |-- !!! "missing end of string" |
216 (Scan.repeat scan_chr --| $$ "'" --| $$ "'"); |
217 ((Scan.repeat scan_chr >> flat) --| $$$ "'" --| $$$ "'"); |
217 |
218 |
218 |
219 |
219 fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs)); |
220 fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs)); |
220 |
221 |
221 fun explode_xstr str = |
222 fun explode_xstr str = |
222 (case Scan.read Symbol.stopper scan_str (Symbol.explode str) of |
223 (case Scan.read SymbolPos.stopper scan_str (SymbolPos.explode (str, Position.none)) of |
223 SOME cs => cs |
224 SOME cs => map symbol cs |
224 | _ => error ("Inner lexical error: literal string expected at " ^ quote str)); |
225 | _ => error ("Inner lexical error: literal string expected at " ^ quote str)); |
225 |
226 |
226 |
227 |
227 (* nested comments *) |
|
228 |
|
229 val scan_cmt = |
|
230 Scan.depend (fn d => $$ "(" ^^ $$ "*" >> pair (d + 1)) || |
|
231 Scan.depend (fn 0 => Scan.fail | d => $$ "*" ^^ $$ ")" >> pair (d - 1)) || |
|
232 Scan.lift ($$ "*" --| Scan.ahead (~$$ ")")) || |
|
233 Scan.lift (Scan.one (fn s => s <> "*" andalso Symbol.is_regular s)); |
|
234 |
|
235 val scan_comment = |
|
236 $$ "(" -- $$ "*" -- !! (lex_err "missing end of comment" "(*") |
|
237 (Scan.pass 0 (Scan.repeat scan_cmt) -- $$ "*" -- $$ ")") |
|
238 >> K (); |
|
239 |
|
240 |
|
241 |
|
242 (** tokenize **) |
228 (** tokenize **) |
243 |
229 |
244 fun tokenize lex xids chs = |
230 fun tokenize lex xids inp = |
245 let |
231 let |
246 fun token kind cs = (kind, implode cs); |
232 fun token kind ss = (kind, #1 (SymbolPos.implode ss)); |
247 |
233 |
248 val scan_xid = |
234 val scan_xid = |
249 if xids then $$ "_" ::: scan_id || scan_id |
235 if xids then $$$ "_" @@@ scan_id || scan_id |
250 else scan_id; |
236 else scan_id; |
251 |
237 |
252 val scan_num = scan_hex || scan_bin || scan_int; |
238 val scan_num = scan_hex || scan_bin || scan_int; |
253 |
239 |
254 val scan_val = |
240 val scan_val = |
255 scan_tvar >> token TVarSy || |
241 scan_tvar >> token TVarSy || |
256 scan_var >> token VarSy || |
242 scan_var >> token VarSy || |
257 scan_tid >> token TFreeSy || |
243 scan_tid >> token TFreeSy || |
258 scan_num >> token NumSy || |
244 scan_num >> token NumSy || |
259 $$ "#" ::: scan_num >> token XNumSy || |
245 $$$ "#" @@@ scan_num >> token XNumSy || |
260 scan_longid >> token LongIdentSy || |
246 scan_longid >> token LongIdentSy || |
261 scan_xid >> token IdentSy; |
247 scan_xid >> token IdentSy; |
262 |
248 |
263 val scan_lit = Scan.literal lex >> token Token; |
249 val scan_lit = Scan.literal lex >> token Token; |
264 |
250 |
265 val scan_token = |
251 val scan_token = |
266 scan_comment >> K NONE || |
252 SymbolPos.scan_comment !!! >> K NONE || |
267 Scan.max (op <= o pairself snd) scan_lit scan_val >> (fn (tk, s) => SOME (tk s)) || |
253 Scan.max (op <= o pairself snd) scan_lit scan_val >> (fn (tk, s) => SOME (tk s)) || |
268 scan_str >> (SOME o StrSy o implode_xstr) || |
254 scan_str >> (SOME o StrSy o implode_xstr o map symbol) || |
269 Scan.one Symbol.is_blank >> K NONE; |
255 Scan.one (Symbol.is_blank o symbol) >> K NONE; |
270 in |
256 in |
271 (case Scan.error (Scan.finite Symbol.stopper (Scan.repeat scan_token)) chs of |
257 (case Scan.error |
|
258 (Scan.finite SymbolPos.stopper (Scan.repeat scan_token)) inp of |
272 (toks, []) => map_filter I toks |
259 (toks, []) => map_filter I toks |
273 | (_, cs) => error ("Inner lexical error at: " ^ quote (implode cs))) |
260 | (_, cs) => |
|
261 let val (s, (pos, _)) = SymbolPos.implode cs |
|
262 in error ("Inner lexical error at: " ^ quote s ^ Position.str_of pos) end) |
274 end; |
263 end; |
275 |
264 |
276 |
265 |
277 |
266 |
278 (** scan variables **) |
267 (** scan variables **) |
279 |
268 |
280 (* scan_indexname *) |
269 (* scan_indexname *) |
281 |
270 |
282 local |
271 local |
283 |
272 |
284 fun scan_vname chrs = |
273 val scan_vname = |
285 let |
274 let |
286 fun nat n [] = n |
275 fun nat n [] = n |
287 | nat n (c :: cs) = nat (n * 10 + (ord c - ord "0")) cs; |
276 | nat n (c :: cs) = nat (n * 10 + (ord c - ord "0")) cs; |
288 |
277 |
289 fun idxname cs ds = (implode (rev cs), nat 0 ds); |
278 fun idxname cs ds = (implode (rev cs), nat 0 ds); |
292 | chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds |
281 | chop_idx (cs as (_ :: "\\<^isup>" :: _)) ds = idxname cs ds |
293 | chop_idx (c :: cs) ds = |
282 | chop_idx (c :: cs) ds = |
294 if Symbol.is_digit c then chop_idx cs (c :: ds) |
283 if Symbol.is_digit c then chop_idx cs (c :: ds) |
295 else idxname (c :: cs) ds; |
284 else idxname (c :: cs) ds; |
296 |
285 |
297 val scan = scan_id -- Scan.optional ($$ "." |-- scan_nat >> nat 0) ~1; |
286 val scan = (scan_id >> map symbol) -- |
|
287 Scan.optional ($$$ "." |-- scan_nat >> (nat 0 o map symbol)) ~1; |
298 in |
288 in |
299 (case scan chrs of |
289 scan >> |
300 ((cs, ~1), cs') => (chop_idx (rev cs) [], cs') |
290 (fn (cs, ~1) => chop_idx (rev cs) [] |
301 | ((cs, i), cs') => ((implode cs, i), cs')) |
291 | (cs, i) => (implode cs, i)) |
302 end; |
292 end; |
303 |
293 |
304 in |
294 in |
305 |
295 |
306 val scan_indexname = |
296 val scan_indexname = $$$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i)) || scan_vname; |
307 $$ "'" |-- scan_vname >> (fn (x, i) => ("'" ^ x, i)) |
|
308 || scan_vname; |
|
309 |
297 |
310 end; |
298 end; |
311 |
299 |
312 |
300 |
313 (* indexname *) |
301 (* indexname *) |
314 |
302 |
315 fun read_indexname s = |
303 fun read_indexname s = |
316 (case Scan.read Symbol.stopper scan_indexname (Symbol.explode s) of |
304 (case Scan.read SymbolPos.stopper scan_indexname (SymbolPos.explode (s, Position.none)) of |
317 SOME xi => xi |
305 SOME xi => xi |
318 | _ => error ("Lexical error in variable name: " ^ quote s)); |
306 | _ => error ("Lexical error in variable name: " ^ quote s)); |
319 |
307 |
320 |
308 |
321 (* read_var *) |
309 (* read_var *) |
325 fun var xi = Var (xi, dummyT); |
313 fun var xi = Var (xi, dummyT); |
326 |
314 |
327 fun read_var str = |
315 fun read_var str = |
328 let |
316 let |
329 val scan = |
317 val scan = |
330 $$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol.is_eof) >> var || |
318 $$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one SymbolPos.is_eof) >> var || |
331 Scan.many Symbol.is_regular >> (free o implode); |
319 Scan.many (Symbol.is_regular o symbol) >> (free o implode o map symbol); |
332 in the (Scan.read Symbol.stopper scan (Symbol.explode str)) end; |
320 in the (Scan.read SymbolPos.stopper scan (SymbolPos.explode (str, Position.none))) end; |
333 |
321 |
334 |
322 |
335 (* read_variable *) |
323 (* read_variable *) |
336 |
324 |
337 fun read_variable str = |
325 fun read_variable str = |
338 let val scan = $$ "?" |-- scan_indexname || scan_indexname |
326 let val scan = $$$ "?" |-- scan_indexname || scan_indexname |
339 in Scan.read Symbol.stopper scan (Symbol.explode str) end; |
327 in Scan.read SymbolPos.stopper scan (SymbolPos.explode (str, Position.none)) end; |
340 |
328 |
341 |
329 |
342 (* specific identifiers *) |
330 (* specific identifiers *) |
343 |
331 |
344 val constN = "\\<^const>"; |
332 val constN = "\\<^const>"; |