7 signature LEXICON0 = |
7 signature LEXICON0 = |
8 sig |
8 sig |
9 val is_identifier: string -> bool |
9 val is_identifier: string -> bool |
10 val is_ascii_identifier: string -> bool |
10 val is_ascii_identifier: string -> bool |
11 val is_tid: string -> bool |
11 val is_tid: string -> bool |
12 val scan_id: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
12 val scan_id: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list |
13 val scan_longid: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
13 val scan_longid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list |
14 val scan_tid: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
14 val scan_tid: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list |
15 val scan_nat: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
15 val scan_nat: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list |
16 val scan_int: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
16 val scan_int: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list |
17 val scan_hex: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
17 val scan_hex: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list |
18 val scan_bin: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
18 val scan_bin: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list |
19 val scan_var: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
19 val scan_var: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list |
20 val scan_tvar: SymbolPos.T list -> SymbolPos.T list * SymbolPos.T list |
20 val scan_tvar: Symbol_Pos.T list -> Symbol_Pos.T list * Symbol_Pos.T list |
21 val implode_xstr: string list -> string |
21 val implode_xstr: string list -> string |
22 val explode_xstr: string -> string list |
22 val explode_xstr: string -> string list |
23 val read_indexname: string -> indexname |
23 val read_indexname: string -> indexname |
24 val read_var: string -> term |
24 val read_var: string -> term |
25 val read_variable: string -> indexname option |
25 val read_variable: string -> indexname option |
58 val is_terminal: string -> bool |
58 val is_terminal: string -> bool |
59 val report_token: token -> unit |
59 val report_token: token -> unit |
60 val matching_tokens: token * token -> bool |
60 val matching_tokens: token * token -> bool |
61 val valued_token: token -> bool |
61 val valued_token: token -> bool |
62 val predef_term: string -> token option |
62 val predef_term: string -> token option |
63 val tokenize: Scan.lexicon -> bool -> SymbolPos.T list -> token list |
63 val tokenize: Scan.lexicon -> bool -> Symbol_Pos.T list -> token list |
64 end; |
64 end; |
65 |
65 |
66 structure Lexicon: LEXICON = |
66 structure Lexicon: LEXICON = |
67 struct |
67 struct |
68 |
68 |
86 |
86 |
87 |
87 |
88 |
88 |
89 (** basic scanners **) |
89 (** basic scanners **) |
90 |
90 |
91 open BasicSymbolPos; |
91 open Basic_Symbol_Pos; |
92 |
92 |
93 fun !!! msg = SymbolPos.!!! ("Inner lexical error: " ^ msg); |
93 fun !!! msg = Symbol_Pos.!!! ("Inner lexical error: " ^ msg); |
94 |
94 |
95 val scan_id = Scan.one (Symbol.is_letter o symbol) ::: Scan.many (Symbol.is_letdig o symbol); |
95 val scan_id = Scan.one (Symbol.is_letter o symbol) ::: Scan.many (Symbol.is_letdig o symbol); |
96 val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat); |
96 val scan_longid = scan_id @@@ (Scan.repeat1 ($$$ "." @@@ scan_id) >> flat); |
97 val scan_tid = $$$ "'" @@@ scan_id; |
97 val scan_tid = $$$ "'" @@@ scan_id; |
98 |
98 |
218 |
218 |
219 |
219 |
220 fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs)); |
220 fun implode_xstr cs = enclose "''" "''" (implode (map (fn "'" => "\\'" | c => c) cs)); |
221 |
221 |
222 fun explode_xstr str = |
222 fun explode_xstr str = |
223 (case Scan.read SymbolPos.stopper scan_str_body (SymbolPos.explode (str, Position.none)) of |
223 (case Scan.read Symbol_Pos.stopper scan_str_body (Symbol_Pos.explode (str, Position.none)) of |
224 SOME cs => map symbol cs |
224 SOME cs => map symbol cs |
225 | _ => error ("Inner lexical error: literal string expected at " ^ quote str)); |
225 | _ => error ("Inner lexical error: literal string expected at " ^ quote str)); |
226 |
226 |
227 |
227 |
228 |
228 |
229 (** tokenize **) |
229 (** tokenize **) |
230 |
230 |
231 fun token_leq (Token (_, s1, _), Token (_, s2, _)) = s1 <= s2; |
231 fun token_leq (Token (_, s1, _), Token (_, s2, _)) = s1 <= s2; |
232 fun token kind ss = Token (kind, SymbolPos.content ss, SymbolPos.range ss); |
232 fun token kind ss = Token (kind, Symbol_Pos.content ss, Symbol_Pos.range ss); |
233 |
233 |
234 fun tokenize lex xids syms = |
234 fun tokenize lex xids syms = |
235 let |
235 let |
236 val scan_xid = |
236 val scan_xid = |
237 if xids then $$$ "_" @@@ scan_id || scan_id |
237 if xids then $$$ "_" @@@ scan_id || scan_id |
250 scan_xid >> token IdentSy; |
250 scan_xid >> token IdentSy; |
251 |
251 |
252 val scan_lit = Scan.literal lex >> token Literal; |
252 val scan_lit = Scan.literal lex >> token Literal; |
253 |
253 |
254 val scan_token = |
254 val scan_token = |
255 SymbolPos.scan_comment !!! >> token Comment || |
255 Symbol_Pos.scan_comment !!! >> token Comment || |
256 Scan.max token_leq scan_lit scan_val || |
256 Scan.max token_leq scan_lit scan_val || |
257 scan_str >> token StrSy || |
257 scan_str >> token StrSy || |
258 Scan.many1 (Symbol.is_blank o symbol) >> token Space; |
258 Scan.many1 (Symbol.is_blank o symbol) >> token Space; |
259 in |
259 in |
260 (case Scan.error |
260 (case Scan.error |
261 (Scan.finite SymbolPos.stopper (Scan.repeat scan_token)) syms of |
261 (Scan.finite Symbol_Pos.stopper (Scan.repeat scan_token)) syms of |
262 (toks, []) => toks |
262 (toks, []) => toks |
263 | (_, ss) => error ("Inner lexical error at: " ^ SymbolPos.content ss ^ |
263 | (_, ss) => error ("Inner lexical error at: " ^ Symbol_Pos.content ss ^ |
264 Position.str_of (#1 (SymbolPos.range ss)))) |
264 Position.str_of (#1 (Symbol_Pos.range ss)))) |
265 end; |
265 end; |
266 |
266 |
267 |
267 |
268 |
268 |
269 (** scan variables **) |
269 (** scan variables **) |
315 fun var xi = Var (xi, dummyT); |
315 fun var xi = Var (xi, dummyT); |
316 |
316 |
317 fun read_var str = |
317 fun read_var str = |
318 let |
318 let |
319 val scan = |
319 val scan = |
320 $$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one SymbolPos.is_eof) >> var || |
320 $$$ "?" |-- scan_indexname --| Scan.ahead (Scan.one Symbol_Pos.is_eof) >> var || |
321 Scan.many (Symbol.is_regular o symbol) >> (free o implode o map symbol); |
321 Scan.many (Symbol.is_regular o symbol) >> (free o implode o map symbol); |
322 in the (Scan.read SymbolPos.stopper scan (SymbolPos.explode (str, Position.none))) end; |
322 in the (Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode (str, Position.none))) end; |
323 |
323 |
324 |
324 |
325 (* read_variable *) |
325 (* read_variable *) |
326 |
326 |
327 fun read_variable str = |
327 fun read_variable str = |
328 let val scan = $$$ "?" |-- scan_indexname || scan_indexname |
328 let val scan = $$$ "?" |-- scan_indexname || scan_indexname |
329 in Scan.read SymbolPos.stopper scan (SymbolPos.explode (str, Position.none)) end; |
329 in Scan.read Symbol_Pos.stopper scan (Symbol_Pos.explode (str, Position.none)) end; |
330 |
330 |
331 |
331 |
332 (* specific identifiers *) |
332 (* specific identifiers *) |
333 |
333 |
334 val constN = "\\<^const>"; |
334 val constN = "\\<^const>"; |
339 |
339 |
340 local |
340 local |
341 |
341 |
342 fun nat cs = |
342 fun nat cs = |
343 Option.map (#1 o Library.read_int o map symbol) |
343 Option.map (#1 o Library.read_int o map symbol) |
344 (Scan.read SymbolPos.stopper scan_nat cs); |
344 (Scan.read Symbol_Pos.stopper scan_nat cs); |
345 |
345 |
346 in |
346 in |
347 |
347 |
348 fun read_nat s = nat (SymbolPos.explode (s, Position.none)); |
348 fun read_nat s = nat (Symbol_Pos.explode (s, Position.none)); |
349 |
349 |
350 fun read_int s = |
350 fun read_int s = |
351 (case SymbolPos.explode (s, Position.none) of |
351 (case Symbol_Pos.explode (s, Position.none) of |
352 ("-", _) :: cs => Option.map ~ (nat cs) |
352 ("-", _) :: cs => Option.map ~ (nat cs) |
353 | cs => nat cs); |
353 | cs => nat cs); |
354 |
354 |
355 end; |
355 end; |
356 |
356 |