43 sig |
43 sig |
44 val is_identifier: string -> bool |
44 val is_identifier: string -> bool |
45 val string_of_vname: indexname -> string |
45 val string_of_vname: indexname -> string |
46 val scan_varname: string list -> indexname * string list |
46 val scan_varname: string list -> indexname * string list |
47 val scan_var: string -> term |
47 val scan_var: string -> term |
|
48 val const: string -> term |
|
49 val free: string -> term |
|
50 val var: indexname -> term |
48 end; |
51 end; |
49 |
52 |
50 signature LEXICON = |
53 signature LEXICON = |
51 sig |
54 sig |
52 include SCANNER |
55 include SCANNER |
117 Token of string | |
122 Token of string | |
118 IdentSy of string | |
123 IdentSy of string | |
119 VarSy of string | |
124 VarSy of string | |
120 TFreeSy of string | |
125 TFreeSy of string | |
121 TVarSy of string | |
126 TVarSy of string | |
|
127 NumSy of string | |
|
128 StrSy of string | |
122 EndToken; |
129 EndToken; |
123 |
130 |
124 |
131 |
125 (* terminal arguments *) |
132 (* terminal arguments *) |
126 |
133 |
127 val id = "id"; |
134 val idT = Type ("id", []); |
128 val var = "var"; |
135 val varT = Type ("var", []); |
129 val tid = "tid"; |
136 val tidT = Type ("tid", []); |
130 val tvar = "tvar"; |
137 val tvarT = Type ("tvar", []); |
131 |
138 |
132 val terminals = [id, var, tid, tvar]; |
139 val terminals = ["id", "var", "tid", "tvar", "xnum", "xstr"]; |
133 |
140 |
134 fun is_terminal s = s mem terminals; |
141 fun is_terminal s = s mem terminals; |
135 |
142 |
136 |
143 |
137 (* str_of_token *) |
144 (* str_of_token *) |
139 fun str_of_token (Token s) = s |
146 fun str_of_token (Token s) = s |
140 | str_of_token (IdentSy s) = s |
147 | str_of_token (IdentSy s) = s |
141 | str_of_token (VarSy s) = s |
148 | str_of_token (VarSy s) = s |
142 | str_of_token (TFreeSy s) = s |
149 | str_of_token (TFreeSy s) = s |
143 | str_of_token (TVarSy s) = s |
150 | str_of_token (TVarSy s) = s |
|
151 | str_of_token (NumSy s) = s |
|
152 | str_of_token (StrSy s) = s |
144 | str_of_token EndToken = "EOF"; |
153 | str_of_token EndToken = "EOF"; |
145 |
154 |
146 |
155 |
147 (* display_token *) |
156 (* display_token *) |
148 |
157 |
149 fun display_token (Token s) = quote s |
158 fun display_token (Token s) = quote s |
150 | display_token (IdentSy s) = "id(" ^ s ^ ")" |
159 | display_token (IdentSy s) = "id(" ^ s ^ ")" |
151 | display_token (VarSy s) = "var(" ^ s ^ ")" |
160 | display_token (VarSy s) = "var(" ^ s ^ ")" |
152 | display_token (TFreeSy s) = "tid(" ^ s ^ ")" |
161 | display_token (TFreeSy s) = "tid(" ^ s ^ ")" |
153 | display_token (TVarSy s) = "tvar(" ^ s ^ ")" |
162 | display_token (TVarSy s) = "tvar(" ^ s ^ ")" |
|
163 | display_token (NumSy s) = "xnum(" ^ s ^ ")" |
|
164 | display_token (StrSy s) = "xstr(" ^ s ^ ")" |
154 | display_token EndToken = ""; |
165 | display_token EndToken = ""; |
155 |
166 |
156 |
167 |
157 (* matching_tokens *) |
168 (* matching_tokens *) |
158 |
169 |
159 fun matching_tokens (Token x, Token y) = (x = y) |
170 fun matching_tokens (Token x, Token y) = (x = y) |
160 | matching_tokens (IdentSy _, IdentSy _) = true |
171 | matching_tokens (IdentSy _, IdentSy _) = true |
161 | matching_tokens (VarSy _, VarSy _) = true |
172 | matching_tokens (VarSy _, VarSy _) = true |
162 | matching_tokens (TFreeSy _, TFreeSy _) = true |
173 | matching_tokens (TFreeSy _, TFreeSy _) = true |
163 | matching_tokens (TVarSy _, TVarSy _) = true |
174 | matching_tokens (TVarSy _, TVarSy _) = true |
|
175 | matching_tokens (NumSy _, NumSy _) = true |
|
176 | matching_tokens (StrSy _, StrSy _) = true |
164 | matching_tokens (EndToken, EndToken) = true |
177 | matching_tokens (EndToken, EndToken) = true |
165 | matching_tokens _ = false; |
178 | matching_tokens _ = false; |
166 |
179 |
167 |
180 |
168 (* token_assoc *) |
181 (* token_assoc *) |
182 fun valued_token (Token _) = false |
195 fun valued_token (Token _) = false |
183 | valued_token (IdentSy _) = true |
196 | valued_token (IdentSy _) = true |
184 | valued_token (VarSy _) = true |
197 | valued_token (VarSy _) = true |
185 | valued_token (TFreeSy _) = true |
198 | valued_token (TFreeSy _) = true |
186 | valued_token (TVarSy _) = true |
199 | valued_token (TVarSy _) = true |
|
200 | valued_token (NumSy _) = true |
|
201 | valued_token (StrSy _) = true |
187 | valued_token EndToken = false; |
202 | valued_token EndToken = false; |
188 |
203 |
189 |
204 |
190 (* predef_term *) |
205 (* predef_term *) |
191 |
206 |
192 fun predef_term name = |
207 fun predef_term "id" = Some (IdentSy "id") |
193 if name = id then Some (IdentSy name) |
208 | predef_term "var" = Some (VarSy "var") |
194 else if name = var then Some (VarSy name) |
209 | predef_term "tid" = Some (TFreeSy "tid") |
195 else if name = tid then Some (TFreeSy name) |
210 | predef_term "tvar" = Some (TVarSy "tvar") |
196 else if name = tvar then Some (TVarSy name) |
211 | predef_term "xnum" = Some (NumSy "xnum") |
197 else None; |
212 | predef_term "xstr" = Some (StrSy "xstr") |
|
213 | predef_term _ = None; |
198 |
214 |
199 |
215 |
200 |
216 |
201 (** datatype lexicon **) |
217 (** datatype lexicon **) |
202 |
218 |
360 if xids then $$ "_" ^^ scan_id || scan_id |
378 if xids then $$ "_" ^^ scan_id || scan_id |
361 else scan_id; |
379 else scan_id; |
362 |
380 |
363 val scan_lit = scan_literal lex >> pair Token; |
381 val scan_lit = scan_literal lex >> pair Token; |
364 |
382 |
365 val scan_ident = |
383 val scan_val = |
366 $$ "?" ^^ $$ "'" ^^ scan_id_nat >> pair TVarSy || |
384 $$ "?" ^^ $$ "'" ^^ scan_id_nat >> pair TVarSy || |
367 $$ "?" ^^ scan_id_nat >> pair VarSy || |
385 $$ "?" ^^ scan_id_nat >> pair VarSy || |
368 $$ "'" ^^ scan_id >> pair TFreeSy || |
386 $$ "'" ^^ scan_id >> pair TFreeSy || |
|
387 $$ "#" ^^ scan_int >> pair NumSy || |
369 scan_xid >> pair IdentSy; |
388 scan_xid >> pair IdentSy; |
370 |
389 |
371 fun scan_tokens [] rev_toks = rev (EndToken :: rev_toks) |
390 fun scan_str ("'" :: "'" :: cs) = ([], cs) |
372 | scan_tokens (chs as c :: cs) rev_toks = |
391 | scan_str ("\\" :: c :: cs) = apfst (cons c) (scan_str cs) |
373 if is_blank c then scan_tokens cs rev_toks |
392 | scan_str (c :: cs) = apfst (cons c) (scan_str cs) |
|
393 | scan_str [] = raise ERROR; |
|
394 |
|
395 |
|
396 fun scan (rev_toks, []) = rev (EndToken :: rev_toks) |
|
397 | scan (rev_toks, chs as "'" :: "'" :: cs) = |
|
398 let |
|
399 val (cs', cs'') = scan_str cs handle ERROR => |
|
400 error ("Lexical error: missing quotes at end of string " ^ |
|
401 quote (implode chs)); |
|
402 in |
|
403 scan (StrSy (implode cs') :: rev_toks, cs'') |
|
404 end |
|
405 | scan (rev_toks, chs as c :: cs) = |
|
406 if is_blank c then scan (rev_toks, cs) |
374 else |
407 else |
375 (case max_of scan_lit scan_ident chs of |
408 (case max_of scan_lit scan_val chs of |
376 (None, _) => error ("Lexical error at: " ^ quote (implode chs)) |
409 (None, _) => error ("Lexical error at: " ^ quote (implode chs)) |
377 | (Some (tk, s), chs') => scan_tokens chs' (tk s :: rev_toks)); |
410 | (Some (tk, s), chs') => scan (tk s :: rev_toks, chs')); |
378 in |
411 in |
379 scan_tokens (explode str) [] |
412 scan ([], explode str) |
380 end; |
413 end; |
381 |
414 |
382 |
415 |
383 |
416 |
384 (** scan variables **) |
417 (** scan variables **) |
413 => error ("scan_varname: bad varname " ^ quote (implode chs)); |
446 => error ("scan_varname: bad varname " ^ quote (implode chs)); |
414 |
447 |
415 |
448 |
416 (* scan_var *) |
449 (* scan_var *) |
417 |
450 |
|
451 fun const c = Const (c, dummyT); |
|
452 fun free x = Free (x, dummyT); |
|
453 fun var xi = Var (xi, dummyT); |
|
454 |
418 fun scan_var str = |
455 fun scan_var str = |
419 let |
456 let |
420 fun tvar (x, i) = Var (("'" ^ x, i), dummyT); |
457 fun tvar (x, i) = var ("'" ^ x, i); |
421 fun var x_i = Var (x_i, dummyT); |
|
422 fun free x = Free (x, dummyT); |
|
423 |
458 |
424 val scan = |
459 val scan = |
425 $$ "?" -- $$ "'" -- scan_vname -- scan_end >> (tvar o #2 o #1) || |
460 $$ "?" -- $$ "'" -- scan_vname -- scan_end >> (tvar o #2 o #1) || |
426 $$ "?" -- scan_vname -- scan_end >> (var o #2 o #1) || |
461 $$ "?" -- scan_vname -- scan_end >> (var o #2 o #1) || |
427 scan_rest >> (free o implode); |
462 scan_rest >> (free o implode); |