1 (* Title: Pure/Thy/thy_parse.ML
3 Author: Markus Wenzel, TU Muenchen
5 The parser for theory files.
8 infix 5 -- --$$ $$-- ^^;
15 val !! : ('a -> 'b * 'c) -> 'a -> 'b * 'c
16 val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
17 val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
18 val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
19 val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
20 val $$ : string -> token list -> string * token list
21 val $$-- : string * (token list -> 'b * 'c) -> token list -> 'b * 'c
22 val --$$ : ('a -> 'b * token list) * string -> 'a -> 'b * token list
23 val ident: token list -> string * token list
24 val long_ident: token list -> string * token list
25 val long_id: token list -> string * token list
26 val type_var: token list -> string * token list
27 val type_args: token list -> string list * token list
28 val nat: token list -> string * token list
29 val string: token list -> string * token list
30 val verbatim: token list -> string * token list
31 val empty: 'a -> 'b list * 'a
32 val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
33 val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
34 val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
35 val enum: string -> (token list -> 'a * token list)
36 -> token list -> 'a list * token list
37 val enum1: string -> (token list -> 'a * token list)
38 -> token list -> 'a list * token list
39 val list: (token list -> 'a * token list)
40 -> token list -> 'a list * token list
41 val list1: (token list -> 'a * token list)
42 -> token list -> 'a list * token list
43 val name: token list -> string * token list
44 val sort: token list -> string * token list
45 val typ: token list -> string * token list
46 val opt_infix: token list -> string * token list
47 val opt_mixfix: token list -> string * token list
48 val opt_witness: token list -> string * token list
49 val const_decls: token list -> string * token list
51 val get_lexicon: syntax -> Scan.lexicon;
52 val make_syntax: string list ->
53 (string * (token list -> (string * string) * token list)) list -> syntax
54 val parse_thy: syntax -> string list -> string
55 val section: string -> string -> (token list -> string * token list)
56 -> (string * (token list -> (string * string) * token list))
57 val axm_section: string -> string
58 -> (token list -> (string * string list) * token list)
59 -> (string * (token list -> (string * string) * token list))
60 val pure_keywords: string list
62 (string * (token list -> (string * string) * token list)) list
63 (*items for building strings*)
64 val cat: string -> string -> string
65 val parens: string -> string
66 val brackets: string -> string
67 val mk_list: string list -> string
68 val mk_big_list: string list -> string
69 val mk_pair: string * string -> string
70 val mk_triple: string * string * string -> string
71 val mk_triple1: (string * string) * string -> string
72 val mk_triple2: string * (string * string) -> string
73 val strip_quotes: string -> string
77 structure ThyParse : THY_PARSE=
83 (** parser toolbox **)
85 type token = token_kind * string * int;
90 exception SYNTAX_ERROR of string * string * int;
92 fun syn_err s1 s2 n = raise SYNTAX_ERROR (s1, s2, n);
94 fun eof_err () = error "Unexpected end-of-file";
96 (*Similar to Prolog's cut: reports any syntax error instead of backtracking
97 through a superior || *)
98 fun !! parse toks = parse toks
99 handle SYNTAX_ERROR (s1, s2, n) => error ("Syntax error on line " ^
100 string_of_int n ^ ": " ^ s1 ^ " expected and " ^ s2 ^ " was found");
103 (* parser combinators *)
105 fun (parse >> f) toks = apfst f (parse toks);
107 fun (parse1 || parse2) toks =
108 parse1 toks handle SYNTAX_ERROR _ => parse2 toks;
110 fun (parse1 -- parse2) toks =
112 val (x, toks') = parse1 toks;
113 val (y, toks'') = parse2 toks';
118 fun (parse1 ^^ parse2) = parse1 -- parse2 >> op ^;
121 (* generic parsers *)
123 fun $$ a ((k, b, n) :: toks) =
124 if k = Keyword andalso a = b then (a, toks)
125 else syn_err (quote a) (quote b) n
126 | $$ _ [] = eof_err ();
128 fun (a $$-- parse) = $$ a -- parse >> #2;
130 fun (parse --$$ a) = parse -- $$ a >> #1;
133 fun kind k1 ((k2, s, n) :: toks) =
134 if k1 = k2 then (s, toks)
135 else syn_err (name_of_kind k1) (quote s) n
136 | kind _ [] = eof_err ();
138 val ident = kind Ident;
139 val long_ident = kind LongIdent;
140 val long_id = ident || long_ident;
141 val type_var = kind TypeVar >> quote;
143 val string = kind String;
144 val verbatim = kind Verbatim;
147 fun empty toks = ([], toks);
149 fun optional parse def = parse || empty >> K def;
151 fun repeat parse toks = (parse -- repeat parse >> op :: || empty) toks;
152 fun repeat1 parse = parse -- repeat parse >> op ::;
154 fun enum1 sep parse = parse -- repeat (sep $$-- parse) >> op ::;
155 fun enum sep parse = enum1 sep parse || empty;
157 fun list1 parse = enum1 "," parse;
158 fun list parse = enum "," parse;
162 (** theory parsers **)
166 fun cat s1 s2 = s1 ^ " " ^ s2;
168 val parens = enclose "(" ")";
169 val brackets = enclose "[" "]";
171 val mk_list = brackets o commas;
172 val mk_big_list = brackets o space_implode ",\n ";
174 fun mk_pair (x, y) = parens (commas [x, y]);
175 fun mk_triple (x, y, z) = parens (commas [x, y, z]);
176 fun mk_triple1 ((x, y), z) = mk_triple (x, y, z);
177 fun mk_triple2 (x, (y, z)) = mk_triple (x, y, z);
179 fun split_decls l = flat (map (fn (xs, y) => map (rpair y) xs) l);
181 (*Remove the leading and trailing chararacters. Actually called to
182 remove quotation marks.*)
183 fun strip_quotes s = String.substring (s, 1, size s - 2);
188 val name = ident >> quote || string;
189 val names = list name;
190 val names1 = list1 name;
191 val name_list = names >> mk_list;
192 val name_list1 = names1 >> mk_list;
197 fun empty_decl toks = (empty >> K "") toks;
202 val subclass = name -- optional ("<" $$-- !! name_list1) "[]";
204 val class_decls = repeat1 (subclass >> mk_pair) >> mk_big_list;
211 "{" $$-- name_list --$$ "}";
213 val sort_list1 = list1 sort >> mk_list;
216 val arity = optional ("(" $$-- !! (sort_list1 --$$")")) "[]" -- sort;
218 val arity_decls = repeat1 (names1 --$$ "::" -- !! arity)
219 >> (mk_big_list o map mk_triple2 o split_decls);
222 (* mixfix annotations *)
225 "infixl" $$-- !! (nat >> cat "Infixl" || string -- nat >> (cat "InfixlName" o mk_pair));
227 "infixr" $$-- !! (nat >> cat "Infixr" || string -- nat >> (cat "InfixrName" o mk_pair));
229 val binder = "binder" $$--
230 !! (string -- (("[" $$-- nat --$$ "]") -- nat || nat >> (fn n => (n, n))))
231 >> (cat "Binder" o mk_triple2);
233 val opt_pris = optional ("[" $$-- !! (list nat --$$ "]")) [] >> mk_list;
235 val mixfix = string -- !! (opt_pris -- optional nat "Syntax.max_pri")
236 >> (cat "Mixfix" o mk_triple2);
238 fun opt_syn fx = optional ("(" $$-- fx --$$ ")") "NoSyn";
240 val opt_infix = opt_syn (infxl || infxr);
241 val opt_mixfix = opt_syn (mixfix || infxl || infxr || binder);
248 (*Parse an identifier, but only if it is not followed by "::", "=" or ",";
249 the exclusion of a postfix comma can be controlled to allow expressions
250 like "(id, id)" but disallow ones like "'a => id id,id :: ..."*)
251 fun ident_no_colon _ [] = eof_err()
252 | ident_no_colon allow_comma ((Ident, s, n) :: (rest as (Keyword, s2, n2) ::
254 if s2 = "::" orelse s2 = "=" orelse (not allow_comma andalso s2 = ",")
255 then syn_err (name_of_kind Ident) (quote s2) n2
257 | ident_no_colon _ ((Ident, s, n) :: toks) = (s, toks)
258 | ident_no_colon _ ((k, s, n) :: _) =
259 syn_err (name_of_kind Ident) (quote s) n;
261 (*type used in types, consts and syntax sections*)
262 fun const_type allow_comma toks =
265 (ident || kind TypeVar ^^ optional ($$ "::" ^^ ident) "") --
266 repeat (ident_no_colon allow_comma)
267 >> (fn (args, ts) => cat args (space_implode " " ts)) ||
268 ("(" $$-- (list1 (const_type true)) --$$ ")" >> (parens o commas)) --
269 repeat1 (ident_no_colon allow_comma)
270 >> (fn (args, ts) => cat args (space_implode " " ts));
273 simple_type || "(" $$-- const_type true --$$ ")" >> parens ||
274 "[" $$-- (list1 (const_type true)) --$$ "]" --$$ "=>" --
275 const_type allow_comma >>
276 (fn (src, dest) => mk_list src ^ " => " ^ dest);
277 in ("[" $$-- (list1 (const_type true)) --$$ "]" --$$ "=>" --
278 const_type allow_comma >>
279 (fn (src, dest) => mk_list src ^ " => " ^ dest) ||
280 repeat1 (appl_param --$$ "=>") -- const_type allow_comma >>
281 (fn (src, dest) => space_implode " => " (src@[dest])) ||
283 "(" $$-- const_type true --$$ ")" >> parens) toks
286 val typ = string || (const_type false >> quote);
289 fun mk_old_type_decl ((ts, n), syn) =
290 map (fn t => (mk_triple (t, n, syn), false)) ts;
292 fun mk_type_decl (((xs, t), None), syn) =
293 [(mk_triple (t, string_of_int (length xs), syn), false)]
294 | mk_type_decl (((xs, t), Some rhs), syn) =
295 [(parens (commas [t, mk_list xs, rhs, syn]), true)];
297 fun mk_type_decls tys =
298 "|> Theory.add_types\n" ^ mk_big_list (keyfilter tys false) ^ "\n\n\
299 \|> Theory.add_tyabbrs\n" ^ mk_big_list (keyfilter tys true);
302 val old_type_decl = names1 -- nat -- opt_infix >> mk_old_type_decl;
305 type_var >> (fn x => [x]) ||
306 "(" $$-- !! (list1 type_var --$$ ")") ||
309 val type_decl = type_args -- name --
310 optional ("=" $$-- typ >> Some) None -- opt_infix >> mk_type_decl;
313 repeat1 (old_type_decl || type_decl) >> (mk_type_decls o flat);
319 repeat1 (names1 --$$ "::" -- !! (typ -- opt_mixfix))
320 >> (mk_big_list o map mk_triple2 o split_decls);
324 ("(" $$-- !! (name -- optional ($$ "output" >> K "false") "true" --$$ ")"))
328 val syntax_decls = opt_mode -- const_decls >> (fn (mode, txt) => mode ^ "\n" ^ txt);
334 optional ("(" $$-- !! (name --$$ ")")) "\"logic\"" -- string >> mk_pair;
337 $$ "=>" >> K "Syntax.ParseRule " ||
338 $$ "<=" >> K "Syntax.PrintRule " ||
339 $$ "==" >> K "Syntax.ParsePrintRule ";
342 trans_pat -- !! (trans_arrow -- trans_pat)
343 >> (fn (left, (arr, right)) => arr ^ mk_pair (left, right));
345 val trans_decls = repeat1 trans_line >> mk_big_list;
348 (* ML translations *)
351 " val parse_ast_translation = [];\n\
352 \ val parse_translation = [];\n\
353 \ val print_translation = [];\n\
354 \ val typed_print_translation = [];\n\
355 \ val print_ast_translation = [];\n\
356 \ val token_translation = [];";
359 "(parse_ast_translation, parse_translation, \
360 \print_translation, print_ast_translation)";
365 val mk_axms = mk_big_list o map (mk_pair o apfst quote);
367 fun mk_axiom_decls axms = (mk_axms axms, map fst axms);
369 val axiom_decls = repeat1 (ident -- !! string) >> mk_axiom_decls;
374 val oracle_decl = (name --$$ "=" -- long_id) >> mk_pair;
377 (* combined consts and axioms *)
379 fun mk_constaxiom_decls x =
381 val (axms_defs, axms_names) =
382 mk_axiom_decls (map (fn ((id, _), def) => (id ^ "_def", def)) x);
383 in ((mk_big_list o map mk_triple2 o map (apfst quote o fst)) x ^
384 "\n\n|> (PureThy.add_defs o map Thm.no_attributes)\n" ^ axms_defs, axms_names)
387 val constaxiom_decls =
388 repeat1 (ident --$$ "::" -- !! (typ -- opt_mixfix) -- !! string)
389 >> mk_constaxiom_decls;
394 fun mk_axclass_decl ((c, cs), axms) =
395 (mk_pair (c, cs) ^ "\n" ^ mk_axms axms,
396 (strip_quotes c ^ "I") :: map fst axms);
398 val axclass_decl = subclass -- repeat (ident -- !! string) >> mk_axclass_decl;
403 fun mk_witness (axths, opt_tac) =
404 mk_list (keyfilter axths false) ^ "\n" ^
405 mk_list (keyfilter axths true) ^ "\n" ^
409 string >> rpair false ||
410 long_id >> rpair true;
414 optional ("(" $$-- list1 axm_or_thm --$$ ")") [] --
415 optional (verbatim >> (parens o cat "Some" o parens)) "None"
419 (name --$$ "<" -- name >> (pair "|> AxClass.add_inst_subclass" o mk_pair) ||
420 name --$$ "::" -- arity >> (pair "|> AxClass.add_inst_arity" o mk_triple2))
422 >> (fn ((x, y), z) => (cat_lines [x, y, z]));
429 (optional ((ident >> (fn x => parens ("Some" ^ quote x))) --$$ "+") ("None")) --
431 (repeat (name --$$ "::" -- !! (typ -- opt_mixfix))
432 >> (mk_big_list o map mk_triple2))) --
434 ("assumes" $$-- (repeat ((ident >> quote) -- !! string)
435 >> (mk_list o map mk_pair)))
438 ("defines" $$-- (repeat ((ident >> quote) -- !! string)
439 >> (mk_list o map mk_pair)))
441 >> (fn ((((nm, ext), cs), asms), defs) => cat_lines [nm, ext, cs, asms, defs]);
445 (** theory syntax **)
448 Scan.lexicon * (token list -> (string * string) * token list) Symtab.table;
450 fun get_lexicon (lex, _) = lex;
452 fun make_syntax keywords sects =
454 val dups = duplicates (map fst sects);
455 val sects' = gen_distinct eq_fst sects;
456 val keys = map Symbol.explode (map fst sects' @ keywords);
459 else warning ("Duplicate declaration of theory file sections:\n" ^ commas_quote dups);
460 (Scan.make_lexicon keys, Symtab.make sects')
466 fun mk_header (thy_name, parents) =
467 (thy_name, "ThyInfo.begin_theory " ^ cat (quote thy_name) (mk_list parents));
469 val header = ident --$$ "=" -- enum1 "+" name >> mk_header;
474 fun mk_extension (txts, mltxt) =
476 val cat_sects = space_implode "\n\n" o filter_out (equal "");
477 val (extxts, postxts) = split_list txts;
479 (cat_sects extxts, cat_sects postxts, mltxt)
482 fun sect tab ((Keyword, s, n) :: toks) =
483 (case Symtab.lookup (tab, s) of
484 Some parse => !! parse toks
485 | None => syn_err "section" s n)
486 | sect _ ((_, s, n) :: _) = syn_err "section" s n
487 | sect _ [] = eof_err ();
489 fun extension sectab = "+" $$-- !!
490 (repeat (sect sectab) --$$ "end" -- optional verbatim "")
493 fun opt_extension sectab = optional (extension sectab) ("", "", "");
496 (* theory definition *)
498 fun mk_structure ((thy_name, bg_theory), (extxt, postxt, mltxt)) =
499 "structure " ^ thy_name ^ " =\n\
508 \val thy = " ^ bg_theory ^ "\n\
510 \|> Theory.add_trfuns\n"
512 \|> Theory.add_trfunsT typed_print_translation\n\
513 \|> Theory.add_tokentrfuns token_translation\n\
517 \|> ThyInfo.end_theory;\n\
519 \val _ = context thy;\n\
527 \open " ^ thy_name ^ ";\n";
529 fun theory_defn sectab =
530 header -- opt_extension sectab -- eof >> (mk_structure o #1);
532 fun parse_thy (lex, sectab) chs = #1 (!! (theory_defn sectab) (tokenize lex chs));
535 (* standard sections *)
537 fun mk_val ax = "val " ^ ax ^ " = get_axiom thy " ^ quote ax ^ ";";
538 val mk_vals = cat_lines o map mk_val;
540 fun mk_axm_sect "" (txt, axs) = (txt, mk_vals axs)
541 | mk_axm_sect pretxt (txt, axs) = (pretxt ^ "\n" ^ txt, mk_vals axs);
543 fun axm_section name pretxt parse =
544 (name, parse >> mk_axm_sect pretxt);
546 fun section name pretxt parse =
547 axm_section name pretxt (parse >> rpair []);
551 ["end", "ML", "mixfix", "infixr", "infixl", "binder", "output", "=",
552 "+", ",", "<", "{", "}", "(", ")", "[", "]", "::", "==", "=>",
553 "<=", "fixes", "assumes", "defines"];
556 [section "classes" "|> Theory.add_classes" class_decls,
557 section "default" "|> Theory.add_defsort" sort,
558 section "types" "" type_decls,
559 section "nonterminals" "|> Theory.add_nonterminals" (repeat1 name >> mk_list),
560 section "arities" "|> Theory.add_arities" arity_decls,
561 section "consts" "|> Theory.add_consts" const_decls,
562 section "syntax" "|> Theory.add_modesyntax" syntax_decls,
563 section "translations" "|> Theory.add_trrules" trans_decls,
564 axm_section "rules" "|> (PureThy.add_axioms o map Thm.no_attributes)" axiom_decls,
565 axm_section "defs" "|> (PureThy.add_defs o map Thm.no_attributes)" axiom_decls,
566 section "oracle" "|> Theory.add_oracle" oracle_decl,
567 axm_section "constdefs" "|> Theory.add_consts" constaxiom_decls,
568 axm_section "axclass" "|> AxClass.add_axclass" axclass_decl,
569 section "instance" "" instance_decl,
570 section "path" "|> Theory.add_path" name,
571 section "global" "|> PureThy.global_path" empty_decl,
572 section "local" "|> PureThy.local_path" empty_decl,
573 section "setup" "|> Library.apply" long_id,
574 section "MLtext" "" verbatim,
575 section "locale" "|> Locale.add_locale" locale_decl];