src/Pure/Thy/thy_parse.ML
author wenzelm
Wed May 12 17:26:56 1999 +0200 (1999-05-12)
changeset 6642 732af87c0650
parent 6378 5780d71203bb
child 6667 58b9785f8534
permissions -rw-r--r--
strip_quotes replaced by unenclose;
     1 (*  Title:      Pure/Thy/thy_parse.ML
     2     ID:         $Id$
     3     Author:     Markus Wenzel, TU Muenchen
     4 
     5 The parser for theory files.
     6 *)
     7 
     8 infix 5 -- --$$ $$-- ^^;
     9 infix 3 >>;
    10 infix 0 ||;
    11 
    12 signature THY_PARSE =
    13 sig
    14   type token
    15   val !! : ('a -> 'b * 'c) -> 'a -> 'b * 'c
    16   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    17   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    18   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    19   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    20   val $$ : string -> token list -> string * token list
    21   val $$-- : string * (token list -> 'b * 'c) -> token list -> 'b * 'c
    22   val --$$ : ('a -> 'b * token list) * string -> 'a -> 'b * token list
    23   val ident: token list -> string * token list
    24   val long_ident: token list -> string * token list
    25   val long_id: token list -> string * token list
    26   val type_var: token list -> string * token list
    27   val type_args: token list -> string list * token list
    28   val nat: token list -> string * token list
    29   val string: token list -> string * token list
    30   val verbatim: token list -> string * token list
    31   val empty: 'a -> 'b list * 'a
    32   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    33   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    34   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    35   val enum: string -> (token list -> 'a * token list)
    36     -> token list -> 'a list * token list
    37   val enum1: string -> (token list -> 'a * token list)
    38     -> token list -> 'a list * token list
    39   val list: (token list -> 'a * token list)
    40     -> token list -> 'a list * token list
    41   val list1: (token list -> 'a * token list)
    42     -> token list -> 'a list * token list
    43   val name: token list -> string * token list
    44   val sort: token list -> string * token list
    45   val typ: token list -> string * token list
    46   val opt_infix: token list -> string * token list
    47   val opt_mixfix: token list -> string * token list
    48   val opt_witness: token list -> string * token list
    49   val const_decls: token list -> string * token list
    50   type syntax
    51   val get_lexicon: syntax -> Scan.lexicon;
    52   val make_syntax: string list ->
    53     (string * (token list -> (string * string) * token list)) list -> syntax
    54   val parse_thy: syntax -> string list -> string
    55   val section: string -> string -> (token list -> string * token list)
    56     -> (string * (token list -> (string * string) * token list))
    57   val axm_section: string -> string
    58     -> (token list -> (string * string list) * token list)
    59     -> (string * (token list -> (string * string) * token list))
    60   val pure_keywords: string list
    61   val pure_sections:
    62     (string * (token list -> (string * string) * token list)) list
    63   (*items for building strings*)
    64   val cat: string -> string -> string
    65   val parens: string -> string
    66   val brackets: string -> string
    67   val mk_list: string list -> string
    68   val mk_big_list: string list -> string
    69   val mk_pair: string * string -> string
    70   val mk_triple: string * string * string -> string
    71   val mk_triple1: (string * string) * string -> string
    72   val mk_triple2: string * (string * string) -> string
    73 end;
    74 
    75 
    76 structure ThyParse : THY_PARSE=
    77 struct
    78 
    79 open ThyScan;
    80 
    81 
    82 (** parser toolbox **)
    83 
    84 type token = token_kind * string * int;
    85 
    86 
    87 (* errors *)
    88 
    89 exception SYNTAX_ERROR of string * string * int;
    90 
    91 fun syn_err s1 s2 n = raise SYNTAX_ERROR (s1, s2, n);
    92 
    93 fun eof_err () = error "Unexpected end-of-file";
    94 
    95 (*Similar to Prolog's cut: reports any syntax error instead of backtracking
    96   through a superior || *)
    97 fun !! parse toks = parse toks
    98   handle SYNTAX_ERROR (s1, s2, n) => error ("Syntax error on line " ^
    99     string_of_int n ^ ": " ^ s1 ^ " expected and " ^ s2 ^ " was found");
   100 
   101 
   102 (* parser combinators *)
   103 
   104 fun (parse >> f) toks = apfst f (parse toks);
   105 
   106 fun (parse1 || parse2) toks =
   107   parse1 toks handle SYNTAX_ERROR _ => parse2 toks;
   108 
   109 fun (parse1 -- parse2) toks =
   110   let
   111     val (x, toks') = parse1 toks;
   112     val (y, toks'') = parse2 toks';
   113   in
   114     ((x, y), toks'')
   115   end;
   116 
   117 fun (parse1 ^^ parse2) = parse1 -- parse2 >> op ^;
   118 
   119 
   120 (* generic parsers *)
   121 
   122 fun $$ a ((k, b, n) :: toks) =
   123       if k = Keyword andalso a = b then (a, toks)
   124       else syn_err (quote a) (quote b) n
   125   | $$ _ [] = eof_err ();
   126 
   127 fun (a $$-- parse) = $$ a -- parse >> #2;
   128 
   129 fun (parse --$$ a) = parse -- $$ a >> #1;
   130 
   131 
   132 fun kind k1 ((k2, s, n) :: toks) =
   133       if k1 = k2 then (s, toks)
   134       else syn_err (name_of_kind k1) (quote s) n
   135   | kind _ [] = eof_err ();
   136 
   137 val ident = kind Ident;
   138 val long_ident = kind LongIdent;
   139 val long_id = ident || long_ident;
   140 val type_var = kind TypeVar >> quote;
   141 val nat = kind Nat;
   142 val string = kind String;
   143 val verbatim = kind Verbatim;
   144 val eof = kind EOF;
   145 
   146 fun empty toks = ([], toks);
   147 
   148 fun optional parse def = parse || empty >> K def;
   149 
   150 fun repeat parse toks = (parse -- repeat parse >> op :: || empty) toks;
   151 fun repeat1 parse = parse -- repeat parse >> op ::;
   152 
   153 fun enum1 sep parse = parse -- repeat (sep $$-- parse) >> op ::;
   154 fun enum sep parse = enum1 sep parse || empty;
   155 
   156 fun list1 parse = enum1 "," parse;
   157 fun list parse = enum "," parse;
   158 
   159 
   160 
   161 (** theory parsers **)
   162 
   163 (* misc utilities *)
   164 
   165 fun cat s1 s2 = s1 ^ " " ^ s2;
   166 
   167 val parens = enclose "(" ")";
   168 val brackets = enclose "[" "]";
   169 
   170 val mk_list = brackets o commas;
   171 val mk_big_list = brackets o space_implode ",\n ";
   172 
   173 fun mk_pair (x, y) = parens (commas [x, y]);
   174 fun mk_triple (x, y, z) = parens (commas [x, y, z]);
   175 fun mk_triple1 ((x, y), z) = mk_triple (x, y, z);
   176 fun mk_triple2 (x, (y, z)) = mk_triple (x, y, z);
   177 
   178 fun split_decls l = flat (map (fn (xs, y) => map (rpair y) xs) l);
   179 
   180 
   181 (* names *)
   182 
   183 val name = ident >> quote || string;
   184 val names = list name;
   185 val names1 = list1 name;
   186 val name_list = names >> mk_list;
   187 val name_list1 = names1 >> mk_list;
   188 
   189 
   190 (* empty *)
   191 
   192 fun empty_decl toks = (empty >> K "") toks;
   193 
   194 
   195 (* classes *)
   196 
   197 val subclass = name -- optional ("<" $$-- !! name_list1) "[]";
   198 
   199 val class_decls = repeat1 (subclass >> mk_pair) >> mk_big_list;
   200 
   201 
   202 (* arities *)
   203 
   204 val sort =
   205   name >> brackets ||
   206   "{" $$-- name_list --$$ "}";
   207 
   208 val sort_list1 = list1 sort >> mk_list;
   209 
   210 
   211 val arity = optional ("(" $$-- !! (sort_list1 --$$")")) "[]" -- sort;
   212 
   213 val arity_decls = repeat1 (names1 --$$ "::" -- !! arity)
   214   >> (mk_big_list o map mk_triple2 o split_decls);
   215 
   216 
   217 (* mixfix annotations *)
   218 
   219 val infxl =
   220   "infixl" $$-- !! (nat >> cat "Infixl" || string -- nat >> (cat "InfixlName" o mk_pair));
   221 val infxr =
   222   "infixr" $$-- !! (nat >> cat "Infixr" || string -- nat >> (cat "InfixrName" o mk_pair));
   223 
   224 val binder = "binder" $$--
   225   !! (string -- (("[" $$-- nat --$$ "]") -- nat || nat >> (fn n => (n, n))))
   226     >> (cat "Binder" o mk_triple2);
   227 
   228 val opt_pris = optional ("[" $$-- !! (list nat --$$ "]")) [] >> mk_list;
   229 
   230 val mixfix = string -- !! (opt_pris -- optional nat "Syntax.max_pri")
   231   >> (cat "Mixfix" o mk_triple2);
   232 
   233 fun opt_syn fx = optional ("(" $$-- fx --$$ ")") "NoSyn";
   234 
   235 val opt_infix = opt_syn (infxl || infxr);
   236 val opt_mixfix = opt_syn (mixfix || infxl || infxr || binder);
   237 
   238 
   239 (* types *)
   240 
   241 (* FIXME clean!! *)
   242 
   243 (*Parse an identifier, but only if it is not followed by "::", "=" or ",";
   244   the exclusion of a postfix comma can be controlled to allow expressions
   245   like "(id, id)" but disallow ones like "'a => id id,id :: ..."*)
   246 fun ident_no_colon _ [] = eof_err()
   247   | ident_no_colon allow_comma ((Ident, s, n) :: (rest as (Keyword, s2, n2) ::
   248                                 toks)) =
   249       if s2 = "::" orelse s2 = "=" orelse (not allow_comma andalso s2 = ",")
   250       then syn_err (name_of_kind Ident) (quote s2) n2
   251       else (s, rest)
   252   | ident_no_colon _ ((Ident, s, n) :: toks) = (s, toks)
   253   | ident_no_colon _ ((k, s, n) :: _) =
   254       syn_err (name_of_kind Ident) (quote s) n;
   255 
   256 (*type used in types, consts and syntax sections*)
   257 fun const_type allow_comma toks =
   258   let
   259     val simple_type =
   260       (ident || kind TypeVar ^^ optional ($$ "::" ^^ ident) "") --
   261           repeat (ident_no_colon allow_comma)
   262           >> (fn (args, ts) => cat args (space_implode " " ts)) ||
   263         ("(" $$-- (list1 (const_type true)) --$$ ")" >> (parens o commas)) --
   264           repeat1 (ident_no_colon allow_comma)
   265           >> (fn (args, ts) => cat args (space_implode " " ts));
   266 
   267       val appl_param =
   268         simple_type || "(" $$-- const_type true --$$ ")" >> parens || 
   269         "[" $$-- (list1 (const_type true)) --$$ "]" --$$ "=>" --
   270           const_type allow_comma >>
   271           (fn (src, dest) => mk_list src ^ " => " ^ dest);
   272   in ("[" $$-- (list1 (const_type true)) --$$ "]" --$$ "=>" --
   273         const_type allow_comma >>
   274         (fn (src, dest) => mk_list src ^ " => " ^ dest) ||
   275       repeat1 (appl_param --$$ "=>") -- const_type allow_comma >>
   276         (fn (src, dest) => space_implode " => " (src@[dest])) ||
   277       simple_type ||
   278       "(" $$-- const_type true --$$ ")" >> parens) toks
   279   end;
   280 
   281 val typ = string || (const_type false >> quote);
   282 
   283 
   284 fun mk_old_type_decl ((ts, n), syn) =
   285   map (fn t => (mk_triple (t, n, syn), false)) ts;
   286 
   287 fun mk_type_decl (((xs, t), None), syn) =
   288       [(mk_triple (t, string_of_int (length xs), syn), false)]
   289   | mk_type_decl (((xs, t), Some rhs), syn) =
   290       [(parens (commas [t, mk_list xs, rhs, syn]), true)];
   291 
   292 fun mk_type_decls tys =
   293   "|> Theory.add_types\n" ^ mk_big_list (keyfilter tys false) ^ "\n\n\
   294   \|> Theory.add_tyabbrs\n" ^ mk_big_list (keyfilter tys true);
   295 
   296 
   297 val old_type_decl = names1 -- nat -- opt_infix >> mk_old_type_decl;
   298 
   299 val type_args =
   300   type_var >> (fn x => [x]) ||
   301   "(" $$-- !! (list1 type_var --$$ ")") ||
   302   empty >> K [];
   303 
   304 val type_decl = type_args -- name --
   305   optional ("=" $$-- typ >> Some) None -- opt_infix >> mk_type_decl;
   306 
   307 val type_decls =
   308   repeat1 (old_type_decl || type_decl) >> (mk_type_decls o flat);
   309 
   310 
   311 (* consts *)
   312 
   313 val const_decls =
   314   repeat1 (names1 --$$ "::" -- !! (typ -- opt_mixfix))
   315   >> (mk_big_list o map mk_triple2 o split_decls);
   316 
   317 val opt_mode =
   318   optional
   319     ("(" $$-- !! (name -- optional ($$ "output" >> K "false") "true" --$$ ")"))
   320     ("\"\"", "true")
   321   >> mk_pair;
   322 
   323 val syntax_decls = opt_mode -- const_decls >> (fn (mode, txt) => mode ^ "\n" ^ txt);
   324 
   325 
   326 (* translations *)
   327 
   328 val trans_pat =
   329   optional ("(" $$-- !! (name --$$ ")")) "\"logic\"" -- string >> mk_pair;
   330 
   331 val trans_arrow =
   332   $$ "=>" >> K "Syntax.ParseRule " ||
   333   $$ "<=" >> K "Syntax.PrintRule " ||
   334   $$ "==" >> K "Syntax.ParsePrintRule ";
   335 
   336 val trans_line =
   337   trans_pat -- !! (trans_arrow -- trans_pat)
   338     >> (fn (left, (arr, right)) => arr ^ mk_pair (left, right));
   339 
   340 val trans_decls = repeat1 trans_line >> mk_big_list;
   341 
   342 
   343 (* ML translations *)
   344 
   345 val local_defs =
   346   " val parse_ast_translation = [];\n\
   347   \ val parse_translation = [];\n\
   348   \ val print_translation = [];\n\
   349   \ val typed_print_translation = [];\n\
   350   \ val print_ast_translation = [];\n\
   351   \ val token_translation = [];";
   352 
   353 val trfun_args =
   354   "(parse_ast_translation, parse_translation, \
   355   \print_translation, print_ast_translation)";
   356 
   357 
   358 (* axioms *)
   359 
   360 val mk_axms = mk_big_list o map (mk_pair o apfst quote);
   361 val mk_axms' = mk_big_list o map (mk_pair o rpair "[]" o mk_pair o apfst quote);
   362 
   363 fun mk_axiom_decls axms = (mk_axms axms, map fst axms);
   364 
   365 val axiom_decls = repeat1 (ident -- !! string) >> mk_axiom_decls;
   366 
   367 
   368 (* oracle *)
   369 
   370 val oracle_decl = (name --$$ "=" -- long_id) >> mk_pair;
   371 
   372 
   373 (* combined consts and axioms *)
   374 
   375 fun mk_constaxiom_decls x =
   376   let
   377     val (axms_defs, axms_names) =
   378       mk_axiom_decls (map (fn ((id, _), def) => (id ^ "_def", def)) x);
   379   in ((mk_big_list o map mk_triple2 o map (apfst quote o fst)) x ^
   380        "\n\n|> (PureThy.add_defs o map Thm.no_attributes)\n" ^ axms_defs, axms_names)
   381   end;
   382 
   383 val constaxiom_decls =
   384   repeat1 (ident --$$ "::" -- !! (typ -- opt_mixfix) -- !! string)
   385   >> mk_constaxiom_decls;
   386 
   387 
   388 (* axclass *)
   389 
   390 fun mk_axclass_decl ((c, cs), axms) =
   391   (mk_pair (c, cs) ^ "\n" ^ mk_axms' axms, unenclose c ^ "I" :: map fst axms);
   392 
   393 val axclass_decl = subclass -- repeat (ident -- !! string) >> mk_axclass_decl;
   394 
   395 
   396 (* instance *)
   397 
   398 fun mk_witness (axths, opt_tac) =
   399   mk_list (keyfilter axths false) ^ "\n" ^
   400   mk_list (keyfilter axths true) ^ "\n" ^
   401   opt_tac;
   402 
   403 val axm_or_thm =
   404   string >> rpair false ||
   405   long_id >> rpair true;
   406 
   407 
   408 val opt_witness =
   409   optional ("(" $$-- list1 axm_or_thm --$$ ")") [] --
   410   optional (verbatim >> (parens o cat "Some" o parens)) "None"
   411   >> mk_witness;
   412 
   413 val instance_decl =
   414   (name --$$ "<" -- name >> (pair "|> AxClass.add_inst_subclass" o mk_pair) ||
   415     name --$$ "::" -- arity >> (pair "|> AxClass.add_inst_arity" o mk_triple2))
   416   -- opt_witness
   417   >> (fn ((x, y), z) => (cat_lines [x, y, z]));
   418 
   419 
   420 (* locale *)
   421 
   422 val locale_decl =
   423   (name --$$ "=") -- 
   424     (optional ((ident >> (fn x => parens ("Some" ^ quote x))) --$$ "+") ("None")) --
   425     ("fixes" $$--
   426       (repeat (name --$$ "::" -- !! (typ -- opt_mixfix)) 
   427        >> (mk_big_list o map mk_triple2))) --
   428     (optional 
   429      ("assumes" $$-- (repeat ((ident >> quote) -- !! string) 
   430 		     >> (mk_list o map mk_pair)))
   431      "[]") --
   432     (optional 
   433      ("defines" $$-- (repeat ((ident >> quote) -- !! string) 
   434 		      >> (mk_list o map mk_pair)))
   435      "[]")
   436   >> (fn ((((nm, ext), cs), asms), defs) => cat_lines [nm, ext, cs, asms, defs]);
   437 
   438 
   439 
   440 (** theory syntax **)
   441 
   442 type syntax =
   443   Scan.lexicon * (token list -> (string * string) * token list) Symtab.table;
   444 
   445 fun get_lexicon (lex, _) = lex;
   446 
   447 fun make_syntax keywords sects =
   448   let
   449     val dups = duplicates (map fst sects);
   450     val sects' = gen_distinct eq_fst sects;
   451     val keys = map Symbol.explode (map fst sects' @ keywords);
   452   in
   453     if null dups then ()
   454     else warning ("Duplicate declaration of theory file sections:\n" ^ commas_quote dups);
   455     (Scan.make_lexicon keys, Symtab.make sects')
   456   end;
   457 
   458 
   459 (* header *)
   460 
   461 fun mk_header (thy_name, parents) =
   462   (thy_name, "IsarThy.begin_theory " ^ cat (quote thy_name) (mk_list parents) ^ " []");
   463 
   464 val header = ident --$$ "=" -- enum1 "+" name >> mk_header;
   465 
   466 
   467 (* extension *)
   468 
   469 fun mk_extension (txts, mltxt) =
   470   let
   471     val cat_sects = space_implode "\n\n" o filter_out (equal "");
   472     val (extxts, postxts) = split_list txts;
   473   in
   474     (cat_sects extxts, cat_sects postxts, mltxt)
   475   end;
   476 
   477 fun sect tab ((Keyword, s, n) :: toks) =
   478       (case Symtab.lookup (tab, s) of
   479         Some parse => !! parse toks
   480       | None => syn_err "section" s n)
   481   | sect _ ((_, s, n) :: _) = syn_err "section" s n
   482   | sect _ [] = eof_err ();
   483 
   484 fun extension sectab = "+" $$-- !!
   485   (repeat (sect sectab) --$$ "end" -- optional verbatim "")
   486     >> mk_extension;
   487 
   488 fun opt_extension sectab = optional (extension sectab) ("", "", "");
   489 
   490 
   491 (* theory definition *)
   492 
   493 fun mk_structure ((thy_name, bg_theory), (extxt, postxt, mltxt)) =
   494   "structure " ^ thy_name ^ " =\n\
   495   \struct\n\
   496   \\n\
   497   \local\n"
   498   ^ local_defs ^ "\n\
   499   \in\n\
   500   \\n"
   501   ^ mltxt ^ "\n\
   502   \\n\
   503   \val thy = " ^ bg_theory ^ "\n\
   504   \\n\
   505   \|> Theory.add_trfuns\n"
   506   ^ trfun_args ^ "\n\
   507   \|> Theory.add_trfunsT typed_print_translation\n\
   508   \|> Theory.add_tokentrfuns token_translation\n\
   509   \\n"
   510   ^ extxt ^ "\n\
   511   \\n\
   512   \|> IsarThy.end_theory;\n\
   513   \\n\
   514   \val _ = context thy;\n\
   515   \\n\
   516   \\n"
   517   ^ postxt ^ "\n\
   518   \\n\
   519   \end;\n\
   520   \end;\n\
   521   \\n\
   522   \open " ^ thy_name ^ ";\n";
   523 
   524 fun theory_defn sectab =
   525   header -- opt_extension sectab -- eof >> (mk_structure o #1);
   526 
   527 fun parse_thy (lex, sectab) chs = #1 (!! (theory_defn sectab) (tokenize lex chs));
   528 
   529 
   530 (* standard sections *)
   531 
   532 fun mk_val ax = "val " ^ ax ^ " = PureThy.get_thm thy " ^ quote ax ^ ";";
   533 val mk_vals = cat_lines o map mk_val;
   534 
   535 fun mk_axm_sect "" (txt, axs) = (txt, mk_vals axs)
   536   | mk_axm_sect pretxt (txt, axs) = (pretxt ^ "\n" ^ txt, mk_vals axs);
   537 
   538 fun axm_section name pretxt parse =
   539   (name, parse >> mk_axm_sect pretxt);
   540 
   541 fun section name pretxt parse =
   542   axm_section name pretxt (parse >> rpair []);
   543 
   544 
   545 val pure_keywords =
   546  ["end", "ML", "mixfix", "infixr", "infixl", "binder", "output", "=",
   547   "+", ",", "<", "{", "}", "(", ")", "[", "]", "::", "==", "=>",
   548   "<=", "fixes", "assumes", "defines"];
   549 
   550 val pure_sections =
   551  [section "classes" "|> Theory.add_classes" class_decls,
   552   section "default" "|> Theory.add_defsort" sort,
   553   section "types" "" type_decls,
   554   section "nonterminals" "|> Theory.add_nonterminals" (repeat1 name >> mk_list),
   555   section "arities" "|> Theory.add_arities" arity_decls,
   556   section "consts" "|> Theory.add_consts" const_decls,
   557   section "syntax" "|> Theory.add_modesyntax" syntax_decls,
   558   section "translations" "|> Theory.add_trrules" trans_decls,
   559   axm_section "rules" "|> (PureThy.add_axioms o map Thm.no_attributes)" axiom_decls,
   560   axm_section "defs" "|> (PureThy.add_defs o map Thm.no_attributes)" axiom_decls,
   561   section "oracle" "|> Theory.add_oracle" oracle_decl,
   562   axm_section "constdefs" "|> Theory.add_consts" constaxiom_decls,
   563   axm_section "axclass" "|> (#1 ooo AxClass.add_axclass)" axclass_decl,
   564   section "instance" "" instance_decl,
   565   section "path" "|> Theory.add_path" name,
   566   section "global" "|> PureThy.global_path" empty_decl,
   567   section "local" "|> PureThy.local_path" empty_decl,
   568   section "setup" "|> Library.apply" long_id,
   569   section "MLtext" "" verbatim,
   570   section "locale" "|> Locale.add_locale" locale_decl];
   571 
   572 
   573 end;