src/Pure/Thy/thy_parse.ML
author wenzelm
Mon Oct 27 15:43:53 1997 +0100 (1997-10-27)
changeset 4011 c161162bc8c5
parent 3977 9b3cbfd6a936
child 4020 f88775cc8d17
permissions -rw-r--r--
flipped global_names default;
     1 (*  Title:      Pure/Thy/thy_parse.ML
     2     ID:         $Id$
     3     Author:     Markus Wenzel, TU Muenchen
     4 
     5 The parser for theory files.
     6 *)
     7 
     8 (* FIXME tmp *)
     9 val global_names = ref false;
    10 
    11 
    12 infix 5 -- --$$ $$-- ^^;
    13 infix 3 >>;
    14 infix 0 ||;
    15 
    16 signature THY_PARSE =
    17 sig
    18   type token
    19   val !! : ('a -> 'b * 'c) -> 'a -> 'b * 'c
    20   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    21   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    22   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    23   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    24   val $$ : string -> token list -> string * token list
    25   val $$-- : string * (token list -> 'b * 'c) -> token list -> 'b * 'c
    26   val --$$ : ('a -> 'b * token list) * string -> 'a -> 'b * token list
    27   val ident: token list -> string * token list
    28   val long_ident: token list -> string * token list
    29   val long_id: token list -> string * token list
    30   val type_var: token list -> string * token list
    31   val type_args: token list -> string list * token list
    32   val nat: token list -> string * token list
    33   val string: token list -> string * token list
    34   val verbatim: token list -> string * token list
    35   val empty: 'a -> 'b list * 'a
    36   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    37   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    38   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    39   val enum: string -> (token list -> 'a * token list)
    40     -> token list -> 'a list * token list
    41   val enum1: string -> (token list -> 'a * token list)
    42     -> token list -> 'a list * token list
    43   val list: (token list -> 'a * token list)
    44     -> token list -> 'a list * token list
    45   val list1: (token list -> 'a * token list)
    46     -> token list -> 'a list * token list
    47   val name: token list -> string * token list
    48   val sort: token list -> string * token list
    49   val typ: token list -> string * token list
    50   val opt_infix: token list -> string * token list
    51   val opt_mixfix: token list -> string * token list
    52   val opt_witness: token list -> string * token list
    53   type syntax
    54   val make_syntax: string list ->
    55     (string * (token list -> (string * string) * token list)) list -> syntax
    56   val parse_thy: syntax -> string -> string -> string
    57   val section: string -> string -> (token list -> string * token list)
    58     -> (string * (token list -> (string * string) * token list))
    59   val axm_section: string -> string
    60     -> (token list -> (string * string list) * token list)
    61     -> (string * (token list -> (string * string) * token list))
    62   val pure_keywords: string list
    63   val pure_sections:
    64     (string * (token list -> (string * string) * token list)) list
    65   (*items for building strings*)
    66   val cat: string -> string -> string
    67   val parens: string -> string
    68   val brackets: string -> string
    69   val mk_list: string list -> string
    70   val mk_big_list: string list -> string
    71   val mk_pair: string * string -> string
    72   val mk_triple: string * string * string -> string
    73   val strip_quotes: string -> string
    74 end;
    75 
    76 
    77 structure ThyParse : THY_PARSE=
    78 struct
    79 
    80 open ThyScan;
    81 
    82 
    83 (** parser toolbox **)
    84 
    85 type token = token_kind * string * int;
    86 
    87 
    88 (* errors *)
    89 
    90 exception SYNTAX_ERROR of string * string * int;
    91 
    92 fun syn_err s1 s2 n = raise SYNTAX_ERROR (s1, s2, n);
    93 
    94 fun eof_err () = error "Unexpected end-of-file";
    95 
    96 (*Similar to Prolog's cut: reports any syntax error instead of backtracking
    97   through a superior || *)
    98 fun !! parse toks = parse toks
    99   handle SYNTAX_ERROR (s1, s2, n) => error ("Syntax error on line " ^
   100     string_of_int n ^ ": " ^ s1 ^ " expected and " ^ s2 ^ " was found");
   101 
   102 
   103 (* parser combinators *)
   104 
   105 fun (parse >> f) toks = apfst f (parse toks);
   106 
   107 fun (parse1 || parse2) toks =
   108   parse1 toks handle SYNTAX_ERROR _ => parse2 toks;
   109 
   110 fun (parse1 -- parse2) toks =
   111   let
   112     val (x, toks') = parse1 toks;
   113     val (y, toks'') = parse2 toks';
   114   in
   115     ((x, y), toks'')
   116   end;
   117 
   118 fun (parse1 ^^ parse2) = parse1 -- parse2 >> op ^;
   119 
   120 
   121 (* generic parsers *)
   122 
   123 fun $$ a ((k, b, n) :: toks) =
   124       if k = Keyword andalso a = b then (a, toks)
   125       else syn_err (quote a) (quote b) n
   126   | $$ _ [] = eof_err ();
   127 
   128 fun (a $$-- parse) = $$ a -- parse >> #2;
   129 
   130 fun (parse --$$ a) = parse -- $$ a >> #1;
   131 
   132 
   133 fun kind k1 ((k2, s, n) :: toks) =
   134       if k1 = k2 then (s, toks)
   135       else syn_err (name_of_kind k1) (quote s) n
   136   | kind _ [] = eof_err ();
   137 
   138 val ident = kind Ident;
   139 val long_ident = kind LongIdent;
   140 val long_id = ident || long_ident;
   141 val type_var = kind TypeVar >> quote;
   142 val nat = kind Nat;
   143 val string = kind String;
   144 val verbatim = kind Verbatim;
   145 val eof = kind EOF;
   146 
   147 fun empty toks = ([], toks);
   148 
   149 fun optional parse def = parse || empty >> K def;
   150 
   151 fun repeat parse toks = (parse -- repeat parse >> op :: || empty) toks;
   152 fun repeat1 parse = parse -- repeat parse >> op ::;
   153 
   154 fun enum1 sep parse = parse -- repeat (sep $$-- parse) >> op ::;
   155 fun enum sep parse = enum1 sep parse || empty;
   156 
   157 fun list1 parse = enum1 "," parse;
   158 fun list parse = enum "," parse;
   159 
   160 
   161 
   162 (** theory parsers **)
   163 
   164 (* misc utilities *)
   165 
   166 fun cat s1 s2 = s1 ^ " " ^ s2;
   167 
   168 val parens = enclose "(" ")";
   169 val brackets = enclose "[" "]";
   170 
   171 val mk_list = brackets o commas;
   172 val mk_big_list = brackets o space_implode ",\n ";
   173 
   174 fun mk_pair (x, y) = parens (commas [x, y]);
   175 fun mk_triple (x, y, z) = parens (commas [x, y, z]);
   176 fun mk_triple1 ((x, y), z) = mk_triple (x, y, z);
   177 fun mk_triple2 (x, (y, z)) = mk_triple (x, y, z);
   178 
   179 fun split_decls l = flat (map (fn (xs, y) => map (rpair y) xs) l);
   180 
   181 fun strip_quotes str =
   182   implode (tl (take (size str - 1, explode str)));
   183 
   184 
   185 (* names *)
   186 
   187 val name = ident >> quote || string;
   188 val names = list name;
   189 val names1 = list1 name;
   190 val name_list = names >> mk_list;
   191 val name_list1 = names1 >> mk_list;
   192 
   193 
   194 (* classes *)
   195 
   196 val subclass = name -- optional ("<" $$-- !! name_list1) "[]";
   197 
   198 val class_decls = repeat1 (subclass >> mk_pair) >> mk_big_list;
   199 
   200 
   201 (* arities *)
   202 
   203 val sort =
   204   name >> brackets ||
   205   "{" $$-- name_list --$$ "}";
   206 
   207 val sort_list1 = list1 sort >> mk_list;
   208 
   209 
   210 val arity = optional ("(" $$-- !! (sort_list1 --$$")")) "[]" -- sort;
   211 
   212 val arity_decls = repeat1 (names1 --$$ "::" -- !! arity)
   213   >> (mk_big_list o map mk_triple2 o split_decls);
   214 
   215 
   216 (* mixfix annotations *)
   217 
   218 val infxl =
   219   "infixl" $$-- !! (nat >> cat "Infixl" || string -- nat >> (cat "InfixlName" o mk_pair));
   220 val infxr =
   221   "infixr" $$-- !! (nat >> cat "Infixr" || string -- nat >> (cat "InfixrName" o mk_pair));
   222 
   223 val binder = "binder" $$--
   224   !! (string -- (("[" $$-- nat --$$ "]") -- nat || nat >> (fn n => (n, n))))
   225     >> (cat "Binder" o mk_triple2);
   226 
   227 val opt_pris = optional ("[" $$-- !! (list nat --$$ "]")) [] >> mk_list;
   228 
   229 val mixfix = string -- !! (opt_pris -- optional nat "max_pri")
   230   >> (cat "Mixfix" o mk_triple2);
   231 
   232 fun opt_syn fx = optional ("(" $$-- fx --$$ ")") "NoSyn";
   233 
   234 val opt_infix = opt_syn (infxl || infxr);
   235 val opt_mixfix = opt_syn (mixfix || infxl || infxr || binder);
   236 
   237 
   238 (* types *)
   239 
   240 (* FIXME clean!! *)
   241 
   242 (*Parse an identifier, but only if it is not followed by "::", "=" or ",";
   243   the exclusion of a postfix comma can be controlled to allow expressions
   244   like "(id, id)" but disallow ones like "'a => id id,id :: ..."*)
   245 fun ident_no_colon _ [] = eof_err()
   246   | ident_no_colon allow_comma ((Ident, s, n) :: (rest as (Keyword, s2, n2) ::
   247                                 toks)) =
   248       if s2 = "::" orelse s2 = "=" orelse (not allow_comma andalso s2 = ",")
   249       then syn_err (name_of_kind Ident) (quote s2) n2
   250       else (s, rest)
   251   | ident_no_colon _ ((Ident, s, n) :: toks) = (s, toks)
   252   | ident_no_colon _ ((k, s, n) :: _) =
   253       syn_err (name_of_kind Ident) (quote s) n;
   254 
   255 (*type used in types, consts and syntax sections*)
   256 fun const_type allow_comma toks =
   257   let
   258     val simple_type =
   259       (ident || kind TypeVar ^^ optional ($$ "::" ^^ ident) "") --
   260           repeat (ident_no_colon allow_comma)
   261           >> (fn (args, ts) => cat args (space_implode " " ts)) ||
   262         ("(" $$-- (list1 (const_type true)) --$$ ")" >> (parens o commas)) --
   263           repeat1 (ident_no_colon allow_comma)
   264           >> (fn (args, ts) => cat args (space_implode " " ts));
   265 
   266       val appl_param =
   267         simple_type || "(" $$-- const_type true --$$ ")" >> parens || 
   268         "[" $$-- (list1 (const_type true)) --$$ "]" --$$ "=>" --
   269           const_type allow_comma >>
   270           (fn (src, dest) => mk_list src ^ " => " ^ dest);
   271   in ("[" $$-- (list1 (const_type true)) --$$ "]" --$$ "=>" --
   272         const_type allow_comma >>
   273         (fn (src, dest) => mk_list src ^ " => " ^ dest) ||
   274       repeat1 (appl_param --$$ "=>") -- const_type allow_comma >>
   275         (fn (src, dest) => space_implode " => " (src@[dest])) ||
   276       simple_type ||
   277       "(" $$-- const_type true --$$ ")" >> parens) toks
   278   end;
   279 
   280 val typ = string || (const_type false >> quote);
   281 
   282 
   283 fun mk_old_type_decl ((ts, n), syn) =
   284   map (fn t => (mk_triple (t, n, syn), false)) ts;
   285 
   286 fun mk_type_decl (((xs, t), None), syn) =
   287       [(mk_triple (t, string_of_int (length xs), syn), false)]
   288   | mk_type_decl (((xs, t), Some rhs), syn) =
   289       [(parens (commas [t, mk_list xs, rhs, syn]), true)];
   290 
   291 fun mk_type_decls tys =
   292   "|> Theory.add_types\n" ^ mk_big_list (keyfilter tys false) ^ "\n\n\
   293   \|> Theory.add_tyabbrs\n" ^ mk_big_list (keyfilter tys true);
   294 
   295 
   296 val old_type_decl = names1 -- nat -- opt_infix >> mk_old_type_decl;
   297 
   298 val type_args =
   299   type_var >> (fn x => [x]) ||
   300   "(" $$-- !! (list1 type_var --$$ ")") ||
   301   empty >> K [];
   302 
   303 val type_decl = type_args -- name --
   304   optional ("=" $$-- typ >> Some) None -- opt_infix >> mk_type_decl;
   305 
   306 val type_decls =
   307   repeat1 (old_type_decl || type_decl) >> (mk_type_decls o flat);
   308 
   309 
   310 (* consts *)
   311 
   312 val const_decls =
   313   repeat1 (names1 --$$ "::" -- !! (typ -- opt_mixfix))
   314   >> (mk_big_list o map mk_triple2 o split_decls);
   315 
   316 val opt_mode =
   317   optional
   318     ("(" $$-- !! (name -- optional ($$ "output" >> K "false") "true" --$$ ")"))
   319     ("\"\"", "true")
   320   >> mk_pair;
   321 
   322 val syntax_decls = opt_mode -- const_decls >> (fn (mode, txt) => mode ^ "\n" ^ txt);
   323 
   324 
   325 (* translations *)
   326 
   327 val trans_pat =
   328   optional ("(" $$-- !! (name --$$ ")")) "\"logic\"" -- string >> mk_pair;
   329 
   330 val trans_arrow =
   331   $$ "=>" >> K "Syntax.ParseRule " ||
   332   $$ "<=" >> K "Syntax.PrintRule " ||
   333   $$ "==" >> K "Syntax.ParsePrintRule ";
   334 
   335 val trans_line =
   336   trans_pat -- !! (trans_arrow -- trans_pat)
   337     >> (fn (left, (arr, right)) => arr ^ mk_pair (left, right));
   338 
   339 val trans_decls = repeat1 trans_line >> mk_big_list;
   340 
   341 
   342 (* ML translations *)
   343 
   344 val trfun_defs =
   345   " val parse_ast_translation = [];\n\
   346   \ val parse_translation = [];\n\
   347   \ val print_translation = [];\n\
   348   \ val typed_print_translation = [];\n\
   349   \ val print_ast_translation = [];\n\
   350   \ val token_translation = [];";
   351 
   352 val trfun_args =
   353   "(parse_ast_translation, parse_translation, \
   354   \print_translation, print_ast_translation)";
   355 
   356 
   357 (* axioms *)
   358 
   359 val mk_axms = mk_big_list o map (mk_pair o apfst quote);
   360 
   361 fun mk_axiom_decls axms = (mk_axms axms, map fst axms);
   362 
   363 val axiom_decls = repeat1 (ident -- !! string) >> mk_axiom_decls;
   364 
   365 
   366 (* oracle *)
   367 
   368 val oracle_decl = (name --$$ "=" -- long_id) >> mk_pair;
   369 
   370 
   371 (* combined consts and axioms *)
   372 
   373 fun mk_constaxiom_decls x =
   374   let
   375     val (axms_defs, axms_names) =
   376       mk_axiom_decls (map (fn ((id, _), def) => (id ^ "_def", def)) x);
   377   in ((mk_big_list o map mk_triple2 o map (apfst quote o fst)) x ^
   378        "\n\n|> Theory.add_defs\n" ^ axms_defs, axms_names)
   379   end;
   380 
   381 val constaxiom_decls =
   382   repeat1 (ident --$$ "::" -- !! (typ -- opt_mixfix) -- !! string)
   383   >> mk_constaxiom_decls;
   384 
   385 
   386 (* axclass *)
   387 
   388 fun mk_axclass_decl ((c, cs), axms) =
   389   (mk_pair (c, cs) ^ "\n" ^ mk_axms axms,
   390     (strip_quotes c ^ "I") :: map fst axms);
   391 
   392 val axclass_decl = subclass -- repeat (ident -- !! string) >> mk_axclass_decl;
   393 
   394 
   395 (* instance *)
   396 
   397 fun mk_witness (axths, opt_tac) =
   398   mk_list (keyfilter axths false) ^ "\n" ^
   399   mk_list (keyfilter axths true) ^ "\n" ^
   400   opt_tac;
   401 
   402 val axm_or_thm =
   403   string >> rpair false ||
   404   long_id >> rpair true;
   405 
   406 
   407 val opt_witness =
   408   optional ("(" $$-- list1 axm_or_thm --$$ ")") [] --
   409   optional (verbatim >> (parens o cat "Some" o parens)) "None"
   410   >> mk_witness;
   411 
   412 val instance_decl =
   413   (name --$$ "<" -- name >> (pair "|> AxClass.add_inst_subclass" o mk_pair) ||
   414     name --$$ "::" -- arity >> (pair "|> AxClass.add_inst_arity" o mk_triple2))
   415   -- opt_witness
   416   >> (fn ((x, y), z) => (cat_lines [x, y, z]));
   417 
   418 
   419 (* local, global path *)
   420 
   421 fun empty_decl toks = (empty >> K "") toks;
   422 
   423 val global_path =
   424   "|> (fn thy => if ! global_names then thy else Theory.add_path \"/\" thy)";
   425 
   426 val local_path =
   427   global_path ^ "\n\
   428   \|> (fn thy => if ! global_names then thy\
   429   \ else Theory.add_path thy_name thy)";
   430 
   431 
   432 
   433 (** theory syntax **)
   434 
   435 type syntax =
   436   lexicon * (token list -> (string * string) * token list) Symtab.table;
   437 
   438 fun make_syntax keywords sects =
   439   (make_lexicon (map fst sects @ keywords),
   440     Symtab.make sects handle Symtab.DUPS dups =>
   441       error ("Duplicate sections in theory file syntax: " ^ commas_quote dups));
   442 
   443 
   444 (* header *)
   445 
   446 fun mk_header (thy_name, bases) =
   447   (thy_name, "mk_base " ^ mk_list bases ^ " " ^ quote thy_name);
   448 
   449 val base =
   450   ident >> (cat "Thy" o quote) ||
   451   string >> cat "File";
   452 
   453 val header = ident --$$ "=" -- enum1 "+" base >> mk_header;
   454 
   455 
   456 (* extension *)
   457 
   458 fun mk_extension (txts, mltxt) =
   459   let
   460     val cat_sects = space_implode "\n\n" o filter_out (equal "");
   461     val (extxts, postxts) = split_list txts;
   462   in
   463     (cat_sects extxts, cat_sects postxts, mltxt)
   464   end;
   465 
   466 fun sect tab ((Keyword, s, n) :: toks) =
   467       (case Symtab.lookup (tab, s) of
   468         Some parse => !! parse toks
   469       | None => syn_err "section" s n)
   470   | sect _ ((_, s, n) :: _) = syn_err "section" s n
   471   | sect _ [] = eof_err ();
   472 
   473 fun extension sectab = "+" $$-- !!
   474   (repeat (sect sectab) --$$ "end" -- optional ("ML" $$-- verbatim) "")
   475     >> mk_extension;
   476 
   477 fun opt_extension sectab = optional (extension sectab) ("", "", "");
   478 
   479 
   480 (* theory definition *)
   481 
   482 fun mk_structure tname ((thy_name, old_thys), (extxt, postxt, mltxt)) =
   483   if thy_name <> tname then
   484     error ("Filename \"" ^ tname ^ ".thy\" and theory name "
   485       ^ quote thy_name ^ " are different")
   486   else
   487     "val thy = " ^ old_thys ^ ";\n\n\
   488     \structure " ^ thy_name ^ " =\n\
   489     \struct\n\
   490     \\n\
   491     \local\n\
   492     \ val thy_name = \"" ^ tname ^ "\";\n"
   493     ^ trfun_defs ^ "\n\
   494     \in\n\
   495     \\n"
   496     ^ mltxt ^ "\n\
   497     \\n\
   498     \val thy = thy\n\
   499     \\n"
   500     ^ local_path ^
   501     "\n\
   502     \|> Theory.add_trfuns\n"
   503     ^ trfun_args ^ "\n\
   504     \|> Theory.add_trfunsT typed_print_translation \n\
   505     \|> Theory.add_tokentrfuns token_translation \n\
   506     \\n"
   507     ^ extxt ^ "\n\
   508     \\n\
   509     \|> Theory.add_name " ^ quote thy_name ^ ";\n\
   510     \\n\
   511     \val _ = store_theory (thy, " ^ quote thy_name ^ ");\n\
   512     \\n\
   513     \\n"
   514     ^ postxt ^ "\n\
   515     \\n\
   516     \end;\n\
   517     \end;\n\
   518     \\n\
   519     \open " ^ thy_name ^ ";\n\
   520     \\n";
   521 
   522 fun theory_defn sectab tname =
   523   header -- opt_extension sectab -- eof >> (mk_structure tname o #1);
   524 
   525 fun parse_thy (lex, sectab) tname str =
   526   #1 (!! (theory_defn sectab tname) (tokenize lex str));
   527 
   528 
   529 (* standard sections *)
   530 
   531 fun mk_val ax = "val " ^ ax ^ " = get_axiom thy " ^ quote ax ^ ";";
   532 val mk_vals = cat_lines o map mk_val;
   533 
   534 fun mk_axm_sect "" (txt, axs) = (txt, mk_vals axs)
   535   | mk_axm_sect pretxt (txt, axs) = (pretxt ^ "\n" ^ txt, mk_vals axs);
   536 
   537 fun axm_section name pretxt parse =
   538   (name, parse >> mk_axm_sect pretxt);
   539 
   540 fun section name pretxt parse =
   541   axm_section name pretxt (parse >> rpair []);
   542 
   543 
   544 val pure_keywords =
   545  ["end", "ML", "mixfix", "infixr", "infixl", "binder", "global",
   546   "local", "output", "=", "+", ",", "<", "{", "}", "(", ")", "[", "]",
   547   "::", "==", "=>", "<="];
   548 
   549 val pure_sections =
   550  [section "classes" "|> Theory.add_classes" class_decls,
   551   section "default" "|> Theory.add_defsort" sort,
   552   section "types" "" type_decls,
   553   section "arities" "|> Theory.add_arities" arity_decls,
   554   section "consts" "|> Theory.add_consts" const_decls,
   555   section "syntax" "|> Theory.add_modesyntax" syntax_decls,
   556   section "translations" "|> Theory.add_trrules" trans_decls,
   557   axm_section "rules" "|> Theory.add_axioms" axiom_decls,
   558   axm_section "defs" "|> Theory.add_defs" axiom_decls,
   559   section "oracle" "|> Theory.add_oracle" oracle_decl,
   560   axm_section "constdefs" "|> Theory.add_consts" constaxiom_decls,
   561   axm_section "axclass" "|> AxClass.add_axclass" axclass_decl,
   562   section "instance" "" instance_decl,
   563   section "path" "|> Theory.add_path" name,
   564   section "global" global_path empty_decl,
   565   section "local" local_path empty_decl];
   566 
   567 
   568 end;