src/Pure/Thy/thy_parse.ML
author clasohm
Fri Dec 01 12:22:07 1995 +0100 (1995-12-01)
changeset 1377 f800f533aa83
parent 1371 2f97fc253763
child 1383 be42217b0b5c
permissions -rw-r--r--
simplified parser for constType
     1 (*  Title:      Pure/Thy/thy_parse.ML
     2     ID:         $Id$
     3     Author:     Markus Wenzel, TU Muenchen
     4 
     5 The parser for theory files.
     6 *)
     7 
     8 infix 5 -- --$$ $$-- ^^;
     9 infix 3 >>;
    10 infix 0 ||;
    11 
    12 signature THY_PARSE =
    13 sig
    14   type token
    15   val !! : ('a -> 'b * 'c) -> 'a -> 'b * 'c
    16   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    17   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    18   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    19   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    20   val $$ : string -> token list -> string * token list
    21   val $$-- : string * (token list -> 'b * 'c) -> token list -> 'b * 'c
    22   val --$$ : ('a -> 'b * token list) * string -> 'a -> 'b * token list
    23   val ident: token list -> string * token list
    24   val long_ident: token list -> string * token list
    25   val long_id: token list -> string * token list
    26   val type_var: token list -> string * token list
    27   val type_args: token list -> string list * token list
    28   val nat: token list -> string * token list
    29   val string: token list -> string * token list
    30   val verbatim: token list -> string * token list
    31   val empty: 'a -> 'b list * 'a
    32   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    33   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    34   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    35   val enum: string -> (token list -> 'a * token list)
    36     -> token list -> 'a list * token list
    37   val enum1: string -> (token list -> 'a * token list)
    38     -> token list -> 'a list * token list
    39   val list: (token list -> 'a * token list)
    40     -> token list -> 'a list * token list
    41   val list1: (token list -> 'a * token list)
    42     -> token list -> 'a list * token list
    43   val name: token list -> string * token list
    44   val sort: token list -> string * token list
    45   val opt_infix: token list -> string * token list
    46   val opt_mixfix: token list -> string * token list
    47   val opt_witness: token list -> string * token list
    48   type syntax
    49   val make_syntax: string list ->
    50     (string * (token list -> (string * string) * token list)) list -> syntax
    51   val parse_thy: syntax -> string -> string -> string
    52   val section: string -> string -> (token list -> string * token list)
    53     -> (string * (token list -> (string * string) * token list))
    54   val axm_section: string -> string
    55     -> (token list -> (string * string list) * token list)
    56     -> (string * (token list -> (string * string) * token list))
    57   val pure_keywords: string list
    58   val pure_sections:
    59     (string * (token list -> (string * string) * token list)) list
    60   (*items for building strings*)
    61   val cat: string -> string -> string
    62   val parens: string -> string
    63   val brackets: string -> string
    64   val mk_list: string list -> string
    65   val mk_big_list: string list -> string
    66   val mk_pair: string * string -> string
    67   val mk_triple: string * string * string -> string
    68   val strip_quotes: string -> string
    69 end;
    70 
    71 functor ThyParseFun(structure Symtab: SYMTAB and ThyScan: THY_SCAN): THY_PARSE=
    72 struct
    73 
    74 open ThyScan;
    75 
    76 
    77 (** parser toolbox **)
    78 
    79 type token = token_kind * string * int;
    80 
    81 
    82 (* errors *)
    83 
    84 exception SYNTAX_ERROR of string * string * int;
    85 
    86 fun syn_err s1 s2 n = raise SYNTAX_ERROR (s1, s2, n);
    87 
    88 fun eof_err () = error "Unexpected end-of-file";
    89 
    90 (*Similar to Prolog's cut: reports any syntax error instead of backtracking
    91   through a superior || *)
    92 fun !! parse toks = parse toks
    93   handle SYNTAX_ERROR (s1, s2, n) => error ("Syntax error on line " ^
    94     string_of_int n ^ ": " ^ s1 ^ " expected and " ^ s2 ^ " was found");
    95 
    96 
    97 (* parser combinators *)
    98 
    99 fun (parse >> f) toks = apfst f (parse toks);
   100 
   101 fun (parse1 || parse2) toks =
   102   parse1 toks handle SYNTAX_ERROR _ => parse2 toks;
   103 
   104 fun (parse1 -- parse2) toks =
   105   let
   106     val (x, toks') = parse1 toks;
   107     val (y, toks'') = parse2 toks';
   108   in
   109     ((x, y), toks'')
   110   end;
   111 
   112 fun (parse1 ^^ parse2) = parse1 -- parse2 >> op ^;
   113 
   114 
   115 (* generic parsers *)
   116 
   117 fun $$ a ((k, b, n) :: toks) =
   118       if k = Keyword andalso a = b then (a, toks)
   119       else syn_err (quote a) (quote b) n
   120   | $$ _ [] = eof_err ();
   121 
   122 fun (a $$-- parse) = $$ a -- parse >> #2;
   123 
   124 fun (parse --$$ a) = parse -- $$ a >> #1;
   125 
   126 
   127 fun kind k1 ((k2, s, n) :: toks) =
   128       if k1 = k2 then (s, toks)
   129       else syn_err (name_of_kind k1) (quote s) n
   130   | kind _ [] = eof_err ();
   131 
   132 val ident = kind Ident;
   133 val long_ident = kind LongIdent;
   134 val long_id = ident || long_ident;
   135 val type_var = kind TypeVar >> quote;
   136 val nat = kind Nat;
   137 val string = kind String;
   138 val verbatim = kind Verbatim;
   139 val eof = kind EOF;
   140 
   141 fun empty toks = ([], toks);
   142 
   143 fun optional parse def = parse || empty >> K def;
   144 
   145 fun repeat parse toks = (parse -- repeat parse >> op :: || empty) toks;
   146 fun repeat1 parse = parse -- repeat parse >> op ::;
   147 
   148 fun enum1 sep parse = parse -- repeat (sep $$-- parse) >> op ::;
   149 fun enum sep parse = enum1 sep parse || empty;
   150 
   151 val list = enum ",";
   152 val list1 = enum1 ",";
   153 
   154 
   155 (** theory parsers **)
   156 
   157 (* misc utilities *)
   158 
   159 fun cat s1 s2 = s1 ^ " " ^ s2;
   160 
   161 val parens = enclose "(" ")";
   162 val brackets = enclose "[" "]";
   163 
   164 val mk_list = brackets o commas;
   165 val mk_big_list = brackets o space_implode ",\n ";
   166 
   167 fun mk_pair (x, y) = parens (commas [x, y]);
   168 fun mk_triple (x, y, z) = parens (commas [x, y, z]);
   169 fun mk_triple1 ((x, y), z) = mk_triple (x, y, z);
   170 fun mk_triple2 (x, (y, z)) = mk_triple (x, y, z);
   171 
   172 val split_decls = flat o map (fn (xs, y) => map (rpair y) xs);
   173 
   174 fun strip_quotes str =
   175   implode (tl (take (size str - 1, explode str)));
   176 
   177 
   178 (* names *)
   179 
   180 val name = ident >> quote || string;
   181 val names = list name;
   182 val names1 = list1 name;
   183 val name_list = names >> mk_list;
   184 val name_list1 = names1 >> mk_list;
   185 
   186 
   187 (* classes *)
   188 
   189 val subclass = name -- optional ("<" $$-- !! name_list1) "[]";
   190 
   191 val class_decls = repeat1 (subclass >> mk_pair) >> mk_big_list;
   192 
   193 
   194 (* arities *)
   195 
   196 val sort =
   197   name >> brackets ||
   198   "{" $$-- name_list --$$ "}";
   199 
   200 val sort_list1 = list1 sort >> mk_list;
   201 
   202 
   203 val arity = optional ("(" $$-- !! (sort_list1 --$$")")) "[]" -- sort;
   204 
   205 val arity_decls = repeat1 (names1 --$$ "::" -- !! arity)
   206   >> (mk_big_list o map mk_triple2 o split_decls);
   207 
   208 
   209 (* mixfix annotations *)
   210 
   211 val infxl = "infixl" $$-- !! nat >> cat "Infixl";
   212 val infxr = "infixr" $$-- !! nat >> cat "Infixr";
   213 
   214 val binder = "binder" $$--
   215   !! (string -- ( ("[" $$-- nat --$$ "]") -- nat
   216                 || nat >> (fn n => (n,n))
   217      )          )
   218   >> (cat "Binder" o mk_triple2);
   219 
   220 val opt_pris = optional ("[" $$-- !! (list nat --$$ "]")) [] >> mk_list;
   221 
   222 val mixfix = string -- !! (opt_pris -- optional nat "max_pri")
   223   >> (cat "Mixfix" o mk_triple2);
   224 
   225 fun opt_syn fx = optional ("(" $$-- fx --$$ ")") "NoSyn";
   226 
   227 val opt_infix = opt_syn (infxl || infxr);
   228 val opt_mixfix = opt_syn (mixfix || infxl || infxr || binder);
   229 
   230 
   231 (* types *)
   232 
   233 fun mk_old_type_decl ((ts, n), syn) =
   234   map (fn t => (mk_triple (t, n, syn), false)) ts;
   235 
   236 fun mk_type_decl (((xs, t), None), syn) =
   237       [(mk_triple (t, string_of_int (length xs), syn), false)]
   238   | mk_type_decl (((xs, t), Some rhs), syn) =
   239       [(parens (commas [t, mk_list xs, rhs, syn]), true)];
   240 
   241 fun mk_type_decls tys =
   242   "|> add_types\n" ^ mk_big_list (keyfilter tys false) ^ "\n\n\
   243   \|> add_tyabbrs\n" ^ mk_big_list (keyfilter tys true);
   244 
   245 
   246 val old_type_decl = names1 -- nat -- opt_infix >> mk_old_type_decl;
   247 
   248 val type_args =
   249   type_var >> (fn x => [x]) ||
   250   "(" $$-- !! (list1 type_var --$$ ")") ||
   251   empty >> K [];
   252 
   253 val type_decl = type_args -- name -- optional ("=" $$-- !! string >> Some) None
   254   -- opt_infix >> mk_type_decl;
   255 
   256 val type_decls = repeat1 (old_type_decl || type_decl) >>
   257                  (mk_type_decls o flat);
   258 
   259 
   260 (* consts *)
   261 
   262 (*Parse an identifier, but only if it is not followed by colons or a comma;
   263   the exclusion of a postfix comma can be controlled to allow expressions
   264   like "(id, id)" but disallow ones like "'a => id id,id :: ..."*)
   265 fun ident_no_colon _ [] = eof_err()
   266   | ident_no_colon allow_comma ((Ident, s, n) :: (rest as (Keyword, s2, n2) ::
   267                                 toks)) =
   268       if s2 = "::" orelse (not allow_comma andalso s2 = ",") then
   269         syn_err (name_of_kind Ident) (quote s2) n2
   270       else (s, rest)
   271   | ident_no_colon _ ((Ident, s, n) :: toks) = (s, toks)
   272   | ident_no_colon _ ((k, s, n) :: _) =
   273       syn_err (name_of_kind Ident) (quote s) n;
   274 
   275 fun const_type allow_comma toks =
   276   let val simple_type =
   277         (ident ||
   278          kind TypeVar -- optional ("::" $$-- ident >> cat "::") "" >>
   279            (fn (tv, cl) => cat tv cl)) --
   280            repeat (ident_no_colon allow_comma) >>
   281            (fn (args, ts) => cat args (space_implode " " ts)) ||
   282          ("(" $$-- (list1 (const_type true)) --$$ ")" >> (parens o commas)) --
   283            repeat1 (ident_no_colon allow_comma) >>
   284            (fn (args, ts) => cat args (space_implode " " ts));
   285 
   286       val appl_param =
   287         simple_type || "(" $$-- const_type true --$$ ")" >> parens || 
   288         "[" $$-- (list1 (const_type true)) --$$ "]" --$$ "=>" --
   289           const_type allow_comma >>
   290           (fn (src, dest) => mk_list src ^ " => " ^ dest);
   291   in ("[" $$-- (list1 (const_type true)) --$$ "]" --$$ "=>" --
   292         const_type allow_comma >>
   293         (fn (src, dest) => mk_list src ^ " => " ^ dest) ||
   294       repeat1 (appl_param --$$ "=>") -- const_type allow_comma >>
   295         (fn (src, dest) => space_implode " => " (src@[dest])) ||
   296       simple_type ||
   297       "(" $$-- const_type true --$$ ")" >> parens) toks
   298   end;
   299 
   300 val const_decls = repeat1 (names1 --$$ "::" -- !!
   301                     ((string || const_type false >> quote) -- opt_mixfix)) >>
   302                   (mk_big_list o map mk_triple2 o split_decls);
   303 
   304 
   305 (* translations *)
   306 
   307 val trans_pat =
   308   optional ("(" $$-- !! (name --$$ ")")) "\"logic\"" -- string >> mk_pair;
   309 
   310 val trans_arrow =
   311   $$ "=>" >> K " |-> " ||
   312   $$ "<=" >> K " <-| " ||
   313   $$ "==" >> K " <-> ";
   314 
   315 val trans_decls = repeat1 (trans_pat ^^ !! (trans_arrow ^^ trans_pat))
   316   >> mk_big_list;
   317 
   318 
   319 (* ML translations *)
   320 
   321 val trfun_defs =
   322   " val parse_ast_translation = [];\n\
   323   \ val parse_translation = [];\n\
   324   \ val print_translation = [];\n\
   325   \ val print_ast_translation = [];";
   326 
   327 val trfun_args =
   328   "(parse_ast_translation, parse_translation, \
   329   \print_translation, print_ast_translation)";
   330 
   331 fun mk_mltrans txt =
   332   "let\n"
   333   ^ trfun_defs ^ "\n"
   334   ^ txt ^ "\n\
   335   \in\n\
   336   \ " ^ trfun_args ^ "\n\
   337   \end";
   338 
   339 val mltrans = verbatim >> mk_mltrans;
   340 
   341 
   342 (* axioms *)
   343 
   344 val mk_axms = mk_big_list o map (mk_pair o apfst quote);
   345 
   346 fun mk_axiom_decls axms = (mk_axms axms, map fst axms);
   347 
   348 val axiom_decls = repeat1 (ident -- !! string) >> mk_axiom_decls;
   349 
   350 
   351 (* axclass *)
   352 
   353 fun mk_axclass_decl ((c, cs), axms) =
   354   (mk_pair (c, cs) ^ "\n" ^ mk_axms axms,
   355     (strip_quotes c ^ "I") :: map fst axms);
   356 
   357 val axclass_decl = subclass -- repeat (ident -- !! string) >> mk_axclass_decl;
   358 
   359 
   360 (* instance *)
   361 
   362 fun mk_witness (axths, opt_tac) =
   363   mk_list (keyfilter axths false) ^ "\n" ^
   364   mk_list (keyfilter axths true) ^ "\n" ^
   365   opt_tac;
   366 
   367 val axm_or_thm =
   368   string >> rpair false ||
   369   long_id >> rpair true;
   370 
   371 
   372 val opt_witness =
   373   optional ("(" $$-- list1 axm_or_thm --$$ ")") [] --
   374   optional (verbatim >> (parens o cat "Some" o parens)) "None"
   375   >> mk_witness;
   376 
   377 val instance_decl =
   378   (name --$$ "<" -- name >> (pair "|> AxClass.add_inst_subclass" o mk_pair) ||
   379     name --$$ "::" -- arity >> (pair "|> AxClass.add_inst_arity" o mk_triple2))
   380   -- opt_witness
   381   >> (fn ((x, y), z) => (cat_lines [x, y, z]));
   382 
   383 
   384 
   385 (** theory syntax **)
   386 
   387 type syntax =
   388   lexicon * (token list -> (string * string) * token list) Symtab.table;
   389 
   390 fun make_syntax keywords sects =
   391   (make_lexicon (map fst sects @ keywords),
   392     Symtab.make sects handle Symtab.DUPS dups =>
   393       error ("Duplicate sections in theory file syntax: " ^ commas_quote dups));
   394 
   395 
   396 (* header *)
   397 
   398 fun mk_header (thy_name, bases) =
   399   (thy_name, "mk_base " ^ mk_list bases ^ " " ^ quote thy_name);
   400 
   401 val base =
   402   ident >> (cat "Thy" o quote) ||
   403   string >> cat "File";
   404 
   405 val header = ident --$$ "=" -- enum1 "+" base >> mk_header;
   406 
   407 
   408 (* extension *)
   409 
   410 fun mk_extension (txts, mltxt) =
   411   let
   412     val cat_sects = space_implode "\n\n" o filter_out (equal "");
   413     val (extxts, postxts) = split_list txts;
   414   in
   415     (cat_sects extxts, cat_sects postxts, mltxt)
   416   end;
   417 
   418 fun sect tab ((Keyword, s, n) :: toks) =
   419       (case Symtab.lookup (tab, s) of
   420         Some parse => !! parse toks
   421       | None => syn_err "section" s n)
   422   | sect _ ((_, s, n) :: _) = syn_err "section" s n
   423   | sect _ [] = eof_err ();
   424 
   425 fun extension sectab = "+" $$-- !! (repeat (sect sectab) --$$ "end") --
   426   optional ("ML" $$-- verbatim) "" >> mk_extension;
   427 
   428 
   429 (* theory definition *)
   430 
   431 fun mk_structure tname ((thy_name, old_thys), opt_txts) =
   432   if thy_name <> tname then
   433     error ("Filename \"" ^ tname ^ ".thy\" and theory name "
   434       ^ quote thy_name ^ " are different")
   435   else
   436     (case opt_txts of
   437       Some (extxt, postxt, mltxt) =>
   438         "val thy = " ^ old_thys ^ " true;\n\n\
   439         \structure " ^ thy_name ^ " =\n\
   440         \struct\n\
   441         \\n\
   442         \local\n"
   443         ^ trfun_defs ^ "\n\
   444         \in\n\
   445         \\n"
   446         ^ mltxt ^ "\n\
   447         \\n\
   448         \val thy = thy\n\
   449         \\n\
   450         \|> add_trfuns\n"
   451         ^ trfun_args ^ "\n\
   452         \\n"
   453         ^ extxt ^ "\n\
   454         \\n\
   455         \|> add_thyname " ^ quote thy_name ^ ";\n\
   456         \\n\
   457         \val _ = store_theory (thy, " ^ quote thy_name ^ ");\n\
   458         \\n\
   459         \\n"
   460         ^ postxt ^ "\n\
   461         \\n\
   462         \end;\n\
   463         \end;\n"
   464     | None =>
   465         "val thy = " ^ old_thys ^ " false;\n\
   466         \\n\
   467         \structure " ^ thy_name ^ " =\n\
   468         \struct\n\
   469         \\n\
   470         \val thy = thy\n\
   471         \\n\
   472         \val _ = store_theory (thy, " ^ quote thy_name ^ ");\n\
   473         \\n\
   474         \end;\n");
   475 
   476 fun theory_defn sectab tname =
   477   header -- optional (extension sectab >> Some) None -- eof
   478   >> (mk_structure tname o #1);
   479 
   480 fun parse_thy (lex, sectab) tname str =
   481   #1 (!! (theory_defn sectab tname) (tokenize lex str));
   482 
   483 
   484 (* standard sections *)
   485 
   486 fun mk_val ax = "val " ^ ax ^ " = get_axiom thy " ^ quote ax ^ ";";
   487 val mk_vals = cat_lines o map mk_val;
   488 
   489 fun mk_axm_sect "" (txt, axs) = (txt, mk_vals axs)
   490   | mk_axm_sect pretxt (txt, axs) = (pretxt ^ "\n" ^ txt, mk_vals axs);
   491 
   492 fun axm_section name pretxt parse =
   493   (name, parse >> mk_axm_sect pretxt);
   494 
   495 fun section name pretxt parse =
   496   axm_section name pretxt (parse >> rpair []);
   497 
   498 
   499 val pure_keywords =
   500  ["end", "ML", "mixfix", "infixr", "infixl", "binder", "=", "+", ",", "<",
   501   "{", "}", "(", ")", "[", "]", "::", "==", "=>", "<="];
   502 
   503 val pure_sections =
   504  [section "classes" "|> add_classes" class_decls,
   505   section "default" "|> add_defsort" sort,
   506   section "types" "" type_decls,
   507   section "arities" "|> add_arities" arity_decls,
   508   section "consts" "|> add_consts" const_decls,
   509   section "syntax" "|> add_syntax" const_decls,
   510   section "translations" "|> add_trrules" trans_decls,
   511   section "MLtrans" "|> add_trfuns" mltrans,
   512   section "MLtext" "" verbatim,
   513   axm_section "rules" "|> add_axioms" axiom_decls,
   514   axm_section "defs" "|> add_defs" axiom_decls,
   515   axm_section "axclass" "|> AxClass.add_axclass" axclass_decl,
   516   section "instance" "" instance_decl];
   517 
   518 
   519 end;