src/Pure/Syntax/parser.ML
changeset 624 33b9b5da3e6f
parent 552 fc92fac8b0de
child 682 c36f49c76d22
equal deleted inserted replaced
623:ca9f5dbab880 624:33b9b5da3e6f
    10   structure Lexicon: LEXICON
    10   structure Lexicon: LEXICON
    11   structure SynExt: SYN_EXT
    11   structure SynExt: SYN_EXT
    12   local open Lexicon SynExt SynExt.Ast in
    12   local open Lexicon SynExt SynExt.Ast in
    13     type gram
    13     type gram
    14     val empty_gram: gram
    14     val empty_gram: gram
    15     val extend_gram: gram -> xprod list -> gram
    15     val extend_gram: gram -> string list -> xprod list -> gram
    16     val merge_grams: gram -> gram -> gram
    16     val merge_grams: gram -> gram -> gram
    17     val pretty_gram: gram -> Pretty.T list
    17     val pretty_gram: gram -> Pretty.T list
    18     datatype parsetree =
    18     datatype parsetree =
    19       Node of string * parsetree list |
    19       Node of string * parsetree list |
    20       Tip of token
    20       Tip of token
   255 
   255 
   256 (* empty, extend, merge grams *)
   256 (* empty, extend, merge grams *)
   257 
   257 
   258 val empty_gram = mk_gram [];
   258 val empty_gram = mk_gram [];
   259 
   259 
   260 fun extend_gram (gram1 as Gram (prods1, _)) xprods2 =
   260 fun extend_gram (gram1 as Gram (prods1, _)) roots xprods2 =
   261   let
   261   let
   262     fun symb_of (Delim s) = Some (Terminal (Token s))
   262     fun simplify preserve s = 
   263       | symb_of (Argument (s, p)) =
   263       if preserve then 
       
   264         (if s mem (roots \\ ["type", "prop", "any"]) then "logic" else s)
       
   265       else (if s = "prop" then "@prop_H" else
       
   266               (if s mem (roots \\ ["type", "prop", "any"]) then 
       
   267                  "@logic_H" 
       
   268                else s));
       
   269 
       
   270     fun not_delim (Delim _) = false
       
   271       | not_delim _ = true
       
   272 
       
   273     fun symb_of _ (Delim s) = Some (Terminal (Token s))
       
   274       | symb_of preserve (Argument (s, p)) =
   264           (case predef_term s of
   275           (case predef_term s of
   265             None => Some (Nonterminal (s, p))
   276             None => Some (Nonterminal (simplify preserve s, p))
   266           | Some tk => Some (Terminal tk))
   277           | Some tk => Some (Terminal tk))
   267       | symb_of _ = None;
   278       | symb_of _ _ = None;
   268 
   279 
   269     fun prod_of (XProd (lhs, xsymbs, const, pri)) =
   280     fun prod_of (XProd (lhs, xsymbs, const, pri)) =
   270       (lhs, (mapfilter symb_of xsymbs, const, pri));
   281       let val copy_prod = (lhs = "prop" andalso forall not_delim xsymbs andalso
       
   282                            const <> "");
       
   283 
       
   284           val preserve = copy_prod 
       
   285                          orelse pri = chain_pri andalso lhs = "logic"
       
   286                          orelse lhs mem ["@prop_H", "@logic_H", "any"];
       
   287 
       
   288           val lhs' = if copy_prod then "@prop_H" else
       
   289                      if lhs = "logic" andalso pri = chain_pri
       
   290                         then "@logic_H"
       
   291                      else if lhs mem ("logic1" :: (roots \\ ["type", "prop"])) 
       
   292                         then "logic"
       
   293                      else lhs;
       
   294       in (lhs', (mapfilter (symb_of preserve) xsymbs, const, pri))
       
   295       end;
   271 
   296 
   272     val prods2 = distinct (map prod_of xprods2);
   297     val prods2 = distinct (map prod_of xprods2);
   273   in
   298   in
   274     if prods2 subset prods1 then gram1
   299     if prods2 subset prods1 then gram1
   275     else (writeln "Building new grammar...";
   300     else (writeln "Building new grammar...";
   551 val get_trees = mapfilter (fn (_, _, [pt], _, _, _) => Some pt | _ => None);
   576 val get_trees = mapfilter (fn (_, _, [pt], _, _, _) => Some pt | _ => None);
   552 
   577 
   553 
   578 
   554 fun earley grammar startsymbol indata =
   579 fun earley grammar startsymbol indata =
   555   let
   580   let
       
   581     val startsymbol' = case startsymbol of
       
   582                            "logic" => "@logic_H"
       
   583                          | "prop"  => "@prop_H"
       
   584                          | other   => other;
   556     val rhss_ref = case assoc (grammar, startsymbol) of
   585     val rhss_ref = case assoc (grammar, startsymbol) of
   557                        Some r => r
   586                        Some r => r
   558                      | None => error ("parse: Unknown startsymbol " ^ 
   587                      | None   => error ("parse: Unknown startsymbol " ^ 
   559                                       quote startsymbol);
   588                                         quote startsymbol);
   560     val S0 = [(ref [], 0, [], [Nonterm (rhss_ref, 0), Term EndToken], "", 0)];
   589     val S0 = [(ref [], 0, [], [Nonterm (rhss_ref, 0), Term EndToken], "", 0)];
   561     val s = length indata + 1;
   590     val s = length indata + 1;
   562     val Estate = Array.array (s, []);
   591     val Estate = Array.array (s, []);
   563   in
   592   in
   564     Array.update (Estate, 0, S0);
   593     Array.update (Estate, 0, S0);
   565     let
   594     let
   566       val l = produce Estate 0 indata EndToken(*dummy*);
   595       val l = produce Estate 0 indata EndToken(*dummy*);
       
   596 
   567       val p_trees = get_trees l;
   597       val p_trees = get_trees l;
   568     in p_trees end
   598     in p_trees end
   569   end;
   599   end;
   570 
   600 
   571 
   601 
   572 fun parse (Gram (_, prod_tab)) start toks =
   602 fun parse (Gram (_, prod_tab)) start toks =
       
   603 let val r =
   573   (case earley prod_tab start toks of
   604   (case earley prod_tab start toks of
   574     [] => sys_error "parse: no parse trees"
   605     [] => sys_error "parse: no parse trees"
   575   | pts => pts);
   606   | pts => pts);
       
   607 in r end
   576 
   608 
   577 end;
   609 end;
   578