src/Pure/Syntax/parser.ML
changeset 81001 0c6a600c8939
parent 81000 fc36180a68e9
child 81002 1f5462055655
equal deleted inserted replaced
81000:fc36180a68e9 81001:0c6a600c8939
    11   val empty_gram: gram
    11   val empty_gram: gram
    12   val make_gram: Syntax_Ext.xprod list -> Syntax_Ext.xprod list -> gram option -> gram
    12   val make_gram: Syntax_Ext.xprod list -> Syntax_Ext.xprod list -> gram option -> gram
    13   val pretty_gram: gram -> Pretty.T list
    13   val pretty_gram: gram -> Pretty.T list
    14   datatype tree =
    14   datatype tree =
    15     Markup of Markup.T list * tree list |
    15     Markup of Markup.T list * tree list |
    16     Node of string * tree list |
    16     Node of {nonterm: string, const: string} * tree list |
    17     Tip of Lexicon.token
    17     Tip of Lexicon.token
    18   val pretty_tree: tree -> Pretty.T list
    18   val pretty_tree: tree -> Pretty.T list
    19   val parse: gram -> string -> Lexicon.token list -> tree list
    19   val parse: gram -> string -> Lexicon.token list -> tree list
    20 end;
    20 end;
    21 
    21 
   511 
   511 
   512 (* parse tree *)
   512 (* parse tree *)
   513 
   513 
   514 datatype tree =
   514 datatype tree =
   515   Markup of Markup.T list * tree list |
   515   Markup of Markup.T list * tree list |
   516   Node of string * tree list |
   516   Node of {nonterm: string, const: string} * tree list |
   517   Tip of Lexicon.token;
   517   Tip of Lexicon.token;
   518 
   518 
   519 datatype tree_op =
   519 datatype tree_op =
   520   Markup_Push |
   520   Markup_Push |
   521   Markup_Pop of Markup.T list |
   521   Markup_Pop of Markup.T list |
   522   Node_Op of string * tree_op list |
   522   Node_Op of {nonterm: int, const: string} * tree_op list |
   523   Tip_Op of Lexicon.token;
   523   Tip_Op of Lexicon.token;
   524 
   524 
   525 local
   525 local
   526   fun drop_markup (Markup_Push :: ts) = drop_markup ts
   526   fun drop_markup (Markup_Push :: ts) = drop_markup ts
   527     | drop_markup (Markup_Pop _ :: ts) = drop_markup ts
   527     | drop_markup (Markup_Pop _ :: ts) = drop_markup ts
   529 in
   529 in
   530   fun tree_ops_ord arg =
   530   fun tree_ops_ord arg =
   531     if pointer_eq arg then EQUAL
   531     if pointer_eq arg then EQUAL
   532     else
   532     else
   533       (case apply2 drop_markup arg of
   533       (case apply2 drop_markup arg of
   534         (Node_Op (s, ts) :: rest, Node_Op (s', ts') :: rest') =>
   534         (Node_Op ({const = s, ...}, ts) :: rest, Node_Op ({const = s', ...}, ts') :: rest') =>
   535           (case fast_string_ord (s, s') of
   535           (case fast_string_ord (s, s') of
   536             EQUAL =>
   536             EQUAL =>
   537               (case tree_ops_ord (ts, ts') of
   537               (case tree_ops_ord (ts, ts') of
   538                 EQUAL => tree_ops_ord (rest, rest')
   538                 EQUAL => tree_ops_ord (rest, rest')
   539               | ord => ord)
   539               | ord => ord)
   548       | ([], _ :: _) => LESS
   548       | ([], _ :: _) => LESS
   549       | (_ :: _, []) => GREATER
   549       | (_ :: _, []) => GREATER
   550       | _ => raise Match);
   550       | _ => raise Match);
   551 end;
   551 end;
   552 
   552 
   553 fun make_tree tree_ops =
   553 fun make_tree names tree_ops =
   554   let
   554   let
   555     fun top [] = []
   555     fun top [] = []
   556       | top (xs :: _) = xs;
   556       | top (xs :: _) = xs;
   557 
   557 
   558     fun pop [] = []
   558     fun pop [] = []
   559       | pop (_ :: xs) = xs;
   559       | pop (_ :: xs) = xs;
       
   560 
       
   561     fun make_name {nonterm = nt, const} =
       
   562       {nonterm = the_name names nt, const = const};
   560 
   563 
   561     fun make body stack ops =
   564     fun make body stack ops =
   562       (case ops of
   565       (case ops of
   563         Markup_Push :: rest => make [] (body :: stack) rest
   566         Markup_Push :: rest => make [] (body :: stack) rest
   564       | Markup_Pop markup :: rest => make (Markup (markup, body) :: top stack) (pop stack) rest
   567       | Markup_Pop markup :: rest => make (Markup (markup, body) :: top stack) (pop stack) rest
   565       | Node_Op (id, ts) :: rest => make (Node (id, make [] [] ts) :: body) stack rest
   568       | Node_Op (name, ts) :: rest => make (Node (make_name name, make [] [] ts) :: body) stack rest
   566       | Tip_Op tok :: rest => make (Tip tok :: body) stack rest
   569       | Tip_Op tok :: rest => make (Tip tok :: body) stack rest
   567       | [] => if null stack then body else raise Fail "Pending markup blocks");
   570       | [] => if null stack then body else raise Fail "Pending markup blocks");
   568  in (case make [] [] tree_ops of [t] => SOME t | _ => NONE) end;
   571  in (case make [] [] tree_ops of [t] => SOME t | _ => NONE) end;
   569 
   572 
   570 fun pretty_tree (Markup (_, ts)) = maps pretty_tree ts
   573 fun pretty_tree (Markup (_, ts)) = maps pretty_tree ts
   571   | pretty_tree (Node (c, ts)) =
   574   | pretty_tree (Node ({const = c, ...}, ts)) =
   572       [Pretty.enclose "(" ")" (Pretty.breaks (Pretty.quote (Pretty.str c) :: maps pretty_tree ts))]
   575       [Pretty.enclose "(" ")" (Pretty.breaks (Pretty.quote (Pretty.str c) :: maps pretty_tree ts))]
   573   | pretty_tree (Tip tok) =
   576   | pretty_tree (Tip tok) =
   574       if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else [];
   577       if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else [];
   575 
   578 
   576 
   579 
   591 fun append_output (Output (m, ss)) (Output (n, ts)) = Output (m + n, ss @ ts);
   594 fun append_output (Output (m, ss)) (Output (n, ts)) = Output (m + n, ss @ ts);
   592 
   595 
   593 val output_ord = pointer_eq_ord (fn (Output (m, ss), Output (n, tt)) =>
   596 val output_ord = pointer_eq_ord (fn (Output (m, ss), Output (n, tt)) =>
   594   (case int_ord (m, n) of EQUAL => tree_ops_ord (ss, tt) | ord => ord));
   597   (case int_ord (m, n) of EQUAL => tree_ops_ord (ss, tt) | ord => ord));
   595 
   598 
   596 fun output_tree (Output (_, ts)) = make_tree ts;
   599 fun output_tree names (Output (_, ts)) = make_tree names ts;
   597 
   600 
   598 end;
   601 end;
   599 
   602 
   600 structure Output = Table(type key = output val ord = output_ord);
   603 structure Output = Table(type key = output val ord = output_ord);
   601 
   604 
   691           | Bg markup :: sa => process used ((info, sa, pop_output markup out) :: states) (Si, Sii)
   694           | Bg markup :: sa => process used ((info, sa, pop_output markup out) :: states) (Si, Sii)
   692           | En :: sa => process used ((info, sa, push_output out) :: states) (Si, Sii)
   695           | En :: sa => process used ((info, sa, push_output out) :: states) (Si, Sii)
   693           | [] => (*completer operation*)
   696           | [] => (*completer operation*)
   694               let
   697               let
   695                 val (A, p, id, j) = info;
   698                 val (A, p, id, j) = info;
   696                 val out' = if id = "" then out else node_output id out;
   699                 val out' = node_output {nonterm = A, const = id} out;
   697                 val (used', Slist) =
   700                 val (used', Slist) =
   698                   if j = i then (*lambda production?*)
   701                   if j = i then (*lambda production?*)
   699                     let val (q, used') = update_output (A, (out', p)) used
   702                     let val (q, used') = update_output (A, (out', p)) used
   700                     in (used', get_states A q p Si) end
   703                     in (used', get_states A q p Si) end
   701                   else (used, get_states A no_prec p (get j));
   704                   else (used, get_states A no_prec p (get j));
   730         [] => states
   733         [] => states
   731       | c :: cs => (process_states gram stateset i c states; produce gram stateset (i + 1) c cs)));
   734       | c :: cs => (process_states gram stateset i c states; produce gram stateset (i + 1) c cs)));
   732 
   735 
   733 in
   736 in
   734 
   737 
   735 fun parse (gram as Gram {tags, ...}) start toks =
   738 fun parse (gram as Gram {tags, names, ...}) start toks =
   736   let
   739   let
   737     val start_tag =
   740     val start_tag =
   738       (case tags_lookup tags start of
   741       (case tags_lookup tags start of
   739         SOME tag => tag
   742         SOME tag => tag
   740       | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote start));
   743       | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote start));
   750     val stateset = Array.array (length input + 1, []);
   753     val stateset = Array.array (length input + 1, []);
   751     val _ = Array.upd stateset 0 [S0];
   754     val _ = Array.upd stateset 0 [S0];
   752 
   755 
   753     val result =
   756     val result =
   754       produce gram stateset 0 Lexicon.dummy input
   757       produce gram stateset 0 Lexicon.dummy input
   755       |> map_filter (output_tree o #3);
   758       |> map_filter (output_tree names o #3);
   756   in if null result then raise Fail "Inner syntax: no parse trees" else result end;
   759   in if null result then raise Fail "Inner syntax: no parse trees" else result end;
   757 
   760 
   758 end;
   761 end;
   759 
   762 
   760 end;
   763 end;