11 val empty_gram: gram |
11 val empty_gram: gram |
12 val make_gram: Syntax_Ext.xprod list -> Syntax_Ext.xprod list -> gram option -> gram |
12 val make_gram: Syntax_Ext.xprod list -> Syntax_Ext.xprod list -> gram option -> gram |
13 val pretty_gram: gram -> Pretty.T list |
13 val pretty_gram: gram -> Pretty.T list |
14 datatype tree = |
14 datatype tree = |
15 Markup of Markup.T list * tree list | |
15 Markup of Markup.T list * tree list | |
16 Node of string * tree list | |
16 Node of {nonterm: string, const: string} * tree list | |
17 Tip of Lexicon.token |
17 Tip of Lexicon.token |
18 val pretty_tree: tree -> Pretty.T list |
18 val pretty_tree: tree -> Pretty.T list |
19 val parse: gram -> string -> Lexicon.token list -> tree list |
19 val parse: gram -> string -> Lexicon.token list -> tree list |
20 end; |
20 end; |
21 |
21 |
511 |
511 |
512 (* parse tree *) |
512 (* parse tree *) |
513 |
513 |
514 datatype tree = |
514 datatype tree = |
515 Markup of Markup.T list * tree list | |
515 Markup of Markup.T list * tree list | |
516 Node of string * tree list | |
516 Node of {nonterm: string, const: string} * tree list | |
517 Tip of Lexicon.token; |
517 Tip of Lexicon.token; |
518 |
518 |
519 datatype tree_op = |
519 datatype tree_op = |
520 Markup_Push | |
520 Markup_Push | |
521 Markup_Pop of Markup.T list | |
521 Markup_Pop of Markup.T list | |
522 Node_Op of string * tree_op list | |
522 Node_Op of {nonterm: int, const: string} * tree_op list | |
523 Tip_Op of Lexicon.token; |
523 Tip_Op of Lexicon.token; |
524 |
524 |
525 local |
525 local |
526 fun drop_markup (Markup_Push :: ts) = drop_markup ts |
526 fun drop_markup (Markup_Push :: ts) = drop_markup ts |
527 | drop_markup (Markup_Pop _ :: ts) = drop_markup ts |
527 | drop_markup (Markup_Pop _ :: ts) = drop_markup ts |
529 in |
529 in |
530 fun tree_ops_ord arg = |
530 fun tree_ops_ord arg = |
531 if pointer_eq arg then EQUAL |
531 if pointer_eq arg then EQUAL |
532 else |
532 else |
533 (case apply2 drop_markup arg of |
533 (case apply2 drop_markup arg of |
534 (Node_Op (s, ts) :: rest, Node_Op (s', ts') :: rest') => |
534 (Node_Op ({const = s, ...}, ts) :: rest, Node_Op ({const = s', ...}, ts') :: rest') => |
535 (case fast_string_ord (s, s') of |
535 (case fast_string_ord (s, s') of |
536 EQUAL => |
536 EQUAL => |
537 (case tree_ops_ord (ts, ts') of |
537 (case tree_ops_ord (ts, ts') of |
538 EQUAL => tree_ops_ord (rest, rest') |
538 EQUAL => tree_ops_ord (rest, rest') |
539 | ord => ord) |
539 | ord => ord) |
548 | ([], _ :: _) => LESS |
548 | ([], _ :: _) => LESS |
549 | (_ :: _, []) => GREATER |
549 | (_ :: _, []) => GREATER |
550 | _ => raise Match); |
550 | _ => raise Match); |
551 end; |
551 end; |
552 |
552 |
553 fun make_tree tree_ops = |
553 fun make_tree names tree_ops = |
554 let |
554 let |
555 fun top [] = [] |
555 fun top [] = [] |
556 | top (xs :: _) = xs; |
556 | top (xs :: _) = xs; |
557 |
557 |
558 fun pop [] = [] |
558 fun pop [] = [] |
559 | pop (_ :: xs) = xs; |
559 | pop (_ :: xs) = xs; |
|
560 |
|
561 fun make_name {nonterm = nt, const} = |
|
562 {nonterm = the_name names nt, const = const}; |
560 |
563 |
561 fun make body stack ops = |
564 fun make body stack ops = |
562 (case ops of |
565 (case ops of |
563 Markup_Push :: rest => make [] (body :: stack) rest |
566 Markup_Push :: rest => make [] (body :: stack) rest |
564 | Markup_Pop markup :: rest => make (Markup (markup, body) :: top stack) (pop stack) rest |
567 | Markup_Pop markup :: rest => make (Markup (markup, body) :: top stack) (pop stack) rest |
565 | Node_Op (id, ts) :: rest => make (Node (id, make [] [] ts) :: body) stack rest |
568 | Node_Op (name, ts) :: rest => make (Node (make_name name, make [] [] ts) :: body) stack rest |
566 | Tip_Op tok :: rest => make (Tip tok :: body) stack rest |
569 | Tip_Op tok :: rest => make (Tip tok :: body) stack rest |
567 | [] => if null stack then body else raise Fail "Pending markup blocks"); |
570 | [] => if null stack then body else raise Fail "Pending markup blocks"); |
568 in (case make [] [] tree_ops of [t] => SOME t | _ => NONE) end; |
571 in (case make [] [] tree_ops of [t] => SOME t | _ => NONE) end; |
569 |
572 |
570 fun pretty_tree (Markup (_, ts)) = maps pretty_tree ts |
573 fun pretty_tree (Markup (_, ts)) = maps pretty_tree ts |
571 | pretty_tree (Node (c, ts)) = |
574 | pretty_tree (Node ({const = c, ...}, ts)) = |
572 [Pretty.enclose "(" ")" (Pretty.breaks (Pretty.quote (Pretty.str c) :: maps pretty_tree ts))] |
575 [Pretty.enclose "(" ")" (Pretty.breaks (Pretty.quote (Pretty.str c) :: maps pretty_tree ts))] |
573 | pretty_tree (Tip tok) = |
576 | pretty_tree (Tip tok) = |
574 if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else []; |
577 if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else []; |
575 |
578 |
576 |
579 |
591 fun append_output (Output (m, ss)) (Output (n, ts)) = Output (m + n, ss @ ts); |
594 fun append_output (Output (m, ss)) (Output (n, ts)) = Output (m + n, ss @ ts); |
592 |
595 |
593 val output_ord = pointer_eq_ord (fn (Output (m, ss), Output (n, tt)) => |
596 val output_ord = pointer_eq_ord (fn (Output (m, ss), Output (n, tt)) => |
594 (case int_ord (m, n) of EQUAL => tree_ops_ord (ss, tt) | ord => ord)); |
597 (case int_ord (m, n) of EQUAL => tree_ops_ord (ss, tt) | ord => ord)); |
595 |
598 |
596 fun output_tree (Output (_, ts)) = make_tree ts; |
599 fun output_tree names (Output (_, ts)) = make_tree names ts; |
597 |
600 |
598 end; |
601 end; |
599 |
602 |
600 structure Output = Table(type key = output val ord = output_ord); |
603 structure Output = Table(type key = output val ord = output_ord); |
601 |
604 |
691 | Bg markup :: sa => process used ((info, sa, pop_output markup out) :: states) (Si, Sii) |
694 | Bg markup :: sa => process used ((info, sa, pop_output markup out) :: states) (Si, Sii) |
692 | En :: sa => process used ((info, sa, push_output out) :: states) (Si, Sii) |
695 | En :: sa => process used ((info, sa, push_output out) :: states) (Si, Sii) |
693 | [] => (*completer operation*) |
696 | [] => (*completer operation*) |
694 let |
697 let |
695 val (A, p, id, j) = info; |
698 val (A, p, id, j) = info; |
696 val out' = if id = "" then out else node_output id out; |
699 val out' = node_output {nonterm = A, const = id} out; |
697 val (used', Slist) = |
700 val (used', Slist) = |
698 if j = i then (*lambda production?*) |
701 if j = i then (*lambda production?*) |
699 let val (q, used') = update_output (A, (out', p)) used |
702 let val (q, used') = update_output (A, (out', p)) used |
700 in (used', get_states A q p Si) end |
703 in (used', get_states A q p Si) end |
701 else (used, get_states A no_prec p (get j)); |
704 else (used, get_states A no_prec p (get j)); |
730 [] => states |
733 [] => states |
731 | c :: cs => (process_states gram stateset i c states; produce gram stateset (i + 1) c cs))); |
734 | c :: cs => (process_states gram stateset i c states; produce gram stateset (i + 1) c cs))); |
732 |
735 |
733 in |
736 in |
734 |
737 |
735 fun parse (gram as Gram {tags, ...}) start toks = |
738 fun parse (gram as Gram {tags, names, ...}) start toks = |
736 let |
739 let |
737 val start_tag = |
740 val start_tag = |
738 (case tags_lookup tags start of |
741 (case tags_lookup tags start of |
739 SOME tag => tag |
742 SOME tag => tag |
740 | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote start)); |
743 | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote start)); |
750 val stateset = Array.array (length input + 1, []); |
753 val stateset = Array.array (length input + 1, []); |
751 val _ = Array.upd stateset 0 [S0]; |
754 val _ = Array.upd stateset 0 [S0]; |
752 |
755 |
753 val result = |
756 val result = |
754 produce gram stateset 0 Lexicon.dummy input |
757 produce gram stateset 0 Lexicon.dummy input |
755 |> map_filter (output_tree o #3); |
758 |> map_filter (output_tree names o #3); |
756 in if null result then raise Fail "Inner syntax: no parse trees" else result end; |
759 in if null result then raise Fail "Inner syntax: no parse trees" else result end; |
757 |
760 |
758 end; |
761 end; |
759 |
762 |
760 end; |
763 end; |