MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
now much leaner (eliminated gramgraph, all data except tables of old
parser are shared); simplified the internal interfaces for syntax
extension;
--- a/src/Pure/Syntax/earley0A.ML Wed Jan 19 14:15:01 1994 +0100
+++ b/src/Pure/Syntax/earley0A.ML Wed Jan 19 14:21:26 1994 +0100
@@ -7,34 +7,39 @@
signature PARSER =
sig
- structure XGram: XGRAM
- structure ParseTree: PARSE_TREE
- local open XGram ParseTree ParseTree.Lexicon in
+ structure Lexicon: LEXICON
+ structure SynExt: SYN_EXT
+ local open Lexicon SynExt SynExt.Ast in
type gram
- val mk_gram: string list -> string prod list -> gram
+ val empty_gram: gram
+ val extend_gram: gram -> string list -> xprod list -> gram
+ val merge_grams: gram -> gram -> gram
+ val pretty_gram: gram -> Pretty.T list
+ datatype parsetree =
+ Node of string * parsetree list |
+ Tip of token
val parse: gram -> string -> token list -> parsetree
end
-
-(* exception SYNTAX_ERROR of token list *)
-(* val compile_xgram: string list * token prod list -> Gram *)
-(* val parsable: Gram * string -> bool *)
-(* val print_gram: Gram * lexicon -> unit *)
end;
-functor EarleyFun(structure Symtab: SYMTAB and XGram: XGRAM
- and ParseTree: PARSE_TREE): PARSER =
+functor EarleyFun(structure Symtab: SYMTAB and Lexicon: LEXICON
+ and SynExt: SYN_EXT)(*: PARSER *) = (* FIXME *)
struct
-structure ParseTree = ParseTree;
-structure Lexicon = ParseTree.Lexicon;
-structure XGram = XGram;
-open ParseTree Lexicon;
+structure Pretty = SynExt.Ast.Pretty;
+structure Lexicon = Lexicon;
+structure SynExt = SynExt;
+open Lexicon;
-(** mk_pt (from parse_tree.ML) **)
+(** datatype parsetree **)
+
+datatype parsetree =
+ Node of string * parsetree list |
+ Tip of token;
fun mk_pt ("", [pt]) = pt
- | mk_pt ("", _) = error "mk_pt: funny copy op in parse tree"
+ | mk_pt ("", _) = sys_error "mk_pt: funny copy op in parse tree"
| mk_pt (s, ptl) = Node (s, ptl);
@@ -88,7 +93,7 @@
val len = length;
-local open Array XGram ParseTree ParseTree.Lexicon in
+local open Array SynExt in
nonfix sub;
fun forA(p: int -> unit, a: 'a array) : unit =
@@ -101,10 +106,16 @@
fun acc(a,i) = if i=ub then a else acc(f(a,sub(b,i)),i+1)
in acc(a0,0) end;
-(*
-gram: name of nt -> number, nt number -> productions array,
- nt number -> list of nt's reachable via copy ops
-*)
+
+
+(** grammars **)
+
+datatype 'a symb =
+ Dlm of 'a |
+ Arg of string * int;
+
+datatype 'a prod = Prod of string * 'a symb list * string * int;
+
datatype Symbol = T of token | NT of int * int
and Op = Op of OpSyn * string * int
@@ -112,56 +123,68 @@
and OpListA = Op array * int TokenMap
and FastAcc = int TokenMap;
-type gram = int Symtab.table * OpListA array * int list array;
+(*gram_tabs: name of nt -> number, nt number -> productions array,
+ nt number -> list of nt's reachable via copy ops*)
-fun non_term(Nonterminal(s,_)) = if predef_term(s)=EndToken then [s] else []
- | non_term(_) = [];
+type gram_tabs = int Symtab.table * OpListA array * int list array;
+
+type gram = string list * string prod list * gram_tabs;
+
-fun non_terms(Prod(_,symbs,_,_)) = flat(map non_term symbs);
+fun non_term (Arg (s, _)) = if is_terminal s then None else Some s
+ | non_term _ = None;
+
+fun non_terms (Prod (_, symbs, _, _)) = mapfilter non_term symbs;
-(* mk_pre_grammar converts a list of productions in external format into an
-internal gram object. *)
+
+(* mk_pre_grammar *)
+(* converts a list of productions in external format into an
+ internal gram object. *)
+
val dummyTM:FastAcc = mkTokenMap[];
-fun mk_pre_grammar(prods): gram =
-let fun same_res(Prod(t1,_,_,_), Prod(t2,_,_,_)) = t1=t2;
+fun mk_pre_grammar prods : gram_tabs =
+ let
+ fun same_res (Prod(t1, _, _, _), Prod(t2, _, _, _)) = t1=t2;
val partitioned0 = partition_eq same_res prods;
- val nts0 = map (fn Prod(ty,_,_,_)::_ => ty) partitioned0;
+ val nts0 = map (fn Prod(ty, _, _, _)::_ => ty) partitioned0;
val nts' = distinct(flat(map non_terms prods)) \\ nts0;
val nts = nts' @ nts0;
val partitioned = (replicate (len nts') []) @ partitioned0;
val ntis = nts ~~ (0 upto (len(nts)-1));
- val tab = foldr Symtab.update (ntis,Symtab.null);
+ val tab = foldr Symtab.update (ntis, Symtab.null);
- fun nt_or_vt(s,p) =
- (case predef_term(s) of
- EndToken => let val Some(i) = Symtab.lookup(tab,s) in NT(i,p) end
- | tk => T(tk));
+ fun nt_or_vt (s, p) =
+ (case predef_term s of
+ None => NT (the (Symtab.lookup (tab, s)), p)
+ | Some tk => T tk);
- fun mksyn(Terminal(t)) = [T(t)]
- | mksyn(Nonterminal(t)) = [nt_or_vt t]
- | mksyn(_) = [];
+ fun mksyn(Dlm(t)) = T(t)
+ | mksyn(Arg(t)) = nt_or_vt t;
- fun prod2op(Prod(nt,sy,opn,p)) =
- let val syA = arrayoflist(flat(map mksyn sy)) in Op(syA,opn,p) end;
+ fun prod2op(Prod(nt, sy, opn, p)) =
+ let val syA = arrayoflist(map mksyn sy) in Op(syA, opn, p) end;
- fun mkops prods = (arrayoflist(map prod2op prods),dummyTM);
+ fun mkops prods = (arrayoflist(map prod2op prods), dummyTM);
val opLA = arrayoflist(map mkops partitioned);
- val subs = array(length opLA,[]) : int list array;
- fun newcp v (a,Op(syA,_,p)) =
+ val subs = array(length opLA, []) : int list array;
+ fun newcp v (a, Op(syA, _, p)) =
if p=chain_pri
- then let val NT(k,_) = sub(syA,0)
+ then let val NT(k, _) = sub(syA, 0)
in if k mem v then a else k ins a end
else a;
fun reach v i =
- let val new = itA ([],#1(sub(opLA,i))) (newcp v)
+ let val new = itA ([], #1(sub(opLA, i))) (newcp v)
val v' = new union v
in flat(map (reach v') new) union v' end;
- fun rch(i) = update(subs,i,reach[i]i);
+ fun rch(i) = update(subs, i, reach[i]i);
-in forA(rch,subs); (tab,opLA,subs) end;
+ in
+ forA(rch, subs); (tab, opLA, subs)
+ end;
+
val RootPref = "__";
@@ -170,7 +193,7 @@
type Lkhd = token list list list;
-(* subst_syn(s,k) syn = [ pref k ts | ts is a token string derivable from sy
+(* subst_syn(s, k) syn = [ pref k ts | ts is a token string derivable from sy
under substitution s ] *)
(* This is the general version.
fun cross l1 l2 = flat(map (fn e2 => (map (fn e1 => e1@e2) l1)) l2);
@@ -212,46 +235,107 @@
in writeln"Computing lookahead tables ...";
iterate (replicate (length opLA) []) end;
-(* create look ahead tables *)
-fun mk_earley_gram(g as (tab,opLA,_):gram):gram =
- let val lkhd = mk_lkhd(opLA,1);
+
+(* mk_earley_gram *) (* create look ahead tables *)
+
+fun mk_earley_gram (g as (tab, opLA, _):gram_tabs):gram_tabs =
+ let val lkhd = mk_lkhd(opLA, 1);
fun mk_fa(i):FastAcc =
- let val opA = #1(sub(opLA,i));
- fun start(j) = let val Op(sy,_,_) = sub(opA,j);
- val pre = subst_syn(lkhd,1) sy
- in (j,if [] mem pre then [] else map hd pre) end;
+ let val opA = #1(sub(opLA, i));
+ fun start(j) = let val Op(sy, _, _) = sub(opA, j);
+ val pre = subst_syn(lkhd, 1) sy
+ in (j, if [] mem pre then [] else map hd pre) end;
in mkTokenMap(map start (0 upto(length(opA)-1))) end;
- fun updt(i) = update(opLA,i,(#1(sub(opLA,i)),mk_fa(i)));
+ fun updt(i) = update(opLA, i, (#1(sub(opLA, i)), mk_fa(i)));
- in forA(updt,opLA); g end;
+ in forA(updt, opLA); g end;
+
+
+(* compile_xgram *)
-fun compile_xgram(roots,prods) =
- let fun mk_root nt = Prod(RootPref^nt,
- [Nonterminal(nt,0),Terminal(EndToken)],"",0);
- val prods' = (map mk_root roots) @ prods
- in mk_earley_gram(mk_pre_grammar(prods')) end;
+fun compile_xgram (roots, prods) =
+ let
+ fun mk_root nt =
+ Prod (RootPref ^ nt, [Arg (nt, 0), Dlm EndToken], "", 0);
+
+ val prods' = (map mk_root roots) @ prods;
+ in
+ mk_earley_gram (mk_pre_grammar prods')
+ end;
(* translate (from xgram.ML) *)
fun translate trfn =
- map
- (fn Terminal t => Terminal (trfn t)
- | Nonterminal s => Nonterminal s
- | Space s => Space s
- | Bg i => Bg i
- | Brk i => Brk i
- | En => En);
+ map (fn Dlm t => Dlm (trfn t) | Arg s => Arg s);
-(* mk_gram (from syntax.ML) *)
+(* mk_gram_tabs *)
fun str_to_tok (opl: string prod list): token prod list =
map
(fn Prod (t, syn, s, pa) => Prod (t, translate Token syn, s, pa))
opl;
-fun mk_gram roots prods = compile_xgram (roots, str_to_tok prods);
+fun mk_gram_tabs roots prods = compile_xgram (roots, str_to_tok prods);
+
+
+
+(** build gram **)
+
+fun mk_gram roots prods = (roots, prods, mk_gram_tabs roots prods);
+
+fun sub_gram (roots1, prods1, _) (roots2, prods2, _) =
+ roots1 subset roots2 andalso prods1 subset prods2;
+
+
+(* empty, extend, merge grams *)
+
+val empty_gram = mk_gram [] [];
+
+fun extend_gram (gram1 as (roots1, prods1, _)) roots2 xprods2 =
+ let
+ fun symb_of (Delim s) = Some (Dlm s)
+ | symb_of (Argument s_p) = Some (Arg s_p)
+ | symb_of _ = None;
+
+ fun prod_of (XProd (lhs, xsymbs, const, pri)) =
+ Prod (lhs, mapfilter symb_of xsymbs, const, pri);
+
+ val prods2 = distinct (map prod_of xprods2);
+ in
+ if roots2 subset roots1 andalso prods2 subset prods1 then gram1
+ else mk_gram (extend_list roots1 roots2) (extend_list prods1 prods2)
+ end;
+
+fun merge_grams (gram1 as (roots1, prods1, _)) (gram2 as (roots2, prods2, _)) =
+ if sub_gram gram2 gram1 then gram1
+ else if sub_gram gram1 gram2 then gram2
+ else mk_gram (merge_lists roots1 roots2) (merge_lists prods1 prods2);
+
+
+(* pretty_gram *)
+
+fun pretty_gram (_, prods, _) =
+ let
+ fun pretty_name name = [Pretty.str (name ^ " =")];
+
+ fun pretty_symb (Dlm s) = Pretty.str (quote s)
+ | pretty_symb (Arg (s, p)) =
+ if is_terminal s then Pretty.str s
+ else Pretty.str (s ^ "[" ^ string_of_int p ^ "]");
+
+ fun pretty_const "" = []
+ | pretty_const c = [Pretty.str ("=> " ^ quote c)];
+
+ fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")];
+
+ fun pretty_prod (Prod (name, symbs, const, pri)) =
+ Pretty.block (Pretty.breaks (pretty_name name @
+ map pretty_symb symbs @ pretty_const const @ pretty_pri pri));
+ in
+ map pretty_prod prods
+ end;
@@ -324,85 +408,58 @@
in seq add (applyTokenMap(tm,tk)) end;
-(*(* FIXME *)
-fun parsable((tab,_,_):gram, root:string) =
- not(Symtab.lookup(tab,RootPref^root) = None);
-*)
-
-(* exception SYNTAX_ERROR of token list; *) (* FIXME *)
fun unknown c = error ("Unparsable category: " ^ c);
-fun syn_err toks = error ("Syntax error at: " ^ space_implode " " (map str_of_token toks));
-
-fun parse ((tab,opLA,rchA):gram) (root:string) (tl: token list): parsetree =
- let val tl' = tl; (* FIXME was ..@[EndToken] *)
- val state_sets = mt_states(len tl' +1);
- val s0 = mt_stateS();
- val rooti = case Symtab.lookup(tab,RootPref^root) of
- Some(ri) => ri | None => unknown root;
+fun syn_err toks =
+ error ("Syntax error at: " ^ space_implode " " (map str_of_token toks));
- fun lr (tl,isi,si,t) =
-(* if ismt_stateS(si) then raise SYNTAX_ERROR(t::tl) else *) (* FIXME *)
- if ismt_stateS(si) then syn_err (t::tl) else
- case tl of
- [] => () |
- t::tl =>
- let val si1 = mt_stateS();
- fun process(St(nti,pi,ip,from,ptl)) =
- let val opA = #1(sub(opLA,nti))
- val Op(syA,opn,p) = sub(opA,pi) in
- if ip = length(syA)
- then complete(nti,syA,opn,p,ptl,
- sub(state_sets,from),si,opLA,rchA)
- else case sub(syA,ip) of
- NT(ntj,p) =>
- seq (predict(t,isi,si,p,opLA)) (sub(rchA,ntj))
- | T(t') =>
- if matching_tokens(t,t')
- then add_state(nti,pi,ip+1,from,
- if valued_token(t)
- then ptl@[Tip(t)] else ptl,
- si1)
- else () end;
+fun parse ((_, _, (tab, opLA, rchA)):gram) (root:string) (tl: token list): parsetree =
+ let
+ val tl' = tl;
+ val state_sets = mt_states(len tl' + 1);
+ val s0 = mt_stateS();
+ val rooti = case Symtab.lookup(tab, RootPref^root) of
+ Some(ri) => ri | None => unknown root;
- in apply_all_states(process,si);
- update(state_sets,isi+1,si1);
- lr(tl,isi+1,si1,t) end
-
- in update(state_sets,0,s0);
- add_state(rooti,0,0,0,[],s0);
- lr(tl',0,s0,EndToken(*dummy*));
- (*print_stat state_sets;*)
- let val St(_,_,_,_,[pt]) = fst_state(sub(state_sets,len tl'))
- in pt end
- end;
-
+ fun lr (tl, isi, si, t) =
+ if ismt_stateS(si) then syn_err (t::tl) else
+ case tl of
+ [] => () |
+ t::tl =>
+ let val si1 = mt_stateS();
+ fun process(St(nti,pi,ip,from,ptl)) =
+ let val opA = #1(sub(opLA,nti))
+ val Op(syA,opn,p) = sub(opA,pi) in
+ if ip = length(syA)
+ then complete(nti,syA,opn,p,ptl,
+ sub(state_sets,from),si,opLA,rchA)
+ else case sub(syA,ip) of
+ NT(ntj,p) =>
+ seq (predict(t,isi,si,p,opLA)) (sub(rchA,ntj))
+ | T(t') =>
+ if matching_tokens(t,t')
+ then add_state(nti,pi,ip+1,from,
+ if valued_token(t)
+ then ptl@[Tip(t)] else ptl,
+ si1)
+ else () end;
-(*(* FIXME *)
-fun print_gram ((st,opAA,rchA):gram,lex) =
- let val tts = Lexicon.name_of_token;
- val al = map (fn (x,y)=>(y,x)) (Symtab.alist_of st);
- fun nt i = let val Some(s) = assoc(al,i) in s end;
- fun print_sy(T(EndToken)) = prs". " |
- print_sy(T(tk)) = (prs(tts tk); prs" ") |
- print_sy(NT(i,p)) = (prs((nt i)^"[");print_int p;prs"] ");
- fun print_opA(i) =
- let val lhs = nt i;
- val (opA,_)=sub(opAA,i);
- fun print_op(j) =
- let val Op(sy,n,p) = sub(opA,j)
- in prs(lhs^" = "); forA(fn i=>print_sy(sub(sy,i)),sy);
- if n="" then () else prs(" => \""^n^"\"");
- prs" (";print_int p;prs")\n"
- end;
- in forA(print_op,opA) end;
- fun print_rch(i) = (print_int i; prs" -> ";
- print_list("[","]\n",print_int) (sub(rchA,i)))
- in forA(print_opA,opAA) (*; forA(print_rch,rchA) *) end;
-*)
+ in apply_all_states(process,si);
+ update(state_sets,isi+1,si1);
+ lr(tl,isi+1,si1,t) end
+
+ in
+ update (state_sets, 0, s0);
+ add_state (rooti, 0, 0, 0, [], s0);
+ lr (tl', 0, s0, EndToken(*dummy*));
+ (*print_stat state_sets;*)
+ let val St(_, _, _, _, [pt]) = fst_state(sub(state_sets, len tl'))
+ in pt end
+ end;
end;
+
end;
--- a/src/Pure/Syntax/lexicon.ML Wed Jan 19 14:15:01 1994 +0100
+++ b/src/Pure/Syntax/lexicon.ML Wed Jan 19 14:21:26 1994 +0100
@@ -48,20 +48,22 @@
VarSy of string |
TFreeSy of string |
TVarSy of string |
- EndToken;
+ EndToken
val id: string
val var: string
val tfree: string
val tvar: string
+ val terminals: string list
+ val is_terminal: string -> bool
val str_of_token: token -> string
val display_token: token -> string
val matching_tokens: token * token -> bool
val valued_token: token -> bool
- val predef_term: string -> token
- val is_terminal: string -> bool
+ val predef_term: string -> token option
+ val dest_lexicon: lexicon -> string list
val empty_lexicon: lexicon
val extend_lexicon: lexicon -> string list -> lexicon
- val mk_lexicon: string list -> lexicon
+ val merge_lexicons: lexicon -> lexicon -> lexicon
val tokenize: lexicon -> bool -> string -> token list
end;
@@ -112,13 +114,17 @@
EndToken;
-(* names of valued tokens *)
+(* terminal arguments *)
val id = "id";
val var = "var";
val tfree = "tfree";
val tvar = "tvar";
+val terminals = [id, var, tfree, tvar];
+
+fun is_terminal s = s mem terminals;
+
(* str_of_token *)
@@ -164,16 +170,11 @@
(* predef_term *)
fun predef_term name =
- if name = id then IdentSy name
- else if name = var then VarSy name
- else if name = tfree then TFreeSy name
- else if name = tvar then TVarSy name
- else EndToken;
-
-
-(* is_terminal **)
-
-fun is_terminal s = s mem [id, var, tfree, tvar];
+ if name = id then Some (IdentSy name)
+ else if name = var then Some (VarSy name)
+ else if name = tfree then Some (TFreeSy name)
+ else if name = tvar then Some (TVarSy name)
+ else None;
@@ -186,13 +187,19 @@
val no_token = "";
-(* empty_lexicon *)
+(* dest_lexicon *)
+
+fun dest_lexicon Empty = []
+ | dest_lexicon (Branch (_, "", lt, eq, gt)) =
+ dest_lexicon eq @ dest_lexicon lt @ dest_lexicon gt
+ | dest_lexicon (Branch (_, str, lt, eq, gt)) =
+ str :: (dest_lexicon eq @ dest_lexicon lt @ dest_lexicon gt);
+
+
+(* empty, extend, merge lexicons *)
val empty_lexicon = Empty;
-
-(* extend_lexicon *)
-
fun extend_lexicon lexicon strs =
let
fun ext (lex, s) =
@@ -210,17 +217,22 @@
val cs = explode s;
in
if exists is_blank cs then
- error ("extend_lexicon: blank in literal " ^ quote s)
+ sys_error ("extend_lexicon: blank in delimiter " ^ quote s)
else add lex cs
end;
in
- foldl ext (lexicon, strs)
+ foldl ext (lexicon, strs \\ dest_lexicon lexicon)
end;
-
-(* mk_lexicon *)
-
-val mk_lexicon = extend_lexicon empty_lexicon;
+fun merge_lexicons lex1 lex2 =
+ let
+ val strs1 = dest_lexicon lex1;
+ val strs2 = dest_lexicon lex2;
+ in
+ if strs2 subset strs1 then lex1
+ else if strs1 subset strs2 then lex2
+ else extend_lexicon lex1 strs2
+ end;
--- a/src/Pure/Syntax/parser.ML Wed Jan 19 14:15:01 1994 +0100
+++ b/src/Pure/Syntax/parser.ML Wed Jan 19 14:21:26 1994 +0100
@@ -5,94 +5,111 @@
Isabelle's main parser (used for terms and typs).
TODO:
+ fix ambiguous grammar problem
~1 --> chain_pri
- rename T, NT
improve syntax error
- fix ambiguous grammar problem
+ extend_gram: remove 'roots' arg
*)
signature PARSER =
sig
- structure XGram: XGRAM
- structure ParseTree: PARSE_TREE
- local open XGram ParseTree ParseTree.Lexicon in
+ structure Lexicon: LEXICON
+ structure SynExt: SYN_EXT
+ local open Lexicon SynExt SynExt.Ast in
type gram
val empty_gram: gram
- val extend_gram: gram -> string list -> string prod list -> gram
- val mk_gram: string list -> string prod list -> gram
+ val extend_gram: gram -> string list -> xprod list -> gram
+ val merge_grams: gram -> gram -> gram
+ val pretty_gram: gram -> Pretty.T list
+ datatype parsetree =
+ Node of string * parsetree list |
+ Tip of token
val parse: gram -> string -> token list -> parsetree
end
end;
-functor ParserFun(structure Symtab: SYMTAB and XGram: XGRAM
- and ParseTree: PARSE_TREE)(*: PARSER *) = (* FIXME *)
+functor ParserFun(structure Symtab: SYMTAB and Lexicon: LEXICON
+ and SynExt: SYN_EXT)(*: PARSER *) = (* FIXME *)
struct
-structure XGram = XGram;
-structure ParseTree = ParseTree;
-structure Lexicon = ParseTree.Lexicon;
-open XGram ParseTree Lexicon;
+structure Pretty = SynExt.Ast.Pretty;
+structure Lexicon = Lexicon;
+structure SynExt = SynExt;
+open Lexicon SynExt;
(** datatype gram **)
-datatype symb = T of token | NT of string * int;
+datatype symb =
+ Terminal of token |
+ Nonterminal of string * int;
datatype gram =
- Gram of string list * (symb list * string * int) list Symtab.table;
-
+ Gram of (string * (symb list * string * int)) list
+ * (symb list * string * int) list Symtab.table;
-(* empty_gram *)
-
-val empty_gram = Gram ([], Symtab.null);
+fun mk_gram prods = Gram (prods, Symtab.make_multi prods);
-(* extend_gram *)
+(* empty, extend, merge grams *)
-(*prods are stored in reverse order*)
+val empty_gram = mk_gram [];
-fun extend_gram (Gram (roots, tab)) new_roots xprods =
+fun extend_gram (gram1 as Gram (prods1, _)) _ xprods2 =
let
- fun symb_of (Terminal s) = Some (T (Token s))
- | symb_of (Nonterminal (s, p)) =
+ fun symb_of (Delim s) = Some (Terminal (Token s))
+ | symb_of (Argument (s, p)) =
(case predef_term s of
- EndToken => Some (NT (s, p))
- | tk => Some (T tk))
+ None => Some (Nonterminal (s, p))
+ | Some tk => Some (Terminal tk))
| symb_of _ = None;
- fun prod_of (Prod (s, xsyms, c, p)) = (s, mapfilter symb_of xsyms, c, p);
+ fun prod_of (XProd (lhs, xsymbs, const, pri)) =
+ (lhs, (mapfilter symb_of xsymbs, const, pri));
- fun add_prod (tab, (s, syms, c, p)) =
- (case Symtab.lookup (tab, s) of
- None => Symtab.update ((s, [(syms, c, p)]), tab)
- | Some prods => Symtab.update ((s, (syms, c, p) :: prods), tab));
+ val prods2 = distinct (map prod_of xprods2);
in
- Gram (new_roots union roots,
- Symtab.balance (foldl add_prod (tab, map prod_of xprods)))
+ if prods2 subset prods1 then gram1
+ else mk_gram (extend_list prods1 prods2)
end;
+fun merge_grams (gram1 as Gram (prods1, _)) (gram2 as Gram (prods2, _)) =
+ if prods2 subset prods1 then gram1
+ else if prods1 subset prods2 then gram2
+ else mk_gram (merge_lists prods1 prods2);
+
-(* mk_gram *)
+(* pretty_gram *)
-val mk_gram = extend_gram empty_gram;
+fun pretty_gram (Gram (prods, _)) =
+ let
+ fun pretty_name name = [Pretty.str (name ^ " =")];
-
-(* get_prods *)
+ fun pretty_symb (Terminal (Token s)) = Pretty.str (quote s)
+ | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok)
+ | pretty_symb (Nonterminal (s, p)) =
+ Pretty.str (s ^ "[" ^ string_of_int p ^ "]");
-fun get_prods tab s pred =
- let
- fun rfilter [] ys = ys
- | rfilter (x :: xs) ys = rfilter xs (if pred x then x :: ys else ys);
+ fun pretty_const "" = []
+ | pretty_const c = [Pretty.str ("=> " ^ quote c)];
+
+ fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")];
+
+ fun pretty_prod (name, (symbs, const, pri)) =
+ Pretty.block (Pretty.breaks (pretty_name name @
+ map pretty_symb symbs @ pretty_const const @ pretty_pri pri));
in
- (case Symtab.lookup (tab, s) of
- Some prods => rfilter prods []
- | None => [])
+ map pretty_prod prods
end;
(** parse **)
+datatype parsetree =
+ Node of string * parsetree list |
+ Tip of token;
+
type state =
string * int
* parsetree list (*before point*)
@@ -103,6 +120,9 @@
+fun get_prods tab lhs pred =
+ filter pred (Symtab.lookup_multi (tab, lhs));
+
fun getProductions tab A i =
get_prods tab A (fn (_, _, j) => j >= i orelse j = ~1);
@@ -111,183 +131,141 @@
-fun mkState i jj A ([NT(B,~1)],id,~1) =
- (A,max_pri,[],[NT (B,jj)],id,i)
- | mkState i jj A (ss,id,j) = (A,j,[],ss,id,i) ;
+fun mkState i jj A ([Nonterminal (B, ~1)], id, ~1) =
+ (A, max_pri, [], [Nonterminal (B, jj)], id, i)
+ | mkState i jj A (ss, id, j) = (A, j, [], ss, id, i);
-fun conc (t,(k:int)) [] = (None, [(t,k)])
- | conc (t,k) ((t',k')::ts) =
- if (t = t')
- then (Some k', ( if k' >= k
- then (t',k')::ts
- else (t,k)::ts )
- )
- else let val (n, ts') = conc (t,k) ts
- in (n, (t',k')::ts')
- end;
+fun conc (t, k:int) [] = (None, [(t, k)])
+ | conc (t, k) ((t', k') :: ts) =
+ if t = t' then
+ (Some k', if k' >= k then (t', k') :: ts else (t, k) :: ts)
+ else
+ let val (n, ts') = conc (t, k) ts
+ in (n, (t', k') :: ts') end;
-fun update_tree ((B,(i,ts))::used) (A,t) =
- if (A = B)
- then
- let val (n,ts') = conc t ts
- in ((A,(i,ts'))::used, n)
- end
- else
- let val (used', n) = update_tree used (A,t)
- in ((B,(i,ts)) :: used', n)
- end;
+fun update_tree ((B, (i, ts)) :: used) (A, t) =
+ if A = B then
+ let val (n, ts') = conc t ts
+ in ((A, (i, ts')) :: used, n) end
+ else
+ let val (used', n) = update_tree used (A, t)
+ in ((B, (i, ts)) :: used', n) end;
-
-fun update_index ((B,(i,ts))::used) (A,j) =
- if (A = B)
- then (A,(j,ts)) :: used
- else (B,(i,ts)) :: (update_index used (A,j));
+fun update_index ((B, (i, ts)) :: used) (A, j) =
+ if A = B then (A, (j, ts)) :: used
+ else (B, (i, ts)) :: (update_index used (A, j));
fun getS A i Si =
- filter (fn (_,_,_,(NT(B,j))::_,_,_)
- => (A=B andalso j<=i)
- | _ => false
- ) Si;
-
-
+ filter
+ (fn (_, _, _, Nonterminal (B, j) :: _, _, _) => A = B andalso j <= i
+ | _ => false) Si;
fun getS' A k n Si =
- filter (fn (_,_,_,(NT(B,j))::_,_,_) => (A=B andalso
- j<=k andalso
- j>n )
- | _ => false
- ) Si;
-
-
+ filter
+ (fn (_, _, _, Nonterminal (B, j) :: _, _, _) => A = B andalso j <= k andalso j > n
+ | _ => false) Si;
fun getStates Estate i ii A k =
- filter (fn (_,_,_,(NT(B,j))::_,_,_)
- => (A=B andalso j<=k)
- | _ => false
- )
- (Array.sub (Estate, ii))
-;
-
+ filter
+ (fn (_, _, _, Nonterminal (B, j) :: _, _, _) => A = B andalso j <= k
+ | _ => false)
+ (Array.sub (Estate, ii));
(* MMW *)(* FIXME *)
-fun movedot_term (A,j,ts,T a::sa,id,i) c =
- if (valued_token c)
- then (A,j,(ts@[Tip c]),sa,id,i)
- else (A,j,ts,sa,id,i) ;
-
-
-
-fun movedot_nonterm ts (A,j,tss,NT(B,k) ::sa,id,i) =
- (A,j,tss@ts,sa,id,i) ;
+fun movedot_term (A, j, ts, Terminal a :: sa, id, i) c =
+ if valued_token c then
+ (A, j, (ts @ [Tip c]), sa, id, i)
+ else (A, j, ts, sa, id, i);
-
+fun movedot_nonterm ts (A, j, tss, Nonterminal (B, k) :: sa, id, i) =
+ (A, j, tss @ ts, sa, id, i);
-fun movedot_lambda _ [] = []
- | movedot_lambda (B,j,tss,(NT(A,k))::sa,id,i) ((t,ki)::ts) =
- if k <= ki
- then (B,j,tss@t,sa,id,i) ::
- (movedot_lambda (B,j,tss,(NT(A,k))::sa,id,i) ts)
- else movedot_lambda (B,j,tss,(NT(A,k))::sa,id,i) ts
- ;
-
+fun movedot_lambda _ [] = []
+ | movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ((t, ki) :: ts) =
+ if k <= ki then
+ (B, j, tss @ t, sa, id, i) ::
+ movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ts
+ else movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ts;
fun PROCESSS Estate grammar i c states =
-
let
-fun processS used [] (Si,Sii) = (Si,Sii)
- | processS used (S::States) (Si,Sii) =
- ( case S of
-
- (_,_,_,(NT (A,j))::_,_,_) =>
+fun processS used [] (Si, Sii) = (Si, Sii)
+ | processS used (S :: States) (Si, Sii) =
+ (case S of
+ (_, _, _, Nonterminal (A, j) :: _, _, _) =>
let
val (used_neu, States_neu) =
- ( case assoc (used, A) of
- Some (k,l) => (* A gehoert zu used *)
-
- if (k <= j) (* Prioritaet wurde
- behandelt *)
- then
- (used, movedot_lambda S l)
-
- else (* Prioritaet wurde noch nicht
- behandelt *)
- let val L =
- getProductions' grammar A j k
- val States' = map (mkState i j A) L
- in
- (update_index used (A,j),
- States'@ movedot_lambda S l
- )
- end
+ (case assoc (used, A) of
+ Some (k, l) => (*A gehoert zu used*)
+ if k <= j (*Prioritaet wurde behandelt*)
+ then (used, movedot_lambda S l)
+ else (*Prioritaet wurde noch nicht behandelt*)
+ let
+ val L = getProductions' grammar A j k;
+ val States' = map (mkState i j A) L;
+ in
+ (update_index used (A, j), States' @ movedot_lambda S l)
+ end
- | None => (* A gehoert nicht zu used *)
- let val L = getProductions grammar A j
- val States' = map (mkState i j A) L
- in
- ((A,(j,[]))::used, States')
- end
- )
+ | None => (*A gehoert nicht zu used*)
+ let
+ val L = getProductions grammar A j;
+ val States' = map (mkState i j A) L;
+ in
+ ((A, (j, [])) :: used, States')
+ end)
in
- processS used_neu (States @ States_neu) (S::Si,Sii)
+ processS used_neu (States @ States_neu) (S :: Si, Sii)
end
-
- | (_,_,_,(T a)::_,_,_) =>
- processS used States
- (S::Si, if (matching_tokens (a, c))
- then (movedot_term S c)::Sii (* MMW *)(* FIXME *)
- else Sii
- )
-
+ | (_, _, _, Terminal a :: _, _, _) =>
+ processS used States
+ (S :: Si,
+ if matching_tokens (a, c) then movedot_term S c :: Sii else Sii)
+ (* MMW *)(* FIXME *)
- | (A,k,ts,[],id,j) =>
- let val tt = if id = ""
- then ts
- else [Node(id,ts)]
+ | (A, k, ts, [], id, j) =>
+ let
+ val tt = if id = "" then ts else [Node (id, ts)]
in
- if (j = i)
- then
- let val (used',O) = update_tree used (A,(tt,k))
- in ( case O of
- None =>
- let val Slist = getS A k Si
- val States' =
- map (movedot_nonterm tt ) Slist
- in processS used'
- (States @ States') (S::Si,Sii)
- end
- | Some n =>
- if (n >= k)
- then
- processS used' States (S::Si,Sii)
- else
- let val Slist = getS' A k n Si
- val States' =
- map (movedot_nonterm tt ) Slist
- in
- processS used'
- (States @ States') (S::Si,Sii)
- end
- )
- end
-
- else
- processS used (States @
- map (movedot_nonterm tt)
- (getStates Estate i j A k))
- (S::Si,Sii)
- end
- )
+ if j = i then
+ let
+ val (used', O) = update_tree used (A, (tt, k));
+ in
+ (case O of
+ None =>
+ let
+ val Slist = getS A k Si;
+ val States' = map (movedot_nonterm tt) Slist;
+ in
+ processS used' (States @ States') (S :: Si, Sii)
+ end
+ | Some n =>
+ if n >= k then
+ processS used' States (S :: Si, Sii)
+ else
+ let
+ val Slist = getS' A k n Si;
+ val States' = map (movedot_nonterm tt) Slist;
+ in
+ processS used' (States @ States') (S :: Si, Sii)
+ end)
+ end
+ else
+ processS used
+ (States @ map (movedot_nonterm tt) (getStates Estate i j A k))
+ (S :: Si, Sii)
+ end)
in
- processS [] states ([],[])
+ processS [] states ([], [])
end;
@@ -295,53 +273,53 @@
fun syntax_error toks =
error ("Syntax error at: " ^ space_implode " " (map str_of_token toks));
-fun Produce grammar stateset i indata =
- case (Array.sub (stateset,i)) of
- [] => syntax_error indata (* MMW *)(* FIXME *)
- | s =>
- (case indata of
- [] => Array.sub (stateset,i)
- | (c::cs) =>
- let val (si,sii) =
- PROCESSS stateset grammar i c s
- in
- Array.update (stateset,i,si);
- Array.update (stateset,i+1,sii);
- Produce grammar stateset (i+1) cs
- end
- );
+fun produce grammar stateset i indata =
+ (case Array.sub (stateset, i) of
+ [] => syntax_error indata (* MMW *)(* FIXME *)
+ | s =>
+ (case indata of
+ [] => Array.sub (stateset, i)
+ | c :: cs =>
+ let
+ val (si, sii) = PROCESSS stateset grammar i c s;
+ in
+ Array.update (stateset, i, si);
+ Array.update (stateset, i + 1, sii);
+ produce grammar stateset (i + 1) cs
+ end));
+(*(* FIXME new *)
+val get_trees = mapfilter (fn (_, _, [pt], _, _, _) => Some pt | _ => None);
+*)
+
fun get_trees [] = []
- | get_trees ((_,_,pt_lst,_,_,_) :: stateset) =
- let val l = get_trees stateset
- in case pt_lst of
- [ptree] => ptree :: l
- | _ => l
- end;
-
-
+ | get_trees ((_, _, pt_lst, _, _, _) :: stateset) =
+ let val l = get_trees stateset
+ in case pt_lst of
+ [ptree] => ptree :: l
+ | _ => l
+ end;
fun earley grammar startsymbol indata =
- let val S0 =
- [("S'",0,[],[NT (startsymbol,0), T EndToken],"",0)]
- val s = length indata + 1 (* MMW *)(* FIXME was .. + 2 *)
- val Estate = Array.array (s, [])
- in Array.update (Estate,0,S0);
- let val l = Produce grammar Estate 0 indata (* MMW *)(* FIXME was .. @ [EndToken] *)
- val p_trees = get_trees l
- in p_trees
- end
- end;
+ let
+ val S0 = [("S'", 0, [], [Nonterminal (startsymbol, 0), Terminal EndToken], "", 0)];
+ val s = length indata + 1; (* MMW *)(* FIXME was .. + 2 *)
+ val Estate = Array.array (s, []);
+ in
+ Array.update (Estate, 0, S0);
+ let
+ val l = produce grammar Estate 0 indata; (* MMW *)(* FIXME was .. @ [EndToken] *)
+ val p_trees = get_trees l;
+ in p_trees end
+ end;
(* FIXME demo *)
-fun parse (Gram (roots, prod_tab)) root toks =
- if root mem roots then
- (case earley prod_tab root toks of
- [] => error "parse: no parse trees"
- | pt :: _ => pt)
- else error ("Unparsable category: " ^ root);
+fun parse (Gram (_, prod_tab)) start toks =
+ (case earley prod_tab start toks of
+ [] => sys_error "parse: no parse trees"
+ | pt :: _ => pt);
end;
--- a/src/Pure/Syntax/printer.ML Wed Jan 19 14:15:01 1994 +0100
+++ b/src/Pure/Syntax/printer.ML Wed Jan 19 14:21:26 1994 +0100
@@ -15,31 +15,29 @@
sig
include PRINTER0
structure Symtab: SYMTAB
- structure XGram: XGRAM
- structure Pretty: PRETTY
- local open XGram XGram.Ast in
+ structure SynExt: SYN_EXT
+ local open SynExt SynExt.Ast SynExt.Ast.Pretty in
val term_to_ast: (string -> (term list -> term) option) -> term -> ast
val typ_to_ast: (string -> (term list -> term) option) -> typ -> ast
type prtab
val empty_prtab: prtab
- val extend_prtab: prtab -> (string prod list) -> (string * (ast list -> ast)) list
- -> prtab
- val mk_prtab: (string prod list) -> (string * (ast list -> ast)) list -> prtab
- val pretty_term_ast: prtab -> ast -> Pretty.T
- val pretty_typ_ast: prtab -> ast -> Pretty.T
+ val extend_prtab: prtab -> xprod list -> prtab
+ val merge_prtabs: prtab -> prtab -> prtab
+ val pretty_term_ast: prtab -> (string -> (ast list -> ast) option)
+ -> ast -> Pretty.T
+ val pretty_typ_ast: prtab -> (string -> (ast list -> ast) option)
+ -> ast -> Pretty.T
end
end;
-functor PrinterFun(structure Symtab: SYMTAB and Lexicon: LEXICON
- and TypeExt: TYPE_EXT and SExtension: SEXTENSION and Pretty: PRETTY
- sharing TypeExt.Extension = SExtension.Extension): PRINTER =
+functor PrinterFun(structure Symtab: SYMTAB and TypeExt: TYPE_EXT
+ and SExtension: SEXTENSION sharing TypeExt.SynExt = SExtension.Parser.SynExt)
+ (*: PRINTER *) = (* FIXME *)
struct
structure Symtab = Symtab;
-structure Extension = TypeExt.Extension;
-structure XGram = Extension.XGram;
-structure Pretty = Pretty;
-open XGram XGram.Ast Lexicon TypeExt Extension SExtension;
+structure SynExt = TypeExt.SynExt;
+open SExtension.Parser.Lexicon SynExt.Ast SynExt TypeExt SExtension;
(** options for printing **)
@@ -147,101 +145,77 @@
Break of int |
Block of int * symb list;
-datatype format =
- Prnt of symb list * int * int |
- Trns of ast list -> ast |
- TorP of (ast list -> ast) * (symb list * int * int);
-
-type prtab = format Symtab.table;
+type prtab = (symb list * int * int) Symtab.table;
-(* empty_prtab *)
+(* xprods_to_fmts *)
+
+fun xprod_to_fmt (XProd (_, _, "", _)) = None
+ | xprod_to_fmt (XProd (_, xsymbs, const, pri)) =
+ let
+ fun cons_str s (String s' :: syms) = String (s ^ s') :: syms
+ | cons_str s syms = String s :: syms;
+
+ fun arg (s, p) =
+ (if s = "type" then TypArg else Arg)
+ (if is_terminal s then 1000 else p); (* FIXME 1000 vs. 0 vs. p (?) *)
+
+ fun xsyms_to_syms (Delim s :: xsyms) =
+ apfst (cons_str s) (xsyms_to_syms xsyms)
+ | xsyms_to_syms (Argument s_p :: xsyms) =
+ apfst (cons (arg s_p)) (xsyms_to_syms xsyms)
+ | xsyms_to_syms (Space s :: xsyms) =
+ apfst (cons_str s) (xsyms_to_syms xsyms)
+ | xsyms_to_syms (Bg i :: xsyms) =
+ let
+ val (bsyms, xsyms') = xsyms_to_syms xsyms;
+ val (syms, xsyms'') = xsyms_to_syms xsyms';
+ in
+ (Block (i, bsyms) :: syms, xsyms'')
+ end
+ | xsyms_to_syms (Brk i :: xsyms) =
+ apfst (cons (Break i)) (xsyms_to_syms xsyms)
+ | xsyms_to_syms (En :: xsyms) = ([], xsyms)
+ | xsyms_to_syms [] = ([], []);
+
+ fun nargs (Arg _ :: syms) = nargs syms + 1
+ | nargs (TypArg _ :: syms) = nargs syms + 1
+ | nargs (String _ :: syms) = nargs syms
+ | nargs (Break _ :: syms) = nargs syms
+ | nargs (Block (_, bsyms) :: syms) = nargs syms + nargs bsyms
+ | nargs [] = 0;
+ in
+ (case xsyms_to_syms xsymbs of
+ (symbs, []) => Some (const, (symbs, nargs symbs, pri))
+ | _ => sys_error "xprod_to_fmt: unbalanced blocks")
+ end;
+
+fun xprods_to_fmts xprods =
+ gen_distinct eq_fst (mapfilter xprod_to_fmt xprods);
+
+
+(* empty, extend, merge prtabs *)
+
+fun err_dup_fmt c =
+ sys_error ("duplicate fmt in prtab for " ^ quote c);
val empty_prtab = Symtab.null;
+fun extend_prtab tab xprods =
+ Symtab.extend (op =) (tab, xprods_to_fmts xprods)
+ handle Symtab.DUPLICATE c => err_dup_fmt c;
+
+fun merge_prtabs tab1 tab2 =
+ Symtab.merge (op =) (tab1, tab2)
+ handle Symtab.DUPLICATE c => err_dup_fmt c;
+
-(** extend_prtab **)
-
-fun extend_prtab prtab prods trfuns =
- let
- fun nargs (Arg _ :: symbs) = nargs symbs + 1
- | nargs (TypArg _ :: symbs) = nargs symbs + 1
- | nargs (String _ :: symbs) = nargs symbs
- | nargs (Break _ :: symbs) = nargs symbs
- | nargs (Block (_, bsymbs) :: symbs) = nargs symbs + nargs bsymbs
- | nargs [] = 0;
-
- fun merge (s, String s' :: l) = String (s ^ s') :: l
- | merge (s, l) = String s :: l;
-
- fun mk_prnp sy c p =
- let
- val constr = (c = constrainC orelse c = constrainIdtC);
+(** pretty term or typ asts **)
- fun syn2pr (Terminal s :: sy) =
- let val (symbs, sy') = syn2pr sy;
- in (merge (s, symbs), sy') end
- | syn2pr (Space s :: sy) =
- let val (symbs, sy') = syn2pr sy;
- in (merge (s, symbs), sy') end
- | syn2pr (Nonterminal (s, p) :: sy) =
- let
- val (symbs, sy') = syn2pr sy;
- val symb =
- (if constr andalso s = "type" then TypArg else Arg)
- (if is_terminal s then 0 else p);
- in (symb :: symbs, sy') end
- | syn2pr (Bg i :: sy) =
- let
- val (bsymbs, sy') = syn2pr sy;
- val (symbs, sy'') = syn2pr sy';
- in (Block (i, bsymbs) :: symbs, sy'') end
- | syn2pr (Brk i :: sy) =
- let val (symbs, sy') = syn2pr sy;
- in (Break i :: symbs, sy') end
- | syn2pr (En :: sy) = ([], sy)
- | syn2pr [] = ([], []);
-
- val (pr, _) = syn2pr sy;
- in
- (pr, nargs pr, p)
- end;
-
- fun trns_err c = error ("More than one parse ast translation for " ^ quote c);
+(*assumes a syntax derived from Pure, otherwise may loop forever*)
- fun add_fmt tab c fmt x = Symtab.update ((c, fmt x), tab);
-
- fun add_prod (tab, Prod (_, _, "", _)) = tab
- | add_prod (tab, Prod (_, sy, c, p)) =
- (case Symtab.lookup (tab, c) of
- None => add_fmt tab c Prnt (mk_prnp sy c p)
- | Some (Prnt _) => add_fmt tab c Prnt (mk_prnp sy c p)
- | Some (Trns f) => add_fmt tab c TorP (f, mk_prnp sy c p)
- | Some (TorP (f, _)) => add_fmt tab c TorP (f, mk_prnp sy c p));
-
- fun add_tr (tab, (c, f)) =
- (case Symtab.lookup (tab, c) of
- None => add_fmt tab c Trns f
- | Some (Prnt pr) => add_fmt tab c TorP (f, pr)
- | Some (Trns _) => trns_err c
- | Some (TorP _) => trns_err c);
- in
- Symtab.balance (foldl add_prod (foldl add_tr (prtab, trfuns), prods))
- end;
-
-
-(* mk_prtab *)
-
-val mk_prtab = extend_prtab empty_prtab;
-
-
-
-(** pretty term/typ asts **)
-
-(*pretty assumes a syntax derived from Pure, otherwise it may loop forever*)
-
-fun pretty tab type_mode ast0 p0 =
+fun pretty prtab trf type_mode ast0 p0 =
let
val trans = apply_trans "print ast translation";
@@ -252,8 +226,12 @@
let val (Ts, args') = synT (symbs, args);
in (astT (t, p) @ Ts, args') end
| synT (TypArg p :: symbs, t :: args) =
- let val (Ts, args') = synT (symbs, args);
- in (pretty tab true t p @ Ts, args') end
+ let
+ val (Ts, args') = synT (symbs, args);
+ in
+ if type_mode then (astT (t, p) @ Ts, args')
+ else (pretty prtab trf true t p @ Ts, args')
+ end
| synT (String s :: symbs, args) =
let val (Ts, args') = synT (symbs, args);
in (Pretty.str s :: Ts, args') end
@@ -265,7 +243,7 @@
| synT (Break i :: symbs, args) =
let val (Ts, args') = synT (symbs, args);
in ((if i < 0 then Pretty.fbrk else Pretty.brk i) :: Ts, args') end
- | synT (_ :: _, []) = error "synT"
+ | synT (_ :: _, []) = sys_error "synT"
and parT (pr, args, p, p': int) =
if p > p' then
@@ -278,7 +256,7 @@
and splitT 0 ([x], ys) = (x, ys)
| splitT 0 (rev_xs, ys) = (Appl (rev rev_xs), ys)
| splitT n (rev_xs, y :: ys) = splitT (n - 1) (y :: rev_xs, ys)
- | splitT _ _ = error "splitT"
+ | splitT _ _ = sys_error "splitT"
and combT (tup as (c, a, args, p)) =
let
@@ -289,12 +267,12 @@
else if nargs < n orelse type_mode then prefixT tup
else astT (appT (splitT n ([c], args)), p);
in
- (case Symtab.lookup (tab, a) of
- None => prefixT tup
- | Some (Prnt prnp) => prnt prnp
- | Some (Trns f) =>
+ (case (trf a, Symtab.lookup (prtab, a)) of
+ (None, None) => prefixT tup
+ | (None, Some prnp) => prnt prnp
+ | (Some f, None) =>
(astT (trans a f args, p) handle Match => prefixT tup)
- | Some (TorP (f, prnp)) =>
+ | (Some f, Some prnp) =>
(astT (trans a f args, p) handle Match => prnt prnp))
end
@@ -311,14 +289,14 @@
(* pretty_term_ast *)
-fun pretty_term_ast tab ast =
- Pretty.blk (0, pretty tab false ast 0);
+fun pretty_term_ast prtab trf ast =
+ Pretty.blk (0, pretty prtab trf false ast 0);
(* pretty_typ_ast *)
-fun pretty_typ_ast tab ast =
- Pretty.blk (0, pretty tab true ast 0);
+fun pretty_typ_ast prtab trf ast =
+ Pretty.blk (0, pretty prtab trf true ast 0);
end;
--- a/src/Pure/Syntax/syntax.ML Wed Jan 19 14:15:01 1994 +0100
+++ b/src/Pure/Syntax/syntax.ML Wed Jan 19 14:21:26 1994 +0100
@@ -3,17 +3,13 @@
Author: Tobias Nipkow and Markus Wenzel, TU Muenchen
Root of Isabelle's syntax module.
-
-TODO:
- fix empty_tables, extend_tables, mk_tables (requires empty_gram, extend_gram)
- fix extend (requires extend_tables)
*)
signature SYNTAX =
sig
include AST0
include LEXICON0
- include EXTENSION0
+ include SYN_EXT0
include TYPE_EXT0
include SEXTENSION1
include PRINTER0
@@ -27,6 +23,7 @@
val test_read: syntax -> string -> string -> unit
val read: syntax -> typ -> string -> term
val read_typ: syntax -> (indexname -> sort) -> string -> typ
+ val simple_read_typ: string -> typ
val pretty_term: syntax -> term -> Pretty.T
val pretty_typ: syntax -> typ -> Pretty.T
val string_of_term: syntax -> term -> string
@@ -34,118 +31,120 @@
end;
functor SyntaxFun(structure Symtab: SYMTAB and TypeExt: TYPE_EXT
- and Parser: PARSER and SExtension: SEXTENSION and Printer: PRINTER
- sharing TypeExt.Extension = SExtension.Extension
- and Parser.XGram = TypeExt.Extension.XGram = Printer.XGram
- and Parser.XGram.Ast = Parser.ParseTree.Ast): SYNTAX =
+ and SExtension: SEXTENSION and Printer: PRINTER
+ sharing SExtension.Parser.SynExt = TypeExt.SynExt = Printer.SynExt)(*: SYNTAX *) = (* FIXME *)
struct
-structure Extension = TypeExt.Extension;
-structure XGram = Extension.XGram;
-structure Lexicon = Parser.ParseTree.Lexicon;
-open Lexicon Parser Parser.ParseTree Extension TypeExt SExtension Printer
- XGram XGram.Ast;
+structure SynExt = TypeExt.SynExt;
+structure Parser = SExtension.Parser;
+structure Lexicon = Parser.Lexicon;
+open Lexicon SynExt SynExt.Ast Parser TypeExt SExtension Printer;
+
+
+(** tables of translation functions **)
+
+(*the ref serves as unique id*)
+type 'a trtab = (('a list -> 'a) * unit ref) Symtab.table;
+
+val dest_trtab = Symtab.alist_of;
+
+fun lookup_trtab tab c =
+ apsome fst (Symtab.lookup (tab, c));
-fun lookup tab a = Symtab.lookup (tab, a);
+(* empty, extend, merge trtabs *)
+
+fun err_dup_trfun name c =
+ error ("More than one " ^ name ^ " for " ^ quote c);
+
+val empty_trtab = Symtab.null;
+
+fun extend_trtab tab trfuns name =
+ Symtab.extend eq_snd (tab, map (fn (c, f) => (c, (f, ref ()))) trfuns)
+ handle Symtab.DUPLICATE c => err_dup_trfun name c;
+
+fun merge_trtabs tab1 tab2 name =
+ Symtab.merge eq_snd (tab1, tab2)
+ handle Symtab.DUPLICATE c => err_dup_trfun name c;
+
+
+
+(** tables of translation rules **)
+
+type ruletab = (ast * ast) list Symtab.table;
+
+fun dest_ruletab tab = flat (map snd (Symtab.alist_of tab));
+
+
+(* lookup_ruletab *)
+
+fun lookup_ruletab tab =
+ if Symtab.is_null tab then None
+ else Some (fn a => Symtab.lookup_multi (tab, a));
+
+
+(* empty, extend, merge ruletabs *)
+
+val empty_ruletab = Symtab.null;
+
+fun extend_ruletab tab rules =
+ generic_extend (op =) Symtab.dest_multi Symtab.make_multi tab
+ (map (fn r => (head_of_rule r, r)) (distinct rules));
+
+fun merge_ruletabs tab1 tab2 =
+ generic_merge (op =) Symtab.dest_multi Symtab.make_multi tab1 tab2;
(** datatype syntax **)
-datatype tables =
- Tabs of {
+datatype syntax =
+ Syntax of {
lexicon: lexicon,
roots: string list,
gram: gram,
consts: string list,
- parse_ast_trtab: (ast list -> ast) Symtab.table,
- parse_ruletab: (ast * ast) list Symtab.table,
- parse_trtab: (term list -> term) Symtab.table,
- print_trtab: (term list -> term) Symtab.table,
- print_ruletab: (ast * ast) list Symtab.table,
+ parse_ast_trtab: ast trtab,
+ parse_ruletab: ruletab,
+ parse_trtab: term trtab,
+ print_trtab: term trtab,
+ print_ruletab: ruletab,
+ print_ast_trtab: ast trtab,
prtab: prtab};
-datatype gramgraph =
- EmptyGG |
- ExtGG of gramgraph ref * ext |
- MergeGG of gramgraph ref * gramgraph ref;
-
-datatype syntax = Syntax of gramgraph ref * tables;
-
-
-
-(*** compile syntax ***)
-
-(* translation funs *)
-
-fun extend_trtab tab trfuns name =
- Symtab.balance (Symtab.st_of_alist (trfuns, tab)) handle Symtab.DUPLICATE s
- => error ("More than one " ^ name ^ " for " ^ quote s);
-
-val mk_trtab = extend_trtab Symtab.null;
-
-(* translation rules *)
+(* empty_syntax *)
-fun mk_ruletab rules =
- let
- fun add_rule (r, tab) =
- let val a = head_of_rule r;
- in
- (case lookup tab a of
- None => Symtab.update ((a, [r]), tab)
- | Some rs => Symtab.update ((a, r :: rs), tab))
- end;
- in
- Symtab.balance (foldr add_rule (rules, Symtab.null))
- end;
-
-fun extend_ruletab tab rules =
- mk_ruletab (flat (map #2 (Symtab.alist_of tab)) @ rules);
-
-
-
-(** tables **)
-
-(* empty_tables *)
-
-(*(* FIXME *)
-val empty_tables =
- Tabs {
+val empty_syntax =
+ Syntax {
lexicon = empty_lexicon,
roots = [],
gram = empty_gram,
consts = [],
- parse_ast_trtab = Symtab.null,
- parse_ruletab = Symtab.null,
- parse_trtab = Symtab.null,
- print_trtab = Symtab.null,
- print_ruletab = Symtab.null,
+ parse_ast_trtab = empty_trtab,
+ parse_ruletab = empty_ruletab,
+ parse_trtab = empty_trtab,
+ print_trtab = empty_trtab,
+ print_ruletab = empty_ruletab,
+ print_ast_trtab = empty_trtab,
prtab = empty_prtab};
-*)
-(* extend_tables *)
+(* extend_syntax *)
-fun extend_tables (Tabs tabs) (XGram xgram) =
+fun extend_syntax (Syntax tabs) syn_ext =
let
val {lexicon, roots = roots1, gram, consts = consts1, parse_ast_trtab,
- parse_ruletab, parse_trtab, print_trtab, print_ruletab, prtab} = tabs;
- val {roots = roots2, prods, consts = consts2, parse_ast_translation,
+ parse_ruletab, parse_trtab, print_trtab, print_ruletab, print_ast_trtab,
+ prtab} = tabs;
+ val SynExt {roots = roots2, xprods, consts = consts2, parse_ast_translation,
parse_rules, parse_translation, print_translation, print_rules,
- print_ast_translation} = xgram;
+ print_ast_translation} = syn_ext;
in
- (* FIXME *)
- if not (null prods) then
- error "extend_tables: called with non-empty prods"
- else
-
- Tabs {
- lexicon = extend_lexicon lexicon (literals_of prods),
- roots = roots2 union roots1,
- (* gram = extend_gram gram roots2 prods, *) (* FIXME *)
- gram = gram, (* FIXME *)
+ Syntax {
+ lexicon = extend_lexicon lexicon (delims_of xprods),
+ roots = extend_list roots1 roots2,
+ gram = extend_gram gram roots2 xprods,
consts = consts2 union consts1,
parse_ast_trtab =
extend_trtab parse_ast_trtab parse_ast_translation "parse ast translation",
@@ -153,176 +152,116 @@
parse_trtab = extend_trtab parse_trtab parse_translation "parse translation",
print_trtab = extend_trtab print_trtab print_translation "print translation",
print_ruletab = extend_ruletab print_ruletab print_rules,
- prtab = extend_prtab prtab prods print_ast_translation}
- end;
-
-
-(* mk_tables *)
-
-(* FIXME *)
-(* val mk_tables = extend_tables empty_tables; *)
-
-(* FIXME *)
-fun mk_tables (XGram xgram) =
- let
- val {roots, prods, consts, parse_ast_translation, parse_rules,
- parse_translation, print_translation, print_rules,
- print_ast_translation} = xgram;
- in
- Tabs {
- lexicon = mk_lexicon (literals_of prods),
- roots = roots,
- gram = mk_gram roots prods,
- consts = consts,
- parse_ast_trtab = mk_trtab parse_ast_translation "parse ast translation",
- parse_ruletab = mk_ruletab parse_rules,
- parse_trtab = mk_trtab parse_translation "parse translation",
- print_trtab = mk_trtab print_translation "print translation",
- print_ruletab = mk_ruletab print_rules,
- prtab = mk_prtab prods print_ast_translation}
+ print_ast_trtab =
+ extend_trtab print_ast_trtab print_ast_translation "print ast translation",
+ prtab = extend_prtab prtab xprods}
end;
-(* ggr_to_xgram *)
+(* merge_syntaxes *)
-fun ggr_to_xgram ggr =
+fun merge_syntaxes (Syntax tabs1) (Syntax tabs2) =
let
- fun flatGG ggr (xg, v) =
- if ggr mem v then (xg, v) else flatGG' ggr (xg, ggr :: v)
- and flatGG' (ref EmptyGG) xgv = xgv
- | flatGG' (ref (ExtGG (ggr, ext))) xgv =
- let
- val (xg', v') = flatGG ggr xgv;
- in
- (extend_xgram xg' ext, v')
- end
- | flatGG' (ref (MergeGG (ggr1, ggr2))) xgv =
- flatGG ggr1 (flatGG ggr2 xgv);
+ val {lexicon = lexicon1, roots = roots1, gram = gram1, consts = consts1,
+ parse_ast_trtab = parse_ast_trtab1, parse_ruletab = parse_ruletab1,
+ parse_trtab = parse_trtab1, print_trtab = print_trtab1,
+ print_ruletab = print_ruletab1, print_ast_trtab = print_ast_trtab1,
+ prtab = prtab1} = tabs1;
+
+ val {lexicon = lexicon2, roots = roots2, gram = gram2, consts = consts2,
+ parse_ast_trtab = parse_ast_trtab2, parse_ruletab = parse_ruletab2,
+ parse_trtab = parse_trtab2, print_trtab = print_trtab2,
+ print_ruletab = print_ruletab2, print_ast_trtab = print_ast_trtab2,
+ prtab = prtab2} = tabs2;
in
- #1 (flatGG ggr (empty_xgram, []))
+ Syntax {
+ lexicon = merge_lexicons lexicon1 lexicon2,
+ roots = merge_lists roots1 roots2,
+ gram = merge_grams gram1 gram2,
+ consts = merge_lists consts1 consts2,
+ parse_ast_trtab =
+ merge_trtabs parse_ast_trtab1 parse_ast_trtab2 "parse ast translation",
+ parse_ruletab = merge_ruletabs parse_ruletab1 parse_ruletab2,
+ parse_trtab = merge_trtabs parse_trtab1 parse_trtab2 "parse translation",
+ print_trtab = merge_trtabs print_trtab1 print_trtab2 "print translation",
+ print_ruletab = merge_ruletabs print_ruletab1 print_ruletab2,
+ print_ast_trtab =
+ merge_trtabs print_ast_trtab1 print_ast_trtab2 "print ast translation",
+ prtab = merge_prtabs prtab1 prtab2}
end;
-(* mk_syntax *)
-fun mk_syntax ggr = Syntax (ggr, mk_tables (ggr_to_xgram ggr));
-
-
-
-(*** inspect syntax ***)
-
-fun xgram_of (Syntax (ggr, _)) = ggr_to_xgram ggr;
+(** inspect syntax **)
fun string_of_big_list name prts =
- Pretty.string_of (Pretty.blk (2,
- separate Pretty.fbrk (Pretty.str name :: prts)));
+ Pretty.string_of (Pretty.block (Pretty.fbreaks (Pretty.str name :: prts)));
fun string_of_strings name strs =
- Pretty.string_of (Pretty.blk (2,
- separate (Pretty.brk 1)
- (map Pretty.str (name :: map quote (sort_strings strs)))));
+ Pretty.string_of (Pretty.block (Pretty.breaks
+ (map Pretty.str (name :: map quote (sort_strings strs)))));
(* print_gram *)
-fun prt_gram (XGram {roots, prods, ...}) =
+fun print_gram (Syntax tabs) =
let
- fun pretty_name name = [Pretty.str (name ^ " ="), Pretty.brk 1];
-
- fun pretty_xsymbs (Terminal s :: xs) =
- Pretty.str (quote s) :: Pretty.brk 1 :: pretty_xsymbs xs
- | pretty_xsymbs (Nonterminal (s, p) :: xs) =
- (if is_terminal s then Pretty.str s
- else Pretty.str (s ^ "[" ^ string_of_int p ^ "]"))
- :: Pretty.brk 1 :: pretty_xsymbs xs
- | pretty_xsymbs (_ :: xs) = pretty_xsymbs xs
- | pretty_xsymbs [] = [];
-
- fun pretty_const "" = [Pretty.brk 1]
- | pretty_const c = [Pretty.str (" => " ^ quote c), Pretty.brk 1];
-
- fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")];
-
- fun pretty_prod (Prod (name, xsymbs, const, pri)) =
- Pretty.blk (2, pretty_name name @ pretty_xsymbs xsymbs @
- pretty_const const @ pretty_pri pri);
+ val {lexicon, roots, gram, ...} = tabs;
in
- writeln (string_of_strings "lexicon:" (literals_of prods));
- writeln (Pretty.string_of (Pretty.blk (2,
- separate (Pretty.brk 1) (map Pretty.str ("roots:" :: roots)))));
- writeln (string_of_big_list "prods:" (map pretty_prod prods))
+ writeln (string_of_strings "lexicon:" (dest_lexicon lexicon));
+ writeln (Pretty.string_of (Pretty.block (Pretty.breaks
+ (map Pretty.str ("roots:" :: roots)))));
+ writeln (string_of_big_list "prods:" (pretty_gram gram))
end;
-val print_gram = prt_gram o xgram_of;
-
(* print_trans *)
-fun prt_trans (XGram xgram) =
+fun print_trans (Syntax tabs) =
let
- fun string_of_trs name trs = string_of_strings name (map fst trs);
+ fun string_of_trtab name tab =
+ string_of_strings name (map fst (dest_trtab tab));
- fun string_of_rules name rules =
- string_of_big_list name (map pretty_rule rules);
+ fun string_of_ruletab name tab =
+ string_of_big_list name (map pretty_rule (dest_ruletab tab));
- val {consts, parse_ast_translation, parse_rules, parse_translation,
- print_translation, print_rules, print_ast_translation, ...} = xgram;
+ val {consts, parse_ast_trtab, parse_ruletab, parse_trtab, print_trtab,
+ print_ruletab, print_ast_trtab, ...} = tabs;
in
writeln (string_of_strings "consts:" consts);
- writeln (string_of_trs "parse_ast_translation:" parse_ast_translation);
- writeln (string_of_rules "parse_rules:" parse_rules);
- writeln (string_of_trs "parse_translation:" parse_translation);
- writeln (string_of_trs "print_translation:" print_translation);
- writeln (string_of_rules "print_rules:" print_rules);
- writeln (string_of_trs "print_ast_translation:" print_ast_translation)
+ writeln (string_of_trtab "parse_ast_translation:" parse_ast_trtab);
+ writeln (string_of_ruletab "parse_rules:" parse_ruletab);
+ writeln (string_of_trtab "parse_translation:" parse_trtab);
+ writeln (string_of_trtab "print_translation:" print_trtab);
+ writeln (string_of_ruletab "print_rules:" print_ruletab);
+ writeln (string_of_trtab "print_ast_translation:" print_ast_trtab)
end;
-val print_trans = prt_trans o xgram_of;
-
(* print_syntax *)
-fun print_syntax syn =
- let
- val xgram = xgram_of syn;
- in
- prt_gram xgram; prt_trans xgram
- end;
+fun print_syntax syn = (print_gram syn; print_trans syn);
-(*** parsing and printing ***)
-
-(* mk_get_rules *)
-
-fun mk_get_rules ruletab =
- let
- fun get_rules a =
- (case lookup ruletab a of
- Some rules => rules
- | None => []);
- in
- if Symtab.is_null ruletab then None else Some get_rules
- end;
-
+(** read **)
(* read_ast *)
-fun read_ast (Syntax (_, tabs)) xids root str =
+fun read_ast (Syntax tabs) xids root str =
let
- val Tabs {lexicon, gram, parse_ast_trtab, ...} = tabs;
+ val {lexicon, gram, parse_ast_trtab, ...} = tabs;
in
- pt_to_ast (lookup parse_ast_trtab)
+ pt_to_ast (lookup_trtab parse_ast_trtab)
(parse gram root (tokenize lexicon xids str))
end;
-
-(** test_read **)
+(* test_read *)
-fun test_read (Syntax (_, tabs)) root str =
+fun test_read (Syntax tabs) root str =
let
- val Tabs {lexicon, gram, parse_ast_trtab, parse_ruletab, ...} = tabs;
+ val {lexicon, gram, parse_ast_trtab, parse_ruletab, ...} = tabs;
val toks = tokenize lexicon false str;
val _ = writeln ("tokens: " ^ space_implode " " (map display_token toks));
@@ -331,36 +270,38 @@
val raw_ast = pt_to_ast (K None) pt;
val _ = writeln ("raw: " ^ str_of_ast raw_ast);
- val pre_ast = pt_to_ast (lookup parse_ast_trtab) pt;
- val _ = normalize true true (mk_get_rules parse_ruletab) pre_ast;
+ val pre_ast = pt_to_ast (lookup_trtab parse_ast_trtab) pt;
+ val _ = normalize true true (lookup_ruletab parse_ruletab) pre_ast;
in () end;
-
-(** read **)
+(* read *)
-fun read (syn as Syntax (_, tabs)) ty str =
+fun read (syn as Syntax tabs) ty str =
let
- val Tabs {parse_ruletab, parse_trtab, ...} = tabs;
+ val {parse_ruletab, parse_trtab, ...} = tabs;
val ast = read_ast syn false (typ_to_nonterm ty) str;
in
- ast_to_term (lookup parse_trtab)
- (normalize_ast (mk_get_rules parse_ruletab) ast)
+ ast_to_term (lookup_trtab parse_trtab)
+ (normalize_ast (lookup_ruletab parse_ruletab) ast)
end;
+(* read types *)
-(** read_typ **)
+fun read_typ syn def_sort str =
+ typ_of_term def_sort (read syn typeT str);
-fun read_typ syn def_sort str = typ_of_term def_sort (read syn typeT str);
+val type_syn = extend_syntax empty_syntax type_ext;
+
+fun simple_read_typ str = read_typ type_syn (K []) str;
-
-(** read_rule **)
+(* read rules *)
fun read_rule syn (xrule as ((_, lhs_src), (_, rhs_src))) =
let
- val Syntax (_, Tabs {consts, ...}) = syn;
+ val Syntax {consts, ...} = syn;
fun constantify (ast as Constant _) = ast
| constantify (ast as Variable x) =
@@ -381,10 +322,6 @@
| None => rule)
end;
-
-
-(** read_xrules **)
-
fun read_xrules syn xrules =
let
fun right_rule (xpat1 |-> xpat2) = Some (xpat1, xpat2)
@@ -403,12 +340,13 @@
(** pretty terms or typs **)
-fun pretty_t t_to_ast pretty_t (syn as Syntax (_, tabs)) t =
+fun pretty_t t_to_ast pretty_t (syn as Syntax tabs) t =
let
- val Tabs {print_trtab, print_ruletab, prtab, ...} = tabs;
- val ast = t_to_ast (lookup print_trtab) t;
+ val {print_trtab, print_ruletab, print_ast_trtab, prtab, ...} = tabs;
+ val ast = t_to_ast (lookup_trtab print_trtab) t;
in
- pretty_t prtab (normalize_ast (mk_get_rules print_ruletab) ast)
+ pretty_t prtab (lookup_trtab print_ast_trtab)
+ (normalize_ast (lookup_ruletab print_ruletab) ast)
end;
val pretty_term = pretty_t term_to_ast pretty_term_ast;
@@ -421,45 +359,32 @@
-(*** build syntax ***)
-
-(* type_syn *)
-
-val type_syn = mk_syntax (ref (ExtGG (ref EmptyGG, type_ext)));
+(** build syntax **)
-
-(* extend *) (* FIXME *)
-
-fun extend syn read_ty (roots, xconsts, sext) =
- let
- val Syntax (ggr0, tabs0 as Tabs {roots = roots0, ...}) = syn;
+(* extend *)
- val ext1 = ext_of_sext ((distinct roots) \\ roots0) xconsts read_ty sext;
+fun extend syn0 read_ty (all_roots, xconsts, sext) =
+ let
+ val Syntax {roots, ...} = syn0;
- val (syn1 as Syntax (ggr1, tabs1)) =
- (case ext1 of
- Ext {roots = [], mfix = [], ...} =>
- Syntax (ref (ExtGG (ggr0, ext1)), extend_tables tabs0 (mk_xgram ext1))
- | _ => mk_syntax (ref (ExtGG (ggr0, ext1))));
+ val syn1 = extend_syntax syn0
+ (syn_ext_of_sext (all_roots \\ roots) xconsts read_ty sext);
- val (parse_rules, print_rules) = read_xrules syn1 (xrules_of sext);
- val ext2 = ExtRules {parse_rules = parse_rules, print_rules = print_rules};
- in
- Syntax (ref (ExtGG (ggr1, ext2)), extend_tables tabs1 (mk_xgram ext2))
- end;
+ val syn2 = extend_syntax syn1
+ (syn_ext_rules (read_xrules syn1 (xrules_of sext)));
+ in syn2 end;
(* merge *)
-fun merge roots syn1 syn2 =
+fun merge all_roots syn1 syn2 =
let
- val Syntax (ggr1, Tabs {roots = roots1, ...}) = syn1;
- val Syntax (ggr2, Tabs {roots = roots2, ...}) = syn2;
- val mggr = ref (MergeGG (ggr1, ggr2));
+ val syn as (Syntax {roots, ...}) = merge_syntaxes syn1 syn2;
in
- (case ((distinct roots) \\ roots1) \\ roots2 of
- [] => mk_syntax mggr
- | new_roots => mk_syntax (ref (ExtGG (mggr, ExtRoots new_roots))))
+ (case all_roots \\ roots of
+ [] => syn
+ | new_roots => (writeln (string_of_strings "DEBUG new roots:" new_roots); (* FIXME debug *)
+ extend_syntax syn (syn_ext_roots new_roots)))
end;