# HG changeset patch # User wenzelm # Date 758985686 -3600 # Node ID a7d3e712767a37fd2f2bcb1ab271cb72e2d31220 # Parent d33cd0011e486d3808e8fcac974e0aa88ff5bbc5 MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables now much leaner (eliminated gramgraph, all data except tables of old parser are shared); simplified the internal interfaces for syntax extension; diff -r d33cd0011e48 -r a7d3e712767a src/Pure/Syntax/earley0A.ML --- a/src/Pure/Syntax/earley0A.ML Wed Jan 19 14:15:01 1994 +0100 +++ b/src/Pure/Syntax/earley0A.ML Wed Jan 19 14:21:26 1994 +0100 @@ -7,34 +7,39 @@ signature PARSER = sig - structure XGram: XGRAM - structure ParseTree: PARSE_TREE - local open XGram ParseTree ParseTree.Lexicon in + structure Lexicon: LEXICON + structure SynExt: SYN_EXT + local open Lexicon SynExt SynExt.Ast in type gram - val mk_gram: string list -> string prod list -> gram + val empty_gram: gram + val extend_gram: gram -> string list -> xprod list -> gram + val merge_grams: gram -> gram -> gram + val pretty_gram: gram -> Pretty.T list + datatype parsetree = + Node of string * parsetree list | + Tip of token val parse: gram -> string -> token list -> parsetree end - -(* exception SYNTAX_ERROR of token list *) -(* val compile_xgram: string list * token prod list -> Gram *) -(* val parsable: Gram * string -> bool *) -(* val print_gram: Gram * lexicon -> unit *) end; -functor EarleyFun(structure Symtab: SYMTAB and XGram: XGRAM - and ParseTree: PARSE_TREE): PARSER = +functor EarleyFun(structure Symtab: SYMTAB and Lexicon: LEXICON + and SynExt: SYN_EXT)(*: PARSER *) = (* FIXME *) struct -structure ParseTree = ParseTree; -structure Lexicon = ParseTree.Lexicon; -structure XGram = XGram; -open ParseTree Lexicon; +structure Pretty = SynExt.Ast.Pretty; +structure Lexicon = Lexicon; +structure SynExt = SynExt; +open Lexicon; -(** mk_pt (from parse_tree.ML) **) +(** datatype parsetree **) + +datatype parsetree = + Node of string * parsetree list | + Tip of token; fun mk_pt ("", [pt]) = pt - | mk_pt ("", _) = error "mk_pt: funny copy op in parse tree" + | mk_pt ("", _) = sys_error "mk_pt: funny copy op in parse tree" | mk_pt (s, ptl) = Node (s, ptl); @@ -88,7 +93,7 @@ val len = length; -local open Array XGram ParseTree ParseTree.Lexicon in +local open Array SynExt in nonfix sub; fun forA(p: int -> unit, a: 'a array) : unit = @@ -101,10 +106,16 @@ fun acc(a,i) = if i=ub then a else acc(f(a,sub(b,i)),i+1) in acc(a0,0) end; -(* -gram: name of nt -> number, nt number -> productions array, - nt number -> list of nt's reachable via copy ops -*) + + +(** grammars **) + +datatype 'a symb = + Dlm of 'a | + Arg of string * int; + +datatype 'a prod = Prod of string * 'a symb list * string * int; + datatype Symbol = T of token | NT of int * int and Op = Op of OpSyn * string * int @@ -112,56 +123,68 @@ and OpListA = Op array * int TokenMap and FastAcc = int TokenMap; -type gram = int Symtab.table * OpListA array * int list array; +(*gram_tabs: name of nt -> number, nt number -> productions array, + nt number -> list of nt's reachable via copy ops*) -fun non_term(Nonterminal(s,_)) = if predef_term(s)=EndToken then [s] else [] - | non_term(_) = []; +type gram_tabs = int Symtab.table * OpListA array * int list array; + +type gram = string list * string prod list * gram_tabs; + -fun non_terms(Prod(_,symbs,_,_)) = flat(map non_term symbs); +fun non_term (Arg (s, _)) = if is_terminal s then None else Some s + | non_term _ = None; + +fun non_terms (Prod (_, symbs, _, _)) = mapfilter non_term symbs; -(* mk_pre_grammar converts a list of productions in external format into an -internal gram object. *) + +(* mk_pre_grammar *) +(* converts a list of productions in external format into an + internal gram object. *) + val dummyTM:FastAcc = mkTokenMap[]; -fun mk_pre_grammar(prods): gram = -let fun same_res(Prod(t1,_,_,_), Prod(t2,_,_,_)) = t1=t2; +fun mk_pre_grammar prods : gram_tabs = + let + fun same_res (Prod(t1, _, _, _), Prod(t2, _, _, _)) = t1=t2; val partitioned0 = partition_eq same_res prods; - val nts0 = map (fn Prod(ty,_,_,_)::_ => ty) partitioned0; + val nts0 = map (fn Prod(ty, _, _, _)::_ => ty) partitioned0; val nts' = distinct(flat(map non_terms prods)) \\ nts0; val nts = nts' @ nts0; val partitioned = (replicate (len nts') []) @ partitioned0; val ntis = nts ~~ (0 upto (len(nts)-1)); - val tab = foldr Symtab.update (ntis,Symtab.null); + val tab = foldr Symtab.update (ntis, Symtab.null); - fun nt_or_vt(s,p) = - (case predef_term(s) of - EndToken => let val Some(i) = Symtab.lookup(tab,s) in NT(i,p) end - | tk => T(tk)); + fun nt_or_vt (s, p) = + (case predef_term s of + None => NT (the (Symtab.lookup (tab, s)), p) + | Some tk => T tk); - fun mksyn(Terminal(t)) = [T(t)] - | mksyn(Nonterminal(t)) = [nt_or_vt t] - | mksyn(_) = []; + fun mksyn(Dlm(t)) = T(t) + | mksyn(Arg(t)) = nt_or_vt t; - fun prod2op(Prod(nt,sy,opn,p)) = - let val syA = arrayoflist(flat(map mksyn sy)) in Op(syA,opn,p) end; + fun prod2op(Prod(nt, sy, opn, p)) = + let val syA = arrayoflist(map mksyn sy) in Op(syA, opn, p) end; - fun mkops prods = (arrayoflist(map prod2op prods),dummyTM); + fun mkops prods = (arrayoflist(map prod2op prods), dummyTM); val opLA = arrayoflist(map mkops partitioned); - val subs = array(length opLA,[]) : int list array; - fun newcp v (a,Op(syA,_,p)) = + val subs = array(length opLA, []) : int list array; + fun newcp v (a, Op(syA, _, p)) = if p=chain_pri - then let val NT(k,_) = sub(syA,0) + then let val NT(k, _) = sub(syA, 0) in if k mem v then a else k ins a end else a; fun reach v i = - let val new = itA ([],#1(sub(opLA,i))) (newcp v) + let val new = itA ([], #1(sub(opLA, i))) (newcp v) val v' = new union v in flat(map (reach v') new) union v' end; - fun rch(i) = update(subs,i,reach[i]i); + fun rch(i) = update(subs, i, reach[i]i); -in forA(rch,subs); (tab,opLA,subs) end; + in + forA(rch, subs); (tab, opLA, subs) + end; + val RootPref = "__"; @@ -170,7 +193,7 @@ type Lkhd = token list list list; -(* subst_syn(s,k) syn = [ pref k ts | ts is a token string derivable from sy +(* subst_syn(s, k) syn = [ pref k ts | ts is a token string derivable from sy under substitution s ] *) (* This is the general version. fun cross l1 l2 = flat(map (fn e2 => (map (fn e1 => e1@e2) l1)) l2); @@ -212,46 +235,107 @@ in writeln"Computing lookahead tables ..."; iterate (replicate (length opLA) []) end; -(* create look ahead tables *) -fun mk_earley_gram(g as (tab,opLA,_):gram):gram = - let val lkhd = mk_lkhd(opLA,1); + +(* mk_earley_gram *) (* create look ahead tables *) + +fun mk_earley_gram (g as (tab, opLA, _):gram_tabs):gram_tabs = + let val lkhd = mk_lkhd(opLA, 1); fun mk_fa(i):FastAcc = - let val opA = #1(sub(opLA,i)); - fun start(j) = let val Op(sy,_,_) = sub(opA,j); - val pre = subst_syn(lkhd,1) sy - in (j,if [] mem pre then [] else map hd pre) end; + let val opA = #1(sub(opLA, i)); + fun start(j) = let val Op(sy, _, _) = sub(opA, j); + val pre = subst_syn(lkhd, 1) sy + in (j, if [] mem pre then [] else map hd pre) end; in mkTokenMap(map start (0 upto(length(opA)-1))) end; - fun updt(i) = update(opLA,i,(#1(sub(opLA,i)),mk_fa(i))); + fun updt(i) = update(opLA, i, (#1(sub(opLA, i)), mk_fa(i))); - in forA(updt,opLA); g end; + in forA(updt, opLA); g end; + + +(* compile_xgram *) -fun compile_xgram(roots,prods) = - let fun mk_root nt = Prod(RootPref^nt, - [Nonterminal(nt,0),Terminal(EndToken)],"",0); - val prods' = (map mk_root roots) @ prods - in mk_earley_gram(mk_pre_grammar(prods')) end; +fun compile_xgram (roots, prods) = + let + fun mk_root nt = + Prod (RootPref ^ nt, [Arg (nt, 0), Dlm EndToken], "", 0); + + val prods' = (map mk_root roots) @ prods; + in + mk_earley_gram (mk_pre_grammar prods') + end; (* translate (from xgram.ML) *) fun translate trfn = - map - (fn Terminal t => Terminal (trfn t) - | Nonterminal s => Nonterminal s - | Space s => Space s - | Bg i => Bg i - | Brk i => Brk i - | En => En); + map (fn Dlm t => Dlm (trfn t) | Arg s => Arg s); -(* mk_gram (from syntax.ML) *) +(* mk_gram_tabs *) fun str_to_tok (opl: string prod list): token prod list = map (fn Prod (t, syn, s, pa) => Prod (t, translate Token syn, s, pa)) opl; -fun mk_gram roots prods = compile_xgram (roots, str_to_tok prods); +fun mk_gram_tabs roots prods = compile_xgram (roots, str_to_tok prods); + + + +(** build gram **) + +fun mk_gram roots prods = (roots, prods, mk_gram_tabs roots prods); + +fun sub_gram (roots1, prods1, _) (roots2, prods2, _) = + roots1 subset roots2 andalso prods1 subset prods2; + + +(* empty, extend, merge grams *) + +val empty_gram = mk_gram [] []; + +fun extend_gram (gram1 as (roots1, prods1, _)) roots2 xprods2 = + let + fun symb_of (Delim s) = Some (Dlm s) + | symb_of (Argument s_p) = Some (Arg s_p) + | symb_of _ = None; + + fun prod_of (XProd (lhs, xsymbs, const, pri)) = + Prod (lhs, mapfilter symb_of xsymbs, const, pri); + + val prods2 = distinct (map prod_of xprods2); + in + if roots2 subset roots1 andalso prods2 subset prods1 then gram1 + else mk_gram (extend_list roots1 roots2) (extend_list prods1 prods2) + end; + +fun merge_grams (gram1 as (roots1, prods1, _)) (gram2 as (roots2, prods2, _)) = + if sub_gram gram2 gram1 then gram1 + else if sub_gram gram1 gram2 then gram2 + else mk_gram (merge_lists roots1 roots2) (merge_lists prods1 prods2); + + +(* pretty_gram *) + +fun pretty_gram (_, prods, _) = + let + fun pretty_name name = [Pretty.str (name ^ " =")]; + + fun pretty_symb (Dlm s) = Pretty.str (quote s) + | pretty_symb (Arg (s, p)) = + if is_terminal s then Pretty.str s + else Pretty.str (s ^ "[" ^ string_of_int p ^ "]"); + + fun pretty_const "" = [] + | pretty_const c = [Pretty.str ("=> " ^ quote c)]; + + fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")]; + + fun pretty_prod (Prod (name, symbs, const, pri)) = + Pretty.block (Pretty.breaks (pretty_name name @ + map pretty_symb symbs @ pretty_const const @ pretty_pri pri)); + in + map pretty_prod prods + end; @@ -324,85 +408,58 @@ in seq add (applyTokenMap(tm,tk)) end; -(*(* FIXME *) -fun parsable((tab,_,_):gram, root:string) = - not(Symtab.lookup(tab,RootPref^root) = None); -*) - -(* exception SYNTAX_ERROR of token list; *) (* FIXME *) fun unknown c = error ("Unparsable category: " ^ c); -fun syn_err toks = error ("Syntax error at: " ^ space_implode " " (map str_of_token toks)); - -fun parse ((tab,opLA,rchA):gram) (root:string) (tl: token list): parsetree = - let val tl' = tl; (* FIXME was ..@[EndToken] *) - val state_sets = mt_states(len tl' +1); - val s0 = mt_stateS(); - val rooti = case Symtab.lookup(tab,RootPref^root) of - Some(ri) => ri | None => unknown root; +fun syn_err toks = + error ("Syntax error at: " ^ space_implode " " (map str_of_token toks)); - fun lr (tl,isi,si,t) = -(* if ismt_stateS(si) then raise SYNTAX_ERROR(t::tl) else *) (* FIXME *) - if ismt_stateS(si) then syn_err (t::tl) else - case tl of - [] => () | - t::tl => - let val si1 = mt_stateS(); - fun process(St(nti,pi,ip,from,ptl)) = - let val opA = #1(sub(opLA,nti)) - val Op(syA,opn,p) = sub(opA,pi) in - if ip = length(syA) - then complete(nti,syA,opn,p,ptl, - sub(state_sets,from),si,opLA,rchA) - else case sub(syA,ip) of - NT(ntj,p) => - seq (predict(t,isi,si,p,opLA)) (sub(rchA,ntj)) - | T(t') => - if matching_tokens(t,t') - then add_state(nti,pi,ip+1,from, - if valued_token(t) - then ptl@[Tip(t)] else ptl, - si1) - else () end; +fun parse ((_, _, (tab, opLA, rchA)):gram) (root:string) (tl: token list): parsetree = + let + val tl' = tl; + val state_sets = mt_states(len tl' + 1); + val s0 = mt_stateS(); + val rooti = case Symtab.lookup(tab, RootPref^root) of + Some(ri) => ri | None => unknown root; - in apply_all_states(process,si); - update(state_sets,isi+1,si1); - lr(tl,isi+1,si1,t) end - - in update(state_sets,0,s0); - add_state(rooti,0,0,0,[],s0); - lr(tl',0,s0,EndToken(*dummy*)); - (*print_stat state_sets;*) - let val St(_,_,_,_,[pt]) = fst_state(sub(state_sets,len tl')) - in pt end - end; - + fun lr (tl, isi, si, t) = + if ismt_stateS(si) then syn_err (t::tl) else + case tl of + [] => () | + t::tl => + let val si1 = mt_stateS(); + fun process(St(nti,pi,ip,from,ptl)) = + let val opA = #1(sub(opLA,nti)) + val Op(syA,opn,p) = sub(opA,pi) in + if ip = length(syA) + then complete(nti,syA,opn,p,ptl, + sub(state_sets,from),si,opLA,rchA) + else case sub(syA,ip) of + NT(ntj,p) => + seq (predict(t,isi,si,p,opLA)) (sub(rchA,ntj)) + | T(t') => + if matching_tokens(t,t') + then add_state(nti,pi,ip+1,from, + if valued_token(t) + then ptl@[Tip(t)] else ptl, + si1) + else () end; -(*(* FIXME *) -fun print_gram ((st,opAA,rchA):gram,lex) = - let val tts = Lexicon.name_of_token; - val al = map (fn (x,y)=>(y,x)) (Symtab.alist_of st); - fun nt i = let val Some(s) = assoc(al,i) in s end; - fun print_sy(T(EndToken)) = prs". " | - print_sy(T(tk)) = (prs(tts tk); prs" ") | - print_sy(NT(i,p)) = (prs((nt i)^"[");print_int p;prs"] "); - fun print_opA(i) = - let val lhs = nt i; - val (opA,_)=sub(opAA,i); - fun print_op(j) = - let val Op(sy,n,p) = sub(opA,j) - in prs(lhs^" = "); forA(fn i=>print_sy(sub(sy,i)),sy); - if n="" then () else prs(" => \""^n^"\""); - prs" (";print_int p;prs")\n" - end; - in forA(print_op,opA) end; - fun print_rch(i) = (print_int i; prs" -> "; - print_list("[","]\n",print_int) (sub(rchA,i))) - in forA(print_opA,opAA) (*; forA(print_rch,rchA) *) end; -*) + in apply_all_states(process,si); + update(state_sets,isi+1,si1); + lr(tl,isi+1,si1,t) end + + in + update (state_sets, 0, s0); + add_state (rooti, 0, 0, 0, [], s0); + lr (tl', 0, s0, EndToken(*dummy*)); + (*print_stat state_sets;*) + let val St(_, _, _, _, [pt]) = fst_state(sub(state_sets, len tl')) + in pt end + end; end; + end; diff -r d33cd0011e48 -r a7d3e712767a src/Pure/Syntax/lexicon.ML --- a/src/Pure/Syntax/lexicon.ML Wed Jan 19 14:15:01 1994 +0100 +++ b/src/Pure/Syntax/lexicon.ML Wed Jan 19 14:21:26 1994 +0100 @@ -48,20 +48,22 @@ VarSy of string | TFreeSy of string | TVarSy of string | - EndToken; + EndToken val id: string val var: string val tfree: string val tvar: string + val terminals: string list + val is_terminal: string -> bool val str_of_token: token -> string val display_token: token -> string val matching_tokens: token * token -> bool val valued_token: token -> bool - val predef_term: string -> token - val is_terminal: string -> bool + val predef_term: string -> token option + val dest_lexicon: lexicon -> string list val empty_lexicon: lexicon val extend_lexicon: lexicon -> string list -> lexicon - val mk_lexicon: string list -> lexicon + val merge_lexicons: lexicon -> lexicon -> lexicon val tokenize: lexicon -> bool -> string -> token list end; @@ -112,13 +114,17 @@ EndToken; -(* names of valued tokens *) +(* terminal arguments *) val id = "id"; val var = "var"; val tfree = "tfree"; val tvar = "tvar"; +val terminals = [id, var, tfree, tvar]; + +fun is_terminal s = s mem terminals; + (* str_of_token *) @@ -164,16 +170,11 @@ (* predef_term *) fun predef_term name = - if name = id then IdentSy name - else if name = var then VarSy name - else if name = tfree then TFreeSy name - else if name = tvar then TVarSy name - else EndToken; - - -(* is_terminal **) - -fun is_terminal s = s mem [id, var, tfree, tvar]; + if name = id then Some (IdentSy name) + else if name = var then Some (VarSy name) + else if name = tfree then Some (TFreeSy name) + else if name = tvar then Some (TVarSy name) + else None; @@ -186,13 +187,19 @@ val no_token = ""; -(* empty_lexicon *) +(* dest_lexicon *) + +fun dest_lexicon Empty = [] + | dest_lexicon (Branch (_, "", lt, eq, gt)) = + dest_lexicon eq @ dest_lexicon lt @ dest_lexicon gt + | dest_lexicon (Branch (_, str, lt, eq, gt)) = + str :: (dest_lexicon eq @ dest_lexicon lt @ dest_lexicon gt); + + +(* empty, extend, merge lexicons *) val empty_lexicon = Empty; - -(* extend_lexicon *) - fun extend_lexicon lexicon strs = let fun ext (lex, s) = @@ -210,17 +217,22 @@ val cs = explode s; in if exists is_blank cs then - error ("extend_lexicon: blank in literal " ^ quote s) + sys_error ("extend_lexicon: blank in delimiter " ^ quote s) else add lex cs end; in - foldl ext (lexicon, strs) + foldl ext (lexicon, strs \\ dest_lexicon lexicon) end; - -(* mk_lexicon *) - -val mk_lexicon = extend_lexicon empty_lexicon; +fun merge_lexicons lex1 lex2 = + let + val strs1 = dest_lexicon lex1; + val strs2 = dest_lexicon lex2; + in + if strs2 subset strs1 then lex1 + else if strs1 subset strs2 then lex2 + else extend_lexicon lex1 strs2 + end; diff -r d33cd0011e48 -r a7d3e712767a src/Pure/Syntax/parser.ML --- a/src/Pure/Syntax/parser.ML Wed Jan 19 14:15:01 1994 +0100 +++ b/src/Pure/Syntax/parser.ML Wed Jan 19 14:21:26 1994 +0100 @@ -5,94 +5,111 @@ Isabelle's main parser (used for terms and typs). TODO: + fix ambiguous grammar problem ~1 --> chain_pri - rename T, NT improve syntax error - fix ambiguous grammar problem + extend_gram: remove 'roots' arg *) signature PARSER = sig - structure XGram: XGRAM - structure ParseTree: PARSE_TREE - local open XGram ParseTree ParseTree.Lexicon in + structure Lexicon: LEXICON + structure SynExt: SYN_EXT + local open Lexicon SynExt SynExt.Ast in type gram val empty_gram: gram - val extend_gram: gram -> string list -> string prod list -> gram - val mk_gram: string list -> string prod list -> gram + val extend_gram: gram -> string list -> xprod list -> gram + val merge_grams: gram -> gram -> gram + val pretty_gram: gram -> Pretty.T list + datatype parsetree = + Node of string * parsetree list | + Tip of token val parse: gram -> string -> token list -> parsetree end end; -functor ParserFun(structure Symtab: SYMTAB and XGram: XGRAM - and ParseTree: PARSE_TREE)(*: PARSER *) = (* FIXME *) +functor ParserFun(structure Symtab: SYMTAB and Lexicon: LEXICON + and SynExt: SYN_EXT)(*: PARSER *) = (* FIXME *) struct -structure XGram = XGram; -structure ParseTree = ParseTree; -structure Lexicon = ParseTree.Lexicon; -open XGram ParseTree Lexicon; +structure Pretty = SynExt.Ast.Pretty; +structure Lexicon = Lexicon; +structure SynExt = SynExt; +open Lexicon SynExt; (** datatype gram **) -datatype symb = T of token | NT of string * int; +datatype symb = + Terminal of token | + Nonterminal of string * int; datatype gram = - Gram of string list * (symb list * string * int) list Symtab.table; - + Gram of (string * (symb list * string * int)) list + * (symb list * string * int) list Symtab.table; -(* empty_gram *) - -val empty_gram = Gram ([], Symtab.null); +fun mk_gram prods = Gram (prods, Symtab.make_multi prods); -(* extend_gram *) +(* empty, extend, merge grams *) -(*prods are stored in reverse order*) +val empty_gram = mk_gram []; -fun extend_gram (Gram (roots, tab)) new_roots xprods = +fun extend_gram (gram1 as Gram (prods1, _)) _ xprods2 = let - fun symb_of (Terminal s) = Some (T (Token s)) - | symb_of (Nonterminal (s, p)) = + fun symb_of (Delim s) = Some (Terminal (Token s)) + | symb_of (Argument (s, p)) = (case predef_term s of - EndToken => Some (NT (s, p)) - | tk => Some (T tk)) + None => Some (Nonterminal (s, p)) + | Some tk => Some (Terminal tk)) | symb_of _ = None; - fun prod_of (Prod (s, xsyms, c, p)) = (s, mapfilter symb_of xsyms, c, p); + fun prod_of (XProd (lhs, xsymbs, const, pri)) = + (lhs, (mapfilter symb_of xsymbs, const, pri)); - fun add_prod (tab, (s, syms, c, p)) = - (case Symtab.lookup (tab, s) of - None => Symtab.update ((s, [(syms, c, p)]), tab) - | Some prods => Symtab.update ((s, (syms, c, p) :: prods), tab)); + val prods2 = distinct (map prod_of xprods2); in - Gram (new_roots union roots, - Symtab.balance (foldl add_prod (tab, map prod_of xprods))) + if prods2 subset prods1 then gram1 + else mk_gram (extend_list prods1 prods2) end; +fun merge_grams (gram1 as Gram (prods1, _)) (gram2 as Gram (prods2, _)) = + if prods2 subset prods1 then gram1 + else if prods1 subset prods2 then gram2 + else mk_gram (merge_lists prods1 prods2); + -(* mk_gram *) +(* pretty_gram *) -val mk_gram = extend_gram empty_gram; +fun pretty_gram (Gram (prods, _)) = + let + fun pretty_name name = [Pretty.str (name ^ " =")]; - -(* get_prods *) + fun pretty_symb (Terminal (Token s)) = Pretty.str (quote s) + | pretty_symb (Terminal tok) = Pretty.str (str_of_token tok) + | pretty_symb (Nonterminal (s, p)) = + Pretty.str (s ^ "[" ^ string_of_int p ^ "]"); -fun get_prods tab s pred = - let - fun rfilter [] ys = ys - | rfilter (x :: xs) ys = rfilter xs (if pred x then x :: ys else ys); + fun pretty_const "" = [] + | pretty_const c = [Pretty.str ("=> " ^ quote c)]; + + fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")]; + + fun pretty_prod (name, (symbs, const, pri)) = + Pretty.block (Pretty.breaks (pretty_name name @ + map pretty_symb symbs @ pretty_const const @ pretty_pri pri)); in - (case Symtab.lookup (tab, s) of - Some prods => rfilter prods [] - | None => []) + map pretty_prod prods end; (** parse **) +datatype parsetree = + Node of string * parsetree list | + Tip of token; + type state = string * int * parsetree list (*before point*) @@ -103,6 +120,9 @@ +fun get_prods tab lhs pred = + filter pred (Symtab.lookup_multi (tab, lhs)); + fun getProductions tab A i = get_prods tab A (fn (_, _, j) => j >= i orelse j = ~1); @@ -111,183 +131,141 @@ -fun mkState i jj A ([NT(B,~1)],id,~1) = - (A,max_pri,[],[NT (B,jj)],id,i) - | mkState i jj A (ss,id,j) = (A,j,[],ss,id,i) ; +fun mkState i jj A ([Nonterminal (B, ~1)], id, ~1) = + (A, max_pri, [], [Nonterminal (B, jj)], id, i) + | mkState i jj A (ss, id, j) = (A, j, [], ss, id, i); -fun conc (t,(k:int)) [] = (None, [(t,k)]) - | conc (t,k) ((t',k')::ts) = - if (t = t') - then (Some k', ( if k' >= k - then (t',k')::ts - else (t,k)::ts ) - ) - else let val (n, ts') = conc (t,k) ts - in (n, (t',k')::ts') - end; +fun conc (t, k:int) [] = (None, [(t, k)]) + | conc (t, k) ((t', k') :: ts) = + if t = t' then + (Some k', if k' >= k then (t', k') :: ts else (t, k) :: ts) + else + let val (n, ts') = conc (t, k) ts + in (n, (t', k') :: ts') end; -fun update_tree ((B,(i,ts))::used) (A,t) = - if (A = B) - then - let val (n,ts') = conc t ts - in ((A,(i,ts'))::used, n) - end - else - let val (used', n) = update_tree used (A,t) - in ((B,(i,ts)) :: used', n) - end; +fun update_tree ((B, (i, ts)) :: used) (A, t) = + if A = B then + let val (n, ts') = conc t ts + in ((A, (i, ts')) :: used, n) end + else + let val (used', n) = update_tree used (A, t) + in ((B, (i, ts)) :: used', n) end; - -fun update_index ((B,(i,ts))::used) (A,j) = - if (A = B) - then (A,(j,ts)) :: used - else (B,(i,ts)) :: (update_index used (A,j)); +fun update_index ((B, (i, ts)) :: used) (A, j) = + if A = B then (A, (j, ts)) :: used + else (B, (i, ts)) :: (update_index used (A, j)); fun getS A i Si = - filter (fn (_,_,_,(NT(B,j))::_,_,_) - => (A=B andalso j<=i) - | _ => false - ) Si; - - + filter + (fn (_, _, _, Nonterminal (B, j) :: _, _, _) => A = B andalso j <= i + | _ => false) Si; fun getS' A k n Si = - filter (fn (_,_,_,(NT(B,j))::_,_,_) => (A=B andalso - j<=k andalso - j>n ) - | _ => false - ) Si; - - + filter + (fn (_, _, _, Nonterminal (B, j) :: _, _, _) => A = B andalso j <= k andalso j > n + | _ => false) Si; fun getStates Estate i ii A k = - filter (fn (_,_,_,(NT(B,j))::_,_,_) - => (A=B andalso j<=k) - | _ => false - ) - (Array.sub (Estate, ii)) -; - + filter + (fn (_, _, _, Nonterminal (B, j) :: _, _, _) => A = B andalso j <= k + | _ => false) + (Array.sub (Estate, ii)); (* MMW *)(* FIXME *) -fun movedot_term (A,j,ts,T a::sa,id,i) c = - if (valued_token c) - then (A,j,(ts@[Tip c]),sa,id,i) - else (A,j,ts,sa,id,i) ; - - - -fun movedot_nonterm ts (A,j,tss,NT(B,k) ::sa,id,i) = - (A,j,tss@ts,sa,id,i) ; +fun movedot_term (A, j, ts, Terminal a :: sa, id, i) c = + if valued_token c then + (A, j, (ts @ [Tip c]), sa, id, i) + else (A, j, ts, sa, id, i); - +fun movedot_nonterm ts (A, j, tss, Nonterminal (B, k) :: sa, id, i) = + (A, j, tss @ ts, sa, id, i); -fun movedot_lambda _ [] = [] - | movedot_lambda (B,j,tss,(NT(A,k))::sa,id,i) ((t,ki)::ts) = - if k <= ki - then (B,j,tss@t,sa,id,i) :: - (movedot_lambda (B,j,tss,(NT(A,k))::sa,id,i) ts) - else movedot_lambda (B,j,tss,(NT(A,k))::sa,id,i) ts - ; - +fun movedot_lambda _ [] = [] + | movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ((t, ki) :: ts) = + if k <= ki then + (B, j, tss @ t, sa, id, i) :: + movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ts + else movedot_lambda (B, j, tss, Nonterminal (A, k) :: sa, id, i) ts; fun PROCESSS Estate grammar i c states = - let -fun processS used [] (Si,Sii) = (Si,Sii) - | processS used (S::States) (Si,Sii) = - ( case S of - - (_,_,_,(NT (A,j))::_,_,_) => +fun processS used [] (Si, Sii) = (Si, Sii) + | processS used (S :: States) (Si, Sii) = + (case S of + (_, _, _, Nonterminal (A, j) :: _, _, _) => let val (used_neu, States_neu) = - ( case assoc (used, A) of - Some (k,l) => (* A gehoert zu used *) - - if (k <= j) (* Prioritaet wurde - behandelt *) - then - (used, movedot_lambda S l) - - else (* Prioritaet wurde noch nicht - behandelt *) - let val L = - getProductions' grammar A j k - val States' = map (mkState i j A) L - in - (update_index used (A,j), - States'@ movedot_lambda S l - ) - end + (case assoc (used, A) of + Some (k, l) => (*A gehoert zu used*) + if k <= j (*Prioritaet wurde behandelt*) + then (used, movedot_lambda S l) + else (*Prioritaet wurde noch nicht behandelt*) + let + val L = getProductions' grammar A j k; + val States' = map (mkState i j A) L; + in + (update_index used (A, j), States' @ movedot_lambda S l) + end - | None => (* A gehoert nicht zu used *) - let val L = getProductions grammar A j - val States' = map (mkState i j A) L - in - ((A,(j,[]))::used, States') - end - ) + | None => (*A gehoert nicht zu used*) + let + val L = getProductions grammar A j; + val States' = map (mkState i j A) L; + in + ((A, (j, [])) :: used, States') + end) in - processS used_neu (States @ States_neu) (S::Si,Sii) + processS used_neu (States @ States_neu) (S :: Si, Sii) end - - | (_,_,_,(T a)::_,_,_) => - processS used States - (S::Si, if (matching_tokens (a, c)) - then (movedot_term S c)::Sii (* MMW *)(* FIXME *) - else Sii - ) - + | (_, _, _, Terminal a :: _, _, _) => + processS used States + (S :: Si, + if matching_tokens (a, c) then movedot_term S c :: Sii else Sii) + (* MMW *)(* FIXME *) - | (A,k,ts,[],id,j) => - let val tt = if id = "" - then ts - else [Node(id,ts)] + | (A, k, ts, [], id, j) => + let + val tt = if id = "" then ts else [Node (id, ts)] in - if (j = i) - then - let val (used',O) = update_tree used (A,(tt,k)) - in ( case O of - None => - let val Slist = getS A k Si - val States' = - map (movedot_nonterm tt ) Slist - in processS used' - (States @ States') (S::Si,Sii) - end - | Some n => - if (n >= k) - then - processS used' States (S::Si,Sii) - else - let val Slist = getS' A k n Si - val States' = - map (movedot_nonterm tt ) Slist - in - processS used' - (States @ States') (S::Si,Sii) - end - ) - end - - else - processS used (States @ - map (movedot_nonterm tt) - (getStates Estate i j A k)) - (S::Si,Sii) - end - ) + if j = i then + let + val (used', O) = update_tree used (A, (tt, k)); + in + (case O of + None => + let + val Slist = getS A k Si; + val States' = map (movedot_nonterm tt) Slist; + in + processS used' (States @ States') (S :: Si, Sii) + end + | Some n => + if n >= k then + processS used' States (S :: Si, Sii) + else + let + val Slist = getS' A k n Si; + val States' = map (movedot_nonterm tt) Slist; + in + processS used' (States @ States') (S :: Si, Sii) + end) + end + else + processS used + (States @ map (movedot_nonterm tt) (getStates Estate i j A k)) + (S :: Si, Sii) + end) in - processS [] states ([],[]) + processS [] states ([], []) end; @@ -295,53 +273,53 @@ fun syntax_error toks = error ("Syntax error at: " ^ space_implode " " (map str_of_token toks)); -fun Produce grammar stateset i indata = - case (Array.sub (stateset,i)) of - [] => syntax_error indata (* MMW *)(* FIXME *) - | s => - (case indata of - [] => Array.sub (stateset,i) - | (c::cs) => - let val (si,sii) = - PROCESSS stateset grammar i c s - in - Array.update (stateset,i,si); - Array.update (stateset,i+1,sii); - Produce grammar stateset (i+1) cs - end - ); +fun produce grammar stateset i indata = + (case Array.sub (stateset, i) of + [] => syntax_error indata (* MMW *)(* FIXME *) + | s => + (case indata of + [] => Array.sub (stateset, i) + | c :: cs => + let + val (si, sii) = PROCESSS stateset grammar i c s; + in + Array.update (stateset, i, si); + Array.update (stateset, i + 1, sii); + produce grammar stateset (i + 1) cs + end)); +(*(* FIXME new *) +val get_trees = mapfilter (fn (_, _, [pt], _, _, _) => Some pt | _ => None); +*) + fun get_trees [] = [] - | get_trees ((_,_,pt_lst,_,_,_) :: stateset) = - let val l = get_trees stateset - in case pt_lst of - [ptree] => ptree :: l - | _ => l - end; - - + | get_trees ((_, _, pt_lst, _, _, _) :: stateset) = + let val l = get_trees stateset + in case pt_lst of + [ptree] => ptree :: l + | _ => l + end; fun earley grammar startsymbol indata = - let val S0 = - [("S'",0,[],[NT (startsymbol,0), T EndToken],"",0)] - val s = length indata + 1 (* MMW *)(* FIXME was .. + 2 *) - val Estate = Array.array (s, []) - in Array.update (Estate,0,S0); - let val l = Produce grammar Estate 0 indata (* MMW *)(* FIXME was .. @ [EndToken] *) - val p_trees = get_trees l - in p_trees - end - end; + let + val S0 = [("S'", 0, [], [Nonterminal (startsymbol, 0), Terminal EndToken], "", 0)]; + val s = length indata + 1; (* MMW *)(* FIXME was .. + 2 *) + val Estate = Array.array (s, []); + in + Array.update (Estate, 0, S0); + let + val l = produce grammar Estate 0 indata; (* MMW *)(* FIXME was .. @ [EndToken] *) + val p_trees = get_trees l; + in p_trees end + end; (* FIXME demo *) -fun parse (Gram (roots, prod_tab)) root toks = - if root mem roots then - (case earley prod_tab root toks of - [] => error "parse: no parse trees" - | pt :: _ => pt) - else error ("Unparsable category: " ^ root); +fun parse (Gram (_, prod_tab)) start toks = + (case earley prod_tab start toks of + [] => sys_error "parse: no parse trees" + | pt :: _ => pt); end; diff -r d33cd0011e48 -r a7d3e712767a src/Pure/Syntax/printer.ML --- a/src/Pure/Syntax/printer.ML Wed Jan 19 14:15:01 1994 +0100 +++ b/src/Pure/Syntax/printer.ML Wed Jan 19 14:21:26 1994 +0100 @@ -15,31 +15,29 @@ sig include PRINTER0 structure Symtab: SYMTAB - structure XGram: XGRAM - structure Pretty: PRETTY - local open XGram XGram.Ast in + structure SynExt: SYN_EXT + local open SynExt SynExt.Ast SynExt.Ast.Pretty in val term_to_ast: (string -> (term list -> term) option) -> term -> ast val typ_to_ast: (string -> (term list -> term) option) -> typ -> ast type prtab val empty_prtab: prtab - val extend_prtab: prtab -> (string prod list) -> (string * (ast list -> ast)) list - -> prtab - val mk_prtab: (string prod list) -> (string * (ast list -> ast)) list -> prtab - val pretty_term_ast: prtab -> ast -> Pretty.T - val pretty_typ_ast: prtab -> ast -> Pretty.T + val extend_prtab: prtab -> xprod list -> prtab + val merge_prtabs: prtab -> prtab -> prtab + val pretty_term_ast: prtab -> (string -> (ast list -> ast) option) + -> ast -> Pretty.T + val pretty_typ_ast: prtab -> (string -> (ast list -> ast) option) + -> ast -> Pretty.T end end; -functor PrinterFun(structure Symtab: SYMTAB and Lexicon: LEXICON - and TypeExt: TYPE_EXT and SExtension: SEXTENSION and Pretty: PRETTY - sharing TypeExt.Extension = SExtension.Extension): PRINTER = +functor PrinterFun(structure Symtab: SYMTAB and TypeExt: TYPE_EXT + and SExtension: SEXTENSION sharing TypeExt.SynExt = SExtension.Parser.SynExt) + (*: PRINTER *) = (* FIXME *) struct structure Symtab = Symtab; -structure Extension = TypeExt.Extension; -structure XGram = Extension.XGram; -structure Pretty = Pretty; -open XGram XGram.Ast Lexicon TypeExt Extension SExtension; +structure SynExt = TypeExt.SynExt; +open SExtension.Parser.Lexicon SynExt.Ast SynExt TypeExt SExtension; (** options for printing **) @@ -147,101 +145,77 @@ Break of int | Block of int * symb list; -datatype format = - Prnt of symb list * int * int | - Trns of ast list -> ast | - TorP of (ast list -> ast) * (symb list * int * int); - -type prtab = format Symtab.table; +type prtab = (symb list * int * int) Symtab.table; -(* empty_prtab *) +(* xprods_to_fmts *) + +fun xprod_to_fmt (XProd (_, _, "", _)) = None + | xprod_to_fmt (XProd (_, xsymbs, const, pri)) = + let + fun cons_str s (String s' :: syms) = String (s ^ s') :: syms + | cons_str s syms = String s :: syms; + + fun arg (s, p) = + (if s = "type" then TypArg else Arg) + (if is_terminal s then 1000 else p); (* FIXME 1000 vs. 0 vs. p (?) *) + + fun xsyms_to_syms (Delim s :: xsyms) = + apfst (cons_str s) (xsyms_to_syms xsyms) + | xsyms_to_syms (Argument s_p :: xsyms) = + apfst (cons (arg s_p)) (xsyms_to_syms xsyms) + | xsyms_to_syms (Space s :: xsyms) = + apfst (cons_str s) (xsyms_to_syms xsyms) + | xsyms_to_syms (Bg i :: xsyms) = + let + val (bsyms, xsyms') = xsyms_to_syms xsyms; + val (syms, xsyms'') = xsyms_to_syms xsyms'; + in + (Block (i, bsyms) :: syms, xsyms'') + end + | xsyms_to_syms (Brk i :: xsyms) = + apfst (cons (Break i)) (xsyms_to_syms xsyms) + | xsyms_to_syms (En :: xsyms) = ([], xsyms) + | xsyms_to_syms [] = ([], []); + + fun nargs (Arg _ :: syms) = nargs syms + 1 + | nargs (TypArg _ :: syms) = nargs syms + 1 + | nargs (String _ :: syms) = nargs syms + | nargs (Break _ :: syms) = nargs syms + | nargs (Block (_, bsyms) :: syms) = nargs syms + nargs bsyms + | nargs [] = 0; + in + (case xsyms_to_syms xsymbs of + (symbs, []) => Some (const, (symbs, nargs symbs, pri)) + | _ => sys_error "xprod_to_fmt: unbalanced blocks") + end; + +fun xprods_to_fmts xprods = + gen_distinct eq_fst (mapfilter xprod_to_fmt xprods); + + +(* empty, extend, merge prtabs *) + +fun err_dup_fmt c = + sys_error ("duplicate fmt in prtab for " ^ quote c); val empty_prtab = Symtab.null; +fun extend_prtab tab xprods = + Symtab.extend (op =) (tab, xprods_to_fmts xprods) + handle Symtab.DUPLICATE c => err_dup_fmt c; + +fun merge_prtabs tab1 tab2 = + Symtab.merge (op =) (tab1, tab2) + handle Symtab.DUPLICATE c => err_dup_fmt c; + -(** extend_prtab **) - -fun extend_prtab prtab prods trfuns = - let - fun nargs (Arg _ :: symbs) = nargs symbs + 1 - | nargs (TypArg _ :: symbs) = nargs symbs + 1 - | nargs (String _ :: symbs) = nargs symbs - | nargs (Break _ :: symbs) = nargs symbs - | nargs (Block (_, bsymbs) :: symbs) = nargs symbs + nargs bsymbs - | nargs [] = 0; - - fun merge (s, String s' :: l) = String (s ^ s') :: l - | merge (s, l) = String s :: l; - - fun mk_prnp sy c p = - let - val constr = (c = constrainC orelse c = constrainIdtC); +(** pretty term or typ asts **) - fun syn2pr (Terminal s :: sy) = - let val (symbs, sy') = syn2pr sy; - in (merge (s, symbs), sy') end - | syn2pr (Space s :: sy) = - let val (symbs, sy') = syn2pr sy; - in (merge (s, symbs), sy') end - | syn2pr (Nonterminal (s, p) :: sy) = - let - val (symbs, sy') = syn2pr sy; - val symb = - (if constr andalso s = "type" then TypArg else Arg) - (if is_terminal s then 0 else p); - in (symb :: symbs, sy') end - | syn2pr (Bg i :: sy) = - let - val (bsymbs, sy') = syn2pr sy; - val (symbs, sy'') = syn2pr sy'; - in (Block (i, bsymbs) :: symbs, sy'') end - | syn2pr (Brk i :: sy) = - let val (symbs, sy') = syn2pr sy; - in (Break i :: symbs, sy') end - | syn2pr (En :: sy) = ([], sy) - | syn2pr [] = ([], []); - - val (pr, _) = syn2pr sy; - in - (pr, nargs pr, p) - end; - - fun trns_err c = error ("More than one parse ast translation for " ^ quote c); +(*assumes a syntax derived from Pure, otherwise may loop forever*) - fun add_fmt tab c fmt x = Symtab.update ((c, fmt x), tab); - - fun add_prod (tab, Prod (_, _, "", _)) = tab - | add_prod (tab, Prod (_, sy, c, p)) = - (case Symtab.lookup (tab, c) of - None => add_fmt tab c Prnt (mk_prnp sy c p) - | Some (Prnt _) => add_fmt tab c Prnt (mk_prnp sy c p) - | Some (Trns f) => add_fmt tab c TorP (f, mk_prnp sy c p) - | Some (TorP (f, _)) => add_fmt tab c TorP (f, mk_prnp sy c p)); - - fun add_tr (tab, (c, f)) = - (case Symtab.lookup (tab, c) of - None => add_fmt tab c Trns f - | Some (Prnt pr) => add_fmt tab c TorP (f, pr) - | Some (Trns _) => trns_err c - | Some (TorP _) => trns_err c); - in - Symtab.balance (foldl add_prod (foldl add_tr (prtab, trfuns), prods)) - end; - - -(* mk_prtab *) - -val mk_prtab = extend_prtab empty_prtab; - - - -(** pretty term/typ asts **) - -(*pretty assumes a syntax derived from Pure, otherwise it may loop forever*) - -fun pretty tab type_mode ast0 p0 = +fun pretty prtab trf type_mode ast0 p0 = let val trans = apply_trans "print ast translation"; @@ -252,8 +226,12 @@ let val (Ts, args') = synT (symbs, args); in (astT (t, p) @ Ts, args') end | synT (TypArg p :: symbs, t :: args) = - let val (Ts, args') = synT (symbs, args); - in (pretty tab true t p @ Ts, args') end + let + val (Ts, args') = synT (symbs, args); + in + if type_mode then (astT (t, p) @ Ts, args') + else (pretty prtab trf true t p @ Ts, args') + end | synT (String s :: symbs, args) = let val (Ts, args') = synT (symbs, args); in (Pretty.str s :: Ts, args') end @@ -265,7 +243,7 @@ | synT (Break i :: symbs, args) = let val (Ts, args') = synT (symbs, args); in ((if i < 0 then Pretty.fbrk else Pretty.brk i) :: Ts, args') end - | synT (_ :: _, []) = error "synT" + | synT (_ :: _, []) = sys_error "synT" and parT (pr, args, p, p': int) = if p > p' then @@ -278,7 +256,7 @@ and splitT 0 ([x], ys) = (x, ys) | splitT 0 (rev_xs, ys) = (Appl (rev rev_xs), ys) | splitT n (rev_xs, y :: ys) = splitT (n - 1) (y :: rev_xs, ys) - | splitT _ _ = error "splitT" + | splitT _ _ = sys_error "splitT" and combT (tup as (c, a, args, p)) = let @@ -289,12 +267,12 @@ else if nargs < n orelse type_mode then prefixT tup else astT (appT (splitT n ([c], args)), p); in - (case Symtab.lookup (tab, a) of - None => prefixT tup - | Some (Prnt prnp) => prnt prnp - | Some (Trns f) => + (case (trf a, Symtab.lookup (prtab, a)) of + (None, None) => prefixT tup + | (None, Some prnp) => prnt prnp + | (Some f, None) => (astT (trans a f args, p) handle Match => prefixT tup) - | Some (TorP (f, prnp)) => + | (Some f, Some prnp) => (astT (trans a f args, p) handle Match => prnt prnp)) end @@ -311,14 +289,14 @@ (* pretty_term_ast *) -fun pretty_term_ast tab ast = - Pretty.blk (0, pretty tab false ast 0); +fun pretty_term_ast prtab trf ast = + Pretty.blk (0, pretty prtab trf false ast 0); (* pretty_typ_ast *) -fun pretty_typ_ast tab ast = - Pretty.blk (0, pretty tab true ast 0); +fun pretty_typ_ast prtab trf ast = + Pretty.blk (0, pretty prtab trf true ast 0); end; diff -r d33cd0011e48 -r a7d3e712767a src/Pure/Syntax/syntax.ML --- a/src/Pure/Syntax/syntax.ML Wed Jan 19 14:15:01 1994 +0100 +++ b/src/Pure/Syntax/syntax.ML Wed Jan 19 14:21:26 1994 +0100 @@ -3,17 +3,13 @@ Author: Tobias Nipkow and Markus Wenzel, TU Muenchen Root of Isabelle's syntax module. - -TODO: - fix empty_tables, extend_tables, mk_tables (requires empty_gram, extend_gram) - fix extend (requires extend_tables) *) signature SYNTAX = sig include AST0 include LEXICON0 - include EXTENSION0 + include SYN_EXT0 include TYPE_EXT0 include SEXTENSION1 include PRINTER0 @@ -27,6 +23,7 @@ val test_read: syntax -> string -> string -> unit val read: syntax -> typ -> string -> term val read_typ: syntax -> (indexname -> sort) -> string -> typ + val simple_read_typ: string -> typ val pretty_term: syntax -> term -> Pretty.T val pretty_typ: syntax -> typ -> Pretty.T val string_of_term: syntax -> term -> string @@ -34,118 +31,120 @@ end; functor SyntaxFun(structure Symtab: SYMTAB and TypeExt: TYPE_EXT - and Parser: PARSER and SExtension: SEXTENSION and Printer: PRINTER - sharing TypeExt.Extension = SExtension.Extension - and Parser.XGram = TypeExt.Extension.XGram = Printer.XGram - and Parser.XGram.Ast = Parser.ParseTree.Ast): SYNTAX = + and SExtension: SEXTENSION and Printer: PRINTER + sharing SExtension.Parser.SynExt = TypeExt.SynExt = Printer.SynExt)(*: SYNTAX *) = (* FIXME *) struct -structure Extension = TypeExt.Extension; -structure XGram = Extension.XGram; -structure Lexicon = Parser.ParseTree.Lexicon; -open Lexicon Parser Parser.ParseTree Extension TypeExt SExtension Printer - XGram XGram.Ast; +structure SynExt = TypeExt.SynExt; +structure Parser = SExtension.Parser; +structure Lexicon = Parser.Lexicon; +open Lexicon SynExt SynExt.Ast Parser TypeExt SExtension Printer; + + +(** tables of translation functions **) + +(*the ref serves as unique id*) +type 'a trtab = (('a list -> 'a) * unit ref) Symtab.table; + +val dest_trtab = Symtab.alist_of; + +fun lookup_trtab tab c = + apsome fst (Symtab.lookup (tab, c)); -fun lookup tab a = Symtab.lookup (tab, a); +(* empty, extend, merge trtabs *) + +fun err_dup_trfun name c = + error ("More than one " ^ name ^ " for " ^ quote c); + +val empty_trtab = Symtab.null; + +fun extend_trtab tab trfuns name = + Symtab.extend eq_snd (tab, map (fn (c, f) => (c, (f, ref ()))) trfuns) + handle Symtab.DUPLICATE c => err_dup_trfun name c; + +fun merge_trtabs tab1 tab2 name = + Symtab.merge eq_snd (tab1, tab2) + handle Symtab.DUPLICATE c => err_dup_trfun name c; + + + +(** tables of translation rules **) + +type ruletab = (ast * ast) list Symtab.table; + +fun dest_ruletab tab = flat (map snd (Symtab.alist_of tab)); + + +(* lookup_ruletab *) + +fun lookup_ruletab tab = + if Symtab.is_null tab then None + else Some (fn a => Symtab.lookup_multi (tab, a)); + + +(* empty, extend, merge ruletabs *) + +val empty_ruletab = Symtab.null; + +fun extend_ruletab tab rules = + generic_extend (op =) Symtab.dest_multi Symtab.make_multi tab + (map (fn r => (head_of_rule r, r)) (distinct rules)); + +fun merge_ruletabs tab1 tab2 = + generic_merge (op =) Symtab.dest_multi Symtab.make_multi tab1 tab2; (** datatype syntax **) -datatype tables = - Tabs of { +datatype syntax = + Syntax of { lexicon: lexicon, roots: string list, gram: gram, consts: string list, - parse_ast_trtab: (ast list -> ast) Symtab.table, - parse_ruletab: (ast * ast) list Symtab.table, - parse_trtab: (term list -> term) Symtab.table, - print_trtab: (term list -> term) Symtab.table, - print_ruletab: (ast * ast) list Symtab.table, + parse_ast_trtab: ast trtab, + parse_ruletab: ruletab, + parse_trtab: term trtab, + print_trtab: term trtab, + print_ruletab: ruletab, + print_ast_trtab: ast trtab, prtab: prtab}; -datatype gramgraph = - EmptyGG | - ExtGG of gramgraph ref * ext | - MergeGG of gramgraph ref * gramgraph ref; - -datatype syntax = Syntax of gramgraph ref * tables; - - - -(*** compile syntax ***) - -(* translation funs *) - -fun extend_trtab tab trfuns name = - Symtab.balance (Symtab.st_of_alist (trfuns, tab)) handle Symtab.DUPLICATE s - => error ("More than one " ^ name ^ " for " ^ quote s); - -val mk_trtab = extend_trtab Symtab.null; - -(* translation rules *) +(* empty_syntax *) -fun mk_ruletab rules = - let - fun add_rule (r, tab) = - let val a = head_of_rule r; - in - (case lookup tab a of - None => Symtab.update ((a, [r]), tab) - | Some rs => Symtab.update ((a, r :: rs), tab)) - end; - in - Symtab.balance (foldr add_rule (rules, Symtab.null)) - end; - -fun extend_ruletab tab rules = - mk_ruletab (flat (map #2 (Symtab.alist_of tab)) @ rules); - - - -(** tables **) - -(* empty_tables *) - -(*(* FIXME *) -val empty_tables = - Tabs { +val empty_syntax = + Syntax { lexicon = empty_lexicon, roots = [], gram = empty_gram, consts = [], - parse_ast_trtab = Symtab.null, - parse_ruletab = Symtab.null, - parse_trtab = Symtab.null, - print_trtab = Symtab.null, - print_ruletab = Symtab.null, + parse_ast_trtab = empty_trtab, + parse_ruletab = empty_ruletab, + parse_trtab = empty_trtab, + print_trtab = empty_trtab, + print_ruletab = empty_ruletab, + print_ast_trtab = empty_trtab, prtab = empty_prtab}; -*) -(* extend_tables *) +(* extend_syntax *) -fun extend_tables (Tabs tabs) (XGram xgram) = +fun extend_syntax (Syntax tabs) syn_ext = let val {lexicon, roots = roots1, gram, consts = consts1, parse_ast_trtab, - parse_ruletab, parse_trtab, print_trtab, print_ruletab, prtab} = tabs; - val {roots = roots2, prods, consts = consts2, parse_ast_translation, + parse_ruletab, parse_trtab, print_trtab, print_ruletab, print_ast_trtab, + prtab} = tabs; + val SynExt {roots = roots2, xprods, consts = consts2, parse_ast_translation, parse_rules, parse_translation, print_translation, print_rules, - print_ast_translation} = xgram; + print_ast_translation} = syn_ext; in - (* FIXME *) - if not (null prods) then - error "extend_tables: called with non-empty prods" - else - - Tabs { - lexicon = extend_lexicon lexicon (literals_of prods), - roots = roots2 union roots1, - (* gram = extend_gram gram roots2 prods, *) (* FIXME *) - gram = gram, (* FIXME *) + Syntax { + lexicon = extend_lexicon lexicon (delims_of xprods), + roots = extend_list roots1 roots2, + gram = extend_gram gram roots2 xprods, consts = consts2 union consts1, parse_ast_trtab = extend_trtab parse_ast_trtab parse_ast_translation "parse ast translation", @@ -153,176 +152,116 @@ parse_trtab = extend_trtab parse_trtab parse_translation "parse translation", print_trtab = extend_trtab print_trtab print_translation "print translation", print_ruletab = extend_ruletab print_ruletab print_rules, - prtab = extend_prtab prtab prods print_ast_translation} - end; - - -(* mk_tables *) - -(* FIXME *) -(* val mk_tables = extend_tables empty_tables; *) - -(* FIXME *) -fun mk_tables (XGram xgram) = - let - val {roots, prods, consts, parse_ast_translation, parse_rules, - parse_translation, print_translation, print_rules, - print_ast_translation} = xgram; - in - Tabs { - lexicon = mk_lexicon (literals_of prods), - roots = roots, - gram = mk_gram roots prods, - consts = consts, - parse_ast_trtab = mk_trtab parse_ast_translation "parse ast translation", - parse_ruletab = mk_ruletab parse_rules, - parse_trtab = mk_trtab parse_translation "parse translation", - print_trtab = mk_trtab print_translation "print translation", - print_ruletab = mk_ruletab print_rules, - prtab = mk_prtab prods print_ast_translation} + print_ast_trtab = + extend_trtab print_ast_trtab print_ast_translation "print ast translation", + prtab = extend_prtab prtab xprods} end; -(* ggr_to_xgram *) +(* merge_syntaxes *) -fun ggr_to_xgram ggr = +fun merge_syntaxes (Syntax tabs1) (Syntax tabs2) = let - fun flatGG ggr (xg, v) = - if ggr mem v then (xg, v) else flatGG' ggr (xg, ggr :: v) - and flatGG' (ref EmptyGG) xgv = xgv - | flatGG' (ref (ExtGG (ggr, ext))) xgv = - let - val (xg', v') = flatGG ggr xgv; - in - (extend_xgram xg' ext, v') - end - | flatGG' (ref (MergeGG (ggr1, ggr2))) xgv = - flatGG ggr1 (flatGG ggr2 xgv); + val {lexicon = lexicon1, roots = roots1, gram = gram1, consts = consts1, + parse_ast_trtab = parse_ast_trtab1, parse_ruletab = parse_ruletab1, + parse_trtab = parse_trtab1, print_trtab = print_trtab1, + print_ruletab = print_ruletab1, print_ast_trtab = print_ast_trtab1, + prtab = prtab1} = tabs1; + + val {lexicon = lexicon2, roots = roots2, gram = gram2, consts = consts2, + parse_ast_trtab = parse_ast_trtab2, parse_ruletab = parse_ruletab2, + parse_trtab = parse_trtab2, print_trtab = print_trtab2, + print_ruletab = print_ruletab2, print_ast_trtab = print_ast_trtab2, + prtab = prtab2} = tabs2; in - #1 (flatGG ggr (empty_xgram, [])) + Syntax { + lexicon = merge_lexicons lexicon1 lexicon2, + roots = merge_lists roots1 roots2, + gram = merge_grams gram1 gram2, + consts = merge_lists consts1 consts2, + parse_ast_trtab = + merge_trtabs parse_ast_trtab1 parse_ast_trtab2 "parse ast translation", + parse_ruletab = merge_ruletabs parse_ruletab1 parse_ruletab2, + parse_trtab = merge_trtabs parse_trtab1 parse_trtab2 "parse translation", + print_trtab = merge_trtabs print_trtab1 print_trtab2 "print translation", + print_ruletab = merge_ruletabs print_ruletab1 print_ruletab2, + print_ast_trtab = + merge_trtabs print_ast_trtab1 print_ast_trtab2 "print ast translation", + prtab = merge_prtabs prtab1 prtab2} end; -(* mk_syntax *) -fun mk_syntax ggr = Syntax (ggr, mk_tables (ggr_to_xgram ggr)); - - - -(*** inspect syntax ***) - -fun xgram_of (Syntax (ggr, _)) = ggr_to_xgram ggr; +(** inspect syntax **) fun string_of_big_list name prts = - Pretty.string_of (Pretty.blk (2, - separate Pretty.fbrk (Pretty.str name :: prts))); + Pretty.string_of (Pretty.block (Pretty.fbreaks (Pretty.str name :: prts))); fun string_of_strings name strs = - Pretty.string_of (Pretty.blk (2, - separate (Pretty.brk 1) - (map Pretty.str (name :: map quote (sort_strings strs))))); + Pretty.string_of (Pretty.block (Pretty.breaks + (map Pretty.str (name :: map quote (sort_strings strs))))); (* print_gram *) -fun prt_gram (XGram {roots, prods, ...}) = +fun print_gram (Syntax tabs) = let - fun pretty_name name = [Pretty.str (name ^ " ="), Pretty.brk 1]; - - fun pretty_xsymbs (Terminal s :: xs) = - Pretty.str (quote s) :: Pretty.brk 1 :: pretty_xsymbs xs - | pretty_xsymbs (Nonterminal (s, p) :: xs) = - (if is_terminal s then Pretty.str s - else Pretty.str (s ^ "[" ^ string_of_int p ^ "]")) - :: Pretty.brk 1 :: pretty_xsymbs xs - | pretty_xsymbs (_ :: xs) = pretty_xsymbs xs - | pretty_xsymbs [] = []; - - fun pretty_const "" = [Pretty.brk 1] - | pretty_const c = [Pretty.str (" => " ^ quote c), Pretty.brk 1]; - - fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")]; - - fun pretty_prod (Prod (name, xsymbs, const, pri)) = - Pretty.blk (2, pretty_name name @ pretty_xsymbs xsymbs @ - pretty_const const @ pretty_pri pri); + val {lexicon, roots, gram, ...} = tabs; in - writeln (string_of_strings "lexicon:" (literals_of prods)); - writeln (Pretty.string_of (Pretty.blk (2, - separate (Pretty.brk 1) (map Pretty.str ("roots:" :: roots))))); - writeln (string_of_big_list "prods:" (map pretty_prod prods)) + writeln (string_of_strings "lexicon:" (dest_lexicon lexicon)); + writeln (Pretty.string_of (Pretty.block (Pretty.breaks + (map Pretty.str ("roots:" :: roots))))); + writeln (string_of_big_list "prods:" (pretty_gram gram)) end; -val print_gram = prt_gram o xgram_of; - (* print_trans *) -fun prt_trans (XGram xgram) = +fun print_trans (Syntax tabs) = let - fun string_of_trs name trs = string_of_strings name (map fst trs); + fun string_of_trtab name tab = + string_of_strings name (map fst (dest_trtab tab)); - fun string_of_rules name rules = - string_of_big_list name (map pretty_rule rules); + fun string_of_ruletab name tab = + string_of_big_list name (map pretty_rule (dest_ruletab tab)); - val {consts, parse_ast_translation, parse_rules, parse_translation, - print_translation, print_rules, print_ast_translation, ...} = xgram; + val {consts, parse_ast_trtab, parse_ruletab, parse_trtab, print_trtab, + print_ruletab, print_ast_trtab, ...} = tabs; in writeln (string_of_strings "consts:" consts); - writeln (string_of_trs "parse_ast_translation:" parse_ast_translation); - writeln (string_of_rules "parse_rules:" parse_rules); - writeln (string_of_trs "parse_translation:" parse_translation); - writeln (string_of_trs "print_translation:" print_translation); - writeln (string_of_rules "print_rules:" print_rules); - writeln (string_of_trs "print_ast_translation:" print_ast_translation) + writeln (string_of_trtab "parse_ast_translation:" parse_ast_trtab); + writeln (string_of_ruletab "parse_rules:" parse_ruletab); + writeln (string_of_trtab "parse_translation:" parse_trtab); + writeln (string_of_trtab "print_translation:" print_trtab); + writeln (string_of_ruletab "print_rules:" print_ruletab); + writeln (string_of_trtab "print_ast_translation:" print_ast_trtab) end; -val print_trans = prt_trans o xgram_of; - (* print_syntax *) -fun print_syntax syn = - let - val xgram = xgram_of syn; - in - prt_gram xgram; prt_trans xgram - end; +fun print_syntax syn = (print_gram syn; print_trans syn); -(*** parsing and printing ***) - -(* mk_get_rules *) - -fun mk_get_rules ruletab = - let - fun get_rules a = - (case lookup ruletab a of - Some rules => rules - | None => []); - in - if Symtab.is_null ruletab then None else Some get_rules - end; - +(** read **) (* read_ast *) -fun read_ast (Syntax (_, tabs)) xids root str = +fun read_ast (Syntax tabs) xids root str = let - val Tabs {lexicon, gram, parse_ast_trtab, ...} = tabs; + val {lexicon, gram, parse_ast_trtab, ...} = tabs; in - pt_to_ast (lookup parse_ast_trtab) + pt_to_ast (lookup_trtab parse_ast_trtab) (parse gram root (tokenize lexicon xids str)) end; - -(** test_read **) +(* test_read *) -fun test_read (Syntax (_, tabs)) root str = +fun test_read (Syntax tabs) root str = let - val Tabs {lexicon, gram, parse_ast_trtab, parse_ruletab, ...} = tabs; + val {lexicon, gram, parse_ast_trtab, parse_ruletab, ...} = tabs; val toks = tokenize lexicon false str; val _ = writeln ("tokens: " ^ space_implode " " (map display_token toks)); @@ -331,36 +270,38 @@ val raw_ast = pt_to_ast (K None) pt; val _ = writeln ("raw: " ^ str_of_ast raw_ast); - val pre_ast = pt_to_ast (lookup parse_ast_trtab) pt; - val _ = normalize true true (mk_get_rules parse_ruletab) pre_ast; + val pre_ast = pt_to_ast (lookup_trtab parse_ast_trtab) pt; + val _ = normalize true true (lookup_ruletab parse_ruletab) pre_ast; in () end; - -(** read **) +(* read *) -fun read (syn as Syntax (_, tabs)) ty str = +fun read (syn as Syntax tabs) ty str = let - val Tabs {parse_ruletab, parse_trtab, ...} = tabs; + val {parse_ruletab, parse_trtab, ...} = tabs; val ast = read_ast syn false (typ_to_nonterm ty) str; in - ast_to_term (lookup parse_trtab) - (normalize_ast (mk_get_rules parse_ruletab) ast) + ast_to_term (lookup_trtab parse_trtab) + (normalize_ast (lookup_ruletab parse_ruletab) ast) end; +(* read types *) -(** read_typ **) +fun read_typ syn def_sort str = + typ_of_term def_sort (read syn typeT str); -fun read_typ syn def_sort str = typ_of_term def_sort (read syn typeT str); +val type_syn = extend_syntax empty_syntax type_ext; + +fun simple_read_typ str = read_typ type_syn (K []) str; - -(** read_rule **) +(* read rules *) fun read_rule syn (xrule as ((_, lhs_src), (_, rhs_src))) = let - val Syntax (_, Tabs {consts, ...}) = syn; + val Syntax {consts, ...} = syn; fun constantify (ast as Constant _) = ast | constantify (ast as Variable x) = @@ -381,10 +322,6 @@ | None => rule) end; - - -(** read_xrules **) - fun read_xrules syn xrules = let fun right_rule (xpat1 |-> xpat2) = Some (xpat1, xpat2) @@ -403,12 +340,13 @@ (** pretty terms or typs **) -fun pretty_t t_to_ast pretty_t (syn as Syntax (_, tabs)) t = +fun pretty_t t_to_ast pretty_t (syn as Syntax tabs) t = let - val Tabs {print_trtab, print_ruletab, prtab, ...} = tabs; - val ast = t_to_ast (lookup print_trtab) t; + val {print_trtab, print_ruletab, print_ast_trtab, prtab, ...} = tabs; + val ast = t_to_ast (lookup_trtab print_trtab) t; in - pretty_t prtab (normalize_ast (mk_get_rules print_ruletab) ast) + pretty_t prtab (lookup_trtab print_ast_trtab) + (normalize_ast (lookup_ruletab print_ruletab) ast) end; val pretty_term = pretty_t term_to_ast pretty_term_ast; @@ -421,45 +359,32 @@ -(*** build syntax ***) - -(* type_syn *) - -val type_syn = mk_syntax (ref (ExtGG (ref EmptyGG, type_ext))); +(** build syntax **) - -(* extend *) (* FIXME *) - -fun extend syn read_ty (roots, xconsts, sext) = - let - val Syntax (ggr0, tabs0 as Tabs {roots = roots0, ...}) = syn; +(* extend *) - val ext1 = ext_of_sext ((distinct roots) \\ roots0) xconsts read_ty sext; +fun extend syn0 read_ty (all_roots, xconsts, sext) = + let + val Syntax {roots, ...} = syn0; - val (syn1 as Syntax (ggr1, tabs1)) = - (case ext1 of - Ext {roots = [], mfix = [], ...} => - Syntax (ref (ExtGG (ggr0, ext1)), extend_tables tabs0 (mk_xgram ext1)) - | _ => mk_syntax (ref (ExtGG (ggr0, ext1)))); + val syn1 = extend_syntax syn0 + (syn_ext_of_sext (all_roots \\ roots) xconsts read_ty sext); - val (parse_rules, print_rules) = read_xrules syn1 (xrules_of sext); - val ext2 = ExtRules {parse_rules = parse_rules, print_rules = print_rules}; - in - Syntax (ref (ExtGG (ggr1, ext2)), extend_tables tabs1 (mk_xgram ext2)) - end; + val syn2 = extend_syntax syn1 + (syn_ext_rules (read_xrules syn1 (xrules_of sext))); + in syn2 end; (* merge *) -fun merge roots syn1 syn2 = +fun merge all_roots syn1 syn2 = let - val Syntax (ggr1, Tabs {roots = roots1, ...}) = syn1; - val Syntax (ggr2, Tabs {roots = roots2, ...}) = syn2; - val mggr = ref (MergeGG (ggr1, ggr2)); + val syn as (Syntax {roots, ...}) = merge_syntaxes syn1 syn2; in - (case ((distinct roots) \\ roots1) \\ roots2 of - [] => mk_syntax mggr - | new_roots => mk_syntax (ref (ExtGG (mggr, ExtRoots new_roots)))) + (case all_roots \\ roots of + [] => syn + | new_roots => (writeln (string_of_strings "DEBUG new roots:" new_roots); (* FIXME debug *) + extend_syntax syn (syn_ext_roots new_roots))) end;