author  wenzelm 
Mon, 09 Nov 1998 15:42:08 +0100  
changeset 5840  e2d2b896c717 
parent 330  2fda15dd1e0f 
permissions  rwrr 
18  1 
(* Title: Pure/Syntax/earley0A.ML 
0  2 
ID: $Id$ 
3 
Author: Tobias Nipkow 

4 

18  5 
WARNING: This file is about to disappear. 
0  6 
*) 
7 

8 
signature PARSER = 

9 
sig 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

10 
structure Lexicon: LEXICON 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

11 
structure SynExt: SYN_EXT 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

12 
local open Lexicon SynExt SynExt.Ast in 
18  13 
type gram 
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

14 
val empty_gram: gram 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

15 
val extend_gram: gram > string list > xprod list > gram 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

16 
val merge_grams: gram > gram > gram 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

17 
val pretty_gram: gram > Pretty.T list 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

18 
datatype parsetree = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

19 
Node of string * parsetree list  
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

20 
Tip of token 
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset

21 
val parse: gram > string > token list > parsetree list 
0  22 
end 
23 
end; 

24 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

25 
functor EarleyFun(structure Symtab: SYMTAB and Lexicon: LEXICON 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

26 
and SynExt: SYN_EXT)(*: PARSER *) = (* FIXME *) 
0  27 
struct 
28 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

29 
structure Pretty = SynExt.Ast.Pretty; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

30 
structure Lexicon = Lexicon; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

31 
structure SynExt = SynExt; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

32 
open Lexicon; 
18  33 

34 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

35 
(** datatype parsetree **) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

36 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

37 
datatype parsetree = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

38 
Node of string * parsetree list  
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

39 
Tip of token; 
18  40 

41 
fun mk_pt ("", [pt]) = pt 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

42 
 mk_pt ("", _) = sys_error "mk_pt: funny copy op in parse tree" 
18  43 
 mk_pt (s, ptl) = Node (s, ptl); 
44 

45 

46 

47 
(** token maps (from lexicon.ML) **) 

48 

49 
type 'b TokenMap = (token * 'b list) list * 'b list; 

50 
val first_token = 0; 

51 

52 
fun int_of_token(Token(tk)) = first_token  

53 
int_of_token(IdentSy _) = first_token  1  

54 
int_of_token(VarSy _) = first_token  2  

55 
int_of_token(TFreeSy _) = first_token  3  

56 
int_of_token(TVarSy _) = first_token  4  

57 
int_of_token(EndToken) = first_token  5; 

58 

59 
fun lesstk(s, t) = int_of_token(s) < int_of_token(t) orelse 

60 
(case (s, t) of (Token(a), Token(b)) => a<b  _ => false); 

61 

62 
fun mkTokenMap(atll) = 

63 
let val aill = atll; 

64 
val dom = sort lesstk (distinct(flat(map snd aill))); 

65 
val mt = map fst (filter (null o snd) atll); 

66 
fun mktm(i) = 

67 
let fun add(l, (a, il)) = if i mem il then a::l else l 

68 
in (i, foldl add ([], aill)) end; 

69 
in (map mktm dom, mt) end; 

70 

71 
fun find_al (i) = 

72 
let fun find((j, al)::l) = if lesstk(i, j) then [] else 

73 
if matching_tokens(i, j) then al else find l  

74 
find [] = []; 

75 
in find end; 

76 
fun applyTokenMap((l, mt), tk:token) = mt@(find_al tk l); 

77 

78 

0  79 

80 
(* Linked lists: *) 

81 
infix 5 &; 

82 
datatype 'a LList = nilL  op & of 'a * ('a LListR) 

83 
withtype 'a LListR = 'a LList ref; 

84 

85 
(* Apply proc to all elements in a linked list *) 

86 
fun seqll (proc: '_a > unit) : ('_a LListR > unit) = 

87 
let fun seq (ref nilL) = ()  

88 
seq (ref((a:'_a)&l)) = (proc a; seq l) 

89 
in seq end; 

90 

91 
fun llist_to_list (ref nilL) = []  

92 
llist_to_list (ref(a&ll)) = a::(llist_to_list ll); 

93 

94 
val len = length; 

95 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

96 
local open Array SynExt in 
0  97 
nonfix sub; 
98 

99 
fun forA(p: int > unit, a: 'a array) : unit = 

100 
let val ub = length a 

101 
fun step(i) = if i=ub then () else (p(i); step(i+1)); 

102 
in step 0 end; 

103 

104 
fun itA(a0:'a, b: 'b array)(f:'a * 'b > 'a) : 'a = 

105 
let val ub = length b 

106 
fun acc(a,i) = if i=ub then a else acc(f(a,sub(b,i)),i+1) 

107 
in acc(a0,0) end; 

108 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

109 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

110 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

111 
(** grammars **) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

112 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

113 
datatype 'a symb = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

114 
Dlm of 'a  
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

115 
Arg of string * int; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

116 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

117 
datatype 'a prod = Prod of string * 'a symb list * string * int; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

118 

0  119 

18  120 
datatype Symbol = T of token  NT of int * int 
0  121 
and Op = Op of OpSyn * string * int 
122 
withtype OpSyn = Symbol array 

123 
and OpListA = Op array * int TokenMap 

124 
and FastAcc = int TokenMap; 

125 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

126 
(*gram_tabs: name of nt > number, nt number > productions array, 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

127 
nt number > list of nt's reachable via copy ops*) 
0  128 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

129 
type gram_tabs = int Symtab.table * OpListA array * int list array; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

130 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

131 
type gram = string list * string prod list * gram_tabs; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

132 

0  133 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

134 
fun non_term (Arg (s, _)) = if is_terminal s then None else Some s 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

135 
 non_term _ = None; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

136 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

137 
fun non_terms (Prod (_, symbs, _, _)) = mapfilter non_term symbs; 
0  138 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

139 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

140 
(* mk_pre_grammar *) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

141 
(* converts a list of productions in external format into an 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

142 
internal gram object. *) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

143 

0  144 
val dummyTM:FastAcc = mkTokenMap[]; 
145 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

146 
fun mk_pre_grammar prods : gram_tabs = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

147 
let 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

148 
fun same_res (Prod(t1, _, _, _), Prod(t2, _, _, _)) = t1=t2; 
0  149 
val partitioned0 = partition_eq same_res prods; 
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

150 
val nts0 = map (fn Prod(ty, _, _, _)::_ => ty) partitioned0; 
0  151 
val nts' = distinct(flat(map non_terms prods)) \\ nts0; 
152 
val nts = nts' @ nts0; 

153 
val partitioned = (replicate (len nts') []) @ partitioned0; 

154 
val ntis = nts ~~ (0 upto (len(nts)1)); 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

155 
val tab = foldr Symtab.update (ntis, Symtab.null); 
0  156 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

157 
fun nt_or_vt (s, p) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

158 
(case predef_term s of 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

159 
None => NT (the (Symtab.lookup (tab, s)), p) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

160 
 Some tk => T tk); 
0  161 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

162 
fun mksyn(Dlm(t)) = T(t) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

163 
 mksyn(Arg(t)) = nt_or_vt t; 
0  164 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

165 
fun prod2op(Prod(nt, sy, opn, p)) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

166 
let val syA = arrayoflist(map mksyn sy) in Op(syA, opn, p) end; 
0  167 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

168 
fun mkops prods = (arrayoflist(map prod2op prods), dummyTM); 
0  169 

170 
val opLA = arrayoflist(map mkops partitioned); 

171 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

172 
val subs = array(length opLA, []) : int list array; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

173 
fun newcp v (a, Op(syA, _, p)) = 
18  174 
if p=chain_pri 
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

175 
then let val NT(k, _) = sub(syA, 0) 
0  176 
in if k mem v then a else k ins a end 
177 
else a; 

178 
fun reach v i = 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

179 
let val new = itA ([], #1(sub(opLA, i))) (newcp v) 
0  180 
val v' = new union v 
181 
in flat(map (reach v') new) union v' end; 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

182 
fun rch(i) = update(subs, i, reach[i]i); 
0  183 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

184 
in 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

185 
forA(rch, subs); (tab, opLA, subs) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

186 
end; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

187 

0  188 

189 
val RootPref = "__"; 

190 

191 
(* Lookahead tables for speeding up parsing. Lkhd is a mapping from 

192 
nonterminals (in the form of OpList) to sets (lists) of token strings *) 

193 

18  194 
type Lkhd = token list list list; 
0  195 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

196 
(* subst_syn(s, k) syn = [ pref k ts  ts is a token string derivable from sy 
0  197 
under substitution s ] *) 
198 
(* This is the general version. 

199 
fun cross l1 l2 = flat(map (fn e2 => (map (fn e1 => e1@e2) l1)) l2); 

200 

201 
(* pref k [x1,...,xn] is [x1,...,xk] if 0<=k<=n and [x1,...,xn] otherwise *) 

202 
fun pref 0 l = [] 

203 
 pref _ [] = [] 

204 
 pref k (e::l) = e::(pref (k1) l); 

205 

206 
fun subst_syn(s:Lkhd,k) = 

18  207 
let fun subst(ref(symb & syn)):token list list = 
0  208 
let val l1 = case symb of T t => [[t]]  
209 
NT(oplr,_) => let val Some l = assoc(s,!oplr) in l end 

210 
in distinct(map (pref k) (cross l1 (subst syn))) end  

211 
subst _ = [[]] 

212 
in subst end; 

213 
*) 

214 
(* This one is specialized to lookahead 1 and a bit too generous *) 

215 
fun subst_syn(s:Lkhd,1) syA = 

216 
let fun subst i = if i = length(syA) then [[]] else 

217 
case sub(syA,i) of 

218 
NT(j,_) => let val pre = nth_elem(j,s) 

219 
in if [] mem pre then (pre \ []) union subst(i+1) 

220 
else pre end  

221 
T(tk) => [[tk]]; 

222 
in subst 0 end; 

223 

224 
(* mk_lkhd(G,k) returns a table which associates with every nonterminal N in 

225 
G the list of pref k s for all token strings s with N G>* s *) 

226 

227 
fun mk_lkhd(opLA:OpListA array,k:int):Lkhd = 

228 
let fun step(s:Lkhd):Lkhd = 

229 
let fun subst_op(l,Op(sy,_,_)) = subst_syn(s,k)sy union l; 

230 
fun step2(l,(opA,_)) = l@[itA([],opA)subst_op]; 

231 
in writeln"."; itA([],opLA)step2 end; 

232 
fun iterate(s:Lkhd):Lkhd = let val s' = step s 

233 
in if map len s = map len s' then s 

234 
else iterate s' end 

235 
in writeln"Computing lookahead tables ..."; 

236 
iterate (replicate (length opLA) []) end; 

237 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

238 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

239 
(* mk_earley_gram *) (* create look ahead tables *) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

240 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

241 
fun mk_earley_gram (g as (tab, opLA, _):gram_tabs):gram_tabs = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

242 
let val lkhd = mk_lkhd(opLA, 1); 
0  243 
fun mk_fa(i):FastAcc = 
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

244 
let val opA = #1(sub(opLA, i)); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

245 
fun start(j) = let val Op(sy, _, _) = sub(opA, j); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

246 
val pre = subst_syn(lkhd, 1) sy 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

247 
in (j, if [] mem pre then [] else map hd pre) end; 
0  248 
in mkTokenMap(map start (0 upto(length(opA)1))) end; 
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

249 
fun updt(i) = update(opLA, i, (#1(sub(opLA, i)), mk_fa(i))); 
0  250 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

251 
in forA(updt, opLA); g end; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

252 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

253 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

254 
(* compile_xgram *) 
0  255 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

256 
fun compile_xgram (roots, prods) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

257 
let 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

258 
fun mk_root nt = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

259 
Prod (RootPref ^ nt, [Arg (nt, 0), Dlm EndToken], "", 0); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

260 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

261 
val prods' = (map mk_root roots) @ prods; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

262 
in 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

263 
mk_earley_gram (mk_pre_grammar prods') 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

264 
end; 
0  265 

18  266 

267 
(* translate (from xgram.ML) *) 

268 

269 
fun translate trfn = 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

270 
map (fn Dlm t => Dlm (trfn t)  Arg s => Arg s); 
18  271 

272 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

273 
(* mk_gram_tabs *) 
18  274 

275 
fun str_to_tok (opl: string prod list): token prod list = 

276 
map 

277 
(fn Prod (t, syn, s, pa) => Prod (t, translate Token syn, s, pa)) 

278 
opl; 

279 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

280 
fun mk_gram_tabs roots prods = compile_xgram (roots, str_to_tok prods); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

281 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

282 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

283 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

284 
(** build gram **) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

285 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

286 
fun mk_gram roots prods = (roots, prods, mk_gram_tabs roots prods); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

287 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

288 
fun sub_gram (roots1, prods1, _) (roots2, prods2, _) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

289 
roots1 subset roots2 andalso prods1 subset prods2; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

290 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

291 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

292 
(* empty, extend, merge grams *) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

293 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

294 
val empty_gram = mk_gram [] []; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

295 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

296 
fun extend_gram (gram1 as (roots1, prods1, _)) roots2 xprods2 = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

297 
let 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

298 
fun symb_of (Delim s) = Some (Dlm s) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

299 
 symb_of (Argument s_p) = Some (Arg s_p) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

300 
 symb_of _ = None; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

301 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

302 
fun prod_of (XProd (lhs, xsymbs, const, pri)) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

303 
Prod (lhs, mapfilter symb_of xsymbs, const, pri); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

304 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

305 
val prods2 = distinct (map prod_of xprods2); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

306 
in 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

307 
if roots2 subset roots1 andalso prods2 subset prods1 then gram1 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

308 
else mk_gram (extend_list roots1 roots2) (extend_list prods1 prods2) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

309 
end; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

310 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

311 
fun merge_grams (gram1 as (roots1, prods1, _)) (gram2 as (roots2, prods2, _)) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

312 
if sub_gram gram2 gram1 then gram1 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

313 
else if sub_gram gram1 gram2 then gram2 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

314 
else mk_gram (merge_lists roots1 roots2) (merge_lists prods1 prods2); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

315 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

316 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

317 
(* pretty_gram *) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

318 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

319 
fun pretty_gram (_, prods, _) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

320 
let 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

321 
fun pretty_name name = [Pretty.str (name ^ " =")]; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

322 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

323 
fun pretty_symb (Dlm s) = Pretty.str (quote s) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

324 
 pretty_symb (Arg (s, p)) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

325 
if is_terminal s then Pretty.str s 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

326 
else Pretty.str (s ^ "[" ^ string_of_int p ^ "]"); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

327 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

328 
fun pretty_const "" = [] 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

329 
 pretty_const c = [Pretty.str ("=> " ^ quote c)]; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

330 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

331 
fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")]; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

332 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

333 
fun pretty_prod (Prod (name, symbs, const, pri)) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

334 
Pretty.block (Pretty.breaks (pretty_name name @ 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

335 
map pretty_symb symbs @ pretty_const const @ pretty_pri pri)); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

336 
in 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

337 
map pretty_prod prods 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

338 
end; 
18  339 

340 

341 

0  342 
(* State: nonterminal#, production#, index in production, 
343 
index of originating state set, 

344 
parse trees generated so far, 

345 
*) 

346 

18  347 
datatype State = St of int * int * int * int * parsetree list 
0  348 
withtype StateSet = State LListR * (State > unit) LListR; 
349 
type Compl = State > unit; 

350 
type StateSetList = StateSet array; 

351 
(* Debugging: 

352 
val print_SL = seqll(fn St(nti,pi,ip,fs,ptl)=> 

353 
(print_int nti; prs" "; print_int pi; prs" "; print_int ip; prs" "; 

354 
print_int fs; prs" "; print_int(len ptl); prs"\n")); 

355 

356 
fun print_SS(s1,delr) = (writeln"================="; print_SL s1); 

357 

358 
fun count_ss(ref nilL) = 0 

359 
 count_ss(ref(_ & ss)) = count_ss(ss)+1; 

360 

361 
fun print_stat(state_sets) = 

362 
let fun pr i = let val(s1,_)=sub(state_sets,i) 

363 
in prs" "; print_int(count_ss s1) end; 

364 
in prs"["; forA(pr,state_sets); prs"]\n" end; 

365 
*) 

366 
fun mt_stateS():StateSet = (ref nilL, ref nilL); 

367 

368 
fun mt_states(n):StateSetList = array(n,mt_stateS()); 

369 

370 
fun ismt_stateS((ref nilL,_):StateSet) = true  ismt_stateS _ = false; 

371 

372 
fun fst_state((ref(st & _),_): StateSet) = st; 

373 

374 
fun apply_all_states(f,(slr,_):StateSet) = seqll f slr; 

375 

376 
fun add_state(nti,pi,ip,from,ptl,(sllr,delr):StateSet) = 

377 
let fun add(ref(St(nti',pi',ip',from',_) & rest)) = 

378 
if nti=nti' andalso pi=pi' andalso ip=ip' andalso from=from' 

379 
then () 

380 
else add rest  

381 
add(last as ref nilL) = 

382 
let val newst = St(nti,pi,ip,from,ptl) 

383 
in last := newst & ref nilL; 

384 
seqll (fn compl => compl newst) delr 

385 
end; 

386 
in add sllr end; 

387 

388 
fun complete(nti,syA,opn,p,ptl,ss,si as (_,delr):StateSet,opLA,rchA) = 

389 
let val pt = mk_pt(opn,ptl) 

390 
fun compl(St(ntj,pj,jp,from,ptl)) = 

391 
let val Op(syj,_,_) = sub(fst(sub(opLA,ntj)),pj) in 

392 
if jp=length(syj) then () else 

393 
case sub(syj,jp) of 

394 
NT(nt,p') => if p >= p' andalso nti mem sub(rchA,nt) 

395 
then add_state(ntj,pj,jp+1,from,ptl@[pt], si) 

396 
else () 

397 
 _ => () 

398 
end 

399 
in apply_all_states(compl,ss); 

400 
if length(syA)=0 (* delayed completion in case of empty production: *) 

401 
then delr := compl & ref(!delr) else () 

402 
end; 

403 

404 
fun predict(tk,isi,si,p',opLA) = fn nti => 

405 
let val (opA,tm) = sub(opLA,nti); 

406 
fun add(pi) = let val opr as Op(syA,_,p) = sub(opA,pi) 

407 
in if p < p' then () else add_state(nti,pi,0,isi,[],si) end 

408 
in seq add (applyTokenMap(tm,tk)) end; 

409 

410 

411 

18  412 
fun unknown c = error ("Unparsable category: " ^ c); 
0  413 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

414 
fun syn_err toks = 
258  415 
error ("Syntax error at: " ^ quote (space_implode " " (map str_of_token toks))); 
0  416 

330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset

417 
fun parse ((_, _, (tab, opLA, rchA)):gram) (root:string) (tl: token list): parsetree list = 
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

418 
let 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

419 
val tl' = tl; 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

420 
val state_sets = mt_states(len tl' + 1); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

421 
val s0 = mt_stateS(); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

422 
val rooti = case Symtab.lookup(tab, RootPref^root) of 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

423 
Some(ri) => ri  None => unknown root; 
0  424 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

425 
fun lr (tl, isi, si, t) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

426 
if ismt_stateS(si) then syn_err (t::tl) else 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

427 
case tl of 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

428 
[] => ()  
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

429 
t::tl => 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

430 
let val si1 = mt_stateS(); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

431 
fun process(St(nti,pi,ip,from,ptl)) = 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

432 
let val opA = #1(sub(opLA,nti)) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

433 
val Op(syA,opn,p) = sub(opA,pi) in 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

434 
if ip = length(syA) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

435 
then complete(nti,syA,opn,p,ptl, 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

436 
sub(state_sets,from),si,opLA,rchA) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

437 
else case sub(syA,ip) of 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

438 
NT(ntj,p) => 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

439 
seq (predict(t,isi,si,p,opLA)) (sub(rchA,ntj)) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

440 
 T(t') => 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

441 
if matching_tokens(t,t') 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

442 
then add_state(nti,pi,ip+1,from, 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

443 
if valued_token(t) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

444 
then ptl@[Tip(t)] else ptl, 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

445 
si1) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

446 
else () end; 
18  447 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

448 
in apply_all_states(process,si); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

449 
update(state_sets,isi+1,si1); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

450 
lr(tl,isi+1,si1,t) end 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

451 

a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

452 
in 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

453 
update (state_sets, 0, s0); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

454 
add_state (rooti, 0, 0, 0, [], s0); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

455 
lr (tl', 0, s0, EndToken(*dummy*)); 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

456 
(*print_stat state_sets;*) 
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

457 
let val St(_, _, _, _, [pt]) = fst_state(sub(state_sets, len tl')) 
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset

458 
in [pt] end 
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

459 
end; 
0  460 

461 
end; 

462 

237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
18
diff
changeset

463 

0  464 
end; 
18  465 