author | clasohm |
Thu, 02 Feb 1995 13:11:51 +0100 | |
changeset 890 | 2b7275b13bef |
parent 844 | 5e59370243fa |
child 1147 | 57b5f55bf879 |
permissions | -rw-r--r-- |
18 | 1 |
(* Title: Pure/Syntax/parser.ML |
46 | 2 |
ID: $Id$ |
345 | 3 |
Author: Sonia Mahjoub, Markus Wenzel and Carsten Clasohm, TU Muenchen |
18 | 4 |
|
552 | 5 |
Isabelle's main parser (used for terms and types). |
18 | 6 |
*) |
7 |
||
8 |
signature PARSER = |
|
9 |
sig |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
10 |
structure Lexicon: LEXICON |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
11 |
structure SynExt: SYN_EXT |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
12 |
local open Lexicon SynExt SynExt.Ast in |
18 | 13 |
type gram |
14 |
val empty_gram: gram |
|
624 | 15 |
val extend_gram: gram -> string list -> xprod list -> gram |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
16 |
val merge_grams: gram -> gram -> gram |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
17 |
val pretty_gram: gram -> Pretty.T list |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
18 |
datatype parsetree = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
19 |
Node of string * parsetree list | |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
20 |
Tip of token |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
21 |
val parse: gram -> string -> token list -> parsetree list |
18 | 22 |
end |
682 | 23 |
val branching_level: int ref; |
18 | 24 |
end; |
25 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
26 |
functor ParserFun(structure Symtab: SYMTAB and Lexicon: LEXICON |
552 | 27 |
and SynExt: SYN_EXT): PARSER = |
18 | 28 |
struct |
29 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
30 |
structure Pretty = SynExt.Ast.Pretty; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
31 |
structure Lexicon = Lexicon; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
32 |
structure SynExt = SynExt; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
33 |
open Lexicon SynExt; |
18 | 34 |
|
35 |
||
36 |
(** datatype gram **) |
|
37 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
38 |
datatype symb = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
39 |
Terminal of token | |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
40 |
Nonterminal of string * int; |
18 | 41 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
42 |
datatype refsymb = Term of token | Nonterm of rhss_ref * int |
890 | 43 |
(*reference to production list instead of name*) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
44 |
and gram = Gram of (string * (symb list * string * int)) list * |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
45 |
(string * rhss_ref) list |
890 | 46 |
withtype rhss_ref = (token option * (refsymb list * string *int) list) list ref |
47 |
(*lookahead table: token and productions*) |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
48 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
49 |
(* convert productions to reference grammar with lookaheads and eliminate |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
50 |
chain productions *) |
373
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
51 |
fun mk_gram prods = |
395
712dceb1ecc7
"Building new grammar" message is no longer displayed by empty_gram
clasohm
parents:
377
diff
changeset
|
52 |
let (*get reference on list of all possible rhss for nonterminal lhs |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
53 |
(if it doesn't exist a new one is created and added to the nonterminal |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
54 |
list)*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
55 |
fun get_rhss ref_prods lhs = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
56 |
case assoc (ref_prods, lhs) of |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
57 |
None => |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
58 |
let val l = ref [(None, [])] |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
59 |
in (l, (lhs, l) :: ref_prods) end |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
60 |
| Some l => (l, ref_prods); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
61 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
62 |
(*convert symb list to refsymb list*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
63 |
fun mk_refsymbs ref_prods [] rs = (rs, ref_prods) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
64 |
| mk_refsymbs ref_prods (Terminal tk :: symbs) rs = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
65 |
mk_refsymbs ref_prods symbs (rs @ [Term tk]) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
66 |
| mk_refsymbs ref_prods (Nonterminal (name, prec) :: symbs) rs = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
67 |
let val (rhss, ref_prods') = get_rhss ref_prods name |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
68 |
in mk_refsymbs ref_prods' symbs (rs @ [Nonterm (rhss, prec)]) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
69 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
70 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
71 |
(*convert prod list to (string * rhss_ref) list |
890 | 72 |
without computing lookaheads; at the same time filter out chains*) |
73 |
fun mk_ref_gram [] ref_prods chains = (ref_prods, chains) |
|
74 |
| mk_ref_gram ((lhs, ([Nonterminal (id, ~1)], _, ~1)) :: ps) |
|
75 |
ref_prods chains = (*chain production*) |
|
76 |
let val (rhss_ref, ref_prods') = get_rhss ref_prods lhs; |
|
77 |
val (rhss_ref2, ref_prods'') = get_rhss ref_prods' id; |
|
78 |
in mk_ref_gram ps ref_prods'' ((rhss_ref, rhss_ref2) :: chains) |
|
79 |
end |
|
80 |
| mk_ref_gram ((lhs, (rhs, name, prec)) :: ps) ref_prods chains = |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
81 |
let val (rhs', ref_prods') = get_rhss ref_prods lhs; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
82 |
val (dummy, rhss) = hd (!rhs'); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
83 |
val (ref_symbs, ref_prods'') = mk_refsymbs ref_prods' rhs []; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
84 |
in rhs' := [(dummy, (ref_symbs, name, prec) :: rhss)]; |
890 | 85 |
mk_ref_gram ps ref_prods'' chains |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
86 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
87 |
|
890 | 88 |
(*expand chain productions*) |
89 |
fun exp_chain (ref_gram, chains) = |
|
90 |
let (*convert a list of pairs to an association list |
|
91 |
by using the first element as the key*) |
|
92 |
fun mk_assoc pairs = |
|
93 |
let fun mk [] result = result |
|
94 |
| mk ((id1, id2) :: ps) result = |
|
95 |
mk ps |
|
96 |
(overwrite (result, (id1, id2 :: (assocs result id1)))); |
|
97 |
in mk pairs [] end; |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
98 |
|
890 | 99 |
(*sort chains in the order they have to be expanded *) |
100 |
fun sort [] [] result = result |
|
101 |
| sort [] todo result = sort todo [] result |
|
102 |
| sort ((chain as (nt, nts)) :: chains) todo result = |
|
103 |
let fun occurs _ [] = false |
|
104 |
| occurs id ((_, nts) :: chains) = |
|
105 |
if id mem nts then true |
|
106 |
else occurs id chains |
|
107 |
in if occurs nt chains then |
|
108 |
sort chains (chain :: todo) result |
|
109 |
else |
|
110 |
sort chains todo (chain :: result) |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
111 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
112 |
|
890 | 113 |
(*replace reference by list of rhss*) |
114 |
fun deref (id:rhss_ref) = #2 (hd (!id)); |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
115 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
116 |
(*add new rhss to productions*) |
890 | 117 |
fun expand (rhss_ref, rhss) = |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
118 |
let val (dummy, old_rhss) = hd (!rhss_ref); |
890 | 119 |
in rhss_ref := [(dummy, old_rhss @ (flat (map deref rhss)))] end; |
120 |
in map expand (sort (mk_assoc chains) [] []); |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
121 |
ref_gram |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
122 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
123 |
|
890 | 124 |
val ref_gram = exp_chain (mk_ref_gram prods [] []); |
18 | 125 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
126 |
(*make a list of all lambda NTs |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
127 |
(i.e. nonterminals that can produce lambda)*) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
128 |
val lambdas = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
129 |
let fun lambda [] result = result |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
130 |
| lambda ((_, rhss_ref) :: nts) result = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
131 |
if rhss_ref mem result then |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
132 |
lambda nts result |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
133 |
else |
345 | 134 |
let (*list all NTs that can be produced by a rhs |
135 |
containing only lambda NTs*) |
|
136 |
fun only_lambdas [] result = result |
|
137 |
| only_lambdas ((_, rhss_ref) :: ps) result = |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
138 |
let fun only (symbs, _, _) = |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
139 |
forall (fn (Nonterm (id, _)) => id mem result |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
140 |
| (Term _) => false) symbs; |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
141 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
142 |
val (_, rhss) = hd (!rhss_ref); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
143 |
in if not (rhss_ref mem result) andalso |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
144 |
exists only rhss then |
345 | 145 |
only_lambdas ref_gram (rhss_ref :: result) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
146 |
else |
345 | 147 |
only_lambdas ps result |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
148 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
149 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
150 |
val (_, rhss) = hd (!rhss_ref); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
151 |
in if exists (fn (symbs, _, _) => null symbs) rhss |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
152 |
then lambda nts |
345 | 153 |
(only_lambdas ref_gram (rhss_ref :: result)) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
154 |
else lambda nts result |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
155 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
156 |
in lambda ref_gram [] end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
157 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
158 |
(*list all nonterminals on which the lookahead depends (due to lambda |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
159 |
NTs this can be more than one) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
160 |
and report if there is a terminal at the 'start'*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
161 |
fun rhss_start [] skipped = (None, skipped) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
162 |
| rhss_start (Term tk :: _) skipped = (Some tk, skipped) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
163 |
| rhss_start (Nonterm (rhss_ref, _) :: rest) skipped = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
164 |
if rhss_ref mem lambdas then |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
165 |
rhss_start rest (rhss_ref ins skipped) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
166 |
else |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
167 |
(None, rhss_ref ins skipped); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
168 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
169 |
(*list all terminals that can start the given rhss*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
170 |
fun look_rhss starts rhss_ref = |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
171 |
let fun look [] _ result = result |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
172 |
| look ((symbs, _, _) :: todos) done result = |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
173 |
let val (start_token, skipped) = rhss_start symbs []; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
174 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
175 |
(*process all nonterminals on which the lookahead |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
176 |
depends and build the new todo and done lists for |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
177 |
the look function*) |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
178 |
fun look2 [] todos result = |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
179 |
look todos (done union skipped) result |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
180 |
| look2 (rhss_ref :: ls) todos result = |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
181 |
if rhss_ref mem done then look2 ls todos result |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
182 |
else case assoc (starts, rhss_ref) of |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
183 |
Some tks => look2 ls todos (tks union result) |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
184 |
| None => |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
185 |
let val (_, rhss) = hd (!rhss_ref); |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
186 |
in look2 ls (rhss @ todos) result end; |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
187 |
in case start_token of |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
188 |
Some tk => look2 skipped todos (start_token ins result) |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
189 |
| None => look2 skipped todos result |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
190 |
end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
191 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
192 |
val (_, rhss) = hd (!rhss_ref); |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
193 |
in look rhss [rhss_ref] [] end; |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
194 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
195 |
(*make a table that contains all possible starting terminals |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
196 |
for each nonterminal*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
197 |
fun mk_starts [] starts = starts |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
198 |
| mk_starts ((_, rhss_ref) :: ps) starts = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
199 |
mk_starts ps ((rhss_ref, look_rhss starts rhss_ref) :: starts); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
200 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
201 |
val starts = mk_starts ref_gram []; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
202 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
203 |
(*add list of allowed starting tokens to productions*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
204 |
fun mk_lookahead (_, rhss_ref) = |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
205 |
let (*compares two values of type token option |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
206 |
(used for speed reasons)*) |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
207 |
fun matching_opt_tks (Some tk1, Some tk2) = |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
208 |
matching_tokens (tk1, tk2) |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
209 |
| matching_opt_tks _ = false; |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
210 |
|
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
211 |
(*add item to lookahead list (a list containing pairs of token and |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
212 |
rhss that can be started with it)*) |
345 | 213 |
fun add_start new_rhs tokens table = |
214 |
let fun add [] [] = [] |
|
215 |
| add (tk :: tks) [] = |
|
216 |
(tk, [new_rhs]) :: (add tks []) |
|
217 |
| add tokens ((tk, rhss) :: ss) = |
|
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
218 |
if gen_mem matching_opt_tks (tk, tokens) then |
345 | 219 |
(tk, new_rhs :: rhss) :: (add (tokens \ tk) ss) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
220 |
else |
345 | 221 |
(tk, rhss) :: (add tokens ss); |
222 |
in add tokens table end; |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
223 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
224 |
(*combine all lookaheads of a list of nonterminals*) |
345 | 225 |
fun combine_starts rhss_refs = |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
226 |
foldr (gen_union matching_opt_tks) |
345 | 227 |
((map (fn rhss_ref => let val Some tks = assoc (starts, rhss_ref) |
228 |
in tks end) rhss_refs), []); |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
229 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
230 |
(*get lookahead for a rhs and update lhs' lookahead list*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
231 |
fun look_rhss [] table = table |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
232 |
| look_rhss ((rhs as (symbs, id, prec)) :: rs) table = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
233 |
let val (start_token, skipped) = rhss_start symbs []; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
234 |
val starts = case start_token of |
377
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
235 |
Some tk => gen_ins matching_opt_tks |
ab8917806779
lookaheads are now computed faster (during the grammar is built)
clasohm
parents:
373
diff
changeset
|
236 |
(Some tk, combine_starts skipped) |
345 | 237 |
| None => if skipped subset lambdas then |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
238 |
[None] |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
239 |
else |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
240 |
combine_starts skipped; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
241 |
in look_rhss rs (add_start rhs starts table) end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
242 |
|
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
243 |
val (_, rhss) = hd (!rhss_ref); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
244 |
in rhss_ref := look_rhss rhss [] end; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
245 |
in map mk_lookahead ref_gram; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
246 |
Gram (prods, ref_gram) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
247 |
end; |
18 | 248 |
|
249 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
250 |
(* empty, extend, merge grams *) |
18 | 251 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
252 |
val empty_gram = mk_gram []; |
18 | 253 |
|
624 | 254 |
fun extend_gram (gram1 as Gram (prods1, _)) roots xprods2 = |
18 | 255 |
let |
764
b60e77395d1a
changed Pure's grammar and the way types are converted to nonterminals
clasohm
parents:
697
diff
changeset
|
256 |
fun symb_of (Delim s) = Some (Terminal (Token s)) |
b60e77395d1a
changed Pure's grammar and the way types are converted to nonterminals
clasohm
parents:
697
diff
changeset
|
257 |
| symb_of (Argument (s, p)) = |
18 | 258 |
(case predef_term s of |
764
b60e77395d1a
changed Pure's grammar and the way types are converted to nonterminals
clasohm
parents:
697
diff
changeset
|
259 |
None => Some (Nonterminal (s, p)) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
260 |
| Some tk => Some (Terminal tk)) |
764
b60e77395d1a
changed Pure's grammar and the way types are converted to nonterminals
clasohm
parents:
697
diff
changeset
|
261 |
| symb_of _ = None; |
18 | 262 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
263 |
fun prod_of (XProd (lhs, xsymbs, const, pri)) = |
764
b60e77395d1a
changed Pure's grammar and the way types are converted to nonterminals
clasohm
parents:
697
diff
changeset
|
264 |
(lhs, (mapfilter symb_of xsymbs, const, pri)); |
18 | 265 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
266 |
val prods2 = distinct (map prod_of xprods2); |
18 | 267 |
in |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
268 |
if prods2 subset prods1 then gram1 |
395
712dceb1ecc7
"Building new grammar" message is no longer displayed by empty_gram
clasohm
parents:
377
diff
changeset
|
269 |
else (writeln "Building new grammar..."; |
712dceb1ecc7
"Building new grammar" message is no longer displayed by empty_gram
clasohm
parents:
377
diff
changeset
|
270 |
mk_gram (extend_list prods1 prods2)) |
18 | 271 |
end; |
272 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
273 |
fun merge_grams (gram1 as Gram (prods1, _)) (gram2 as Gram (prods2, _)) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
274 |
if prods2 subset prods1 then gram1 |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
275 |
else if prods1 subset prods2 then gram2 |
395
712dceb1ecc7
"Building new grammar" message is no longer displayed by empty_gram
clasohm
parents:
377
diff
changeset
|
276 |
else (writeln "Building new grammar..."; |
712dceb1ecc7
"Building new grammar" message is no longer displayed by empty_gram
clasohm
parents:
377
diff
changeset
|
277 |
mk_gram (merge_lists prods1 prods2)); |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
278 |
|
18 | 279 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
280 |
(* pretty_gram *) |
18 | 281 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
282 |
fun pretty_gram (Gram (prods, _)) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
283 |
let |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
284 |
fun pretty_name name = [Pretty.str (name ^ " =")]; |
18 | 285 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
286 |
fun pretty_symb (Terminal (Token s)) = Pretty.str (quote s) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
287 |
| pretty_symb (Terminal tok) = Pretty.str (str_of_token tok) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
288 |
| pretty_symb (Nonterminal (s, p)) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
289 |
Pretty.str (s ^ "[" ^ string_of_int p ^ "]"); |
18 | 290 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
291 |
fun pretty_const "" = [] |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
292 |
| pretty_const c = [Pretty.str ("=> " ^ quote c)]; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
293 |
|
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
294 |
fun pretty_pri p = [Pretty.str ("(" ^ string_of_int p ^ ")")]; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
295 |
|
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
296 |
fun pretty_prod (name, (symbs, const, pri)) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
297 |
Pretty.block (Pretty.breaks (pretty_name name @ |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
298 |
map pretty_symb symbs @ pretty_const const @ pretty_pri pri)); |
18 | 299 |
in |
844 | 300 |
map pretty_prod (sort (op <= o pairself #1) prods) |
18 | 301 |
end; |
302 |
||
303 |
||
304 |
||
305 |
(** parse **) |
|
306 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
307 |
datatype parsetree = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
308 |
Node of string * parsetree list | |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
309 |
Tip of token; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
310 |
|
18 | 311 |
type state = |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
312 |
rhss_ref * int (*lhs: identification and production precedence*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
313 |
* parsetree list (*already parsed nonterminals on rhs*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
314 |
* refsymb list (*rest of rhs*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
315 |
* string (*name of production*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
316 |
* int; (*index for previous state list*) |
18 | 317 |
|
318 |
type earleystate = state list Array.array; |
|
319 |
||
320 |
||
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
321 |
(*Get all rhss with precedence >= minPrec*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
322 |
fun getRHS minPrec = filter (fn (_, _, prec:int) => prec >= minPrec); |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
323 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
324 |
(*Get all rhss with precedence >= minPrec and < maxPrec*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
325 |
fun getRHS' minPrec maxPrec = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
326 |
filter (fn (_, _, prec:int) => prec >= minPrec andalso prec < maxPrec); |
18 | 327 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
328 |
(*Make states using a list of rhss*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
329 |
fun mkStates i minPrec lhsID rhss = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
330 |
let fun mkState (rhs, id, prodPrec) = (lhsID, prodPrec, [], rhs, id, i); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
331 |
in map mkState rhss end; |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
332 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
333 |
(*Add parse tree to list and eliminate duplicates |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
334 |
saving the maximum precedence*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
335 |
fun conc (t, prec:int) [] = (None, [(t, prec)]) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
336 |
| conc (t, prec) ((t', prec') :: ts) = |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
337 |
if t = t' then |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
338 |
(Some prec', if prec' >= prec then (t', prec') :: ts |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
339 |
else (t, prec) :: ts) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
340 |
else |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
341 |
let val (n, ts') = conc (t, prec) ts |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
342 |
in (n, (t', prec') :: ts') end; |
18 | 343 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
344 |
(*Update entry in used*) |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
345 |
fun update_trees ((B, (i, ts)) :: used) (A, t) = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
346 |
if A = B then |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
347 |
let val (n, ts') = conc t ts |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
348 |
in ((A, (i, ts')) :: used, n) end |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
349 |
else |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
350 |
let val (used', n) = update_trees used (A, t) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
351 |
in ((B, (i, ts)) :: used', n) end; |
18 | 352 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
353 |
(*Replace entry in used*) |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
354 |
fun update_prec (A, prec) used = |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
355 |
let fun update ((hd as (B, (_, ts))) :: used, used') = |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
356 |
if A = B |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
357 |
then used' @ ((A, (prec, ts)) :: used) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
358 |
else update (used, hd :: used') |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
359 |
in update (used, []) end; |
18 | 360 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
361 |
fun getS A maxPrec Si = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
362 |
filter |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
363 |
(fn (_, _, _, Nonterm (B, prec) :: _, _, _) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
364 |
=> A = B andalso prec <= maxPrec |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
365 |
| _ => false) Si; |
18 | 366 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
367 |
fun getS' A maxPrec minPrec Si = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
368 |
filter |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
369 |
(fn (_, _, _, Nonterm (B, prec) :: _, _, _) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
370 |
=> A = B andalso prec > minPrec andalso prec <= maxPrec |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
371 |
| _ => false) Si; |
18 | 372 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
373 |
fun getStates Estate i ii A maxPrec = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
374 |
filter |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
375 |
(fn (_, _, _, Nonterm (B, prec) :: _, _, _) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
376 |
=> A = B andalso prec <= maxPrec |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
377 |
| _ => false) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
378 |
(Array.sub (Estate, ii)); |
18 | 379 |
|
380 |
||
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
381 |
fun movedot_term (A, j, ts, Term a :: sa, id, i) c = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
382 |
if valued_token c then |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
383 |
(A, j, ts @ [Tip c], sa, id, i) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
384 |
else (A, j, ts, sa, id, i); |
18 | 385 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
386 |
fun movedot_nonterm ts (A, j, tss, Nonterm _ :: sa, id, i) = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
387 |
(A, j, tss @ ts, sa, id, i); |
18 | 388 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
389 |
fun movedot_lambda _ [] = [] |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
390 |
| movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ((t, ki) :: ts) = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
391 |
if k <= ki then |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
392 |
(B, j, tss @ t, sa, id, i) :: |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
393 |
movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
394 |
else movedot_lambda (B, j, tss, Nonterm (A, k) :: sa, id, i) ts; |
18 | 395 |
|
396 |
||
682 | 397 |
val warned = ref false; (*flag for warning message*) |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
398 |
val branching_level = ref 200; (*trigger value for warnings*) |
18 | 399 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
400 |
fun PROCESSS Estate i c states = |
18 | 401 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
402 |
fun get_lookahead rhss_ref = token_assoc (!rhss_ref, c); |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
403 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
404 |
fun processS used [] (Si, Sii) = (Si, Sii) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
405 |
| processS used (S :: States) (Si, Sii) = |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
406 |
(case S of |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
407 |
(_, _, _, Nonterm (rhss_ref, minPrec) :: _, _, _) => |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
408 |
let (*predictor operation*) |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
409 |
val (used', new_states) = |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
410 |
(case assoc (used, rhss_ref) of |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
411 |
Some (usedPrec, l) => (*nonterminal has been processed*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
412 |
if usedPrec <= minPrec then |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
413 |
(*wanted precedence has been processed*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
414 |
(used, movedot_lambda S l) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
415 |
else (*wanted precedence hasn't been parsed yet*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
416 |
let val rhss = get_lookahead rhss_ref; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
417 |
val States' = mkStates i minPrec rhss_ref |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
418 |
(getRHS' minPrec usedPrec rhss); |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
419 |
in (update_prec (rhss_ref, minPrec) used, |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
420 |
movedot_lambda S l @ States') |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
421 |
end |
18 | 422 |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
423 |
| None => (*nonterminal is parsed for the first time*) |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
424 |
let val rhss = get_lookahead rhss_ref; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
425 |
val States' = mkStates i minPrec rhss_ref |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
426 |
(getRHS minPrec rhss); |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
427 |
in ((rhss_ref, (minPrec, [])) :: used, States') end); |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
428 |
|
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
429 |
val _ = if not (!warned) andalso |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
430 |
length (new_states @ States) > (!branching_level) then |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
431 |
(writeln "Warning: Currently parsed expression could be \ |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
432 |
\extremely ambiguous."; |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
433 |
warned := true) |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
434 |
else () |
18 | 435 |
in |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
436 |
processS used' (new_states @ States) (S :: Si, Sii) |
18 | 437 |
end |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
438 |
| (_, _, _, Term a :: _, _, _) => (*scanner operation*) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
439 |
processS used States |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
440 |
(S :: Si, |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
441 |
if matching_tokens (a, c) then movedot_term S c :: Sii else Sii) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
442 |
| (A, prec, ts, [], id, j) => (*completer operation*) |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
443 |
let val tt = if id = "" then ts else [Node (id, ts)] in |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
444 |
if j = i then (*lambda production?*) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
445 |
let |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
446 |
val (used', O) = update_trees used (A, (tt, prec)); |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
447 |
in |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
448 |
(case O of |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
449 |
None => |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
450 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
451 |
val Slist = getS A prec Si; |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
452 |
val States' = map (movedot_nonterm tt) Slist; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
453 |
in |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
454 |
processS used' (States' @ States) (S :: Si, Sii) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
455 |
end |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
456 |
| Some n => |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
457 |
if n >= prec then |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
458 |
processS used' States (S :: Si, Sii) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
459 |
else |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
460 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
461 |
val Slist = getS' A prec n Si; |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
462 |
val States' = map (movedot_nonterm tt) Slist; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
463 |
in |
682 | 464 |
processS used' (States' @ States) (S :: Si, Sii) |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
465 |
end) |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
466 |
end |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
467 |
else |
697
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
468 |
let val Slist = getStates Estate i j A prec in |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
469 |
processS used (map (movedot_nonterm tt) Slist @ States) |
40f72ab196f8
changed warning for extremely ambiguous expressions
clasohm
parents:
682
diff
changeset
|
470 |
(S :: Si, Sii) |
682 | 471 |
end |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
472 |
end) |
682 | 473 |
in processS [] states ([], []) end; |
18 | 474 |
|
475 |
||
362 | 476 |
fun syntax_error toks allowed = |
477 |
error |
|
478 |
((if toks = [] then |
|
479 |
"error: unexpected end of input\n" |
|
480 |
else |
|
367
b734dc03067e
syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents:
362
diff
changeset
|
481 |
"Syntax error at: " ^ quote (space_implode " " (map str_of_token |
b734dc03067e
syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents:
362
diff
changeset
|
482 |
((rev o tl o rev) toks))) |
362 | 483 |
^ "\n") |
484 |
^ "Expected tokens: " |
|
485 |
^ space_implode ", " (map (quote o str_of_token) allowed)); |
|
18 | 486 |
|
362 | 487 |
fun produce stateset i indata prev_token = |
488 |
(*the argument prev_token is only used for error messages*) |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
489 |
(case Array.sub (stateset, i) of |
373
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
490 |
[] => let (*similar to token_assoc but does not automatically |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
491 |
include 'None' key*) |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
492 |
fun token_assoc2 (list, key) = |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
493 |
let fun assoc [] = [] |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
494 |
| assoc ((keyi, xi) :: pairs) = |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
495 |
if is_some keyi andalso |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
496 |
matching_tokens (the keyi, key) then |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
497 |
(assoc pairs) @ xi |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
498 |
else assoc pairs; |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
499 |
in assoc list end; |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
500 |
|
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
501 |
(*test if tk is a lookahead for a given minimum precedence*) |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
502 |
fun reduction minPrec tk _ (Term _ :: _, _, prec:int) = |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
503 |
if prec >= minPrec then true |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
504 |
else false |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
505 |
| reduction minPrec tk checked |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
506 |
(Nonterm (rhss_ref, NTprec)::_,_, prec) = |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
507 |
if prec >= minPrec andalso not (rhss_ref mem checked) then |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
508 |
exists (reduction NTprec tk (rhss_ref :: checked)) |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
509 |
(token_assoc2 (!rhss_ref, tk)) |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
510 |
else false; |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
511 |
|
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
512 |
(*compute a list of allowed starting tokens |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
513 |
for a list of nonterminals considering precedence*) |
362 | 514 |
fun get_starts [] = [] |
372
40d565e51dea
syntax_error now checks precedences when computing expected tokens
clasohm
parents:
367
diff
changeset
|
515 |
| get_starts ((rhss_ref, minPrec:int) :: refs) = |
362 | 516 |
let fun get [] = [] |
373
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
517 |
| get ((Some tk, prods) :: rhss) = |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
518 |
if exists (reduction minPrec tk [rhss_ref]) prods |
68400ea32f7b
fixed a bug in syntax_error, added "Building new grammar" message;
clasohm
parents:
372
diff
changeset
|
519 |
then tk :: (get rhss) |
372
40d565e51dea
syntax_error now checks precedences when computing expected tokens
clasohm
parents:
367
diff
changeset
|
520 |
else get rhss |
362 | 521 |
| get ((None, _) :: rhss) = |
522 |
get rhss; |
|
523 |
in (get (!rhss_ref)) union (get_starts refs) end; |
|
524 |
||
372
40d565e51dea
syntax_error now checks precedences when computing expected tokens
clasohm
parents:
367
diff
changeset
|
525 |
val NTs = map (fn (_, _, _, Nonterm (a, prec) :: _, _, _) => |
40d565e51dea
syntax_error now checks precedences when computing expected tokens
clasohm
parents:
367
diff
changeset
|
526 |
(a, prec)) |
362 | 527 |
(filter (fn (_, _, _, Nonterm _ :: _, _, _) => true |
528 |
| _ => false) (Array.sub (stateset, i-1))); |
|
367
b734dc03067e
syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents:
362
diff
changeset
|
529 |
val allowed = distinct (get_starts NTs @ |
362 | 530 |
(map (fn (_, _, _, Term a :: _, _, _) => a) |
531 |
(filter (fn (_, _, _, Term _ :: _, _, _) => true |
|
367
b734dc03067e
syntax_error now removes duplicate tokens in its output and doesn't
clasohm
parents:
362
diff
changeset
|
532 |
| _ => false) (Array.sub (stateset, i-1))))); |
362 | 533 |
in syntax_error (if prev_token = EndToken then indata |
534 |
else prev_token :: indata) allowed |
|
535 |
end |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
536 |
| s => |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
537 |
(case indata of |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
538 |
[] => Array.sub (stateset, i) |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
539 |
| c :: cs => |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
540 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
541 |
val (si, sii) = PROCESSS stateset i c s; |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
542 |
in |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
543 |
Array.update (stateset, i, si); |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
544 |
Array.update (stateset, i + 1, sii); |
362 | 545 |
produce stateset (i + 1) cs c |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
546 |
end)); |
18 | 547 |
|
548 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
549 |
val get_trees = mapfilter (fn (_, _, [pt], _, _, _) => Some pt | _ => None); |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
550 |
|
18 | 551 |
|
552 |
fun earley grammar startsymbol indata = |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
553 |
let |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
554 |
val rhss_ref = case assoc (grammar, startsymbol) of |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
555 |
Some r => r |
624 | 556 |
| None => error ("parse: Unknown startsymbol " ^ |
557 |
quote startsymbol); |
|
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
558 |
val S0 = [(ref [], 0, [], [Nonterm (rhss_ref, 0), Term EndToken], "", 0)]; |
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
559 |
val s = length indata + 1; |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
560 |
val Estate = Array.array (s, []); |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
561 |
in |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
562 |
Array.update (Estate, 0, S0); |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
563 |
let |
682 | 564 |
val l = (warned := false; produce Estate 0 indata EndToken(*dummy*)); |
624 | 565 |
|
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
566 |
val p_trees = get_trees l; |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
567 |
in p_trees end |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
568 |
end; |
18 | 569 |
|
570 |
||
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
571 |
fun parse (Gram (_, prod_tab)) start toks = |
624 | 572 |
let val r = |
237
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
573 |
(case earley prod_tab start toks of |
a7d3e712767a
MAJOR INTERNAL CHANGE: extend and merge operations of syntax tables
wenzelm
parents:
46
diff
changeset
|
574 |
[] => sys_error "parse: no parse trees" |
330
2fda15dd1e0f
changed the way a grammar is generated to allow the new parser to work;
clasohm
parents:
258
diff
changeset
|
575 |
| pts => pts); |
624 | 576 |
in r end |
18 | 577 |
|
578 |
end; |