46845
|
1 |
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
|
|
2 |
|
|
3 |
(* base.sig: Base signature file for SML-Yacc. This file contains signatures
|
|
4 |
that must be loaded before any of the files produced by ML-Yacc are loaded
|
|
5 |
*)
|
|
6 |
|
|
7 |
(* STREAM: signature for a lazy stream.*)
|
|
8 |
|
|
9 |
signature STREAM =
|
|
10 |
sig type 'xa stream
|
|
11 |
val streamify : (unit -> '_a) -> '_a stream
|
|
12 |
val cons : '_a * '_a stream -> '_a stream
|
|
13 |
val get : '_a stream -> '_a * '_a stream
|
|
14 |
end
|
|
15 |
|
|
16 |
(* LR_TABLE: signature for an LR Table.
|
|
17 |
|
|
18 |
The list of actions and gotos passed to mkLrTable must be ordered by state
|
|
19 |
number. The values for state 0 are the first in the list, the values for
|
|
20 |
state 1 are next, etc.
|
|
21 |
*)
|
|
22 |
|
|
23 |
signature LR_TABLE =
|
|
24 |
sig
|
|
25 |
datatype ('a,'b) pairlist = EMPTY | PAIR of 'a * 'b * ('a,'b) pairlist
|
|
26 |
datatype state = STATE of int
|
|
27 |
datatype term = T of int
|
|
28 |
datatype nonterm = NT of int
|
|
29 |
datatype action = SHIFT of state
|
|
30 |
| REDUCE of int
|
|
31 |
| ACCEPT
|
|
32 |
| ERROR
|
|
33 |
type table
|
|
34 |
|
|
35 |
val numStates : table -> int
|
|
36 |
val numRules : table -> int
|
|
37 |
val describeActions : table -> state ->
|
|
38 |
(term,action) pairlist * action
|
|
39 |
val describeGoto : table -> state -> (nonterm,state) pairlist
|
|
40 |
val action : table -> state * term -> action
|
|
41 |
val goto : table -> state * nonterm -> state
|
|
42 |
val initialState : table -> state
|
|
43 |
exception Goto of state * nonterm
|
|
44 |
|
|
45 |
val mkLrTable : {actions : ((term,action) pairlist * action) array,
|
|
46 |
gotos : (nonterm,state) pairlist array,
|
|
47 |
numStates : int, numRules : int,
|
|
48 |
initialState : state} -> table
|
|
49 |
end
|
|
50 |
|
|
51 |
(* TOKEN: signature revealing the internal structure of a token. This signature
|
|
52 |
TOKEN distinct from the signature {parser name}_TOKENS produced by ML-Yacc.
|
|
53 |
The {parser name}_TOKENS structures contain some types and functions to
|
|
54 |
construct tokens from values and positions.
|
|
55 |
|
|
56 |
The representation of token was very carefully chosen here to allow the
|
|
57 |
polymorphic parser to work without knowing the types of semantic values
|
|
58 |
or line numbers.
|
|
59 |
|
|
60 |
This has had an impact on the TOKENS structure produced by SML-Yacc, which
|
|
61 |
is a structure parameter to lexer functors. We would like to have some
|
|
62 |
type 'a token which functions to construct tokens would create. A
|
|
63 |
constructor function for a integer token might be
|
|
64 |
|
|
65 |
INT: int * 'a * 'a -> 'a token.
|
|
66 |
|
|
67 |
This is not possible because we need to have tokens with the representation
|
|
68 |
given below for the polymorphic parser.
|
|
69 |
|
|
70 |
Thus our constructur functions for tokens have the form:
|
|
71 |
|
|
72 |
INT: int * 'a * 'a -> (svalue,'a) token
|
|
73 |
|
|
74 |
This in turn has had an impact on the signature that lexers for SML-Yacc
|
|
75 |
must match and the types that a user must declare in the user declarations
|
|
76 |
section of lexers.
|
|
77 |
*)
|
|
78 |
|
|
79 |
signature TOKEN =
|
|
80 |
sig
|
|
81 |
structure LrTable : LR_TABLE
|
|
82 |
datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
|
|
83 |
val sameToken : ('a,'b) token * ('a,'b) token -> bool
|
|
84 |
end
|
|
85 |
|
|
86 |
(* LR_PARSER: signature for a polymorphic LR parser *)
|
|
87 |
|
|
88 |
signature LR_PARSER =
|
|
89 |
sig
|
|
90 |
structure Stream: STREAM
|
|
91 |
structure LrTable : LR_TABLE
|
|
92 |
structure Token : TOKEN
|
|
93 |
|
|
94 |
sharing LrTable = Token.LrTable
|
|
95 |
|
|
96 |
exception ParseError
|
|
97 |
|
|
98 |
val parse : {table : LrTable.table,
|
|
99 |
lexer : ('_b,'_c) Token.token Stream.stream,
|
|
100 |
arg: 'arg,
|
|
101 |
saction : int *
|
|
102 |
'_c *
|
|
103 |
(LrTable.state * ('_b * '_c * '_c)) list *
|
|
104 |
'arg ->
|
|
105 |
LrTable.nonterm *
|
|
106 |
('_b * '_c * '_c) *
|
|
107 |
((LrTable.state *('_b * '_c * '_c)) list),
|
|
108 |
void : '_b,
|
|
109 |
ec : { is_keyword : LrTable.term -> bool,
|
|
110 |
noShift : LrTable.term -> bool,
|
|
111 |
preferred_change : (LrTable.term list * LrTable.term list) list,
|
|
112 |
errtermvalue : LrTable.term -> '_b,
|
|
113 |
showTerminal : LrTable.term -> string,
|
|
114 |
terms: LrTable.term list,
|
|
115 |
error : string * '_c * '_c -> unit
|
|
116 |
},
|
|
117 |
lookahead : int (* max amount of lookahead used in *)
|
|
118 |
(* error correction *)
|
|
119 |
} -> '_b *
|
|
120 |
(('_b,'_c) Token.token Stream.stream)
|
|
121 |
end
|
|
122 |
|
|
123 |
(* LEXER: a signature that most lexers produced for use with SML-Yacc's
|
|
124 |
output will match. The user is responsible for declaring type token,
|
|
125 |
type pos, and type svalue in the UserDeclarations section of a lexer.
|
|
126 |
|
|
127 |
Note that type token is abstract in the lexer. This allows SML-Yacc to
|
|
128 |
create a TOKENS signature for use with lexers produced by ML-Lex that
|
|
129 |
treats the type token abstractly. Lexers that are functors parametrized by
|
|
130 |
a Tokens structure matching a TOKENS signature cannot examine the structure
|
|
131 |
of tokens.
|
|
132 |
*)
|
|
133 |
|
|
134 |
signature LEXER =
|
|
135 |
sig
|
|
136 |
structure UserDeclarations :
|
|
137 |
sig
|
|
138 |
type ('a,'b) token
|
|
139 |
type pos
|
|
140 |
type svalue
|
|
141 |
end
|
|
142 |
val makeLexer : (int -> string) -> unit ->
|
|
143 |
(UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
|
|
144 |
end
|
|
145 |
|
|
146 |
(* ARG_LEXER: the %arg option of ML-Lex allows users to produce lexers which
|
|
147 |
also take an argument before yielding a function from unit to a token
|
|
148 |
*)
|
|
149 |
|
|
150 |
signature ARG_LEXER =
|
|
151 |
sig
|
|
152 |
structure UserDeclarations :
|
|
153 |
sig
|
|
154 |
type ('a,'b) token
|
|
155 |
type pos
|
|
156 |
type svalue
|
|
157 |
type arg
|
|
158 |
end
|
|
159 |
val makeLexer : (int -> string) -> UserDeclarations.arg -> unit ->
|
|
160 |
(UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
|
|
161 |
end
|
|
162 |
|
|
163 |
(* PARSER_DATA: the signature of ParserData structures in {parser name}LrValsFun
|
|
164 |
produced by SML-Yacc. All such structures match this signature.
|
|
165 |
|
|
166 |
The {parser name}LrValsFun produces a structure which contains all the values
|
|
167 |
except for the lexer needed to call the polymorphic parser mentioned
|
|
168 |
before.
|
|
169 |
|
|
170 |
*)
|
|
171 |
|
|
172 |
signature PARSER_DATA =
|
|
173 |
sig
|
|
174 |
(* the type of line numbers *)
|
|
175 |
|
|
176 |
type pos
|
|
177 |
|
|
178 |
(* the type of semantic values *)
|
|
179 |
|
|
180 |
type svalue
|
|
181 |
|
|
182 |
(* the type of the user-supplied argument to the parser *)
|
|
183 |
type arg
|
|
184 |
|
|
185 |
(* the intended type of the result of the parser. This value is
|
|
186 |
produced by applying extract from the structure Actions to the
|
|
187 |
final semantic value resultiing from a parse.
|
|
188 |
*)
|
|
189 |
|
|
190 |
type result
|
|
191 |
|
|
192 |
structure LrTable : LR_TABLE
|
|
193 |
structure Token : TOKEN
|
|
194 |
sharing Token.LrTable = LrTable
|
|
195 |
|
|
196 |
(* structure Actions contains the functions which mantain the
|
|
197 |
semantic values stack in the parser. Void is used to provide
|
|
198 |
a default value for the semantic stack.
|
|
199 |
*)
|
|
200 |
|
|
201 |
structure Actions :
|
|
202 |
sig
|
|
203 |
val actions : int * pos *
|
|
204 |
(LrTable.state * (svalue * pos * pos)) list * arg->
|
|
205 |
LrTable.nonterm * (svalue * pos * pos) *
|
|
206 |
((LrTable.state *(svalue * pos * pos)) list)
|
|
207 |
val void : svalue
|
|
208 |
val extract : svalue -> result
|
|
209 |
end
|
|
210 |
|
|
211 |
(* structure EC contains information used to improve error
|
|
212 |
recovery in an error-correcting parser *)
|
|
213 |
|
|
214 |
structure EC :
|
|
215 |
sig
|
|
216 |
val is_keyword : LrTable.term -> bool
|
|
217 |
val noShift : LrTable.term -> bool
|
|
218 |
val preferred_change : (LrTable.term list * LrTable.term list) list
|
|
219 |
val errtermvalue : LrTable.term -> svalue
|
|
220 |
val showTerminal : LrTable.term -> string
|
|
221 |
val terms: LrTable.term list
|
|
222 |
end
|
|
223 |
|
|
224 |
(* table is the LR table for the parser *)
|
|
225 |
|
|
226 |
val table : LrTable.table
|
|
227 |
end
|
|
228 |
|
|
229 |
(* signature PARSER is the signature that most user parsers created by
|
|
230 |
SML-Yacc will match.
|
|
231 |
*)
|
|
232 |
|
|
233 |
signature PARSER =
|
|
234 |
sig
|
|
235 |
structure Token : TOKEN
|
|
236 |
structure Stream : STREAM
|
|
237 |
exception ParseError
|
|
238 |
|
|
239 |
(* type pos is the type of line numbers *)
|
|
240 |
|
|
241 |
type pos
|
|
242 |
|
|
243 |
(* type result is the type of the result from the parser *)
|
|
244 |
|
|
245 |
type result
|
|
246 |
|
|
247 |
(* the type of the user-supplied argument to the parser *)
|
|
248 |
type arg
|
|
249 |
|
|
250 |
(* type svalue is the type of semantic values for the semantic value
|
|
251 |
stack
|
|
252 |
*)
|
|
253 |
|
|
254 |
type svalue
|
|
255 |
|
|
256 |
(* val makeLexer is used to create a stream of tokens for the parser *)
|
|
257 |
|
|
258 |
val makeLexer : (int -> string) ->
|
|
259 |
(svalue,pos) Token.token Stream.stream
|
|
260 |
|
|
261 |
(* val parse takes a stream of tokens and a function to print
|
|
262 |
errors and returns a value of type result and a stream containing
|
|
263 |
the unused tokens
|
|
264 |
*)
|
|
265 |
|
|
266 |
val parse : int * ((svalue,pos) Token.token Stream.stream) *
|
|
267 |
(string * pos * pos -> unit) * arg ->
|
|
268 |
result * (svalue,pos) Token.token Stream.stream
|
|
269 |
|
|
270 |
val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
|
|
271 |
bool
|
|
272 |
end
|
|
273 |
|
|
274 |
(* signature ARG_PARSER is the signature that will be matched by parsers whose
|
|
275 |
lexer takes an additional argument.
|
|
276 |
*)
|
|
277 |
|
|
278 |
signature ARG_PARSER =
|
|
279 |
sig
|
|
280 |
structure Token : TOKEN
|
|
281 |
structure Stream : STREAM
|
|
282 |
exception ParseError
|
|
283 |
|
|
284 |
type arg
|
|
285 |
type lexarg
|
|
286 |
type pos
|
|
287 |
type result
|
|
288 |
type svalue
|
|
289 |
|
|
290 |
val makeLexer : (int -> string) -> lexarg ->
|
|
291 |
(svalue,pos) Token.token Stream.stream
|
|
292 |
val parse : int * ((svalue,pos) Token.token Stream.stream) *
|
|
293 |
(string * pos * pos -> unit) * arg ->
|
|
294 |
result * (svalue,pos) Token.token Stream.stream
|
|
295 |
|
|
296 |
val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
|
|
297 |
bool
|
|
298 |
end
|
|
299 |
|