| 46845 |      1 | (* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
 | 
|  |      2 | 
 | 
|  |      3 | (* base.sig: Base signature file for SML-Yacc.  This file contains signatures
 | 
|  |      4 |    that must be loaded before any of the files produced by ML-Yacc are loaded
 | 
|  |      5 | *)
 | 
|  |      6 | 
 | 
|  |      7 | (* STREAM: signature for a lazy stream.*)
 | 
|  |      8 | 
 | 
|  |      9 | signature STREAM =
 | 
|  |     10 |  sig type 'xa stream
 | 
|  |     11 |      val streamify : (unit -> '_a) -> '_a stream
 | 
|  |     12 |      val cons : '_a * '_a stream -> '_a stream
 | 
|  |     13 |      val get : '_a stream -> '_a * '_a stream
 | 
|  |     14 |  end
 | 
|  |     15 | 
 | 
|  |     16 | (* LR_TABLE: signature for an LR Table.
 | 
|  |     17 | 
 | 
|  |     18 |    The list of actions and gotos passed to mkLrTable must be ordered by state
 | 
|  |     19 |    number. The values for state 0 are the first in the list, the values for
 | 
|  |     20 |     state 1 are next, etc.
 | 
|  |     21 | *)
 | 
|  |     22 | 
 | 
|  |     23 | signature LR_TABLE =
 | 
|  |     24 |     sig
 | 
|  |     25 |         datatype ('a,'b) pairlist = EMPTY | PAIR of 'a * 'b * ('a,'b) pairlist
 | 
|  |     26 | 	datatype state = STATE of int
 | 
|  |     27 | 	datatype term = T of int
 | 
|  |     28 | 	datatype nonterm = NT of int
 | 
|  |     29 | 	datatype action = SHIFT of state
 | 
|  |     30 | 			| REDUCE of int
 | 
|  |     31 | 			| ACCEPT
 | 
|  |     32 | 			| ERROR
 | 
|  |     33 | 	type table
 | 
|  |     34 | 	
 | 
|  |     35 | 	val numStates : table -> int
 | 
|  |     36 | 	val numRules : table -> int
 | 
|  |     37 | 	val describeActions : table -> state ->
 | 
|  |     38 | 				(term,action) pairlist * action
 | 
|  |     39 | 	val describeGoto : table -> state -> (nonterm,state) pairlist
 | 
|  |     40 | 	val action : table -> state * term -> action
 | 
|  |     41 | 	val goto : table -> state * nonterm -> state
 | 
|  |     42 | 	val initialState : table -> state
 | 
|  |     43 | 	exception Goto of state * nonterm
 | 
|  |     44 | 
 | 
|  |     45 | 	val mkLrTable : {actions : ((term,action) pairlist * action) array,
 | 
|  |     46 | 			 gotos : (nonterm,state) pairlist array,
 | 
|  |     47 | 			 numStates : int, numRules : int,
 | 
|  |     48 | 			 initialState : state} -> table
 | 
|  |     49 |     end
 | 
|  |     50 | 
 | 
|  |     51 | (* TOKEN: signature revealing the internal structure of a token. This signature
 | 
|  |     52 |    TOKEN distinct from the signature {parser name}_TOKENS produced by ML-Yacc.
 | 
|  |     53 |    The {parser name}_TOKENS structures contain some types and functions to
 | 
|  |     54 |     construct tokens from values and positions.
 | 
|  |     55 | 
 | 
|  |     56 |    The representation of token was very carefully chosen here to allow the
 | 
|  |     57 |    polymorphic parser to work without knowing the types of semantic values
 | 
|  |     58 |    or line numbers.
 | 
|  |     59 | 
 | 
|  |     60 |    This has had an impact on the TOKENS structure produced by SML-Yacc, which
 | 
|  |     61 |    is a structure parameter to lexer functors.  We would like to have some
 | 
|  |     62 |    type 'a token which functions to construct tokens would create.  A
 | 
|  |     63 |    constructor function for a integer token might be
 | 
|  |     64 | 
 | 
|  |     65 | 	  INT: int * 'a * 'a -> 'a token.
 | 
|  |     66 |  
 | 
|  |     67 |    This is not possible because we need to have tokens with the representation
 | 
|  |     68 |    given below for the polymorphic parser.
 | 
|  |     69 | 
 | 
|  |     70 |    Thus our constructur functions for tokens have the form:
 | 
|  |     71 | 
 | 
|  |     72 | 	  INT: int * 'a * 'a -> (svalue,'a) token
 | 
|  |     73 | 
 | 
|  |     74 |    This in turn has had an impact on the signature that lexers for SML-Yacc
 | 
|  |     75 |    must match and the types that a user must declare in the user declarations
 | 
|  |     76 |    section of lexers.
 | 
|  |     77 | *)
 | 
|  |     78 | 
 | 
|  |     79 | signature TOKEN =
 | 
|  |     80 |     sig
 | 
|  |     81 | 	structure LrTable : LR_TABLE
 | 
|  |     82 |         datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
 | 
|  |     83 | 	val sameToken : ('a,'b) token * ('a,'b) token -> bool
 | 
|  |     84 |     end
 | 
|  |     85 | 
 | 
|  |     86 | (* LR_PARSER: signature for a polymorphic LR parser *)
 | 
|  |     87 | 
 | 
|  |     88 | signature LR_PARSER =
 | 
|  |     89 |     sig
 | 
|  |     90 | 	structure Stream: STREAM
 | 
|  |     91 | 	structure LrTable : LR_TABLE
 | 
|  |     92 | 	structure Token : TOKEN
 | 
|  |     93 | 
 | 
|  |     94 | 	sharing LrTable = Token.LrTable
 | 
|  |     95 | 
 | 
|  |     96 | 	exception ParseError
 | 
|  |     97 | 
 | 
|  |     98 | 	val parse : {table : LrTable.table,
 | 
|  |     99 | 		     lexer : ('_b,'_c) Token.token Stream.stream,
 | 
|  |    100 | 		     arg: 'arg,
 | 
|  |    101 | 		     saction : int *
 | 
|  |    102 | 			       '_c *
 | 
|  |    103 | 				(LrTable.state * ('_b * '_c * '_c)) list * 
 | 
|  |    104 | 				'arg ->
 | 
|  |    105 | 				     LrTable.nonterm *
 | 
|  |    106 | 				     ('_b * '_c * '_c) *
 | 
|  |    107 | 				     ((LrTable.state *('_b * '_c * '_c)) list),
 | 
|  |    108 | 		     void : '_b,
 | 
|  |    109 | 		     ec : { is_keyword : LrTable.term -> bool,
 | 
|  |    110 | 			    noShift : LrTable.term -> bool,
 | 
|  |    111 | 			    preferred_change : (LrTable.term list * LrTable.term list) list,
 | 
|  |    112 | 			    errtermvalue : LrTable.term -> '_b,
 | 
|  |    113 | 			    showTerminal : LrTable.term -> string,
 | 
|  |    114 | 			    terms: LrTable.term list,
 | 
|  |    115 | 			    error : string * '_c * '_c -> unit
 | 
|  |    116 | 			   },
 | 
|  |    117 | 		     lookahead : int  (* max amount of lookahead used in *)
 | 
|  |    118 | 				      (* error correction *)
 | 
|  |    119 | 			} -> '_b *
 | 
|  |    120 | 			     (('_b,'_c) Token.token Stream.stream)
 | 
|  |    121 |     end
 | 
|  |    122 | 
 | 
|  |    123 | (* LEXER: a signature that most lexers produced for use with SML-Yacc's
 | 
|  |    124 |    output will match.  The user is responsible for declaring type token,
 | 
|  |    125 |    type pos, and type svalue in the UserDeclarations section of a lexer.
 | 
|  |    126 | 
 | 
|  |    127 |    Note that type token is abstract in the lexer.  This allows SML-Yacc to
 | 
|  |    128 |    create a TOKENS signature for use with lexers produced by ML-Lex that
 | 
|  |    129 |    treats the type token abstractly.  Lexers that are functors parametrized by
 | 
|  |    130 |    a Tokens structure matching a TOKENS signature cannot examine the structure
 | 
|  |    131 |    of tokens.
 | 
|  |    132 | *)
 | 
|  |    133 | 
 | 
|  |    134 | signature LEXER =
 | 
|  |    135 |    sig
 | 
|  |    136 |        structure UserDeclarations :
 | 
|  |    137 | 	   sig
 | 
|  |    138 | 	        type ('a,'b) token
 | 
|  |    139 | 		type pos
 | 
|  |    140 | 		type svalue
 | 
|  |    141 | 	   end
 | 
|  |    142 | 	val makeLexer : (int -> string) -> unit -> 
 | 
|  |    143 |          (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
 | 
|  |    144 |    end
 | 
|  |    145 | 
 | 
|  |    146 | (* ARG_LEXER: the %arg option of ML-Lex allows users to produce lexers which
 | 
|  |    147 |    also take an argument before yielding a function from unit to a token
 | 
|  |    148 | *)
 | 
|  |    149 | 
 | 
|  |    150 | signature ARG_LEXER =
 | 
|  |    151 |    sig
 | 
|  |    152 |        structure UserDeclarations :
 | 
|  |    153 | 	   sig
 | 
|  |    154 | 	        type ('a,'b) token
 | 
|  |    155 | 		type pos
 | 
|  |    156 | 		type svalue
 | 
|  |    157 | 		type arg
 | 
|  |    158 | 	   end
 | 
|  |    159 | 	val makeLexer : (int -> string) -> UserDeclarations.arg -> unit -> 
 | 
|  |    160 |          (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
 | 
|  |    161 |    end
 | 
|  |    162 | 
 | 
|  |    163 | (* PARSER_DATA: the signature of ParserData structures in {parser name}LrValsFun
 | 
|  |    164 |    produced by  SML-Yacc.  All such structures match this signature.  
 | 
|  |    165 | 
 | 
|  |    166 |    The {parser name}LrValsFun produces a structure which contains all the values
 | 
|  |    167 |    except for the lexer needed to call the polymorphic parser mentioned
 | 
|  |    168 |    before.
 | 
|  |    169 | 
 | 
|  |    170 | *)
 | 
|  |    171 | 
 | 
|  |    172 | signature PARSER_DATA =
 | 
|  |    173 |    sig
 | 
|  |    174 |         (* the type of line numbers *)
 | 
|  |    175 | 
 | 
|  |    176 | 	type pos
 | 
|  |    177 | 
 | 
|  |    178 | 	(* the type of semantic values *)
 | 
|  |    179 | 
 | 
|  |    180 | 	type svalue
 | 
|  |    181 | 
 | 
|  |    182 |          (* the type of the user-supplied argument to the parser *)
 | 
|  |    183 |  	type arg
 | 
|  |    184 |  
 | 
|  |    185 | 	(* the intended type of the result of the parser.  This value is
 | 
|  |    186 | 	   produced by applying extract from the structure Actions to the
 | 
|  |    187 | 	   final semantic value resultiing from a parse.
 | 
|  |    188 | 	 *)
 | 
|  |    189 | 
 | 
|  |    190 | 	type result
 | 
|  |    191 | 
 | 
|  |    192 | 	structure LrTable : LR_TABLE
 | 
|  |    193 | 	structure Token : TOKEN
 | 
|  |    194 | 	sharing Token.LrTable = LrTable
 | 
|  |    195 | 
 | 
|  |    196 | 	(* structure Actions contains the functions which mantain the
 | 
|  |    197 | 	   semantic values stack in the parser.  Void is used to provide
 | 
|  |    198 | 	   a default value for the semantic stack.
 | 
|  |    199 | 	 *)
 | 
|  |    200 | 
 | 
|  |    201 | 	structure Actions : 
 | 
|  |    202 | 	  sig
 | 
|  |    203 | 	      val actions : int * pos *
 | 
|  |    204 | 		   (LrTable.state * (svalue * pos * pos)) list * arg->
 | 
|  |    205 | 		         LrTable.nonterm * (svalue * pos * pos) *
 | 
|  |    206 | 			 ((LrTable.state *(svalue * pos * pos)) list)
 | 
|  |    207 | 	      val void : svalue
 | 
|  |    208 | 	      val extract : svalue -> result
 | 
|  |    209 | 	  end
 | 
|  |    210 | 
 | 
|  |    211 | 	(* structure EC contains information used to improve error
 | 
|  |    212 | 	   recovery in an error-correcting parser *)
 | 
|  |    213 | 
 | 
|  |    214 | 	structure EC :
 | 
|  |    215 | 	   sig
 | 
|  |    216 | 	     val is_keyword : LrTable.term -> bool
 | 
|  |    217 | 	     val noShift : LrTable.term -> bool
 | 
|  |    218 |  	     val preferred_change : (LrTable.term list * LrTable.term list) list
 | 
|  |    219 | 	     val errtermvalue : LrTable.term -> svalue
 | 
|  |    220 | 	     val showTerminal : LrTable.term -> string
 | 
|  |    221 | 	     val terms: LrTable.term list
 | 
|  |    222 | 	   end
 | 
|  |    223 | 
 | 
|  |    224 | 	(* table is the LR table for the parser *)
 | 
|  |    225 | 
 | 
|  |    226 | 	val table : LrTable.table
 | 
|  |    227 |     end
 | 
|  |    228 | 
 | 
|  |    229 | (* signature PARSER is the signature that most user parsers created by 
 | 
|  |    230 |    SML-Yacc will match.
 | 
|  |    231 | *)
 | 
|  |    232 | 
 | 
|  |    233 | signature PARSER =
 | 
|  |    234 |     sig
 | 
|  |    235 |         structure Token : TOKEN
 | 
|  |    236 | 	structure Stream : STREAM
 | 
|  |    237 | 	exception ParseError
 | 
|  |    238 | 
 | 
|  |    239 | 	(* type pos is the type of line numbers *)
 | 
|  |    240 | 
 | 
|  |    241 | 	type pos
 | 
|  |    242 | 
 | 
|  |    243 | 	(* type result is the type of the result from the parser *)
 | 
|  |    244 | 
 | 
|  |    245 | 	type result
 | 
|  |    246 | 
 | 
|  |    247 |          (* the type of the user-supplied argument to the parser *)
 | 
|  |    248 |  	type arg
 | 
|  |    249 | 	
 | 
|  |    250 | 	(* type svalue is the type of semantic values for the semantic value
 | 
|  |    251 | 	   stack
 | 
|  |    252 | 	 *)
 | 
|  |    253 | 
 | 
|  |    254 | 	type svalue
 | 
|  |    255 | 
 | 
|  |    256 | 	(* val makeLexer is used to create a stream of tokens for the parser *)
 | 
|  |    257 | 
 | 
|  |    258 | 	val makeLexer : (int -> string) ->
 | 
|  |    259 | 			 (svalue,pos) Token.token Stream.stream
 | 
|  |    260 | 
 | 
|  |    261 | 	(* val parse takes a stream of tokens and a function to print
 | 
|  |    262 | 	   errors and returns a value of type result and a stream containing
 | 
|  |    263 | 	   the unused tokens
 | 
|  |    264 | 	 *)
 | 
|  |    265 | 
 | 
|  |    266 | 	val parse : int * ((svalue,pos) Token.token Stream.stream) *
 | 
|  |    267 | 		    (string * pos * pos -> unit) * arg ->
 | 
|  |    268 | 				result * (svalue,pos) Token.token Stream.stream
 | 
|  |    269 | 
 | 
|  |    270 | 	val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
 | 
|  |    271 | 				bool
 | 
|  |    272 |      end
 | 
|  |    273 | 
 | 
|  |    274 | (* signature ARG_PARSER is the signature that will be matched by parsers whose
 | 
|  |    275 |     lexer takes an additional argument.
 | 
|  |    276 | *)
 | 
|  |    277 | 
 | 
|  |    278 | signature ARG_PARSER = 
 | 
|  |    279 |     sig
 | 
|  |    280 |         structure Token : TOKEN
 | 
|  |    281 | 	structure Stream : STREAM
 | 
|  |    282 | 	exception ParseError
 | 
|  |    283 | 
 | 
|  |    284 | 	type arg
 | 
|  |    285 | 	type lexarg
 | 
|  |    286 | 	type pos
 | 
|  |    287 | 	type result
 | 
|  |    288 | 	type svalue
 | 
|  |    289 | 
 | 
|  |    290 | 	val makeLexer : (int -> string) -> lexarg ->
 | 
|  |    291 | 			 (svalue,pos) Token.token Stream.stream
 | 
|  |    292 | 	val parse : int * ((svalue,pos) Token.token Stream.stream) *
 | 
|  |    293 | 		    (string * pos * pos -> unit) * arg ->
 | 
|  |    294 | 				result * (svalue,pos) Token.token Stream.stream
 | 
|  |    295 | 
 | 
|  |    296 | 	val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
 | 
|  |    297 | 				bool
 | 
|  |    298 |      end
 | 
|  |    299 | 
 |