| 0 |      1 | (*  Title: 	Pure/Thy/parse
 | 
|  |      2 |     ID:         $Id$
 | 
|  |      3 |     Author: 	Sonia Mahjoub / Tobias Nipkow
 | 
|  |      4 |     Copyright   1992  TU Muenchen
 | 
|  |      5 | 
 | 
| 213 |      6 |     modified    Dezember 1993 by Max Breitling (type-variables added)
 | 
|  |      7 | 
 | 
| 0 |      8 | The parser combinators.
 | 
|  |      9 | Adapted from Larry Paulson's ML for the Working Programmer.
 | 
|  |     10 | *)
 | 
|  |     11 | 
 | 
|  |     12 | (*Global infix declarations for the parsing primitives*)
 | 
|  |     13 | infix 5 -- --$$ $$--;
 | 
|  |     14 | infix 3 >>;
 | 
|  |     15 | infix 0 ||;
 | 
|  |     16 | 
 | 
|  |     17 | 
 | 
|  |     18 | signature PARSE = 
 | 
|  |     19 | sig
 | 
|  |     20 | 
 | 
|  |     21 | type token
 | 
|  |     22 | val $$     : string -> 
 | 
|  |     23 |              (token * int) list -> string * (token * int)list 
 | 
|  |     24 | val id     : (token * int) list -> string * (token * int)list 
 | 
|  |     25 | val nat    : (token * int) list -> string * (token * int)list 
 | 
|  |     26 | val stg    : (token * int) list -> string * (token * int)list 
 | 
|  |     27 | val txt    : (token * int) list -> string * (token * int)list 
 | 
| 213 |     28 | val typevar: (token * int) list -> string * (token * int)list
 | 
| 0 |     29 | val >>     : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
 | 
|  |     30 | val ||     : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b 
 | 
|  |     31 | val --     : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 
 | 
|  |     32 |              'a -> ('b * 'd) * 'e
 | 
|  |     33 | val $$--   : string * ((token * int)list -> 'b * 'c) -> 
 | 
|  |     34 |              (token * int) list -> 'b * 'c
 | 
|  |     35 | val --$$   : ( 'a -> 'b * (token * int)list ) * string -> 
 | 
|  |     36 |              'a -> 'b * (token * int)list 
 | 
|  |     37 | val !!     : ((token * int) list -> 'a * (token * int) list ) 
 | 
|  |     38 |              -> (token * int) list -> 'a * (token * int) list 
 | 
|  |     39 | val empty  : 'a -> 'b list * 'a
 | 
|  |     40 | val repeat : ('a -> 'b * 'a) -> 'a -> 'b list * 'a
 | 
|  |     41 | val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
 | 
|  |     42 | val list_of: ((token * int)list -> 'b * (token * int)list ) -> 
 | 
|  |     43 |              (token * int)list  -> 'b list * (token * int)list 
 | 
|  |     44 | val list_of1: ((token * int)list -> 'b * (token * int)list ) -> 
 | 
|  |     45 |              (token * int)list  -> 'b list * (token * int)list 
 | 
|  |     46 | val reader : ((token * int) list -> 'a * ('b * int) list )
 | 
|  |     47 |               -> string list -> 'a
 | 
|  |     48 | 
 | 
|  |     49 | end;
 | 
|  |     50 | 
 | 
|  |     51 | 
 | 
|  |     52 | 
 | 
|  |     53 | functor ParseFUN (Lex: LEXICAL): PARSE =
 | 
|  |     54 | struct
 | 
|  |     55 | 
 | 
|  |     56 | type token = Lex.token;
 | 
|  |     57 | 
 | 
|  |     58 | datatype synerr = Line of string * string * int | EOF of string;
 | 
|  |     59 | 
 | 
|  |     60 | exception SynError of synerr;
 | 
|  |     61 | 
 | 
|  |     62 | fun synerr(Line(s1, s2, n)) =
 | 
|  |     63 |       error("Syntax error on line " ^ (string_of_int n) ^ ": " ^ s1 ^
 | 
|  |     64 |             " expected and " ^ s2 ^ " was found")
 | 
|  |     65 |   | synerr(EOF(s)) = error("Syntax error on last line: " ^ s ^
 | 
|  |     66 |                            " expected and end-of-file was found"); 
 | 
|  |     67 | 
 | 
|  |     68 | fun string_of_token (Lex.Key b) = b
 | 
|  |     69 |   | string_of_token (Lex.Id b)  = b
 | 
|  |     70 |   | string_of_token (Lex.Nat b) = b
 | 
|  |     71 |   | string_of_token (Lex.Txt b) = b
 | 
| 213 |     72 |   | string_of_token (Lex.Stg b) = b
 | 
|  |     73 |   | string_of_token (Lex.TypVar b) = b;
 | 
| 0 |     74 | 
 | 
|  |     75 | fun line_err x = raise SynError(Line x);
 | 
|  |     76 | fun eof_err s =  raise SynError(EOF s);
 | 
|  |     77 | 
 | 
|  |     78 | fun $$a ((Lex.Key b,n) :: toks) =
 | 
|  |     79 |            if a = b then (a,toks) else line_err(a,b,n) 
 | 
|  |     80 |   | $$a ((t,n) :: toks) = line_err (a,string_of_token t, n)
 | 
|  |     81 |   | $$a _ = eof_err a;
 | 
|  |     82 | 
 | 
|  |     83 | 
 | 
|  |     84 | fun id ((Lex.Id b,n) :: toks) = (b, toks)
 | 
|  |     85 |   | id ((t,n) :: toks) = line_err ("identifier", string_of_token t, n)
 | 
|  |     86 |   | id _ = eof_err "identifier";
 | 
|  |     87 | 
 | 
|  |     88 | 
 | 
|  |     89 | fun nat ((Lex.Nat b,n) :: toks) = (b, toks)
 | 
|  |     90 |   | nat ((t,n) :: toks) = 
 | 
|  |     91 | 	line_err ("natural number", string_of_token t, n)
 | 
|  |     92 |   | nat _ = eof_err "natural number";
 | 
|  |     93 | 
 | 
|  |     94 | 
 | 
|  |     95 | fun stg ((Lex.Stg b,n) :: toks) = (b, toks)
 | 
|  |     96 |   | stg ((t,n) :: toks) = line_err("string", string_of_token t, n)
 | 
|  |     97 |   | stg _ = eof_err"string";
 | 
|  |     98 | 
 | 
|  |     99 | 
 | 
|  |    100 | fun txt ((Lex.Txt b,n) :: toks) = (b, toks)
 | 
|  |    101 |   | txt ((t,n) :: toks) = line_err ("ML text", string_of_token t, n)
 | 
|  |    102 |   | txt _ = eof_err "ML text";
 | 
|  |    103 | 
 | 
| 213 |    104 | fun typevar ((Lex.TypVar b,n) :: toks) = (b, toks)
 | 
|  |    105 |   | typevar ((t,n)::toks) = line_err("type variable",string_of_token t,n)
 | 
|  |    106 |   | typevar _ = eof_err "type variable";
 | 
|  |    107 | 
 | 
| 0 |    108 | 
 | 
|  |    109 | fun ( ph >> f) toks = let val (x, toks2) = ph toks in (f x, toks2) end;
 | 
|  |    110 | 
 | 
|  |    111 | fun (ph1 || ph2) toks = ph1 toks handle SynError _ => ph2 toks;
 | 
|  |    112 | 
 | 
|  |    113 | 
 | 
|  |    114 | fun (ph1 -- ph2) toks =
 | 
|  |    115 |     let val (x, toks2) = ph1 toks
 | 
|  |    116 |         val (y, toks3) = ph2 toks2
 | 
|  |    117 |     in ((x,y), toks3) end;
 | 
|  |    118 | 
 | 
|  |    119 | fun (a $$-- ph)  =  $$a -- ph >> #2;
 | 
|  |    120 | 
 | 
|  |    121 | fun (ph --$$ a)  =  ph -- $$a >> #1;
 | 
|  |    122 | 
 | 
|  |    123 | fun !! ph toks = ph toks handle SynError s => synerr s;
 | 
|  |    124 | 
 | 
|  |    125 | fun empty toks = ([], toks);
 | 
|  |    126 | 
 | 
|  |    127 | fun repeat ph toks  = (   ph -- repeat ph >> (op::)
 | 
|  |    128 |                        || empty ) toks; 
 | 
|  |    129 | 
 | 
|  |    130 | fun repeat1 ph  =  ph -- repeat ph >> (op::);
 | 
|  |    131 | 
 | 
|  |    132 | fun list_of1 ph =  ph -- repeat("," $$-- !! ph) >> (op::);
 | 
|  |    133 | fun list_of ph  =  list_of1 ph || empty;
 | 
|  |    134 | 
 | 
|  |    135 | fun reader ph a = 
 | 
|  |    136 |        ( case ph (Lex.scan a) of 
 | 
|  |    137 |                 (x,  []) => x
 | 
|  |    138 |               | (_,(_, n)::_) => error
 | 
|  |    139 |                 ("Syntax error on line " ^ (string_of_int n) ^ ": " ^
 | 
|  |    140 |                  "Extra characters in phrase")
 | 
|  |    141 |        );
 | 
|  |    142 | end;
 |