src/Pure/Thy/parse.ML
changeset 0 a5a9c433f639
child 213 42f2b8a3581f
equal deleted inserted replaced
-1:000000000000 0:a5a9c433f639
       
     1 (*  Title: 	Pure/Thy/parse
       
     2     ID:         $Id$
       
     3     Author: 	Sonia Mahjoub / Tobias Nipkow
       
     4     Copyright   1992  TU Muenchen
       
     5 
       
     6 The parser combinators.
       
     7 Adapted from Larry Paulson's ML for the Working Programmer.
       
     8 *)
       
     9 
       
    10 (*Global infix declarations for the parsing primitives*)
       
    11 infix 5 -- --$$ $$--;
       
    12 infix 3 >>;
       
    13 infix 0 ||;
       
    14 
       
    15 
       
    16 signature PARSE = 
       
    17 sig
       
    18 
       
    19 type token
       
    20 val $$     : string -> 
       
    21              (token * int) list -> string * (token * int)list 
       
    22 val id     : (token * int) list -> string * (token * int)list 
       
    23 val nat    : (token * int) list -> string * (token * int)list 
       
    24 val stg    : (token * int) list -> string * (token * int)list 
       
    25 val txt    : (token * int) list -> string * (token * int)list 
       
    26 val >>     : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
       
    27 val ||     : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b 
       
    28 val --     : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 
       
    29              'a -> ('b * 'd) * 'e
       
    30 val $$--   : string * ((token * int)list -> 'b * 'c) -> 
       
    31              (token * int) list -> 'b * 'c
       
    32 val --$$   : ( 'a -> 'b * (token * int)list ) * string -> 
       
    33              'a -> 'b * (token * int)list 
       
    34 val !!     : ((token * int) list -> 'a * (token * int) list ) 
       
    35              -> (token * int) list -> 'a * (token * int) list 
       
    36 val empty  : 'a -> 'b list * 'a
       
    37 val repeat : ('a -> 'b * 'a) -> 'a -> 'b list * 'a
       
    38 val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
       
    39 val list_of: ((token * int)list -> 'b * (token * int)list ) -> 
       
    40              (token * int)list  -> 'b list * (token * int)list 
       
    41 val list_of1: ((token * int)list -> 'b * (token * int)list ) -> 
       
    42              (token * int)list  -> 'b list * (token * int)list 
       
    43 val reader : ((token * int) list -> 'a * ('b * int) list )
       
    44               -> string list -> 'a
       
    45 
       
    46 end;
       
    47 
       
    48 
       
    49 
       
    50 functor ParseFUN (Lex: LEXICAL): PARSE =
       
    51 struct
       
    52 
       
    53 type token = Lex.token;
       
    54 
       
    55 datatype synerr = Line of string * string * int | EOF of string;
       
    56 
       
    57 exception SynError of synerr;
       
    58 
       
    59 fun synerr(Line(s1, s2, n)) =
       
    60       error("Syntax error on line " ^ (string_of_int n) ^ ": " ^ s1 ^
       
    61             " expected and " ^ s2 ^ " was found")
       
    62   | synerr(EOF(s)) = error("Syntax error on last line: " ^ s ^
       
    63                            " expected and end-of-file was found"); 
       
    64 
       
    65 fun string_of_token (Lex.Key b) = b
       
    66   | string_of_token (Lex.Id b)  = b
       
    67   | string_of_token (Lex.Nat b) = b
       
    68   | string_of_token (Lex.Txt b) = b
       
    69   | string_of_token (Lex.Stg b) = b;
       
    70 
       
    71 fun line_err x = raise SynError(Line x);
       
    72 fun eof_err s =  raise SynError(EOF s);
       
    73 
       
    74 fun $$a ((Lex.Key b,n) :: toks) =
       
    75            if a = b then (a,toks) else line_err(a,b,n) 
       
    76   | $$a ((t,n) :: toks) = line_err (a,string_of_token t, n)
       
    77   | $$a _ = eof_err a;
       
    78 
       
    79 
       
    80 fun id ((Lex.Id b,n) :: toks) = (b, toks)
       
    81   | id ((t,n) :: toks) = line_err ("identifier", string_of_token t, n)
       
    82   | id _ = eof_err "identifier";
       
    83 
       
    84 
       
    85 fun nat ((Lex.Nat b,n) :: toks) = (b, toks)
       
    86   | nat ((t,n) :: toks) = 
       
    87 	line_err ("natural number", string_of_token t, n)
       
    88   | nat _ = eof_err "natural number";
       
    89 
       
    90 
       
    91 fun stg ((Lex.Stg b,n) :: toks) = (b, toks)
       
    92   | stg ((t,n) :: toks) = line_err("string", string_of_token t, n)
       
    93   | stg _ = eof_err"string";
       
    94 
       
    95 
       
    96 fun txt ((Lex.Txt b,n) :: toks) = (b, toks)
       
    97   | txt ((t,n) :: toks) = line_err ("ML text", string_of_token t, n)
       
    98   | txt _ = eof_err "ML text";
       
    99 
       
   100 
       
   101 fun ( ph >> f) toks = let val (x, toks2) = ph toks in (f x, toks2) end;
       
   102 
       
   103 fun (ph1 || ph2) toks = ph1 toks handle SynError _ => ph2 toks;
       
   104 
       
   105 
       
   106 fun (ph1 -- ph2) toks =
       
   107     let val (x, toks2) = ph1 toks
       
   108         val (y, toks3) = ph2 toks2
       
   109     in ((x,y), toks3) end;
       
   110 
       
   111 fun (a $$-- ph)  =  $$a -- ph >> #2;
       
   112 
       
   113 fun (ph --$$ a)  =  ph -- $$a >> #1;
       
   114 
       
   115 fun !! ph toks = ph toks handle SynError s => synerr s;
       
   116 
       
   117 fun empty toks = ([], toks);
       
   118 
       
   119 fun repeat ph toks  = (   ph -- repeat ph >> (op::)
       
   120                        || empty ) toks; 
       
   121 
       
   122 fun repeat1 ph  =  ph -- repeat ph >> (op::);
       
   123 
       
   124 fun list_of1 ph =  ph -- repeat("," $$-- !! ph) >> (op::);
       
   125 fun list_of ph  =  list_of1 ph || empty;
       
   126 
       
   127 fun reader ph a = 
       
   128        ( case ph (Lex.scan a) of 
       
   129                 (x,  []) => x
       
   130               | (_,(_, n)::_) => error
       
   131                 ("Syntax error on line " ^ (string_of_int n) ^ ": " ^
       
   132                  "Extra characters in phrase")
       
   133        );
       
   134 end;