src/Pure/Thy/parse.ML
author wenzelm
Wed Jan 29 15:45:40 1997 +0100 (1997-01-29)
changeset 2564 9d66b758bce5
parent 213 42f2b8a3581f
permissions -rw-r--r--
removed warning for unprintable chars in strings (functionality will
be put into administrative script);
     1 (*  Title: 	Pure/Thy/parse
     2     ID:         $Id$
     3     Author: 	Sonia Mahjoub / Tobias Nipkow
     4     Copyright   1992  TU Muenchen
     5 
     6     modified    Dezember 1993 by Max Breitling (type-variables added)
     7 
     8 The parser combinators.
     9 Adapted from Larry Paulson's ML for the Working Programmer.
    10 *)
    11 
    12 (*Global infix declarations for the parsing primitives*)
    13 infix 5 -- --$$ $$--;
    14 infix 3 >>;
    15 infix 0 ||;
    16 
    17 
    18 signature PARSE = 
    19 sig
    20 
    21 type token
    22 val $$     : string -> 
    23              (token * int) list -> string * (token * int)list 
    24 val id     : (token * int) list -> string * (token * int)list 
    25 val nat    : (token * int) list -> string * (token * int)list 
    26 val stg    : (token * int) list -> string * (token * int)list 
    27 val txt    : (token * int) list -> string * (token * int)list 
    28 val typevar: (token * int) list -> string * (token * int)list
    29 val >>     : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    30 val ||     : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b 
    31 val --     : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 
    32              'a -> ('b * 'd) * 'e
    33 val $$--   : string * ((token * int)list -> 'b * 'c) -> 
    34              (token * int) list -> 'b * 'c
    35 val --$$   : ( 'a -> 'b * (token * int)list ) * string -> 
    36              'a -> 'b * (token * int)list 
    37 val !!     : ((token * int) list -> 'a * (token * int) list ) 
    38              -> (token * int) list -> 'a * (token * int) list 
    39 val empty  : 'a -> 'b list * 'a
    40 val repeat : ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    41 val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    42 val list_of: ((token * int)list -> 'b * (token * int)list ) -> 
    43              (token * int)list  -> 'b list * (token * int)list 
    44 val list_of1: ((token * int)list -> 'b * (token * int)list ) -> 
    45              (token * int)list  -> 'b list * (token * int)list 
    46 val reader : ((token * int) list -> 'a * ('b * int) list )
    47               -> string list -> 'a
    48 
    49 end;
    50 
    51 
    52 
    53 functor ParseFUN (Lex: LEXICAL): PARSE =
    54 struct
    55 
    56 type token = Lex.token;
    57 
    58 datatype synerr = Line of string * string * int | EOF of string;
    59 
    60 exception SynError of synerr;
    61 
    62 fun synerr(Line(s1, s2, n)) =
    63       error("Syntax error on line " ^ (string_of_int n) ^ ": " ^ s1 ^
    64             " expected and " ^ s2 ^ " was found")
    65   | synerr(EOF(s)) = error("Syntax error on last line: " ^ s ^
    66                            " expected and end-of-file was found"); 
    67 
    68 fun string_of_token (Lex.Key b) = b
    69   | string_of_token (Lex.Id b)  = b
    70   | string_of_token (Lex.Nat b) = b
    71   | string_of_token (Lex.Txt b) = b
    72   | string_of_token (Lex.Stg b) = b
    73   | string_of_token (Lex.TypVar b) = b;
    74 
    75 fun line_err x = raise SynError(Line x);
    76 fun eof_err s =  raise SynError(EOF s);
    77 
    78 fun $$a ((Lex.Key b,n) :: toks) =
    79            if a = b then (a,toks) else line_err(a,b,n) 
    80   | $$a ((t,n) :: toks) = line_err (a,string_of_token t, n)
    81   | $$a _ = eof_err a;
    82 
    83 
    84 fun id ((Lex.Id b,n) :: toks) = (b, toks)
    85   | id ((t,n) :: toks) = line_err ("identifier", string_of_token t, n)
    86   | id _ = eof_err "identifier";
    87 
    88 
    89 fun nat ((Lex.Nat b,n) :: toks) = (b, toks)
    90   | nat ((t,n) :: toks) = 
    91 	line_err ("natural number", string_of_token t, n)
    92   | nat _ = eof_err "natural number";
    93 
    94 
    95 fun stg ((Lex.Stg b,n) :: toks) = (b, toks)
    96   | stg ((t,n) :: toks) = line_err("string", string_of_token t, n)
    97   | stg _ = eof_err"string";
    98 
    99 
   100 fun txt ((Lex.Txt b,n) :: toks) = (b, toks)
   101   | txt ((t,n) :: toks) = line_err ("ML text", string_of_token t, n)
   102   | txt _ = eof_err "ML text";
   103 
   104 fun typevar ((Lex.TypVar b,n) :: toks) = (b, toks)
   105   | typevar ((t,n)::toks) = line_err("type variable",string_of_token t,n)
   106   | typevar _ = eof_err "type variable";
   107 
   108 
   109 fun ( ph >> f) toks = let val (x, toks2) = ph toks in (f x, toks2) end;
   110 
   111 fun (ph1 || ph2) toks = ph1 toks handle SynError _ => ph2 toks;
   112 
   113 
   114 fun (ph1 -- ph2) toks =
   115     let val (x, toks2) = ph1 toks
   116         val (y, toks3) = ph2 toks2
   117     in ((x,y), toks3) end;
   118 
   119 fun (a $$-- ph)  =  $$a -- ph >> #2;
   120 
   121 fun (ph --$$ a)  =  ph -- $$a >> #1;
   122 
   123 fun !! ph toks = ph toks handle SynError s => synerr s;
   124 
   125 fun empty toks = ([], toks);
   126 
   127 fun repeat ph toks  = (   ph -- repeat ph >> (op::)
   128                        || empty ) toks; 
   129 
   130 fun repeat1 ph  =  ph -- repeat ph >> (op::);
   131 
   132 fun list_of1 ph =  ph -- repeat("," $$-- !! ph) >> (op::);
   133 fun list_of ph  =  list_of1 ph || empty;
   134 
   135 fun reader ph a = 
   136        ( case ph (Lex.scan a) of 
   137                 (x,  []) => x
   138               | (_,(_, n)::_) => error
   139                 ("Syntax error on line " ^ (string_of_int n) ^ ": " ^
   140                  "Extra characters in phrase")
   141        );
   142 end;