src/Pure/General/scan.ML
changeset 6116 8ba2f25610f7
child 6118 caa439435666
equal deleted inserted replaced
6115:c70bce7deb0f 6116:8ba2f25610f7
       
     1 (*  Title:	Pure/Syntax/scan.ML
       
     2     ID:		$Id$
       
     3     Author:	Markus Wenzel and Tobias Nipkow, TU Muenchen
       
     4 
       
     5 Generic scanners (for potentially infinite input).
       
     6 *)
       
     7 
       
     8 infix 5 -- :-- |-- --| ^^;
       
     9 infix 3 >>;
       
    10 infix 0 ||;
       
    11 
       
    12 signature BASIC_SCAN =
       
    13 sig
       
    14   val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
       
    15   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
       
    16   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
       
    17   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
       
    18   val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
       
    19   val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
       
    20   val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
       
    21   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
       
    22   val $$ : ''a -> ''a list -> ''a * ''a list
       
    23 end;
       
    24 
       
    25 signature SCAN =
       
    26 sig
       
    27   include BASIC_SCAN
       
    28   val fail: 'a -> 'b
       
    29   val fail_with: ('a -> string) -> 'a -> 'b
       
    30   val succeed: 'a -> 'b -> 'a * 'b
       
    31   val one: ('a -> bool) -> 'a list -> 'a * 'a list
       
    32   val any: ('a -> bool) -> 'a list -> 'a list * 'a list
       
    33   val any1: ('a -> bool) -> 'a list -> 'a list * 'a list
       
    34   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
       
    35   val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
       
    36   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
       
    37   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
       
    38   val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
       
    39   val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
       
    40   val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
       
    41   val first: ('a -> 'b) list -> 'a -> 'b
       
    42   val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
       
    43   val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
       
    44   val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
       
    45   val try: ('a -> 'b) -> 'a -> 'b
       
    46   val force: ('a -> 'b) -> 'a -> 'b
       
    47   val prompt: string -> ('a -> 'b) -> 'a -> 'b
       
    48   val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
       
    49     -> 'b * 'a list -> 'c * ('d * 'a list)
       
    50   val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
       
    51   val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
       
    52   val catch: ('a -> 'b) -> 'a -> 'b
       
    53   val error: ('a -> 'b) -> 'a -> 'b
       
    54   val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
       
    55     'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
       
    56     ('d * 'b list -> 'f * ('d * 'b list)) option -> 'd * 'a -> 'e list * ('d * 'c)
       
    57   val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
       
    58     'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
       
    59     ('b list -> 'e * 'b list) option -> 'a -> 'd list * 'c
       
    60   val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
       
    61   val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
       
    62   type lexicon
       
    63   val dest_lexicon: lexicon -> string list list
       
    64   val make_lexicon: string list list -> lexicon
       
    65   val empty_lexicon: lexicon
       
    66   val extend_lexicon: lexicon -> string list list -> lexicon
       
    67   val merge_lexicons: lexicon -> lexicon -> lexicon
       
    68   val literal: lexicon -> string list -> string list * string list
       
    69 end;
       
    70 
       
    71 structure Scan: SCAN =
       
    72 struct
       
    73 
       
    74 
       
    75 (** scanners **)
       
    76 
       
    77 exception MORE of string option;	(*need more input (prompt)*)
       
    78 exception FAIL of string option;	(*try alternatives (reason of failure)*)
       
    79 exception ABORT of string;		(*dead end*)
       
    80 
       
    81 
       
    82 (* scanner combinators *)
       
    83 
       
    84 fun (scan >> f) xs = apfst f (scan xs);
       
    85 
       
    86 fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
       
    87 
       
    88 (*dependent pairing*)
       
    89 fun (scan1 :-- scan2) xs =
       
    90   let
       
    91     val (x, ys) = scan1 xs;
       
    92     val (y, zs) = scan2 x ys;
       
    93   in ((x, y), zs) end;
       
    94 
       
    95 fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
       
    96 fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
       
    97 fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
       
    98 fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
       
    99 
       
   100 
       
   101 (* generic scanners *)
       
   102 
       
   103 fun fail _ = raise FAIL None;
       
   104 fun fail_with msg_of xs = raise FAIL (Some (msg_of xs));
       
   105 fun succeed y xs = (y, xs);
       
   106 
       
   107 fun one _ [] = raise MORE None
       
   108   | one pred (x :: xs) =
       
   109       if pred x then (x, xs) else raise FAIL None;
       
   110 
       
   111 fun $$ _ [] = raise MORE None
       
   112   | $$ a (x :: xs) =
       
   113       if a = x then (x, xs) else raise FAIL None;
       
   114 
       
   115 fun any _ [] = raise MORE None
       
   116   | any pred (lst as x :: xs) =
       
   117       if pred x then apfst (cons x) (any pred xs)
       
   118       else ([], lst);
       
   119 
       
   120 fun any1 pred = one pred -- any pred >> op ::;
       
   121 
       
   122 fun optional scan def = scan || succeed def;
       
   123 fun option scan = optional (scan >> Some) None;
       
   124 
       
   125 fun repeat scan xs = (scan -- repeat scan >> op :: || succeed []) xs;
       
   126 fun repeat1 scan = scan -- repeat scan >> op ::;
       
   127 
       
   128 fun max leq scan1 scan2 xs =
       
   129   (case (option scan1 xs, option scan2 xs) of
       
   130     ((None, _), (None, _)) => raise FAIL None		(*looses FAIL msg!*)
       
   131   | ((Some tok1, xs'), (None, _)) => (tok1, xs')
       
   132   | ((None, _), (Some tok2, xs')) => (tok2, xs')
       
   133   | ((Some tok1, xs1'), (Some tok2, xs2')) =>
       
   134       if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
       
   135 
       
   136 fun ahead scan xs = (fst (scan xs), xs);
       
   137 
       
   138 fun unless test scan =
       
   139   ahead (option test) :-- (fn None => scan | _ => fail) >> #2;
       
   140 
       
   141 fun first [] = fail
       
   142   | first (scan :: scans) = scan || first scans;
       
   143 
       
   144 
       
   145 (* state based scanners *)
       
   146 
       
   147 fun depend scan (st, xs) =
       
   148   let val ((st', y), xs') = scan st xs
       
   149   in (y, (st', xs')) end;
       
   150 
       
   151 fun lift scan (st, xs) =
       
   152   let val (y, xs') = scan xs
       
   153   in (y, (st, xs')) end;
       
   154 
       
   155 fun pass st scan xs =
       
   156   let val (y, (_, xs')) = scan (st, xs)
       
   157   in (y, xs') end;
       
   158 
       
   159 
       
   160 (* exception handling *)
       
   161 
       
   162 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
       
   163 fun try scan xs = scan xs handle MORE _ => raise FAIL None | ABORT _ => raise FAIL None;
       
   164 fun force scan xs = scan xs handle MORE _ => raise FAIL None;
       
   165 fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str);
       
   166 fun catch scan xs = scan xs handle ABORT msg => raise FAIL (Some msg);
       
   167 fun error scan xs = scan xs handle ABORT msg => Library.error msg;
       
   168 
       
   169 
       
   170 (* finite scans *)
       
   171 
       
   172 fun finite' (stopper, is_stopper) scan (state, input) =
       
   173   let
       
   174     fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
       
   175 
       
   176     fun stop [] = lost ()
       
   177       | stop lst =
       
   178           let val (xs, x) = split_last lst
       
   179           in if is_stopper x then ((), xs) else lost () end;
       
   180   in
       
   181     if exists is_stopper input then
       
   182       raise ABORT "Stopper may not occur in input of finite scan!"
       
   183     else (force scan --| lift stop) (state, input @ [stopper])
       
   184   end;
       
   185 
       
   186 fun finite stopper scan xs =
       
   187   let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs)
       
   188   in (y, xs') end;
       
   189 
       
   190 fun read stopper scan xs =
       
   191   (case error (finite stopper (option scan)) xs of
       
   192     (y as Some _, []) => y
       
   193   | _ => None);
       
   194 
       
   195 
       
   196 
       
   197 (* infinite scans -- draining state-based source *)
       
   198 
       
   199 fun drain def_prmpt get stopper scan ((state, xs), src) =
       
   200   (scan (state, xs), src) handle MORE prmpt =>
       
   201     (case get (if_none prmpt def_prmpt) src of
       
   202       ([], _) => (finite' stopper scan (state, xs), src)
       
   203     | (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src'));
       
   204 
       
   205 fun source' def_prmpt get unget stopper scanner opt_recover (state, src) =
       
   206   let
       
   207     fun drain_with scan = drain def_prmpt get stopper scan;
       
   208 
       
   209     fun drain_loop recover inp =
       
   210       drain_with (catch scanner) inp handle FAIL msg =>
       
   211         (error_msg (if_none msg "Syntax error.");
       
   212           drain_loop recover (apfst snd (drain_with recover inp)));
       
   213 
       
   214     val ((ys, (state', xs')), src') =
       
   215       (case (get def_prmpt src, opt_recover) of
       
   216         (([], s), _) => (([], (state, [])), s)
       
   217       | ((xs, s), None) => drain_with (error scanner) ((state, xs), s)
       
   218       | ((xs, s), Some scan) => drain_loop scan ((state, xs), s));
       
   219   in
       
   220     (ys, (state', unget (xs', src')))
       
   221   end;
       
   222 
       
   223 fun source def_prmpt get unget stopper scan opt_recover src =
       
   224   let val (ys, ((), src')) =
       
   225     source' def_prmpt get unget stopper (lift scan) (apsome lift opt_recover) ((), src)
       
   226   in (ys, src') end;
       
   227 
       
   228 fun single scan = scan >> (fn x => [x]);
       
   229 fun bulk scan = scan -- repeat (try scan) >> (op ::);
       
   230 
       
   231 
       
   232 
       
   233 (** datatype lexicon **)
       
   234 
       
   235 datatype lexicon =
       
   236   Empty |
       
   237   Branch of string * string list * lexicon * lexicon * lexicon;
       
   238 
       
   239 val no_literal = [];
       
   240 
       
   241 
       
   242 (* dest_lexicon *)
       
   243 
       
   244 fun dest_lexicon Empty = []
       
   245   | dest_lexicon (Branch (_, [], lt, eq, gt)) =
       
   246       dest_lexicon lt @ dest_lexicon eq @ dest_lexicon gt
       
   247   | dest_lexicon (Branch (_, cs, lt, eq, gt)) =
       
   248       dest_lexicon lt @ [cs] @ dest_lexicon eq @ dest_lexicon gt;
       
   249 
       
   250 
       
   251 (* empty, extend, make, merge lexicons *)
       
   252 
       
   253 val empty_lexicon = Empty;
       
   254 
       
   255 fun extend_lexicon lexicon chrss =
       
   256   let
       
   257     fun ext (lex, chrs) =
       
   258       let
       
   259 	fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
       
   260 	      if c < d then Branch (d, a, add lt chs, eq, gt)
       
   261 	      else if c > d then Branch (d, a, lt, eq, add gt chs)
       
   262 	      else Branch (d, if null cs then chrs else a, lt, add eq cs, gt)
       
   263 	  | add Empty [c] =
       
   264 	      Branch (c, chrs, Empty, Empty, Empty)
       
   265 	  | add Empty (c :: cs) =
       
   266 	      Branch (c, no_literal, Empty, add Empty cs, Empty)
       
   267 	  | add lex [] = lex;
       
   268       in add lex chrs end;
       
   269   in foldl ext (lexicon, chrss \\ dest_lexicon lexicon) end;
       
   270 
       
   271 val make_lexicon = extend_lexicon empty_lexicon;
       
   272 
       
   273 fun merge_lexicons lex1 lex2 =
       
   274   let
       
   275     val chss1 = dest_lexicon lex1;
       
   276     val chss2 = dest_lexicon lex2;
       
   277   in
       
   278     if chss2 subset chss1 then lex1
       
   279     else if chss1 subset chss2 then lex2
       
   280     else extend_lexicon lex1 chss2
       
   281   end;
       
   282 
       
   283 
       
   284 (* scan literal *)
       
   285 
       
   286 fun literal lex chrs =
       
   287   let
       
   288     fun lit Empty res _ = res
       
   289       | lit (Branch _) _ [] = raise MORE None
       
   290       | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
       
   291 	  if c < d then lit lt res chs
       
   292 	  else if c > d then lit gt res chs
       
   293 	  else lit eq (if a = no_literal then res else Some (a, cs)) cs;
       
   294   in
       
   295     (case lit lex None chrs of
       
   296       None => raise FAIL None
       
   297     | Some res => res)
       
   298   end;
       
   299 
       
   300 
       
   301 end;
       
   302 
       
   303 
       
   304 structure BasicScan: BASIC_SCAN = Scan;
       
   305 open BasicScan;