src/Pure/General/scan.ML
changeset 6116 8ba2f25610f7
child 6118 caa439435666
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/Pure/General/scan.ML	Wed Jan 13 12:44:33 1999 +0100
     1.3 @@ -0,0 +1,305 @@
     1.4 +(*  Title:	Pure/Syntax/scan.ML
     1.5 +    ID:		$Id$
     1.6 +    Author:	Markus Wenzel and Tobias Nipkow, TU Muenchen
     1.7 +
     1.8 +Generic scanners (for potentially infinite input).
     1.9 +*)
    1.10 +
    1.11 +infix 5 -- :-- |-- --| ^^;
    1.12 +infix 3 >>;
    1.13 +infix 0 ||;
    1.14 +
    1.15 +signature BASIC_SCAN =
    1.16 +sig
    1.17 +  val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
    1.18 +  val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    1.19 +  val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    1.20 +  val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    1.21 +  val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    1.22 +  val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
    1.23 +  val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
    1.24 +  val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    1.25 +  val $$ : ''a -> ''a list -> ''a * ''a list
    1.26 +end;
    1.27 +
    1.28 +signature SCAN =
    1.29 +sig
    1.30 +  include BASIC_SCAN
    1.31 +  val fail: 'a -> 'b
    1.32 +  val fail_with: ('a -> string) -> 'a -> 'b
    1.33 +  val succeed: 'a -> 'b -> 'a * 'b
    1.34 +  val one: ('a -> bool) -> 'a list -> 'a * 'a list
    1.35 +  val any: ('a -> bool) -> 'a list -> 'a list * 'a list
    1.36 +  val any1: ('a -> bool) -> 'a list -> 'a list * 'a list
    1.37 +  val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    1.38 +  val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
    1.39 +  val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    1.40 +  val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    1.41 +  val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
    1.42 +  val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
    1.43 +  val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
    1.44 +  val first: ('a -> 'b) list -> 'a -> 'b
    1.45 +  val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
    1.46 +  val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
    1.47 +  val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
    1.48 +  val try: ('a -> 'b) -> 'a -> 'b
    1.49 +  val force: ('a -> 'b) -> 'a -> 'b
    1.50 +  val prompt: string -> ('a -> 'b) -> 'a -> 'b
    1.51 +  val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
    1.52 +    -> 'b * 'a list -> 'c * ('d * 'a list)
    1.53 +  val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
    1.54 +  val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
    1.55 +  val catch: ('a -> 'b) -> 'a -> 'b
    1.56 +  val error: ('a -> 'b) -> 'a -> 'b
    1.57 +  val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
    1.58 +    'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
    1.59 +    ('d * 'b list -> 'f * ('d * 'b list)) option -> 'd * 'a -> 'e list * ('d * 'c)
    1.60 +  val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
    1.61 +    'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
    1.62 +    ('b list -> 'e * 'b list) option -> 'a -> 'd list * 'c
    1.63 +  val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    1.64 +  val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    1.65 +  type lexicon
    1.66 +  val dest_lexicon: lexicon -> string list list
    1.67 +  val make_lexicon: string list list -> lexicon
    1.68 +  val empty_lexicon: lexicon
    1.69 +  val extend_lexicon: lexicon -> string list list -> lexicon
    1.70 +  val merge_lexicons: lexicon -> lexicon -> lexicon
    1.71 +  val literal: lexicon -> string list -> string list * string list
    1.72 +end;
    1.73 +
    1.74 +structure Scan: SCAN =
    1.75 +struct
    1.76 +
    1.77 +
    1.78 +(** scanners **)
    1.79 +
    1.80 +exception MORE of string option;	(*need more input (prompt)*)
    1.81 +exception FAIL of string option;	(*try alternatives (reason of failure)*)
    1.82 +exception ABORT of string;		(*dead end*)
    1.83 +
    1.84 +
    1.85 +(* scanner combinators *)
    1.86 +
    1.87 +fun (scan >> f) xs = apfst f (scan xs);
    1.88 +
    1.89 +fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
    1.90 +
    1.91 +(*dependent pairing*)
    1.92 +fun (scan1 :-- scan2) xs =
    1.93 +  let
    1.94 +    val (x, ys) = scan1 xs;
    1.95 +    val (y, zs) = scan2 x ys;
    1.96 +  in ((x, y), zs) end;
    1.97 +
    1.98 +fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
    1.99 +fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
   1.100 +fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
   1.101 +fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
   1.102 +
   1.103 +
   1.104 +(* generic scanners *)
   1.105 +
   1.106 +fun fail _ = raise FAIL None;
   1.107 +fun fail_with msg_of xs = raise FAIL (Some (msg_of xs));
   1.108 +fun succeed y xs = (y, xs);
   1.109 +
   1.110 +fun one _ [] = raise MORE None
   1.111 +  | one pred (x :: xs) =
   1.112 +      if pred x then (x, xs) else raise FAIL None;
   1.113 +
   1.114 +fun $$ _ [] = raise MORE None
   1.115 +  | $$ a (x :: xs) =
   1.116 +      if a = x then (x, xs) else raise FAIL None;
   1.117 +
   1.118 +fun any _ [] = raise MORE None
   1.119 +  | any pred (lst as x :: xs) =
   1.120 +      if pred x then apfst (cons x) (any pred xs)
   1.121 +      else ([], lst);
   1.122 +
   1.123 +fun any1 pred = one pred -- any pred >> op ::;
   1.124 +
   1.125 +fun optional scan def = scan || succeed def;
   1.126 +fun option scan = optional (scan >> Some) None;
   1.127 +
   1.128 +fun repeat scan xs = (scan -- repeat scan >> op :: || succeed []) xs;
   1.129 +fun repeat1 scan = scan -- repeat scan >> op ::;
   1.130 +
   1.131 +fun max leq scan1 scan2 xs =
   1.132 +  (case (option scan1 xs, option scan2 xs) of
   1.133 +    ((None, _), (None, _)) => raise FAIL None		(*looses FAIL msg!*)
   1.134 +  | ((Some tok1, xs'), (None, _)) => (tok1, xs')
   1.135 +  | ((None, _), (Some tok2, xs')) => (tok2, xs')
   1.136 +  | ((Some tok1, xs1'), (Some tok2, xs2')) =>
   1.137 +      if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
   1.138 +
   1.139 +fun ahead scan xs = (fst (scan xs), xs);
   1.140 +
   1.141 +fun unless test scan =
   1.142 +  ahead (option test) :-- (fn None => scan | _ => fail) >> #2;
   1.143 +
   1.144 +fun first [] = fail
   1.145 +  | first (scan :: scans) = scan || first scans;
   1.146 +
   1.147 +
   1.148 +(* state based scanners *)
   1.149 +
   1.150 +fun depend scan (st, xs) =
   1.151 +  let val ((st', y), xs') = scan st xs
   1.152 +  in (y, (st', xs')) end;
   1.153 +
   1.154 +fun lift scan (st, xs) =
   1.155 +  let val (y, xs') = scan xs
   1.156 +  in (y, (st, xs')) end;
   1.157 +
   1.158 +fun pass st scan xs =
   1.159 +  let val (y, (_, xs')) = scan (st, xs)
   1.160 +  in (y, xs') end;
   1.161 +
   1.162 +
   1.163 +(* exception handling *)
   1.164 +
   1.165 +fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
   1.166 +fun try scan xs = scan xs handle MORE _ => raise FAIL None | ABORT _ => raise FAIL None;
   1.167 +fun force scan xs = scan xs handle MORE _ => raise FAIL None;
   1.168 +fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str);
   1.169 +fun catch scan xs = scan xs handle ABORT msg => raise FAIL (Some msg);
   1.170 +fun error scan xs = scan xs handle ABORT msg => Library.error msg;
   1.171 +
   1.172 +
   1.173 +(* finite scans *)
   1.174 +
   1.175 +fun finite' (stopper, is_stopper) scan (state, input) =
   1.176 +  let
   1.177 +    fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
   1.178 +
   1.179 +    fun stop [] = lost ()
   1.180 +      | stop lst =
   1.181 +          let val (xs, x) = split_last lst
   1.182 +          in if is_stopper x then ((), xs) else lost () end;
   1.183 +  in
   1.184 +    if exists is_stopper input then
   1.185 +      raise ABORT "Stopper may not occur in input of finite scan!"
   1.186 +    else (force scan --| lift stop) (state, input @ [stopper])
   1.187 +  end;
   1.188 +
   1.189 +fun finite stopper scan xs =
   1.190 +  let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs)
   1.191 +  in (y, xs') end;
   1.192 +
   1.193 +fun read stopper scan xs =
   1.194 +  (case error (finite stopper (option scan)) xs of
   1.195 +    (y as Some _, []) => y
   1.196 +  | _ => None);
   1.197 +
   1.198 +
   1.199 +
   1.200 +(* infinite scans -- draining state-based source *)
   1.201 +
   1.202 +fun drain def_prmpt get stopper scan ((state, xs), src) =
   1.203 +  (scan (state, xs), src) handle MORE prmpt =>
   1.204 +    (case get (if_none prmpt def_prmpt) src of
   1.205 +      ([], _) => (finite' stopper scan (state, xs), src)
   1.206 +    | (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src'));
   1.207 +
   1.208 +fun source' def_prmpt get unget stopper scanner opt_recover (state, src) =
   1.209 +  let
   1.210 +    fun drain_with scan = drain def_prmpt get stopper scan;
   1.211 +
   1.212 +    fun drain_loop recover inp =
   1.213 +      drain_with (catch scanner) inp handle FAIL msg =>
   1.214 +        (error_msg (if_none msg "Syntax error.");
   1.215 +          drain_loop recover (apfst snd (drain_with recover inp)));
   1.216 +
   1.217 +    val ((ys, (state', xs')), src') =
   1.218 +      (case (get def_prmpt src, opt_recover) of
   1.219 +        (([], s), _) => (([], (state, [])), s)
   1.220 +      | ((xs, s), None) => drain_with (error scanner) ((state, xs), s)
   1.221 +      | ((xs, s), Some scan) => drain_loop scan ((state, xs), s));
   1.222 +  in
   1.223 +    (ys, (state', unget (xs', src')))
   1.224 +  end;
   1.225 +
   1.226 +fun source def_prmpt get unget stopper scan opt_recover src =
   1.227 +  let val (ys, ((), src')) =
   1.228 +    source' def_prmpt get unget stopper (lift scan) (apsome lift opt_recover) ((), src)
   1.229 +  in (ys, src') end;
   1.230 +
   1.231 +fun single scan = scan >> (fn x => [x]);
   1.232 +fun bulk scan = scan -- repeat (try scan) >> (op ::);
   1.233 +
   1.234 +
   1.235 +
   1.236 +(** datatype lexicon **)
   1.237 +
   1.238 +datatype lexicon =
   1.239 +  Empty |
   1.240 +  Branch of string * string list * lexicon * lexicon * lexicon;
   1.241 +
   1.242 +val no_literal = [];
   1.243 +
   1.244 +
   1.245 +(* dest_lexicon *)
   1.246 +
   1.247 +fun dest_lexicon Empty = []
   1.248 +  | dest_lexicon (Branch (_, [], lt, eq, gt)) =
   1.249 +      dest_lexicon lt @ dest_lexicon eq @ dest_lexicon gt
   1.250 +  | dest_lexicon (Branch (_, cs, lt, eq, gt)) =
   1.251 +      dest_lexicon lt @ [cs] @ dest_lexicon eq @ dest_lexicon gt;
   1.252 +
   1.253 +
   1.254 +(* empty, extend, make, merge lexicons *)
   1.255 +
   1.256 +val empty_lexicon = Empty;
   1.257 +
   1.258 +fun extend_lexicon lexicon chrss =
   1.259 +  let
   1.260 +    fun ext (lex, chrs) =
   1.261 +      let
   1.262 +	fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
   1.263 +	      if c < d then Branch (d, a, add lt chs, eq, gt)
   1.264 +	      else if c > d then Branch (d, a, lt, eq, add gt chs)
   1.265 +	      else Branch (d, if null cs then chrs else a, lt, add eq cs, gt)
   1.266 +	  | add Empty [c] =
   1.267 +	      Branch (c, chrs, Empty, Empty, Empty)
   1.268 +	  | add Empty (c :: cs) =
   1.269 +	      Branch (c, no_literal, Empty, add Empty cs, Empty)
   1.270 +	  | add lex [] = lex;
   1.271 +      in add lex chrs end;
   1.272 +  in foldl ext (lexicon, chrss \\ dest_lexicon lexicon) end;
   1.273 +
   1.274 +val make_lexicon = extend_lexicon empty_lexicon;
   1.275 +
   1.276 +fun merge_lexicons lex1 lex2 =
   1.277 +  let
   1.278 +    val chss1 = dest_lexicon lex1;
   1.279 +    val chss2 = dest_lexicon lex2;
   1.280 +  in
   1.281 +    if chss2 subset chss1 then lex1
   1.282 +    else if chss1 subset chss2 then lex2
   1.283 +    else extend_lexicon lex1 chss2
   1.284 +  end;
   1.285 +
   1.286 +
   1.287 +(* scan literal *)
   1.288 +
   1.289 +fun literal lex chrs =
   1.290 +  let
   1.291 +    fun lit Empty res _ = res
   1.292 +      | lit (Branch _) _ [] = raise MORE None
   1.293 +      | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
   1.294 +	  if c < d then lit lt res chs
   1.295 +	  else if c > d then lit gt res chs
   1.296 +	  else lit eq (if a = no_literal then res else Some (a, cs)) cs;
   1.297 +  in
   1.298 +    (case lit lex None chrs of
   1.299 +      None => raise FAIL None
   1.300 +    | Some res => res)
   1.301 +  end;
   1.302 +
   1.303 +
   1.304 +end;
   1.305 +
   1.306 +
   1.307 +structure BasicScan: BASIC_SCAN = Scan;
   1.308 +open BasicScan;