src/Pure/General/scan.ML
author wenzelm
Tue Jul 10 00:43:51 2007 +0200 (2007-07-10)
changeset 23683 1fcfb8682209
parent 23682 cf4773532006
child 23699 5a4527f3ac79
permissions -rw-r--r--
tuned;
     1 (*  Title:      Pure/General/scan.ML
     2     ID:         $Id$
     3     Author:     Markus Wenzel and Tobias Nipkow, TU Muenchen
     4 
     5 Generic scanners (for potentially infinite input).
     6 *)
     7 
     8 infix 5 -- :-- |-- --| ^^;
     9 infix 3 >>;
    10 infix 0 ||;
    11 
    12 signature BASIC_SCAN =
    13 sig
    14   (*error msg handler*)
    15   val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
    16   (*apply function*)
    17   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    18   (*alternative*)
    19   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    20   (*sequential pairing*)
    21   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    22   (*dependent pairing*)
    23   val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    24   (*forget fst*)
    25   val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
    26   (*forget snd*)
    27   val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
    28   (*concatenation*)
    29   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    30   (*one element literal*)
    31   val $$ : string -> string list -> string * string list
    32   val ~$$ : string -> string list -> string * string list
    33 end;
    34 
    35 signature SCAN =
    36 sig
    37   include BASIC_SCAN
    38   val fail: 'a -> 'b
    39   val fail_with: ('a -> string) -> 'a -> 'b
    40   val succeed: 'a -> 'b -> 'a * 'b
    41   val some: ('a -> 'b option) -> 'a list -> 'b * 'a list
    42   val one: ('a -> bool) -> 'a list -> 'a * 'a list
    43   val this: string list -> string list -> string list * string list
    44   val this_string: string -> string list -> string * string list
    45   val many: ('a -> bool) -> 'a list -> 'a list * 'a list
    46   val many1: ('a -> bool) -> 'a list -> 'a list * 'a list
    47   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    48   val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
    49   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    50   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    51   val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
    52   val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
    53   val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
    54   val first: ('a -> 'b) list -> 'a -> 'b
    55   val state: 'a * 'b -> 'a * ('a * 'b)
    56   val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
    57   val peek: ('a -> 'b -> 'c * 'd) -> 'a * 'b -> 'c * ('a * 'd)
    58   val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
    59   val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
    60   val trace': ('a * 'b list -> 'c * ('d * 'e list)) -> 'a * 'b list ->
    61     ('c * 'b list) * ('d * 'e list)
    62   val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
    63   val prompt: string -> ('a -> 'b) -> 'a -> 'b
    64   val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
    65     -> 'b * 'a list -> 'c * ('d * 'a list)
    66   val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
    67   val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
    68   val error: ('a -> 'b) -> 'a -> 'b
    69   val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
    70     'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
    71     (bool * (string -> 'd * 'b list -> 'e list * ('d * 'b list))) option ->
    72     'd * 'a -> 'e list * ('d * 'c)
    73   val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
    74     'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
    75     (bool * (string -> 'b list -> 'd list * 'b list)) option -> 'a -> 'd list * 'c
    76   val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    77   val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    78   type lexicon
    79   val dest_lexicon: lexicon -> string list
    80   val make_lexicon: string list list -> lexicon
    81   val empty_lexicon: lexicon
    82   val extend_lexicon: string list list -> lexicon -> lexicon
    83   val merge_lexicons: lexicon -> lexicon -> lexicon
    84   val is_literal: lexicon -> string list -> bool
    85   val literal: lexicon -> string list -> string list * string list
    86 end;
    87 
    88 structure Scan: SCAN =
    89 struct
    90 
    91 
    92 (** scanners **)
    93 
    94 exception MORE of string option;        (*need more input (prompt)*)
    95 exception FAIL of string option;        (*try alternatives (reason of failure)*)
    96 exception ABORT of string;              (*dead end*)
    97 
    98 
    99 (* scanner combinators *)
   100 
   101 fun (scan >> f) xs = scan xs |>> f;
   102 
   103 fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
   104 
   105 fun (scan1 :-- scan2) xs =
   106   let
   107     val (x, ys) = scan1 xs;
   108     val (y, zs) = scan2 x ys;
   109   in ((x, y), zs) end;
   110 
   111 fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
   112 fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
   113 fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
   114 fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
   115 
   116 
   117 (* generic scanners *)
   118 
   119 fun fail _ = raise FAIL NONE;
   120 fun fail_with msg_of xs = raise FAIL (SOME (msg_of xs));
   121 fun succeed y xs = (y, xs);
   122 
   123 fun some _ [] = raise MORE NONE
   124   | some f (x :: xs) =
   125       (case f x of SOME y => (y, xs) | _ => raise FAIL NONE);
   126 
   127 fun one _ [] = raise MORE NONE
   128   | one pred (x :: xs) =
   129       if pred x then (x, xs) else raise FAIL NONE;
   130 
   131 fun $$ a = one (fn s: string => s = a);
   132 fun ~$$ a = one (fn s: string => s <> a);
   133 
   134 fun this ys xs =
   135   let
   136     fun drop_prefix [] xs = xs
   137       | drop_prefix (_ :: _) [] = raise MORE NONE
   138       | drop_prefix (y :: ys) (x :: xs) =
   139           if (y: string) = x then drop_prefix ys xs else raise FAIL NONE;
   140   in (ys, drop_prefix ys xs) end;
   141 
   142 fun this_string s = this (explode s) >> K s;  (*primitive string -- no symbols here!*)
   143 
   144 fun many _ [] = raise MORE NONE
   145   | many pred (lst as x :: xs) =
   146       if pred x then apfst (cons x) (many pred xs)
   147       else ([], lst);
   148 
   149 fun many1 pred = one pred -- many pred >> op ::;
   150 
   151 fun optional scan def = scan || succeed def;
   152 fun option scan = (scan >> SOME) || succeed NONE;
   153 
   154 fun repeat scan =
   155   let
   156     fun rep ys xs =
   157       (case (SOME (scan xs) handle FAIL _ => NONE) of
   158         NONE => (rev ys, xs)
   159       | SOME (y, xs') => rep (y :: ys) xs');
   160   in rep [] end;
   161 
   162 fun repeat1 scan = scan -- repeat scan >> op ::;
   163 
   164 fun max leq scan1 scan2 xs =
   165   (case (option scan1 xs, option scan2 xs) of
   166     ((NONE, _), (NONE, _)) => raise FAIL NONE           (*looses FAIL msg!*)
   167   | ((SOME tok1, xs'), (NONE, _)) => (tok1, xs')
   168   | ((NONE, _), (SOME tok2, xs')) => (tok2, xs')
   169   | ((SOME tok1, xs1'), (SOME tok2, xs2')) =>
   170       if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
   171 
   172 fun ahead scan xs = (fst (scan xs), xs);
   173 
   174 fun unless test scan =
   175   ahead (option test) :-- (fn NONE => scan | _ => fail) >> #2;
   176 
   177 fun first [] = fail
   178   | first (scan :: scans) = scan || first scans;
   179 
   180 
   181 (* state based scanners *)
   182 
   183 fun state (st, xs) = (st, (st, xs));
   184 
   185 fun depend scan (st, xs) =
   186   let val ((st', y), xs') = scan st xs
   187   in (y, (st', xs')) end;
   188 
   189 fun peek scan = depend (fn st => scan st >> pair st);
   190 
   191 fun pass st scan xs =
   192   let val (y, (_, xs')) = scan (st, xs)
   193   in (y, xs') end;
   194 
   195 fun lift scan (st, xs) =
   196   let val (y, xs') = scan xs
   197   in (y, (st, xs')) end;
   198 
   199 fun unlift scan = pass () scan;
   200 
   201 
   202 (* trace input *)
   203 
   204 fun trace' scan (st, xs) =
   205   let val (y, (st', xs')) = scan (st, xs)
   206   in ((y, Library.take (length xs - length xs', xs)), (st', xs')) end;
   207 
   208 fun trace scan = unlift (trace' (lift scan));
   209 
   210 
   211 (* exception handling *)
   212 
   213 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
   214 fun permissive scan xs = scan xs handle MORE _ => raise FAIL NONE | ABORT _ => raise FAIL NONE;
   215 fun strict scan xs = scan xs handle MORE _ => raise FAIL NONE;
   216 fun prompt str scan xs = scan xs handle MORE NONE => raise MORE (SOME str);
   217 fun catch scan xs = scan xs handle ABORT msg => raise FAIL (SOME msg);
   218 fun error scan xs = scan xs handle ABORT msg => Library.error msg;
   219 
   220 
   221 (* finite scans *)
   222 
   223 fun finite' (stopper, is_stopper) scan (state, input) =
   224   let
   225     fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
   226 
   227     fun stop [] = lost ()
   228       | stop lst =
   229           let val (xs, x) = split_last lst
   230           in if is_stopper x then ((), xs) else lost () end;
   231   in
   232     if exists is_stopper input then
   233       raise ABORT "Stopper may not occur in input of finite scan!"
   234     else (strict scan --| lift stop) (state, input @ [stopper])
   235   end;
   236 
   237 fun finite stopper scan = unlift (finite' stopper (lift scan));
   238 
   239 fun read stopper scan xs =
   240   (case error (finite stopper (option scan)) xs of
   241     (y as SOME _, []) => y
   242   | _ => NONE);
   243 
   244 
   245 (* infinite scans -- draining state-based source *)
   246 
   247 fun drain def_prmpt get stopper scan ((state, xs), src) =
   248   (scan (state, xs), src) handle MORE prmpt =>
   249     (case get (the_default def_prmpt prmpt) src of
   250       ([], _) => (finite' stopper scan (state, xs), src)
   251     | (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src'));
   252 
   253 fun source' def_prmpt get unget stopper scanner opt_recover (state, src) =
   254   let
   255     val draining = drain def_prmpt get stopper;
   256     val (xs, s) = get def_prmpt src;
   257     val inp = ((state, xs), s);
   258     val ((ys, (state', xs')), src') =
   259       if null xs then (([], (state, [])), s)
   260       else
   261         (case opt_recover of
   262           NONE => draining (error scanner) inp
   263         | SOME (interactive, recover) =>
   264             (draining (catch scanner) inp handle FAIL msg =>
   265               let val err = the_default "Syntax error." msg in
   266                 if interactive then Output.error_msg err else ();
   267                 draining (unless (lift (one (#2 stopper))) (recover err)) inp
   268               end));
   269   in (ys, (state', unget (xs', src'))) end;
   270 
   271 fun source def_prmpt get unget stopper scan opt_recover =
   272   unlift (source' def_prmpt get unget stopper (lift scan)
   273     (Option.map (fn (int, r) => (int, lift o r)) opt_recover));
   274 
   275 fun single scan = scan >> (fn x => [x]);
   276 fun bulk scan = scan -- repeat (permissive scan) >> (op ::);
   277 
   278 
   279 
   280 (** datatype lexicon **)
   281 
   282 datatype lexicon =
   283   Empty |
   284   Branch of string * string list * lexicon * lexicon * lexicon;
   285 
   286 val no_literal = [];
   287 
   288 
   289 (* dest_lexicon *)
   290 
   291 fun dest_lex Empty = []
   292   | dest_lex (Branch (_, [], lt, eq, gt)) =
   293       dest_lex lt @ dest_lex eq @ dest_lex gt
   294   | dest_lex (Branch (_, cs, lt, eq, gt)) =
   295       dest_lex lt @ [cs] @ dest_lex eq @ dest_lex gt;
   296 
   297 val dest_lexicon = map implode o dest_lex;
   298 
   299 
   300 (* empty, extend, make, merge lexicons *)
   301 
   302 val empty_lexicon = Empty;
   303 
   304 fun extend_lexicon [] lexicon = lexicon
   305   | extend_lexicon chrss lexicon =
   306       let
   307         fun ext chrs lex =
   308           let
   309             fun add (chs as c :: cs) (Branch (d, a, lt, eq, gt)) =
   310                   (case fast_string_ord (c, d) of
   311                     LESS => Branch (d, a, add chs lt, eq, gt)
   312                   | EQUAL => Branch (d, if null cs then chrs else a, lt, add cs eq, gt)
   313                   | GREATER => Branch (d, a, lt, eq, add chs gt))
   314               | add [c] Empty = Branch (c, chrs, Empty, Empty, Empty)
   315               | add (c :: cs) Empty = Branch (c, no_literal, Empty, add cs Empty, Empty)
   316               | add [] lex = lex;
   317           in add chrs lex end;
   318       in lexicon |> fold ext (chrss |> subtract (op =) (dest_lex lexicon)) end;
   319 
   320 fun make_lexicon chrss = extend_lexicon chrss empty_lexicon;
   321 
   322 fun merge_lexicons lex1 lex2 =
   323   let
   324     val chss1 = dest_lex lex1;
   325     val chss2 = dest_lex lex2;
   326   in
   327     if chss2 subset chss1 then lex1
   328     else if chss1 subset chss2 then lex2
   329     else extend_lexicon chss2 lex1
   330   end;
   331 
   332 
   333 (* is_literal *)
   334 
   335 fun is_literal Empty _ = false
   336   | is_literal _ [] = false
   337   | is_literal (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
   338       (case fast_string_ord (c, d) of
   339         LESS => is_literal lt chs
   340       | EQUAL => a <> no_literal andalso null cs orelse is_literal eq cs
   341       | GREATER => is_literal gt chs);
   342 
   343 
   344 (* scan literal *)
   345 
   346 fun literal lex chrs =
   347   let
   348     fun lit Empty res _ = res
   349       | lit (Branch _) _ [] = raise MORE NONE
   350       | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
   351           (case fast_string_ord (c, d) of
   352             LESS => lit lt res chs
   353           | EQUAL => lit eq (if a = no_literal then res else SOME (a, cs)) cs
   354           | GREATER => lit gt res chs);
   355   in
   356     (case lit lex NONE chrs of
   357       NONE => raise FAIL NONE
   358     | SOME res => res)
   359   end;
   360 
   361 end;
   362 
   363 structure BasicScan: BASIC_SCAN = Scan;
   364 open BasicScan;