src/Pure/General/scan.ML
author berghofe
Wed Jan 29 17:35:11 2003 +0100 (2003-01-29)
changeset 13795 cfa3441c5238
parent 11523 9a658fe20107
child 14078 cddad2aa025b
permissions -rw-r--r--
Some tuning:
- finite now uses rev_append (tail recursive!) to append stopper, because @
needs to much stack space for large strings
- repeat is now tail recursive
     1 (*  Title:      Pure/General/scan.ML
     2     ID:         $Id$
     3     Author:     Markus Wenzel and Tobias Nipkow, TU Muenchen
     4     License:    GPL (GNU GENERAL PUBLIC LICENSE)
     5 
     6 Generic scanners (for potentially infinite input).
     7 *)
     8 
     9 infix 5 -- :-- |-- --| ^^;
    10 infix 3 >>;
    11 infix 0 ||;
    12 
    13 signature BASIC_SCAN =
    14 sig
    15   val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
    16   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    17   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    18   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    19   val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    20   val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
    21   val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
    22   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    23   val $$ : ''a -> ''a list -> ''a * ''a list
    24 end;
    25 
    26 signature SCAN =
    27 sig
    28   include BASIC_SCAN
    29   val fail: 'a -> 'b
    30   val fail_with: ('a -> string) -> 'a -> 'b
    31   val succeed: 'a -> 'b -> 'a * 'b
    32   val one: ('a -> bool) -> 'a list -> 'a * 'a list
    33   val any: ('a -> bool) -> 'a list -> 'a list * 'a list
    34   val any1: ('a -> bool) -> 'a list -> 'a list * 'a list
    35   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    36   val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
    37   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    38   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    39   val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
    40   val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
    41   val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
    42   val first: ('a -> 'b) list -> 'a -> 'b
    43   val state: 'a * 'b -> 'a * ('a * 'b) 
    44   val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
    45   val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
    46   val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
    47   val try: ('a -> 'b) -> 'a -> 'b
    48   val force: ('a -> 'b) -> 'a -> 'b
    49   val prompt: string -> ('a -> 'b) -> 'a -> 'b
    50   val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
    51     -> 'b * 'a list -> 'c * ('d * 'a list)
    52   val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
    53   val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
    54   val catch: ('a -> 'b) -> 'a -> 'b
    55   val error: ('a -> 'b) -> 'a -> 'b
    56   val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
    57     'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
    58     ('d * 'b list -> 'e list * ('d * 'b list)) option -> 'd * 'a -> 'e list * ('d * 'c)
    59   val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
    60     'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
    61     ('b list -> 'd list * 'b list) option -> 'a -> 'd list * 'c
    62   val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    63   val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    64   type lexicon
    65   val dest_lexicon: lexicon -> string list
    66   val make_lexicon: string list list -> lexicon
    67   val empty_lexicon: lexicon
    68   val extend_lexicon: lexicon -> string list list -> lexicon
    69   val merge_lexicons: lexicon -> lexicon -> lexicon
    70   val literal: lexicon -> string list -> string list * string list
    71 end;
    72 
    73 structure Scan: SCAN =
    74 struct
    75 
    76 
    77 (** scanners **)
    78 
    79 exception MORE of string option;        (*need more input (prompt)*)
    80 exception FAIL of string option;        (*try alternatives (reason of failure)*)
    81 exception ABORT of string;              (*dead end*)
    82 
    83 
    84 (* scanner combinators *)
    85 
    86 fun (scan >> f) xs = apfst f (scan xs);
    87 
    88 fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
    89 
    90 (*dependent pairing*)
    91 fun (scan1 :-- scan2) xs =
    92   let
    93     val (x, ys) = scan1 xs;
    94     val (y, zs) = scan2 x ys;
    95   in ((x, y), zs) end;
    96 
    97 fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
    98 fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
    99 fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
   100 fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
   101 
   102 
   103 (* generic scanners *)
   104 
   105 fun fail _ = raise FAIL None;
   106 fun fail_with msg_of xs = raise FAIL (Some (msg_of xs));
   107 fun succeed y xs = (y, xs);
   108 
   109 fun one _ [] = raise MORE None
   110   | one pred (x :: xs) =
   111       if pred x then (x, xs) else raise FAIL None;
   112 
   113 fun $$ _ [] = raise MORE None
   114   | $$ a (x :: xs) =
   115       if a = x then (x, xs) else raise FAIL None;
   116 
   117 fun any _ [] = raise MORE None
   118   | any pred (lst as x :: xs) =
   119       if pred x then apfst (cons x) (any pred xs)
   120       else ([], lst);
   121 
   122 fun any1 pred = one pred -- any pred >> op ::;
   123 
   124 fun optional scan def = scan || succeed def;
   125 fun option scan = optional (scan >> Some) None;
   126 
   127 fun repeat scan =
   128   let fun rep ys xs = (case (Some (scan xs) handle FAIL _ => None) of
   129     None => (rev ys, xs) | Some (y, xs') => rep (y :: ys) xs')
   130   in rep [] end;
   131 
   132 fun repeat1 scan = scan -- repeat scan >> op ::;
   133 
   134 fun max leq scan1 scan2 xs =
   135   (case (option scan1 xs, option scan2 xs) of
   136     ((None, _), (None, _)) => raise FAIL None           (*looses FAIL msg!*)
   137   | ((Some tok1, xs'), (None, _)) => (tok1, xs')
   138   | ((None, _), (Some tok2, xs')) => (tok2, xs')
   139   | ((Some tok1, xs1'), (Some tok2, xs2')) =>
   140       if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
   141 
   142 fun ahead scan xs = (fst (scan xs), xs);
   143 
   144 fun unless test scan =
   145   ahead (option test) :-- (fn None => scan | _ => fail) >> #2;
   146 
   147 fun first [] = fail
   148   | first (scan :: scans) = scan || first scans;
   149 
   150 
   151 (* state based scanners *)
   152 
   153 fun state (st, xs) = (st, (st, xs));
   154 
   155 fun depend scan (st, xs) =
   156   let val ((st', y), xs') = scan st xs
   157   in (y, (st', xs')) end;
   158 
   159 fun lift scan (st, xs) =
   160   let val (y, xs') = scan xs
   161   in (y, (st, xs')) end;
   162 
   163 fun pass st scan xs =
   164   let val (y, (_, xs')) = scan (st, xs)
   165   in (y, xs') end;
   166 
   167 
   168 (* exception handling *)
   169 
   170 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
   171 fun try scan xs = scan xs handle MORE _ => raise FAIL None | ABORT _ => raise FAIL None;
   172 fun force scan xs = scan xs handle MORE _ => raise FAIL None;
   173 fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str);
   174 fun catch scan xs = scan xs handle ABORT msg => raise FAIL (Some msg);
   175 fun error scan xs = scan xs handle ABORT msg => Library.error msg;
   176 
   177 
   178 (* finite scans *)
   179 
   180 fun finite' (stopper, is_stopper) scan (state, input) =
   181   let
   182     fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
   183 
   184     fun stop [] = lost ()
   185       | stop lst =
   186           let val (xs, x) = split_last lst
   187           in if is_stopper x then ((), xs) else lost () end;
   188   in
   189     if exists is_stopper input then
   190       raise ABORT "Stopper may not occur in input of finite scan!"
   191     else (force scan --| lift stop) (state, rev_append (rev input) [stopper])
   192   end;
   193 
   194 fun finite stopper scan xs =
   195   let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs)
   196   in (y, xs') end;
   197 
   198 fun read stopper scan xs =
   199   (case error (finite stopper (option scan)) xs of
   200     (y as Some _, []) => y
   201   | _ => None);
   202 
   203 
   204 (* infinite scans -- draining state-based source *)
   205 
   206 fun drain def_prmpt get stopper scan ((state, xs), src) =
   207   (scan (state, xs), src) handle MORE prmpt =>
   208     (case get (if_none prmpt def_prmpt) src of
   209       ([], _) => (finite' stopper scan (state, xs), src)
   210     | (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src'));
   211 
   212 fun source' def_prmpt get unget stopper scanner opt_recover (state, src) =
   213   let
   214     val drain_with = drain def_prmpt get stopper;
   215 
   216     fun drain_loop recover inp =
   217       drain_with (catch scanner) inp handle FAIL msg =>
   218         (error_msg (if_none msg "Syntax error."); drain_with recover inp);
   219 
   220     val ((ys, (state', xs')), src') =
   221       (case (get def_prmpt src, opt_recover) of
   222         (([], s), _) => (([], (state, [])), s)
   223       | ((xs, s), None) => drain_with (error scanner) ((state, xs), s)
   224       | ((xs, s), Some r) => drain_loop (unless (lift (one (#2 stopper))) r) ((state, xs), s));
   225   in (ys, (state', unget (xs', src'))) end;
   226 
   227 fun source def_prmpt get unget stopper scan opt_recover src =
   228   let val (ys, ((), src')) =
   229     source' def_prmpt get unget stopper (lift scan) (apsome lift opt_recover) ((), src)
   230   in (ys, src') end;
   231 
   232 fun single scan = scan >> (fn x => [x]);
   233 fun bulk scan = scan -- repeat (try scan) >> (op ::);
   234 
   235 
   236 
   237 (** datatype lexicon **)
   238 
   239 datatype lexicon =
   240   Empty |
   241   Branch of string * string list * lexicon * lexicon * lexicon;
   242 
   243 val no_literal = [];
   244 
   245 
   246 (* dest_lexicon *)
   247 
   248 fun dest_lex Empty = []
   249   | dest_lex (Branch (_, [], lt, eq, gt)) =
   250       dest_lex lt @ dest_lex eq @ dest_lex gt
   251   | dest_lex (Branch (_, cs, lt, eq, gt)) =
   252       dest_lex lt @ [cs] @ dest_lex eq @ dest_lex gt;
   253 
   254 val dest_lexicon = map implode o dest_lex;
   255 
   256 
   257 (* empty, extend, make, merge lexicons *)
   258 
   259 val empty_lexicon = Empty;
   260 
   261 fun extend_lexicon lexicon [] = lexicon
   262   | extend_lexicon lexicon chrss =
   263       let
   264         fun ext (lex, chrs) =
   265           let
   266             fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
   267                   if c < d then Branch (d, a, add lt chs, eq, gt)
   268                   else if c > d then Branch (d, a, lt, eq, add gt chs)
   269                   else Branch (d, if null cs then chrs else a, lt, add eq cs, gt)
   270               | add Empty [c] =
   271                   Branch (c, chrs, Empty, Empty, Empty)
   272               | add Empty (c :: cs) =
   273                   Branch (c, no_literal, Empty, add Empty cs, Empty)
   274               | add lex [] = lex;
   275           in add lex chrs end;
   276       in foldl ext (lexicon, chrss \\ dest_lex lexicon) end;
   277 
   278 val make_lexicon = extend_lexicon empty_lexicon;
   279 
   280 fun merge_lexicons lex1 lex2 =
   281   let
   282     val chss1 = dest_lex lex1;
   283     val chss2 = dest_lex lex2;
   284   in
   285     if chss2 subset chss1 then lex1
   286     else if chss1 subset chss2 then lex2
   287     else extend_lexicon lex1 chss2
   288   end;
   289 
   290 
   291 (* scan literal *)
   292 
   293 fun literal lex chrs =
   294   let
   295     fun lit Empty res _ = res
   296       | lit (Branch _) _ [] = raise MORE None
   297       | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
   298           if c < d then lit lt res chs
   299           else if c > d then lit gt res chs
   300           else lit eq (if a = no_literal then res else Some (a, cs)) cs;
   301   in
   302     (case lit lex None chrs of
   303       None => raise FAIL None
   304     | Some res => res)
   305   end;
   306 
   307 
   308 end;
   309 
   310 
   311 structure BasicScan: BASIC_SCAN = Scan;
   312 open BasicScan;