src/Pure/General/scan.ML
author kleing
Mon Jun 21 10:25:57 2004 +0200 (2004-06-21)
changeset 14981 e73f8140af78
parent 14955 08ee855c1d94
child 15531 08c8dad8e399
permissions -rw-r--r--
Merged in license change from Isabelle2004
     1 (*  Title:      Pure/General/scan.ML
     2     ID:         $Id$
     3     Author:     Markus Wenzel and Tobias Nipkow, TU Muenchen
     4 
     5 Generic scanners (for potentially infinite input).
     6 *)
     7 
     8 infix 5 -- :-- |-- --| ^^;
     9 infix 3 >>;
    10 infix 0 ||;
    11 
    12 signature BASIC_SCAN =
    13 sig
    14   (*error msg handler*)
    15   val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
    16   (*apply function*)
    17   val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
    18   (*alternative*)
    19   val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
    20   (*sequential pairing*)
    21   val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    22   (*dependent pairing*)
    23   val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
    24   (*forget fst*)
    25   val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
    26   (*forget snd*)
    27   val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
    28   (*concatenation*)
    29   val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
    30   (*one element literal*)
    31   val $$ : ''a -> ''a list -> ''a * ''a list
    32 end;
    33 
    34 signature SCAN =
    35 sig
    36   include BASIC_SCAN
    37   val fail: 'a -> 'b
    38   val fail_with: ('a -> string) -> 'a -> 'b
    39   val succeed: 'a -> 'b -> 'a * 'b
    40   val this: ''a list -> ''a list -> ''a list * ''a list
    41   val this_string: string -> string list -> string * string list
    42   val one: ('a -> bool) -> 'a list -> 'a * 'a list
    43   val any: ('a -> bool) -> 'a list -> 'a list * 'a list
    44   val any1: ('a -> bool) -> 'a list -> 'a list * 'a list
    45   val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
    46   val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
    47   val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    48   val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    49   val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
    50   val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
    51   val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
    52   val first: ('a -> 'b) list -> 'a -> 'b
    53   val trace: ('a list -> 'b * 'c list) -> 'a list -> ('b * 'a list) * 'c list
    54   val state: 'a * 'b -> 'a * ('a * 'b)
    55   val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
    56   val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
    57   val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
    58   val try: ('a -> 'b) -> 'a -> 'b
    59   val force: ('a -> 'b) -> 'a -> 'b
    60   val prompt: string -> ('a -> 'b) -> 'a -> 'b
    61   val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
    62     -> 'b * 'a list -> 'c * ('d * 'a list)
    63   val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
    64   val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
    65   val catch: ('a -> 'b) -> 'a -> 'b
    66   val error: ('a -> 'b) -> 'a -> 'b
    67   val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
    68     'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
    69     ('d * 'b list -> 'e list * ('d * 'b list)) option -> 'd * 'a -> 'e list * ('d * 'c)
    70   val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
    71     'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
    72     ('b list -> 'd list * 'b list) option -> 'a -> 'd list * 'c
    73   val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    74   val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
    75   type lexicon
    76   val dest_lexicon: lexicon -> string list
    77   val make_lexicon: string list list -> lexicon
    78   val empty_lexicon: lexicon
    79   val extend_lexicon: lexicon -> string list list -> lexicon
    80   val merge_lexicons: lexicon -> lexicon -> lexicon
    81   val is_literal: lexicon -> string list -> bool
    82   val literal: lexicon -> string list -> string list * string list
    83 end;
    84 
    85 structure Scan: SCAN =
    86 struct
    87 
    88 
    89 (** scanners **)
    90 
    91 exception MORE of string option;        (*need more input (prompt)*)
    92 exception FAIL of string option;        (*try alternatives (reason of failure)*)
    93 exception ABORT of string;              (*dead end*)
    94 
    95 
    96 (* scanner combinators *)
    97 
    98 (*dependent pairing*)
    99 fun (sc1 :-- sc2) toks =
   100   let
   101     val (x, toks2) = sc1 toks
   102     val (y, toks3) = sc2 x toks2
   103   in ((x, y), toks3) end;
   104 
   105 (*sequential pairing*)
   106 fun (sc1 -- sc2) toks =
   107   let
   108     val (x, toks2) = sc1 toks
   109     val (y, toks3) = sc2 toks2
   110   in ((x, y), toks3) end;
   111 
   112 (*application*)
   113 fun (sc >> f) toks =
   114   let val (x, toks2) = sc toks
   115   in (f x, toks2) end;
   116 
   117 (*forget snd*)
   118 fun (sc1 --| sc2) toks =
   119   let
   120     val (x, toks2) = sc1 toks
   121     val (_, toks3) = sc2 toks2
   122   in (x, toks3) end;
   123 
   124 (*forget fst*)
   125 fun (sc1 |-- sc2) toks =
   126   let val (_, toks2) = sc1 toks
   127   in sc2 toks2 end;
   128 
   129 (*concatenation*)
   130 fun (sc1 ^^ sc2) toks =
   131   let
   132     val (x, toks2) = sc1 toks
   133     val (y, toks3) = sc2 toks2
   134   in (x ^ y, toks3) end;
   135 
   136 (*alternative*)
   137 fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
   138 
   139 
   140 (* generic scanners *)
   141 
   142 fun fail _ = raise FAIL None;
   143 fun fail_with msg_of xs = raise FAIL (Some (msg_of xs));
   144 fun succeed y xs = (y, xs);
   145 
   146 fun one _ [] = raise MORE None
   147   | one pred (x :: xs) =
   148       if pred x then (x, xs) else raise FAIL None;
   149 
   150 fun $$ _ [] = raise MORE None
   151   | $$ a (x :: xs) =
   152       if a = x then (x, xs) else raise FAIL None;
   153 
   154 fun this ys xs =
   155   let
   156     fun drop_prefix [] xs = xs
   157       | drop_prefix (_ :: _) [] = raise MORE None
   158       | drop_prefix (y :: ys) (x :: xs) =
   159           if y = x then drop_prefix ys xs else raise FAIL None;
   160   in (ys, drop_prefix ys xs) end;
   161 
   162 fun this_string s = this (explode s) >> K s;  (*primitive string -- no symbols yet!*)
   163 
   164 fun any _ [] = raise MORE None
   165   | any pred (lst as x :: xs) =
   166       if pred x then apfst (cons x) (any pred xs)
   167       else ([], lst);
   168 
   169 fun any1 p toks =
   170   let
   171     val (x, toks2) = one p toks
   172     val (xs,toks3) = any p toks2
   173   in (x :: xs, toks3) end;
   174 
   175 fun optional scan def =  scan || succeed def
   176 fun option scan = (scan >> Some) || succeed None
   177 
   178 fun repeat scan =
   179   let fun rep ys xs = (case (Some (scan xs) handle FAIL _ => None) of
   180     None => (rev ys, xs) | Some (y, xs') => rep (y :: ys) xs')
   181   in rep [] end;
   182 
   183 fun repeat1 scan toks =
   184   let
   185     val (x, toks2) = scan toks
   186     val (xs, toks3) = repeat scan toks2
   187   in (x :: xs, toks3) end;
   188 
   189 fun max leq scan1 scan2 xs =
   190   (case (option scan1 xs, option scan2 xs) of
   191     ((None, _), (None, _)) => raise FAIL None           (*looses FAIL msg!*)
   192   | ((Some tok1, xs'), (None, _)) => (tok1, xs')
   193   | ((None, _), (Some tok2, xs')) => (tok2, xs')
   194   | ((Some tok1, xs1'), (Some tok2, xs2')) =>
   195       if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
   196 
   197 fun ahead scan xs = (fst (scan xs), xs);
   198 
   199 fun unless test scan =
   200   ahead (option test) :-- (fn None => scan | _ => fail) >> #2;
   201 
   202 fun first [] = fail
   203   | first (scan :: scans) = scan || first scans;
   204 
   205 fun trace scan toks =
   206   let val (x, toks') = scan toks
   207   in ((x, take (length toks - length toks', toks)), toks') end;
   208 
   209 
   210 (* state based scanners *)
   211 
   212 fun state (st, xs) = (st, (st, xs));
   213 
   214 fun depend scan (st, xs) =
   215   let val ((st', y), xs') = scan st xs
   216   in (y, (st', xs')) end;
   217 
   218 fun lift scan (st, xs) =
   219   let val (y, xs') = scan xs
   220   in (y, (st, xs')) end;
   221 
   222 fun pass st scan xs =
   223   let val (y, (_, xs')) = scan (st, xs)
   224   in (y, xs') end;
   225 
   226 
   227 (* exception handling *)
   228 
   229 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
   230 fun try scan xs = scan xs handle MORE _ => raise FAIL None | ABORT _ => raise FAIL None;
   231 fun force scan xs = scan xs handle MORE _ => raise FAIL None;
   232 fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str);
   233 fun catch scan xs = scan xs handle ABORT msg => raise FAIL (Some msg);
   234 fun error scan xs = scan xs handle ABORT msg => Output.error msg;
   235 
   236 
   237 (* finite scans *)
   238 
   239 fun finite' (stopper, is_stopper) scan (state, input) =
   240   let
   241     fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
   242 
   243     fun stop [] = lost ()
   244       | stop lst =
   245           let val (xs, x) = split_last lst
   246           in if is_stopper x then ((), xs) else lost () end;
   247   in
   248     if exists is_stopper input then
   249       raise ABORT "Stopper may not occur in input of finite scan!"
   250     else (force scan --| lift stop) (state, rev_append (rev input) [stopper])
   251   end;
   252 
   253 fun finite stopper scan xs =
   254   let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs)
   255   in (y, xs') end;
   256 
   257 fun read stopper scan xs =
   258   (case error (finite stopper (option scan)) xs of
   259     (y as Some _, []) => y
   260   | _ => None);
   261 
   262 
   263 (* infinite scans -- draining state-based source *)
   264 
   265 fun drain def_prmpt get stopper scan ((state, xs), src) =
   266   (scan (state, xs), src) handle MORE prmpt =>
   267     (case get (if_none prmpt def_prmpt) src of
   268       ([], _) => (finite' stopper scan (state, xs), src)
   269     | (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src'));
   270 
   271 fun source' def_prmpt get unget stopper scanner opt_recover (state, src) =
   272   let
   273     val drain_with = drain def_prmpt get stopper;
   274 
   275     fun drain_loop recover inp =
   276       drain_with (catch scanner) inp handle FAIL msg =>
   277         (error_msg (if_none msg "Syntax error."); drain_with recover inp);
   278 
   279     val ((ys, (state', xs')), src') =
   280       (case (get def_prmpt src, opt_recover) of
   281         (([], s), _) => (([], (state, [])), s)
   282       | ((xs, s), None) => drain_with (error scanner) ((state, xs), s)
   283       | ((xs, s), Some r) => drain_loop (unless (lift (one (#2 stopper))) r) ((state, xs), s));
   284   in (ys, (state', unget (xs', src'))) end;
   285 
   286 fun source def_prmpt get unget stopper scan opt_recover src =
   287   let val (ys, ((), src')) =
   288     source' def_prmpt get unget stopper (lift scan) (apsome lift opt_recover) ((), src)
   289   in (ys, src') end;
   290 
   291 fun single scan = scan >> (fn x => [x]);
   292 fun bulk scan = scan -- repeat (try scan) >> (op ::);
   293 
   294 
   295 
   296 (** datatype lexicon **)
   297 
   298 datatype lexicon =
   299   Empty |
   300   Branch of string * string list * lexicon * lexicon * lexicon;
   301 
   302 val no_literal = [];
   303 
   304 
   305 (* dest_lexicon *)
   306 
   307 fun dest_lex Empty = []
   308   | dest_lex (Branch (_, [], lt, eq, gt)) =
   309       dest_lex lt @ dest_lex eq @ dest_lex gt
   310   | dest_lex (Branch (_, cs, lt, eq, gt)) =
   311       dest_lex lt @ [cs] @ dest_lex eq @ dest_lex gt;
   312 
   313 val dest_lexicon = map implode o dest_lex;
   314 
   315 
   316 (* empty, extend, make, merge lexicons *)
   317 
   318 val empty_lexicon = Empty;
   319 
   320 fun extend_lexicon lexicon [] = lexicon
   321   | extend_lexicon lexicon chrss =
   322       let
   323         fun ext (lex, chrs) =
   324           let
   325             fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
   326                   (case String.compare (c, d) of
   327                     LESS => Branch (d, a, add lt chs, eq, gt)
   328                   | EQUAL => Branch (d, if null cs then chrs else a, lt, add eq cs, gt)
   329                   | GREATER => Branch (d, a, lt, eq, add gt chs))
   330               | add Empty [c] =
   331                   Branch (c, chrs, Empty, Empty, Empty)
   332               | add Empty (c :: cs) =
   333                   Branch (c, no_literal, Empty, add Empty cs, Empty)
   334               | add lex [] = lex;
   335           in add lex chrs end;
   336       in foldl ext (lexicon, chrss \\ dest_lex lexicon) end;
   337 
   338 val make_lexicon = extend_lexicon empty_lexicon;
   339 
   340 fun merge_lexicons lex1 lex2 =
   341   let
   342     val chss1 = dest_lex lex1;
   343     val chss2 = dest_lex lex2;
   344   in
   345     if chss2 subset chss1 then lex1
   346     else if chss1 subset chss2 then lex2
   347     else extend_lexicon lex1 chss2
   348   end;
   349 
   350 
   351 (* is_literal *)
   352 
   353 fun is_literal Empty _ = false
   354   | is_literal _ [] = false
   355   | is_literal (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
   356       (case String.compare (c, d) of
   357         LESS => is_literal lt chs
   358       | EQUAL => a <> no_literal andalso null cs orelse is_literal eq cs
   359       | GREATER => is_literal gt chs);
   360 
   361 
   362 (* scan literal *)
   363 
   364 fun literal lex chrs =
   365   let
   366     fun lit Empty res _ = res
   367       | lit (Branch _) _ [] = raise MORE None
   368       | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
   369           (case String.compare (c, d) of
   370             LESS => lit lt res chs
   371           | EQUAL => lit eq (if a = no_literal then res else Some (a, cs)) cs
   372           | GREATER => lit gt res chs);
   373   in
   374     (case lit lex None chrs of
   375       None => raise FAIL None
   376     | Some res => res)
   377   end;
   378 
   379 end;
   380 
   381 structure BasicScan: BASIC_SCAN = Scan;
   382 open BasicScan;