1 (* Title: Pure/General/scan.ML
3 Author: Markus Wenzel and Tobias Nipkow, TU Muenchen
4 License: GPL (GNU GENERAL PUBLIC LICENSE)
6 Generic scanners (for potentially infinite input).
9 infix 5 -- :-- |-- --| ^^;
13 signature BASIC_SCAN =
15 val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b
16 val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c
17 val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b
18 val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
19 val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e
20 val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e
21 val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e
22 val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c
23 val $$ : ''a -> ''a list -> ''a * ''a list
30 val fail_with: ('a -> string) -> 'a -> 'b
31 val succeed: 'a -> 'b -> 'a * 'b
32 val one: ('a -> bool) -> 'a list -> 'a * 'a list
33 val any: ('a -> bool) -> 'a list -> 'a list * 'a list
34 val any1: ('a -> bool) -> 'a list -> 'a list * 'a list
35 val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a
36 val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a
37 val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
38 val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
39 val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b
40 val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a
41 val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd
42 val first: ('a -> 'b) list -> 'a -> 'b
43 val state: 'a * 'b -> 'a * ('a * 'b)
44 val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e)
45 val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c)
46 val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e
47 val try: ('a -> 'b) -> 'a -> 'b
48 val force: ('a -> 'b) -> 'a -> 'b
49 val prompt: string -> ('a -> 'b) -> 'a -> 'b
50 val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list))
51 -> 'b * 'a list -> 'c * ('d * 'a list)
52 val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list
53 val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option
54 val catch: ('a -> 'b) -> 'a -> 'b
55 val error: ('a -> 'b) -> 'a -> 'b
56 val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
57 'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) ->
58 ('d * 'b list -> 'e list * ('d * 'b list)) option -> 'd * 'a -> 'e list * ('d * 'c)
59 val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) ->
60 'b * ('b -> bool) -> ('b list -> 'd list * 'b list) ->
61 ('b list -> 'd list * 'b list) option -> 'a -> 'd list * 'c
62 val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
63 val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a
65 val dest_lexicon: lexicon -> string list
66 val make_lexicon: string list list -> lexicon
67 val empty_lexicon: lexicon
68 val extend_lexicon: lexicon -> string list list -> lexicon
69 val merge_lexicons: lexicon -> lexicon -> lexicon
70 val literal: lexicon -> string list -> string list * string list
73 structure Scan: SCAN =
79 exception MORE of string option; (*need more input (prompt)*)
80 exception FAIL of string option; (*try alternatives (reason of failure)*)
81 exception ABORT of string; (*dead end*)
84 (* scanner combinators *)
86 fun (scan >> f) xs = apfst f (scan xs);
88 fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs;
91 fun (scan1 :-- scan2) xs =
93 val (x, ys) = scan1 xs;
94 val (y, zs) = scan2 x ys;
97 fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2);
98 fun (scan1 |-- scan2) = scan1 -- scan2 >> #2;
99 fun (scan1 --| scan2) = scan1 -- scan2 >> #1;
100 fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^;
103 (* generic scanners *)
105 fun fail _ = raise FAIL None;
106 fun fail_with msg_of xs = raise FAIL (Some (msg_of xs));
107 fun succeed y xs = (y, xs);
109 fun one _ [] = raise MORE None
110 | one pred (x :: xs) =
111 if pred x then (x, xs) else raise FAIL None;
113 fun $$ _ [] = raise MORE None
115 if a = x then (x, xs) else raise FAIL None;
117 fun any _ [] = raise MORE None
118 | any pred (lst as x :: xs) =
119 if pred x then apfst (cons x) (any pred xs)
122 fun any1 pred = one pred -- any pred >> op ::;
124 fun optional scan def = scan || succeed def;
125 fun option scan = optional (scan >> Some) None;
127 fun repeat scan xs = (scan -- repeat scan >> op :: || succeed []) xs;
128 fun repeat1 scan = scan -- repeat scan >> op ::;
130 fun max leq scan1 scan2 xs =
131 (case (option scan1 xs, option scan2 xs) of
132 ((None, _), (None, _)) => raise FAIL None (*looses FAIL msg!*)
133 | ((Some tok1, xs'), (None, _)) => (tok1, xs')
134 | ((None, _), (Some tok2, xs')) => (tok2, xs')
135 | ((Some tok1, xs1'), (Some tok2, xs2')) =>
136 if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2'));
138 fun ahead scan xs = (fst (scan xs), xs);
140 fun unless test scan =
141 ahead (option test) :-- (fn None => scan | _ => fail) >> #2;
144 | first (scan :: scans) = scan || first scans;
147 (* state based scanners *)
149 fun state (st, xs) = (st, (st, xs));
151 fun depend scan (st, xs) =
152 let val ((st', y), xs') = scan st xs
153 in (y, (st', xs')) end;
155 fun lift scan (st, xs) =
156 let val (y, xs') = scan xs
157 in (y, (st, xs')) end;
159 fun pass st scan xs =
160 let val (y, (_, xs')) = scan (st, xs)
164 (* exception handling *)
166 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg));
167 fun try scan xs = scan xs handle MORE _ => raise FAIL None | ABORT _ => raise FAIL None;
168 fun force scan xs = scan xs handle MORE _ => raise FAIL None;
169 fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str);
170 fun catch scan xs = scan xs handle ABORT msg => raise FAIL (Some msg);
171 fun error scan xs = scan xs handle ABORT msg => Library.error msg;
176 fun finite' (stopper, is_stopper) scan (state, input) =
178 fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!";
180 fun stop [] = lost ()
182 let val (xs, x) = split_last lst
183 in if is_stopper x then ((), xs) else lost () end;
185 if exists is_stopper input then
186 raise ABORT "Stopper may not occur in input of finite scan!"
187 else (force scan --| lift stop) (state, input @ [stopper])
190 fun finite stopper scan xs =
191 let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs)
194 fun read stopper scan xs =
195 (case error (finite stopper (option scan)) xs of
196 (y as Some _, []) => y
200 (* infinite scans -- draining state-based source *)
202 fun drain def_prmpt get stopper scan ((state, xs), src) =
203 (scan (state, xs), src) handle MORE prmpt =>
204 (case get (if_none prmpt def_prmpt) src of
205 ([], _) => (finite' stopper scan (state, xs), src)
206 | (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src'));
208 fun source' def_prmpt get unget stopper scanner opt_recover (state, src) =
210 val drain_with = drain def_prmpt get stopper;
212 fun drain_loop recover inp =
213 drain_with (catch scanner) inp handle FAIL msg =>
214 (error_msg (if_none msg "Syntax error."); drain_with recover inp);
216 val ((ys, (state', xs')), src') =
217 (case (get def_prmpt src, opt_recover) of
218 (([], s), _) => (([], (state, [])), s)
219 | ((xs, s), None) => drain_with (error scanner) ((state, xs), s)
220 | ((xs, s), Some r) => drain_loop (unless (lift (one (#2 stopper))) r) ((state, xs), s));
221 in (ys, (state', unget (xs', src'))) end;
223 fun source def_prmpt get unget stopper scan opt_recover src =
224 let val (ys, ((), src')) =
225 source' def_prmpt get unget stopper (lift scan) (apsome lift opt_recover) ((), src)
228 fun single scan = scan >> (fn x => [x]);
229 fun bulk scan = scan -- repeat (try scan) >> (op ::);
233 (** datatype lexicon **)
237 Branch of string * string list * lexicon * lexicon * lexicon;
244 fun dest_lex Empty = []
245 | dest_lex (Branch (_, [], lt, eq, gt)) =
246 dest_lex lt @ dest_lex eq @ dest_lex gt
247 | dest_lex (Branch (_, cs, lt, eq, gt)) =
248 dest_lex lt @ [cs] @ dest_lex eq @ dest_lex gt;
250 val dest_lexicon = map implode o dest_lex;
253 (* empty, extend, make, merge lexicons *)
255 val empty_lexicon = Empty;
257 fun extend_lexicon lexicon [] = lexicon
258 | extend_lexicon lexicon chrss =
260 fun ext (lex, chrs) =
262 fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) =
263 if c < d then Branch (d, a, add lt chs, eq, gt)
264 else if c > d then Branch (d, a, lt, eq, add gt chs)
265 else Branch (d, if null cs then chrs else a, lt, add eq cs, gt)
267 Branch (c, chrs, Empty, Empty, Empty)
268 | add Empty (c :: cs) =
269 Branch (c, no_literal, Empty, add Empty cs, Empty)
272 in foldl ext (lexicon, chrss \\ dest_lex lexicon) end;
274 val make_lexicon = extend_lexicon empty_lexicon;
276 fun merge_lexicons lex1 lex2 =
278 val chss1 = dest_lex lex1;
279 val chss2 = dest_lex lex2;
281 if chss2 subset chss1 then lex1
282 else if chss1 subset chss2 then lex2
283 else extend_lexicon lex1 chss2
289 fun literal lex chrs =
291 fun lit Empty res _ = res
292 | lit (Branch _) _ [] = raise MORE None
293 | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) =
294 if c < d then lit lt res chs
295 else if c > d then lit gt res chs
296 else lit eq (if a = no_literal then res else Some (a, cs)) cs;
298 (case lit lex None chrs of
299 None => raise FAIL None
307 structure BasicScan: BASIC_SCAN = Scan;