|
1 (* Title: Pure/Syntax/scan.ML |
|
2 ID: $Id$ |
|
3 Author: Markus Wenzel and Tobias Nipkow, TU Muenchen |
|
4 |
|
5 Generic scanners (for potentially infinite input). |
|
6 *) |
|
7 |
|
8 infix 5 -- :-- |-- --| ^^; |
|
9 infix 3 >>; |
|
10 infix 0 ||; |
|
11 |
|
12 signature BASIC_SCAN = |
|
13 sig |
|
14 val !! : ('a * string option -> string) -> ('a -> 'b) -> 'a -> 'b |
|
15 val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c |
|
16 val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b |
|
17 val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e |
|
18 val :-- : ('a -> 'b * 'c) * ('b -> 'c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e |
|
19 val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e |
|
20 val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e |
|
21 val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c |
|
22 val $$ : ''a -> ''a list -> ''a * ''a list |
|
23 end; |
|
24 |
|
25 signature SCAN = |
|
26 sig |
|
27 include BASIC_SCAN |
|
28 val fail: 'a -> 'b |
|
29 val fail_with: ('a -> string) -> 'a -> 'b |
|
30 val succeed: 'a -> 'b -> 'a * 'b |
|
31 val one: ('a -> bool) -> 'a list -> 'a * 'a list |
|
32 val any: ('a -> bool) -> 'a list -> 'a list * 'a list |
|
33 val any1: ('a -> bool) -> 'a list -> 'a list * 'a list |
|
34 val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a |
|
35 val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a |
|
36 val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a |
|
37 val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a |
|
38 val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b |
|
39 val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a |
|
40 val unless: ('a -> 'b * 'a) -> ('a -> 'c * 'd) -> 'a -> 'c * 'd |
|
41 val first: ('a -> 'b) list -> 'a -> 'b |
|
42 val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e) |
|
43 val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c) |
|
44 val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e |
|
45 val try: ('a -> 'b) -> 'a -> 'b |
|
46 val force: ('a -> 'b) -> 'a -> 'b |
|
47 val prompt: string -> ('a -> 'b) -> 'a -> 'b |
|
48 val finite': 'a * ('a -> bool) -> ('b * 'a list -> 'c * ('d * 'a list)) |
|
49 -> 'b * 'a list -> 'c * ('d * 'a list) |
|
50 val finite: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b * 'a list |
|
51 val read: 'a * ('a -> bool) -> ('a list -> 'b * 'a list) -> 'a list -> 'b option |
|
52 val catch: ('a -> 'b) -> 'a -> 'b |
|
53 val error: ('a -> 'b) -> 'a -> 'b |
|
54 val source': string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) -> |
|
55 'b * ('b -> bool) -> ('d * 'b list -> 'e list * ('d * 'b list)) -> |
|
56 ('d * 'b list -> 'f * ('d * 'b list)) option -> 'd * 'a -> 'e list * ('d * 'c) |
|
57 val source: string -> (string -> 'a -> 'b list * 'a) -> ('b list * 'a -> 'c) -> |
|
58 'b * ('b -> bool) -> ('b list -> 'd list * 'b list) -> |
|
59 ('b list -> 'e * 'b list) option -> 'a -> 'd list * 'c |
|
60 val single: ('a -> 'b * 'a) -> 'a -> 'b list * 'a |
|
61 val bulk: ('a -> 'b * 'a) -> 'a -> 'b list * 'a |
|
62 type lexicon |
|
63 val dest_lexicon: lexicon -> string list list |
|
64 val make_lexicon: string list list -> lexicon |
|
65 val empty_lexicon: lexicon |
|
66 val extend_lexicon: lexicon -> string list list -> lexicon |
|
67 val merge_lexicons: lexicon -> lexicon -> lexicon |
|
68 val literal: lexicon -> string list -> string list * string list |
|
69 end; |
|
70 |
|
71 structure Scan: SCAN = |
|
72 struct |
|
73 |
|
74 |
|
75 (** scanners **) |
|
76 |
|
77 exception MORE of string option; (*need more input (prompt)*) |
|
78 exception FAIL of string option; (*try alternatives (reason of failure)*) |
|
79 exception ABORT of string; (*dead end*) |
|
80 |
|
81 |
|
82 (* scanner combinators *) |
|
83 |
|
84 fun (scan >> f) xs = apfst f (scan xs); |
|
85 |
|
86 fun (scan1 || scan2) xs = scan1 xs handle FAIL _ => scan2 xs; |
|
87 |
|
88 (*dependent pairing*) |
|
89 fun (scan1 :-- scan2) xs = |
|
90 let |
|
91 val (x, ys) = scan1 xs; |
|
92 val (y, zs) = scan2 x ys; |
|
93 in ((x, y), zs) end; |
|
94 |
|
95 fun (scan1 -- scan2) = scan1 :-- (fn _ => scan2); |
|
96 fun (scan1 |-- scan2) = scan1 -- scan2 >> #2; |
|
97 fun (scan1 --| scan2) = scan1 -- scan2 >> #1; |
|
98 fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^; |
|
99 |
|
100 |
|
101 (* generic scanners *) |
|
102 |
|
103 fun fail _ = raise FAIL None; |
|
104 fun fail_with msg_of xs = raise FAIL (Some (msg_of xs)); |
|
105 fun succeed y xs = (y, xs); |
|
106 |
|
107 fun one _ [] = raise MORE None |
|
108 | one pred (x :: xs) = |
|
109 if pred x then (x, xs) else raise FAIL None; |
|
110 |
|
111 fun $$ _ [] = raise MORE None |
|
112 | $$ a (x :: xs) = |
|
113 if a = x then (x, xs) else raise FAIL None; |
|
114 |
|
115 fun any _ [] = raise MORE None |
|
116 | any pred (lst as x :: xs) = |
|
117 if pred x then apfst (cons x) (any pred xs) |
|
118 else ([], lst); |
|
119 |
|
120 fun any1 pred = one pred -- any pred >> op ::; |
|
121 |
|
122 fun optional scan def = scan || succeed def; |
|
123 fun option scan = optional (scan >> Some) None; |
|
124 |
|
125 fun repeat scan xs = (scan -- repeat scan >> op :: || succeed []) xs; |
|
126 fun repeat1 scan = scan -- repeat scan >> op ::; |
|
127 |
|
128 fun max leq scan1 scan2 xs = |
|
129 (case (option scan1 xs, option scan2 xs) of |
|
130 ((None, _), (None, _)) => raise FAIL None (*looses FAIL msg!*) |
|
131 | ((Some tok1, xs'), (None, _)) => (tok1, xs') |
|
132 | ((None, _), (Some tok2, xs')) => (tok2, xs') |
|
133 | ((Some tok1, xs1'), (Some tok2, xs2')) => |
|
134 if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2')); |
|
135 |
|
136 fun ahead scan xs = (fst (scan xs), xs); |
|
137 |
|
138 fun unless test scan = |
|
139 ahead (option test) :-- (fn None => scan | _ => fail) >> #2; |
|
140 |
|
141 fun first [] = fail |
|
142 | first (scan :: scans) = scan || first scans; |
|
143 |
|
144 |
|
145 (* state based scanners *) |
|
146 |
|
147 fun depend scan (st, xs) = |
|
148 let val ((st', y), xs') = scan st xs |
|
149 in (y, (st', xs')) end; |
|
150 |
|
151 fun lift scan (st, xs) = |
|
152 let val (y, xs') = scan xs |
|
153 in (y, (st, xs')) end; |
|
154 |
|
155 fun pass st scan xs = |
|
156 let val (y, (_, xs')) = scan (st, xs) |
|
157 in (y, xs') end; |
|
158 |
|
159 |
|
160 (* exception handling *) |
|
161 |
|
162 fun !! err scan xs = scan xs handle FAIL msg => raise ABORT (err (xs, msg)); |
|
163 fun try scan xs = scan xs handle MORE _ => raise FAIL None | ABORT _ => raise FAIL None; |
|
164 fun force scan xs = scan xs handle MORE _ => raise FAIL None; |
|
165 fun prompt str scan xs = scan xs handle MORE None => raise MORE (Some str); |
|
166 fun catch scan xs = scan xs handle ABORT msg => raise FAIL (Some msg); |
|
167 fun error scan xs = scan xs handle ABORT msg => Library.error msg; |
|
168 |
|
169 |
|
170 (* finite scans *) |
|
171 |
|
172 fun finite' (stopper, is_stopper) scan (state, input) = |
|
173 let |
|
174 fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!"; |
|
175 |
|
176 fun stop [] = lost () |
|
177 | stop lst = |
|
178 let val (xs, x) = split_last lst |
|
179 in if is_stopper x then ((), xs) else lost () end; |
|
180 in |
|
181 if exists is_stopper input then |
|
182 raise ABORT "Stopper may not occur in input of finite scan!" |
|
183 else (force scan --| lift stop) (state, input @ [stopper]) |
|
184 end; |
|
185 |
|
186 fun finite stopper scan xs = |
|
187 let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs) |
|
188 in (y, xs') end; |
|
189 |
|
190 fun read stopper scan xs = |
|
191 (case error (finite stopper (option scan)) xs of |
|
192 (y as Some _, []) => y |
|
193 | _ => None); |
|
194 |
|
195 |
|
196 |
|
197 (* infinite scans -- draining state-based source *) |
|
198 |
|
199 fun drain def_prmpt get stopper scan ((state, xs), src) = |
|
200 (scan (state, xs), src) handle MORE prmpt => |
|
201 (case get (if_none prmpt def_prmpt) src of |
|
202 ([], _) => (finite' stopper scan (state, xs), src) |
|
203 | (xs', src') => drain def_prmpt get stopper scan ((state, xs @ xs'), src')); |
|
204 |
|
205 fun source' def_prmpt get unget stopper scanner opt_recover (state, src) = |
|
206 let |
|
207 fun drain_with scan = drain def_prmpt get stopper scan; |
|
208 |
|
209 fun drain_loop recover inp = |
|
210 drain_with (catch scanner) inp handle FAIL msg => |
|
211 (error_msg (if_none msg "Syntax error."); |
|
212 drain_loop recover (apfst snd (drain_with recover inp))); |
|
213 |
|
214 val ((ys, (state', xs')), src') = |
|
215 (case (get def_prmpt src, opt_recover) of |
|
216 (([], s), _) => (([], (state, [])), s) |
|
217 | ((xs, s), None) => drain_with (error scanner) ((state, xs), s) |
|
218 | ((xs, s), Some scan) => drain_loop scan ((state, xs), s)); |
|
219 in |
|
220 (ys, (state', unget (xs', src'))) |
|
221 end; |
|
222 |
|
223 fun source def_prmpt get unget stopper scan opt_recover src = |
|
224 let val (ys, ((), src')) = |
|
225 source' def_prmpt get unget stopper (lift scan) (apsome lift opt_recover) ((), src) |
|
226 in (ys, src') end; |
|
227 |
|
228 fun single scan = scan >> (fn x => [x]); |
|
229 fun bulk scan = scan -- repeat (try scan) >> (op ::); |
|
230 |
|
231 |
|
232 |
|
233 (** datatype lexicon **) |
|
234 |
|
235 datatype lexicon = |
|
236 Empty | |
|
237 Branch of string * string list * lexicon * lexicon * lexicon; |
|
238 |
|
239 val no_literal = []; |
|
240 |
|
241 |
|
242 (* dest_lexicon *) |
|
243 |
|
244 fun dest_lexicon Empty = [] |
|
245 | dest_lexicon (Branch (_, [], lt, eq, gt)) = |
|
246 dest_lexicon lt @ dest_lexicon eq @ dest_lexicon gt |
|
247 | dest_lexicon (Branch (_, cs, lt, eq, gt)) = |
|
248 dest_lexicon lt @ [cs] @ dest_lexicon eq @ dest_lexicon gt; |
|
249 |
|
250 |
|
251 (* empty, extend, make, merge lexicons *) |
|
252 |
|
253 val empty_lexicon = Empty; |
|
254 |
|
255 fun extend_lexicon lexicon chrss = |
|
256 let |
|
257 fun ext (lex, chrs) = |
|
258 let |
|
259 fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) = |
|
260 if c < d then Branch (d, a, add lt chs, eq, gt) |
|
261 else if c > d then Branch (d, a, lt, eq, add gt chs) |
|
262 else Branch (d, if null cs then chrs else a, lt, add eq cs, gt) |
|
263 | add Empty [c] = |
|
264 Branch (c, chrs, Empty, Empty, Empty) |
|
265 | add Empty (c :: cs) = |
|
266 Branch (c, no_literal, Empty, add Empty cs, Empty) |
|
267 | add lex [] = lex; |
|
268 in add lex chrs end; |
|
269 in foldl ext (lexicon, chrss \\ dest_lexicon lexicon) end; |
|
270 |
|
271 val make_lexicon = extend_lexicon empty_lexicon; |
|
272 |
|
273 fun merge_lexicons lex1 lex2 = |
|
274 let |
|
275 val chss1 = dest_lexicon lex1; |
|
276 val chss2 = dest_lexicon lex2; |
|
277 in |
|
278 if chss2 subset chss1 then lex1 |
|
279 else if chss1 subset chss2 then lex2 |
|
280 else extend_lexicon lex1 chss2 |
|
281 end; |
|
282 |
|
283 |
|
284 (* scan literal *) |
|
285 |
|
286 fun literal lex chrs = |
|
287 let |
|
288 fun lit Empty res _ = res |
|
289 | lit (Branch _) _ [] = raise MORE None |
|
290 | lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) = |
|
291 if c < d then lit lt res chs |
|
292 else if c > d then lit gt res chs |
|
293 else lit eq (if a = no_literal then res else Some (a, cs)) cs; |
|
294 in |
|
295 (case lit lex None chrs of |
|
296 None => raise FAIL None |
|
297 | Some res => res) |
|
298 end; |
|
299 |
|
300 |
|
301 end; |
|
302 |
|
303 |
|
304 structure BasicScan: BASIC_SCAN = Scan; |
|
305 open BasicScan; |