author | wenzelm |
Mon, 09 Mar 1998 16:12:19 +0100 | |
changeset 4702 | ffbaf431665d |
child 4756 | 329c09e15991 |
permissions | -rw-r--r-- |
4702
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
1 |
(* Title: Pure/Syntax/scan.ML |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
2 |
ID: $Id$ |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
3 |
Author: Markus Wenzel and Tobias Nipkow, TU Muenchen |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
4 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
5 |
Generic scanners (for potentially infinite input). |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
6 |
*) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
7 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
8 |
infix 5 -- |-- --| ^^; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
9 |
infix 3 >>; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
10 |
infix 0 ||; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
11 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
12 |
signature BASIC_SCAN = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
13 |
sig |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
14 |
val !! : ('a -> string) -> ('a -> 'b) -> 'a -> 'b |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
15 |
val >> : ('a -> 'b * 'c) * ('b -> 'd) -> 'a -> 'd * 'c |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
16 |
val || : ('a -> 'b) * ('a -> 'b) -> 'a -> 'b |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
17 |
val -- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
18 |
val |-- : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'd * 'e |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
19 |
val --| : ('a -> 'b * 'c) * ('c -> 'd * 'e) -> 'a -> 'b * 'e |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
20 |
val ^^ : ('a -> string * 'b) * ('b -> string * 'c) -> 'a -> string * 'c |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
21 |
val $$ : ''a -> ''a list -> ''a * ''a list |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
22 |
end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
23 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
24 |
signature SCAN = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
25 |
sig |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
26 |
include BASIC_SCAN |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
27 |
val fail: 'a -> 'b |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
28 |
val succeed: 'a -> 'b -> 'a * 'b |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
29 |
val one: ('a -> bool) -> 'a list -> 'a * 'a list |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
30 |
val any: ('a -> bool) -> 'a list -> 'a list * 'a list |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
31 |
val any1: ('a -> bool) -> 'a list -> 'a list * 'a list |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
32 |
val optional: ('a -> 'b * 'a) -> 'b -> 'a -> 'b * 'a |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
33 |
val option: ('a -> 'b * 'a) -> 'a -> 'b option * 'a |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
34 |
val repeat: ('a -> 'b * 'a) -> 'a -> 'b list * 'a |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
35 |
val repeat1: ('a -> 'b * 'a) -> 'a -> 'b list * 'a |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
36 |
val max: ('a * 'a -> bool) -> ('b -> 'a * 'b) -> ('b -> 'a * 'b) -> 'b -> 'a * 'b |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
37 |
val ahead: ('a -> 'b * 'c) -> 'a -> 'b * 'a |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
38 |
val depend: ('a -> 'b -> ('c * 'd) * 'e) -> 'a * 'b -> 'd * ('c * 'e) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
39 |
val lift: ('a -> 'b * 'c) -> 'd * 'a -> 'b * ('d * 'c) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
40 |
val pass: 'a -> ('a * 'b -> 'c * ('d * 'e)) -> 'b -> 'c * 'e |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
41 |
val force: ('a -> 'b) -> 'a -> 'b |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
42 |
val finite': ''a -> ('b * ''a list -> 'c * ('d * ''a list)) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
43 |
-> 'b * ''a list -> 'c * ('d * ''a list) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
44 |
val finite: ''a -> (''a list -> 'b * ''a list) -> ''a list -> 'b * ''a list |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
45 |
val error: ('a -> 'b) -> 'a -> 'b |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
46 |
val source: ('a -> 'b list * 'a) -> ('b list * 'a -> 'c) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
47 |
-> ('b list -> 'd * 'b list) -> 'a -> 'd list * 'c |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
48 |
type lexicon |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
49 |
val dest_lexicon: lexicon -> string list list |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
50 |
val make_lexicon: string list list -> lexicon |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
51 |
val empty_lexicon: lexicon |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
52 |
val extend_lexicon: lexicon -> string list list -> lexicon |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
53 |
val merge_lexicons: lexicon -> lexicon -> lexicon |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
54 |
val literal: lexicon -> string list -> string list * string list |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
55 |
end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
56 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
57 |
structure Scan: SCAN = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
58 |
struct |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
59 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
60 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
61 |
(** scanners **) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
62 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
63 |
exception MORE; (*need more input*) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
64 |
exception FAIL; (*try alternatives*) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
65 |
exception ABORT of string; (*dead end*) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
66 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
67 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
68 |
(* scanner combinators *) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
69 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
70 |
fun (scan >> f) xs = apfst f (scan xs); |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
71 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
72 |
fun (scan1 || scan2) xs = scan1 xs handle FAIL => scan2 xs; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
73 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
74 |
fun (scan1 -- scan2) xs = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
75 |
let |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
76 |
val (x, ys) = scan1 xs; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
77 |
val (y, zs) = scan2 ys; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
78 |
in ((x, y), zs) end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
79 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
80 |
fun (scan1 |-- scan2) = scan1 -- scan2 >> #2; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
81 |
fun (scan1 --| scan2) = scan1 -- scan2 >> #1; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
82 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
83 |
fun (scan1 ^^ scan2) = scan1 -- scan2 >> op ^; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
84 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
85 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
86 |
(* generic scanners *) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
87 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
88 |
fun fail _ = raise FAIL; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
89 |
fun succeed y xs = (y, xs); |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
90 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
91 |
fun one _ [] = raise MORE |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
92 |
| one pred (x :: xs) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
93 |
if pred x then (x, xs) else raise FAIL; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
94 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
95 |
fun $$ _ [] = raise MORE |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
96 |
| $$ a (x :: xs) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
97 |
if a = x then (x, xs) else raise FAIL; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
98 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
99 |
fun any _ [] = raise MORE |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
100 |
| any pred (lst as x :: xs) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
101 |
if pred x then apfst (cons x) (any pred xs) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
102 |
else ([], lst); |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
103 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
104 |
fun any1 pred = one pred -- any pred >> op ::; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
105 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
106 |
fun optional scan def = scan || succeed def; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
107 |
fun option scan = optional (scan >> Some) None; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
108 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
109 |
fun repeat scan xs = (scan -- repeat scan >> op :: || succeed []) xs; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
110 |
fun repeat1 scan = scan -- repeat scan >> op ::; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
111 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
112 |
fun max leq scan1 scan2 xs = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
113 |
(case (option scan1 xs, option scan2 xs) of |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
114 |
((None, _), (None, _)) => raise FAIL |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
115 |
| ((Some tok1, xs'), (None, _)) => (tok1, xs') |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
116 |
| ((None, _), (Some tok2, xs')) => (tok2, xs') |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
117 |
| ((Some tok1, xs1'), (Some tok2, xs2')) => |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
118 |
if leq (tok2, tok1) then (tok1, xs1') else (tok2, xs2')); |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
119 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
120 |
fun ahead scan xs = (fst (scan xs), xs); |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
121 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
122 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
123 |
(* state based scanners *) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
124 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
125 |
fun depend scan (st, xs) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
126 |
let val ((st', y), xs') = scan st xs |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
127 |
in (y, (st', xs')) end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
128 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
129 |
fun lift scan (st, xs) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
130 |
let val (y, xs') = scan xs |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
131 |
in (y, (st, xs')) end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
132 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
133 |
fun pass st scan xs = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
134 |
let val (y, (_, xs')) = scan (st, xs) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
135 |
in (y, xs') end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
136 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
137 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
138 |
(* exception handling *) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
139 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
140 |
fun !! err scan xs = scan xs handle FAIL => raise ABORT (err xs); |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
141 |
fun force scan xs = scan xs handle MORE => raise FAIL; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
142 |
fun error scan xs = scan xs handle ABORT msg => Library.error msg; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
143 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
144 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
145 |
(* finite scans *) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
146 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
147 |
fun finite' stopper scan (state, input) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
148 |
let |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
149 |
fun lost () = raise ABORT "Scanner bug: lost stopper of finite scan!"; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
150 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
151 |
fun stop [] = lost () |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
152 |
| stop lst = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
153 |
let val (xs, x) = split_last lst |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
154 |
in if x = stopper then ((), xs) else lost () end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
155 |
in |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
156 |
if exists (equal stopper) input then |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
157 |
raise ABORT "Stopper may not occur in input of finite scan!" |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
158 |
else (force scan --| lift stop) (state, input @ [stopper]) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
159 |
end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
160 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
161 |
fun finite stopper scan xs = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
162 |
let val (y, ((), xs')) = finite' stopper (lift scan) ((), xs) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
163 |
in (y, xs') end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
164 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
165 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
166 |
(* infinite scans *) (* FIXME state based!? *) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
167 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
168 |
fun source get unget scan src = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
169 |
let |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
170 |
fun try_more x = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
171 |
scan x handle MORE => raise FAIL | ABORT _ => raise FAIL; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
172 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
173 |
fun drain (xs, s) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
174 |
(((scan -- repeat try_more) >> op ::) xs, s) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
175 |
handle MORE => |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
176 |
let val (more_xs, s') = get s in |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
177 |
if null more_xs then raise ABORT "Input source exhausted" |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
178 |
else drain (xs @ more_xs, s') |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
179 |
end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
180 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
181 |
val ((ys, xs'), src') = drain (get src); |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
182 |
in (ys, unget (xs', src')) end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
183 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
184 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
185 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
186 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
187 |
(** datatype lexicon **) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
188 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
189 |
datatype lexicon = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
190 |
Empty | |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
191 |
Branch of string * string list * lexicon * lexicon * lexicon; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
192 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
193 |
val no_literal = []; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
194 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
195 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
196 |
(* dest_lexicon *) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
197 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
198 |
fun dest_lexicon Empty = [] |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
199 |
| dest_lexicon (Branch (_, [], lt, eq, gt)) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
200 |
dest_lexicon lt @ dest_lexicon eq @ dest_lexicon gt |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
201 |
| dest_lexicon (Branch (_, cs, lt, eq, gt)) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
202 |
dest_lexicon lt @ [cs] @ dest_lexicon eq @ dest_lexicon gt; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
203 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
204 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
205 |
(* empty, extend, make, merge lexicons *) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
206 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
207 |
val empty_lexicon = Empty; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
208 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
209 |
fun extend_lexicon lexicon chrss = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
210 |
let |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
211 |
fun ext (lex, chrs) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
212 |
let |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
213 |
fun add (Branch (d, a, lt, eq, gt)) (chs as c :: cs) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
214 |
if c < d then Branch (d, a, add lt chs, eq, gt) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
215 |
else if c > d then Branch (d, a, lt, eq, add gt chs) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
216 |
else Branch (d, if null cs then chrs else a, lt, add eq cs, gt) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
217 |
| add Empty [c] = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
218 |
Branch (c, chrs, Empty, Empty, Empty) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
219 |
| add Empty (c :: cs) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
220 |
Branch (c, no_literal, Empty, add Empty cs, Empty) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
221 |
| add lex [] = lex; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
222 |
in add lex chrs end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
223 |
in foldl ext (lexicon, chrss \\ dest_lexicon lexicon) end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
224 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
225 |
val make_lexicon = extend_lexicon empty_lexicon; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
226 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
227 |
fun merge_lexicons lex1 lex2 = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
228 |
let |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
229 |
val chss1 = dest_lexicon lex1; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
230 |
val chss2 = dest_lexicon lex2; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
231 |
in |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
232 |
if chss2 subset chss1 then lex1 |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
233 |
else if chss1 subset chss2 then lex2 |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
234 |
else extend_lexicon lex1 chss2 |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
235 |
end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
236 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
237 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
238 |
(* scan literal *) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
239 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
240 |
fun literal lex chrs = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
241 |
let |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
242 |
fun lit Empty res _ = res |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
243 |
| lit (Branch _) _ [] = raise MORE |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
244 |
| lit (Branch (d, a, lt, eq, gt)) res (chs as c :: cs) = |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
245 |
if c < d then lit lt res chs |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
246 |
else if c > d then lit gt res chs |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
247 |
else lit eq (if a = no_literal then res else Some (a, cs)) cs; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
248 |
in |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
249 |
(case lit lex None chrs of |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
250 |
None => raise FAIL |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
251 |
| Some res => res) |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
252 |
end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
253 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
254 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
255 |
end; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
256 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
257 |
|
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
258 |
structure BasicScan: BASIC_SCAN = Scan; |
ffbaf431665d
Generic scanners (for potentially infinite input) -- replaces Scanner;
wenzelm
parents:
diff
changeset
|
259 |
open BasicScan; |