|
1 (* Title: Lexicon |
|
2 ID: $Id$ |
|
3 Author: Tobias Nipkow, TU Muenchen |
|
4 Copyright 1993 TU Muenchen |
|
5 |
|
6 The Isabelle Lexer |
|
7 |
|
8 Changes: |
|
9 TODO: |
|
10 Lexicon -> lexicon, Token -> token |
|
11 end_token -> EndToken ? |
|
12 *) |
|
13 |
|
14 signature LEXICON0 = |
|
15 sig |
|
16 val is_identifier: string -> bool |
|
17 val scan_varname: string list -> indexname * string list |
|
18 val string_of_vname: indexname -> string |
|
19 end; |
|
20 |
|
21 signature LEXICON = |
|
22 sig |
|
23 type Lexicon |
|
24 datatype Token = Token of string |
|
25 | IdentSy of string |
|
26 | VarSy of string * int |
|
27 | TFreeSy of string |
|
28 | TVarSy of string * int |
|
29 | end_token; |
|
30 val empty: Lexicon |
|
31 val extend: Lexicon * string list -> Lexicon |
|
32 val matching_tokens: Token * Token -> bool |
|
33 val mk_lexicon: string list -> Lexicon |
|
34 val name_of_token: Token -> string |
|
35 val predef_term: string -> Token |
|
36 val is_terminal: string -> bool |
|
37 val tokenize: Lexicon -> string -> Token list |
|
38 val token_to_string: Token -> string |
|
39 val valued_token: Token -> bool |
|
40 type 'b TokenMap |
|
41 val mkTokenMap: ('b * Token list) list -> 'b TokenMap |
|
42 val applyTokenMap: 'b TokenMap * Token -> 'b list |
|
43 include LEXICON0 |
|
44 end; |
|
45 |
|
46 functor LexiconFun(Extension: EXTENSION): LEXICON = |
|
47 struct |
|
48 |
|
49 open Extension; |
|
50 |
|
51 |
|
52 datatype Token = Token of string |
|
53 | IdentSy of string |
|
54 | VarSy of string * int |
|
55 | TFreeSy of string |
|
56 | TVarSy of string * int |
|
57 | end_token; |
|
58 val no_token = ""; |
|
59 |
|
60 datatype dfa = Tip | Branch of string * string * dfa * dfa * dfa; |
|
61 |
|
62 type Lexicon = dfa; |
|
63 |
|
64 fun is_id(c::cs) = is_letter(c) andalso forall is_letdig cs |
|
65 | is_id([]) = false; |
|
66 |
|
67 fun is_identifier s = is_id(explode s); |
|
68 |
|
69 val empty = Tip; |
|
70 |
|
71 fun extend1(dfa, s) = |
|
72 let fun add(Branch(c, a, less, eq, gr), cs as (d::ds)) = |
|
73 if c>d then Branch(c, a, add(less, cs), eq, gr) else |
|
74 if c<d then Branch(c, a, less, eq, add(gr, cs)) else |
|
75 Branch(c, if null ds then s else a, less, add(eq, ds), gr) |
|
76 | add(Tip, c::cs) = |
|
77 Branch(c, if null cs then s else no_token, Tip, add(Tip, cs), Tip) |
|
78 | add(dfa, []) = dfa |
|
79 |
|
80 in add(dfa, explode s) end; |
|
81 |
|
82 val extend = foldl extend1; |
|
83 |
|
84 fun mk_lexicon ss = extend(empty, ss); |
|
85 |
|
86 fun next_dfa (Tip, _) = None |
|
87 | next_dfa (Branch(d, a, less, eq, gr), c) = |
|
88 if c<d then next_dfa(less, c) else |
|
89 if c>d then next_dfa(gr, c) else Some(a, eq); |
|
90 |
|
91 exception ID of string * string list; |
|
92 |
|
93 val eof_id = "End of input - identifier expected.\n"; |
|
94 |
|
95 (*A string of letters, digits, or ' _ *) |
|
96 fun xscan_ident exn = |
|
97 let fun scan [] = raise exn(eof_id, []) |
|
98 | scan(c::cs) = |
|
99 if is_letter c |
|
100 then let val (ds, tail) = take_prefix is_letdig cs |
|
101 in (implode(c::ds), tail) end |
|
102 else raise exn("Identifier expected: ", c::cs) |
|
103 in scan end; |
|
104 |
|
105 (*Scan the offset of a Var, if present; otherwise ~1 *) |
|
106 fun scan_offset cs = case cs of |
|
107 ("."::[]) => (~1, cs) |
|
108 | ("."::(ds as c::cs')) => if is_digit c then scan_int ds else (~1, cs) |
|
109 | _ => (~1, cs); |
|
110 |
|
111 fun split_varname s = |
|
112 let val (rpost, rpref) = take_prefix is_digit (rev(explode s)) |
|
113 val (i, _) = scan_int(rev rpost) |
|
114 in (implode(rev rpref), i) end; |
|
115 |
|
116 fun xscan_varname exn cs : (string*int) * string list = |
|
117 let val (a, ds) = xscan_ident exn cs; |
|
118 val (i, es) = scan_offset ds |
|
119 in if i = ~1 then (split_varname a, es) else ((a, i), es) end; |
|
120 |
|
121 fun scan_varname s = xscan_varname ID s |
|
122 handle ID(err, cs) => error(err^(implode cs)); |
|
123 |
|
124 fun tokenize (dfa) (s:string) : Token list = |
|
125 let exception LEX_ERR; |
|
126 exception FAIL of string * string list; |
|
127 val lexerr = "Lexical error: "; |
|
128 |
|
129 fun tokenize1 (_:dfa, []:string list) : Token * string list = |
|
130 raise LEX_ERR |
|
131 | tokenize1(dfa, c::cs) = |
|
132 case next_dfa(dfa, c) of |
|
133 None => raise LEX_ERR |
|
134 | Some(a, dfa') => |
|
135 if a=no_token then tokenize1(dfa', cs) |
|
136 else (tokenize1(dfa', cs) handle LEX_ERR => |
|
137 if is_identifier a andalso not(null cs) andalso |
|
138 is_letdig(hd cs) |
|
139 then raise LEX_ERR else (Token(a), cs)); |
|
140 |
|
141 fun token(cs) = tokenize1(dfa, cs) handle LEX_ERR => raise FAIL(lexerr, cs); |
|
142 |
|
143 fun id([]) = raise FAIL(eof_id, []) |
|
144 | id(cs as c::cs') = |
|
145 if is_letter(c) |
|
146 then let val (id, cs'') = xscan_ident FAIL cs |
|
147 in (IdentSy(id), cs'') end |
|
148 else |
|
149 if c = "?" |
|
150 then case cs' of |
|
151 "'"::xs => let val ((a, i), ys) = xscan_varname FAIL xs |
|
152 in (TVarSy("'"^a, i), ys) end |
|
153 | _ => let val ((a, i), ys) = xscan_varname FAIL cs' |
|
154 in (VarSy(a, i), ys) end |
|
155 else |
|
156 if c = "'" |
|
157 then let val (a, cs'') = xscan_ident FAIL cs' |
|
158 in (TFreeSy("'" ^ a), cs'') end |
|
159 else raise FAIL(lexerr, cs); |
|
160 |
|
161 fun tknize([], ts) = rev(ts) |
|
162 | tknize(cs as c::cs', ts) = |
|
163 if is_blank(c) then tknize(cs', ts) else |
|
164 let val (t, cs'') = |
|
165 if c="?" then id(cs) handle FAIL _ => token(cs) |
|
166 else (token(cs) handle FAIL _ => id(cs)) |
|
167 in tknize(cs'', t::ts) end |
|
168 |
|
169 in tknize(explode s, []) handle FAIL(s, cs) => error(s^(implode cs)) end; |
|
170 |
|
171 (*Variables have the form ?nameidx, or ?name.idx if "name" ends with a digit*) |
|
172 fun string_of_vname (a, idx) = case rev(explode a) of |
|
173 [] => "?NULL_VARIABLE_NAME" |
|
174 | c::_ => "?" ^ |
|
175 (if is_digit c then a ^ "." ^ string_of_int idx |
|
176 else if idx = 0 then a |
|
177 else a ^ string_of_int idx); |
|
178 |
|
179 fun token_to_string (Token(s)) = s |
|
180 | token_to_string (IdentSy(s)) = s |
|
181 | token_to_string (VarSy v) = string_of_vname v |
|
182 | token_to_string (TFreeSy a) = a |
|
183 | token_to_string (TVarSy v) = string_of_vname v |
|
184 | token_to_string end_token = "\n"; |
|
185 |
|
186 (* MMW *) |
|
187 fun name_of_token (Token s) = quote s |
|
188 | name_of_token (IdentSy _) = id |
|
189 | name_of_token (VarSy _) = var |
|
190 | name_of_token (TFreeSy _) = tfree |
|
191 | name_of_token (TVarSy _) = tvar; |
|
192 |
|
193 fun matching_tokens(Token(i), Token(j)) = (i=j) | |
|
194 matching_tokens(IdentSy(_), IdentSy(_)) = true | |
|
195 matching_tokens(VarSy _, VarSy _) = true | |
|
196 matching_tokens(TFreeSy _, TFreeSy _) = true | |
|
197 matching_tokens(TVarSy _, TVarSy _) = true | |
|
198 matching_tokens(end_token, end_token) = true | |
|
199 matching_tokens(_, _) = false; |
|
200 |
|
201 fun valued_token(end_token) = false |
|
202 | valued_token(Token _) = false |
|
203 | valued_token(IdentSy _) = true |
|
204 | valued_token(VarSy _) = true |
|
205 | valued_token(TFreeSy _) = true |
|
206 | valued_token(TVarSy _) = true; |
|
207 |
|
208 (* MMW *) |
|
209 fun predef_term name = |
|
210 if name = id then IdentSy name |
|
211 else if name = var then VarSy (name, 0) |
|
212 else if name = tfree then TFreeSy name |
|
213 else if name = tvar then TVarSy (name, 0) |
|
214 else end_token; |
|
215 |
|
216 (* MMW *) |
|
217 fun is_terminal s = s mem [id, var, tfree, tvar]; |
|
218 |
|
219 |
|
220 |
|
221 type 'b TokenMap = (Token * 'b list) list * 'b list; |
|
222 val first_token = 0; |
|
223 |
|
224 fun int_of_token(Token(tk)) = first_token | |
|
225 int_of_token(IdentSy _) = first_token - 1 | |
|
226 int_of_token(VarSy _) = first_token - 2 | |
|
227 int_of_token(TFreeSy _) = first_token - 3 | |
|
228 int_of_token(TVarSy _) = first_token - 4 | |
|
229 int_of_token(end_token) = first_token - 5; |
|
230 |
|
231 fun lesstk(s, t) = int_of_token(s) < int_of_token(t) orelse |
|
232 (case (s, t) of (Token(a), Token(b)) => a<b | _ => false); |
|
233 |
|
234 fun mkTokenMap(atll) = |
|
235 let val aill = atll; |
|
236 val dom = sort lesstk (distinct(flat(map snd aill))); |
|
237 val mt = map fst (filter (null o snd) atll); |
|
238 fun mktm(i) = |
|
239 let fun add(l, (a, il)) = if i mem il then a::l else l |
|
240 in (i, foldl add ([], aill)) end; |
|
241 in (map mktm dom, mt) end; |
|
242 |
|
243 fun find_al (i) = |
|
244 let fun find((j, al)::l) = if lesstk(i, j) then [] else |
|
245 if matching_tokens(i, j) then al else find l | |
|
246 find [] = []; |
|
247 in find end; |
|
248 fun applyTokenMap((l, mt), tk:Token) = mt@(find_al tk l); |
|
249 |
|
250 |
|
251 end; |
|
252 |