0
|
1 |
(* Title: Lexicon
|
|
2 |
ID: $Id$
|
|
3 |
Author: Tobias Nipkow, TU Muenchen
|
|
4 |
Copyright 1993 TU Muenchen
|
|
5 |
|
|
6 |
The Isabelle Lexer
|
|
7 |
|
|
8 |
Changes:
|
|
9 |
TODO:
|
|
10 |
Lexicon -> lexicon, Token -> token
|
|
11 |
end_token -> EndToken ?
|
|
12 |
*)
|
|
13 |
|
|
14 |
signature LEXICON0 =
|
|
15 |
sig
|
|
16 |
val is_identifier: string -> bool
|
|
17 |
val scan_varname: string list -> indexname * string list
|
|
18 |
val string_of_vname: indexname -> string
|
|
19 |
end;
|
|
20 |
|
|
21 |
signature LEXICON =
|
|
22 |
sig
|
|
23 |
type Lexicon
|
|
24 |
datatype Token = Token of string
|
|
25 |
| IdentSy of string
|
|
26 |
| VarSy of string * int
|
|
27 |
| TFreeSy of string
|
|
28 |
| TVarSy of string * int
|
|
29 |
| end_token;
|
|
30 |
val empty: Lexicon
|
|
31 |
val extend: Lexicon * string list -> Lexicon
|
|
32 |
val matching_tokens: Token * Token -> bool
|
|
33 |
val mk_lexicon: string list -> Lexicon
|
|
34 |
val name_of_token: Token -> string
|
|
35 |
val predef_term: string -> Token
|
|
36 |
val is_terminal: string -> bool
|
|
37 |
val tokenize: Lexicon -> string -> Token list
|
|
38 |
val token_to_string: Token -> string
|
|
39 |
val valued_token: Token -> bool
|
|
40 |
type 'b TokenMap
|
|
41 |
val mkTokenMap: ('b * Token list) list -> 'b TokenMap
|
|
42 |
val applyTokenMap: 'b TokenMap * Token -> 'b list
|
|
43 |
include LEXICON0
|
|
44 |
end;
|
|
45 |
|
|
46 |
functor LexiconFun(Extension: EXTENSION): LEXICON =
|
|
47 |
struct
|
|
48 |
|
|
49 |
open Extension;
|
|
50 |
|
|
51 |
|
|
52 |
datatype Token = Token of string
|
|
53 |
| IdentSy of string
|
|
54 |
| VarSy of string * int
|
|
55 |
| TFreeSy of string
|
|
56 |
| TVarSy of string * int
|
|
57 |
| end_token;
|
|
58 |
val no_token = "";
|
|
59 |
|
|
60 |
datatype dfa = Tip | Branch of string * string * dfa * dfa * dfa;
|
|
61 |
|
|
62 |
type Lexicon = dfa;
|
|
63 |
|
|
64 |
fun is_id(c::cs) = is_letter(c) andalso forall is_letdig cs
|
|
65 |
| is_id([]) = false;
|
|
66 |
|
|
67 |
fun is_identifier s = is_id(explode s);
|
|
68 |
|
|
69 |
val empty = Tip;
|
|
70 |
|
|
71 |
fun extend1(dfa, s) =
|
|
72 |
let fun add(Branch(c, a, less, eq, gr), cs as (d::ds)) =
|
|
73 |
if c>d then Branch(c, a, add(less, cs), eq, gr) else
|
|
74 |
if c<d then Branch(c, a, less, eq, add(gr, cs)) else
|
|
75 |
Branch(c, if null ds then s else a, less, add(eq, ds), gr)
|
|
76 |
| add(Tip, c::cs) =
|
|
77 |
Branch(c, if null cs then s else no_token, Tip, add(Tip, cs), Tip)
|
|
78 |
| add(dfa, []) = dfa
|
|
79 |
|
|
80 |
in add(dfa, explode s) end;
|
|
81 |
|
|
82 |
val extend = foldl extend1;
|
|
83 |
|
|
84 |
fun mk_lexicon ss = extend(empty, ss);
|
|
85 |
|
|
86 |
fun next_dfa (Tip, _) = None
|
|
87 |
| next_dfa (Branch(d, a, less, eq, gr), c) =
|
|
88 |
if c<d then next_dfa(less, c) else
|
|
89 |
if c>d then next_dfa(gr, c) else Some(a, eq);
|
|
90 |
|
|
91 |
exception ID of string * string list;
|
|
92 |
|
|
93 |
val eof_id = "End of input - identifier expected.\n";
|
|
94 |
|
|
95 |
(*A string of letters, digits, or ' _ *)
|
|
96 |
fun xscan_ident exn =
|
|
97 |
let fun scan [] = raise exn(eof_id, [])
|
|
98 |
| scan(c::cs) =
|
|
99 |
if is_letter c
|
|
100 |
then let val (ds, tail) = take_prefix is_letdig cs
|
|
101 |
in (implode(c::ds), tail) end
|
|
102 |
else raise exn("Identifier expected: ", c::cs)
|
|
103 |
in scan end;
|
|
104 |
|
|
105 |
(*Scan the offset of a Var, if present; otherwise ~1 *)
|
|
106 |
fun scan_offset cs = case cs of
|
|
107 |
("."::[]) => (~1, cs)
|
|
108 |
| ("."::(ds as c::cs')) => if is_digit c then scan_int ds else (~1, cs)
|
|
109 |
| _ => (~1, cs);
|
|
110 |
|
|
111 |
fun split_varname s =
|
|
112 |
let val (rpost, rpref) = take_prefix is_digit (rev(explode s))
|
|
113 |
val (i, _) = scan_int(rev rpost)
|
|
114 |
in (implode(rev rpref), i) end;
|
|
115 |
|
|
116 |
fun xscan_varname exn cs : (string*int) * string list =
|
|
117 |
let val (a, ds) = xscan_ident exn cs;
|
|
118 |
val (i, es) = scan_offset ds
|
|
119 |
in if i = ~1 then (split_varname a, es) else ((a, i), es) end;
|
|
120 |
|
|
121 |
fun scan_varname s = xscan_varname ID s
|
|
122 |
handle ID(err, cs) => error(err^(implode cs));
|
|
123 |
|
|
124 |
fun tokenize (dfa) (s:string) : Token list =
|
|
125 |
let exception LEX_ERR;
|
|
126 |
exception FAIL of string * string list;
|
|
127 |
val lexerr = "Lexical error: ";
|
|
128 |
|
|
129 |
fun tokenize1 (_:dfa, []:string list) : Token * string list =
|
|
130 |
raise LEX_ERR
|
|
131 |
| tokenize1(dfa, c::cs) =
|
|
132 |
case next_dfa(dfa, c) of
|
|
133 |
None => raise LEX_ERR
|
|
134 |
| Some(a, dfa') =>
|
|
135 |
if a=no_token then tokenize1(dfa', cs)
|
|
136 |
else (tokenize1(dfa', cs) handle LEX_ERR =>
|
|
137 |
if is_identifier a andalso not(null cs) andalso
|
|
138 |
is_letdig(hd cs)
|
|
139 |
then raise LEX_ERR else (Token(a), cs));
|
|
140 |
|
|
141 |
fun token(cs) = tokenize1(dfa, cs) handle LEX_ERR => raise FAIL(lexerr, cs);
|
|
142 |
|
|
143 |
fun id([]) = raise FAIL(eof_id, [])
|
|
144 |
| id(cs as c::cs') =
|
|
145 |
if is_letter(c)
|
|
146 |
then let val (id, cs'') = xscan_ident FAIL cs
|
|
147 |
in (IdentSy(id), cs'') end
|
|
148 |
else
|
|
149 |
if c = "?"
|
|
150 |
then case cs' of
|
|
151 |
"'"::xs => let val ((a, i), ys) = xscan_varname FAIL xs
|
|
152 |
in (TVarSy("'"^a, i), ys) end
|
|
153 |
| _ => let val ((a, i), ys) = xscan_varname FAIL cs'
|
|
154 |
in (VarSy(a, i), ys) end
|
|
155 |
else
|
|
156 |
if c = "'"
|
|
157 |
then let val (a, cs'') = xscan_ident FAIL cs'
|
|
158 |
in (TFreeSy("'" ^ a), cs'') end
|
|
159 |
else raise FAIL(lexerr, cs);
|
|
160 |
|
|
161 |
fun tknize([], ts) = rev(ts)
|
|
162 |
| tknize(cs as c::cs', ts) =
|
|
163 |
if is_blank(c) then tknize(cs', ts) else
|
|
164 |
let val (t, cs'') =
|
|
165 |
if c="?" then id(cs) handle FAIL _ => token(cs)
|
|
166 |
else (token(cs) handle FAIL _ => id(cs))
|
|
167 |
in tknize(cs'', t::ts) end
|
|
168 |
|
|
169 |
in tknize(explode s, []) handle FAIL(s, cs) => error(s^(implode cs)) end;
|
|
170 |
|
|
171 |
(*Variables have the form ?nameidx, or ?name.idx if "name" ends with a digit*)
|
|
172 |
fun string_of_vname (a, idx) = case rev(explode a) of
|
|
173 |
[] => "?NULL_VARIABLE_NAME"
|
|
174 |
| c::_ => "?" ^
|
|
175 |
(if is_digit c then a ^ "." ^ string_of_int idx
|
|
176 |
else if idx = 0 then a
|
|
177 |
else a ^ string_of_int idx);
|
|
178 |
|
|
179 |
fun token_to_string (Token(s)) = s
|
|
180 |
| token_to_string (IdentSy(s)) = s
|
|
181 |
| token_to_string (VarSy v) = string_of_vname v
|
|
182 |
| token_to_string (TFreeSy a) = a
|
|
183 |
| token_to_string (TVarSy v) = string_of_vname v
|
|
184 |
| token_to_string end_token = "\n";
|
|
185 |
|
|
186 |
(* MMW *)
|
|
187 |
fun name_of_token (Token s) = quote s
|
|
188 |
| name_of_token (IdentSy _) = id
|
|
189 |
| name_of_token (VarSy _) = var
|
|
190 |
| name_of_token (TFreeSy _) = tfree
|
|
191 |
| name_of_token (TVarSy _) = tvar;
|
|
192 |
|
|
193 |
fun matching_tokens(Token(i), Token(j)) = (i=j) |
|
|
194 |
matching_tokens(IdentSy(_), IdentSy(_)) = true |
|
|
195 |
matching_tokens(VarSy _, VarSy _) = true |
|
|
196 |
matching_tokens(TFreeSy _, TFreeSy _) = true |
|
|
197 |
matching_tokens(TVarSy _, TVarSy _) = true |
|
|
198 |
matching_tokens(end_token, end_token) = true |
|
|
199 |
matching_tokens(_, _) = false;
|
|
200 |
|
|
201 |
fun valued_token(end_token) = false
|
|
202 |
| valued_token(Token _) = false
|
|
203 |
| valued_token(IdentSy _) = true
|
|
204 |
| valued_token(VarSy _) = true
|
|
205 |
| valued_token(TFreeSy _) = true
|
|
206 |
| valued_token(TVarSy _) = true;
|
|
207 |
|
|
208 |
(* MMW *)
|
|
209 |
fun predef_term name =
|
|
210 |
if name = id then IdentSy name
|
|
211 |
else if name = var then VarSy (name, 0)
|
|
212 |
else if name = tfree then TFreeSy name
|
|
213 |
else if name = tvar then TVarSy (name, 0)
|
|
214 |
else end_token;
|
|
215 |
|
|
216 |
(* MMW *)
|
|
217 |
fun is_terminal s = s mem [id, var, tfree, tvar];
|
|
218 |
|
|
219 |
|
|
220 |
|
|
221 |
type 'b TokenMap = (Token * 'b list) list * 'b list;
|
|
222 |
val first_token = 0;
|
|
223 |
|
|
224 |
fun int_of_token(Token(tk)) = first_token |
|
|
225 |
int_of_token(IdentSy _) = first_token - 1 |
|
|
226 |
int_of_token(VarSy _) = first_token - 2 |
|
|
227 |
int_of_token(TFreeSy _) = first_token - 3 |
|
|
228 |
int_of_token(TVarSy _) = first_token - 4 |
|
|
229 |
int_of_token(end_token) = first_token - 5;
|
|
230 |
|
|
231 |
fun lesstk(s, t) = int_of_token(s) < int_of_token(t) orelse
|
|
232 |
(case (s, t) of (Token(a), Token(b)) => a<b | _ => false);
|
|
233 |
|
|
234 |
fun mkTokenMap(atll) =
|
|
235 |
let val aill = atll;
|
|
236 |
val dom = sort lesstk (distinct(flat(map snd aill)));
|
|
237 |
val mt = map fst (filter (null o snd) atll);
|
|
238 |
fun mktm(i) =
|
|
239 |
let fun add(l, (a, il)) = if i mem il then a::l else l
|
|
240 |
in (i, foldl add ([], aill)) end;
|
|
241 |
in (map mktm dom, mt) end;
|
|
242 |
|
|
243 |
fun find_al (i) =
|
|
244 |
let fun find((j, al)::l) = if lesstk(i, j) then [] else
|
|
245 |
if matching_tokens(i, j) then al else find l |
|
|
246 |
find [] = [];
|
|
247 |
in find end;
|
|
248 |
fun applyTokenMap((l, mt), tk:Token) = mt@(find_al tk l);
|
|
249 |
|
|
250 |
|
|
251 |
end;
|
|
252 |
|