1 (* Title: Pure/General/symbol.ML |
1 (* Title: Pure/General/symbol.ML |
2 ID: $Id$ |
2 ID: $Id$ |
3 Author: Markus Wenzel, TU Muenchen |
3 Author: Markus Wenzel, TU Muenchen |
4 License: GPL (GNU GENERAL PUBLIC LICENSE) |
4 License: GPL (GNU GENERAL PUBLIC LICENSE) |
5 |
5 |
6 Generalized characters with infinitely many named symbols. |
6 Generalized characters with and infinite amount of named symbols. |
7 *) |
7 *) |
8 |
8 |
9 signature SYMBOL = |
9 signature SYMBOL = |
10 sig |
10 sig |
11 type symbol |
11 type symbol |
12 val space: symbol |
12 val space: symbol |
13 val spaces: int -> symbol |
13 val spaces: int -> symbol |
|
14 val is_char: symbol -> bool |
|
15 val is_symbolic: symbol -> bool |
|
16 val is_printable: symbol -> bool |
|
17 val eof: symbol |
|
18 val is_eof: symbol -> bool |
|
19 val not_eof: symbol -> bool |
|
20 val stopper: symbol * (symbol -> bool) |
14 val sync: symbol |
21 val sync: symbol |
15 val is_sync: symbol -> bool |
22 val is_sync: symbol -> bool |
16 val not_sync: symbol -> bool |
23 val not_sync: symbol -> bool |
17 val malformed: symbol |
24 val malformed: symbol |
18 val eof: symbol |
|
19 val is_eof: symbol -> bool |
|
20 val not_eof: symbol -> bool |
|
21 val stopper: symbol * (symbol -> bool) |
|
22 val is_ascii: symbol -> bool |
25 val is_ascii: symbol -> bool |
|
26 val is_ascii_letter: symbol -> bool |
|
27 val is_ascii_digit: symbol -> bool |
|
28 val is_ascii_quasi: symbol -> bool |
|
29 val is_ascii_blank: symbol -> bool |
|
30 datatype kind = Letter | Digit | Quasi | Blank | Other |
|
31 val kind: symbol -> kind |
23 val is_letter: symbol -> bool |
32 val is_letter: symbol -> bool |
24 val is_digit: symbol -> bool |
33 val is_digit: symbol -> bool |
25 val is_quasi: symbol -> bool |
34 val is_quasi: symbol -> bool |
|
35 val is_blank: symbol -> bool |
26 val is_quasi_letter: symbol -> bool |
36 val is_quasi_letter: symbol -> bool |
27 val is_letdig: symbol -> bool |
37 val is_letdig: symbol -> bool |
28 val is_blank: symbol -> bool |
|
29 val is_identifier: symbol -> bool |
|
30 val is_symbolic: symbol -> bool |
|
31 val is_printable: symbol -> bool |
|
32 val length: symbol list -> int |
|
33 val strip_blanks: string -> string |
|
34 val beginning: symbol list -> string |
38 val beginning: symbol list -> string |
|
39 val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a |
35 val scan_id: string list -> string * string list |
40 val scan_id: string list -> string * string list |
36 val scan: string list -> symbol * string list |
41 val scan: string list -> symbol * string list |
37 val scanner: string -> (symbol list -> 'a * symbol list) -> symbol list -> 'a |
|
38 val source: bool -> (string, 'a) Source.source -> |
42 val source: bool -> (string, 'a) Source.source -> |
39 (symbol, (string, 'a) Source.source) Source.source |
43 (symbol, (string, 'a) Source.source) Source.source |
40 val escape: string -> string |
|
41 val explode: string -> symbol list |
44 val explode: string -> symbol list |
|
45 val strip_blanks: string -> string |
|
46 val bump_init: string -> string |
42 val bump_string: string -> string |
47 val bump_string: string -> string |
|
48 val length: symbol list -> int |
43 val default_indent: string * int -> string |
49 val default_indent: string * int -> string |
44 val add_mode: string -> (string -> string * real) * (string * int -> string) -> unit |
50 val add_mode: string -> (string -> string * real) * (string * int -> string) -> unit |
45 val symbolsN: string |
51 val symbolsN: string |
46 val xsymbolsN: string |
52 val xsymbolsN: string |
47 val plain_output: string -> string |
53 val plain_output: string -> string |
48 val output: string -> string |
54 val output: string -> string |
49 val output_width: string -> string * real |
55 val output_width: string -> string * real |
50 val indent: string * int -> string |
56 val indent: string * int -> string |
51 val quote: string -> string |
|
52 val commas_quote: string list -> string |
|
53 end; |
57 end; |
54 |
58 |
55 structure Symbol: SYMBOL = |
59 structure Symbol: SYMBOL = |
56 struct |
60 struct |
57 |
61 |
58 |
62 (** type symbol **) |
59 (** generalized characters **) |
63 |
60 |
64 (*Symbols, which are considered the smallest entities of any Isabelle |
61 (*symbols, which are considered the smallest entities of any Isabelle |
|
62 string, may be of the following form: |
65 string, may be of the following form: |
|
66 |
63 (a) ASCII symbols: a |
67 (a) ASCII symbols: a |
64 (b) printable symbols: \<ident> |
68 (b) printable symbols: \<ident> |
65 (c) control symbols: \<^ident> |
69 (c) control symbols: \<^ident> |
66 (d) raw control symbols: \<^raw:...>, where "..." may be any printable |
70 (d) raw control symbols: \<^raw:...>, where "..." may be any printable |
67 character excluding ">" |
71 character excluding ">" |
68 |
72 |
69 output is subject to the print_mode variable (default: verbatim), |
73 Output is subject to the print_mode variable (default: verbatim), |
70 actual interpretation in display is up to front-end tools; |
74 actual interpretation in display is up to front-end tools. |
71 |
75 |
72 Symbols (b),(c) and (d) may optionally start with "\\" instead of just "\" |
76 Symbols (b),(c) and (d) may optionally start with "\\" instead of |
73 for compatibility with ML-strings of old style theory and ML-files and |
77 just "\" for compatibility with ML string literals (e.g. used in |
74 isa-ProofGeneral. The default output of these symbols will also start with "\\". |
78 old-style theory files and ML proof scripts). To be on the safe |
75 This is used by the Isar ML-command to convert Isabelle-strings to ML-strings, |
79 side, the default output of these symbols will also start with the |
76 before evaluation. |
80 double "\\". |
77 *) |
81 *) |
78 |
82 |
79 type symbol = string; |
83 type symbol = string; |
80 |
84 |
81 val space = " "; |
85 val space = " "; |
82 fun spaces k = Library.replicate_string k space; |
86 fun spaces k = Library.replicate_string k space; |
83 val sync = "\\<^sync>"; |
87 |
84 val malformed = "\\<^malformed>"; |
88 fun is_char s = size s = 1; |
|
89 |
|
90 fun is_symbolic s = |
|
91 String.isPrefix "\\<" s andalso not (String.isPrefix "\\<^" s); |
|
92 |
|
93 fun is_printable s = |
|
94 if is_char s then ord space <= ord s andalso ord s <= ord "~" |
|
95 else not (String.isPrefix "\\<^" s); |
|
96 |
|
97 |
|
98 (* input source control *) |
|
99 |
85 val eof = ""; |
100 val eof = ""; |
86 |
|
87 |
|
88 (* kinds *) |
|
89 |
|
90 fun is_sync s = s = sync; |
|
91 fun not_sync s = s <> sync; |
|
92 |
|
93 fun is_eof s = s = eof; |
101 fun is_eof s = s = eof; |
94 fun not_eof s = s <> eof; |
102 fun not_eof s = s <> eof; |
95 val stopper = (eof, is_eof); |
103 val stopper = (eof, is_eof); |
96 |
104 |
97 fun is_ascii s = size s = 1 andalso ord s < 128; |
105 val sync = "\\<^sync>"; |
|
106 fun is_sync s = s = sync; |
|
107 fun not_sync s = s <> sync; |
|
108 |
|
109 val malformed = "\\<^malformed>"; |
|
110 |
|
111 |
|
112 (* ascii symbols *) |
|
113 |
|
114 fun is_ascii s = is_char s andalso ord s < 128; |
|
115 |
|
116 fun is_ascii_letter s = |
|
117 is_char s andalso |
|
118 (ord "A" <= ord s andalso ord s <= ord "Z" orelse |
|
119 ord "a" <= ord s andalso ord s <= ord "z"); |
|
120 |
|
121 fun is_ascii_digit s = |
|
122 is_char s andalso ord "0" <= ord s andalso ord s <= ord "9"; |
|
123 |
|
124 fun is_ascii_quasi "_" = true |
|
125 | is_ascii_quasi "'" = true |
|
126 | is_ascii_quasi _ = false; |
|
127 |
|
128 val is_ascii_blank = |
|
129 fn " " => true | "\t" => true | "\r" => true | "\n" => true | "\^L" => true |
|
130 | _ => false; |
|
131 |
|
132 |
|
133 (* standard symbol kinds *) |
|
134 |
|
135 datatype kind = Letter | Digit | Quasi | Blank | Other; |
98 |
136 |
99 local |
137 local |
100 fun wrap s = "\\<" ^ s ^ ">" |
138 val symbol_kinds = Symtab.make |
101 |
139 [("\\<A>", Letter), |
102 val cal_letters = |
140 ("\\<B>", Letter), |
103 ["A","B","C","D","E","F","G","H","I","J","K","L","M", |
141 ("\\<C>", Letter), |
104 "N","O","P","Q","R","S","T","U","V","W","X","Y","Z"] |
142 ("\\<D>", Letter), |
105 |
143 ("\\<E>", Letter), |
106 val small_letters = |
144 ("\\<F>", Letter), |
107 ["a","b","c","d","e","f","g","h","i","j","k","l","m", |
145 ("\\<G>", Letter), |
108 "n","o","p","q","r","s","t","u","v","w","x","y","z"] |
146 ("\\<H>", Letter), |
109 |
147 ("\\<I>", Letter), |
110 val goth_letters = |
148 ("\\<J>", Letter), |
111 ["AA","BB","CC","DD","EE","FF","GG","HH","II","JJ","KK","LL","MM", |
149 ("\\<K>", Letter), |
112 "NN","OO","PP","QQ","RR","SS","TT","UU","VV","WW","XX","YY","ZZ", |
150 ("\\<L>", Letter), |
113 "aa","bb","cc","dd","ee","ff","gg","hh","ii","jj","kk","ll","mm", |
151 ("\\<M>", Letter), |
114 "nn","oo","pp","qq","rr","ss","tt","uu","vv","ww","xx","yy","zz"] |
152 ("\\<N>", Letter), |
115 |
153 ("\\<O>", Letter), |
116 val greek_letters = |
154 ("\\<P>", Letter), |
117 ["alpha","beta","gamma","delta","epsilon","zeta","eta","theta", |
155 ("\\<Q>", Letter), |
118 "iota","kappa",(*"lambda",*)"mu","nu","xi","pi","rho","sigma","tau", |
156 ("\\<R>", Letter), |
119 "upsilon","phi","psi","omega","Gamma","Delta","Theta","Lambda", |
157 ("\\<S>", Letter), |
120 "Xi","Pi","Sigma","Upsilon","Phi","Psi","Omega"] |
158 ("\\<T>", Letter), |
121 |
159 ("\\<U>", Letter), |
122 val bbb_letters = ["bool","complex","nat","rat","real","int"] |
160 ("\\<V>", Letter), |
123 |
161 ("\\<W>", Letter), |
124 val control_letters = ["^isub", "^isup"] |
162 ("\\<X>", Letter), |
125 |
163 ("\\<Y>", Letter), |
126 val pre_letters = |
164 ("\\<Z>", Letter), |
127 cal_letters @ |
165 ("\\<a>", Letter), |
128 small_letters @ |
166 ("\\<b>", Letter), |
129 goth_letters @ |
167 ("\\<c>", Letter), |
130 greek_letters @ |
168 ("\\<d>", Letter), |
131 control_letters |
169 ("\\<e>", Letter), |
132 |
170 ("\\<f>", Letter), |
|
171 ("\\<g>", Letter), |
|
172 ("\\<h>", Letter), |
|
173 ("\\<i>", Letter), |
|
174 ("\\<j>", Letter), |
|
175 ("\\<k>", Letter), |
|
176 ("\\<l>", Letter), |
|
177 ("\\<m>", Letter), |
|
178 ("\\<n>", Letter), |
|
179 ("\\<o>", Letter), |
|
180 ("\\<p>", Letter), |
|
181 ("\\<q>", Letter), |
|
182 ("\\<r>", Letter), |
|
183 ("\\<s>", Letter), |
|
184 ("\\<t>", Letter), |
|
185 ("\\<u>", Letter), |
|
186 ("\\<v>", Letter), |
|
187 ("\\<w>", Letter), |
|
188 ("\\<x>", Letter), |
|
189 ("\\<y>", Letter), |
|
190 ("\\<z>", Letter), |
|
191 ("\\<AA>", Letter), |
|
192 ("\\<BB>", Letter), |
|
193 ("\\<CC>", Letter), |
|
194 ("\\<DD>", Letter), |
|
195 ("\\<EE>", Letter), |
|
196 ("\\<FF>", Letter), |
|
197 ("\\<GG>", Letter), |
|
198 ("\\<HH>", Letter), |
|
199 ("\\<II>", Letter), |
|
200 ("\\<JJ>", Letter), |
|
201 ("\\<KK>", Letter), |
|
202 ("\\<LL>", Letter), |
|
203 ("\\<MM>", Letter), |
|
204 ("\\<NN>", Letter), |
|
205 ("\\<OO>", Letter), |
|
206 ("\\<PP>", Letter), |
|
207 ("\\<QQ>", Letter), |
|
208 ("\\<RR>", Letter), |
|
209 ("\\<SS>", Letter), |
|
210 ("\\<TT>", Letter), |
|
211 ("\\<UU>", Letter), |
|
212 ("\\<VV>", Letter), |
|
213 ("\\<WW>", Letter), |
|
214 ("\\<XX>", Letter), |
|
215 ("\\<YY>", Letter), |
|
216 ("\\<ZZ>", Letter), |
|
217 ("\\<aa>", Letter), |
|
218 ("\\<bb>", Letter), |
|
219 ("\\<cc>", Letter), |
|
220 ("\\<dd>", Letter), |
|
221 ("\\<ee>", Letter), |
|
222 ("\\<ff>", Letter), |
|
223 ("\\<gg>", Letter), |
|
224 ("\\<hh>", Letter), |
|
225 ("\\<ii>", Letter), |
|
226 ("\\<jj>", Letter), |
|
227 ("\\<kk>", Letter), |
|
228 ("\\<ll>", Letter), |
|
229 ("\\<mm>", Letter), |
|
230 ("\\<nn>", Letter), |
|
231 ("\\<oo>", Letter), |
|
232 ("\\<pp>", Letter), |
|
233 ("\\<qq>", Letter), |
|
234 ("\\<rr>", Letter), |
|
235 ("\\<ss>", Letter), |
|
236 ("\\<tt>", Letter), |
|
237 ("\\<uu>", Letter), |
|
238 ("\\<vv>", Letter), |
|
239 ("\\<ww>", Letter), |
|
240 ("\\<xx>", Letter), |
|
241 ("\\<yy>", Letter), |
|
242 ("\\<zz>", Letter), |
|
243 ("\\<alpha>", Letter), |
|
244 ("\\<beta>", Letter), |
|
245 ("\\<gamma>", Letter), |
|
246 ("\\<delta>", Letter), |
|
247 ("\\<epsilon>", Letter), |
|
248 ("\\<zeta>", Letter), |
|
249 ("\\<eta>", Letter), |
|
250 ("\\<theta>", Letter), |
|
251 ("\\<iota>", Letter), |
|
252 ("\\<kappa>", Letter), |
|
253 ("\\<lambda>", Other), (*sic!*) |
|
254 ("\\<mu>", Letter), |
|
255 ("\\<nu>", Letter), |
|
256 ("\\<xi>", Letter), |
|
257 ("\\<pi>", Letter), |
|
258 ("\\<rho>", Letter), |
|
259 ("\\<sigma>", Letter), |
|
260 ("\\<tau>", Letter), |
|
261 ("\\<upsilon>", Letter), |
|
262 ("\\<phi>", Letter), |
|
263 ("\\<psi>", Letter), |
|
264 ("\\<omega>", Letter), |
|
265 ("\\<Gamma>", Letter), |
|
266 ("\\<Delta>", Letter), |
|
267 ("\\<Theta>", Letter), |
|
268 ("\\<Lambda>", Letter), |
|
269 ("\\<Xi>", Letter), |
|
270 ("\\<Pi>", Letter), |
|
271 ("\\<Sigma>", Letter), |
|
272 ("\\<Upsilon>", Letter), |
|
273 ("\\<Phi>", Letter), |
|
274 ("\\<Psi>", Letter), |
|
275 ("\\<Omega>", Letter), |
|
276 ("\\<^isub>", Quasi), |
|
277 ("\\<^isup>", Quasi), |
|
278 ("\\<spacespace>", Blank)]; |
133 in |
279 in |
134 val ext_letters = map wrap pre_letters |
280 fun kind s = |
135 |
281 if is_ascii_letter s then Letter |
136 fun is_ext_letter s = String.isPrefix "\\<" s andalso s mem_string ext_letters |
282 else if is_ascii_digit s then Digit |
137 end |
283 else if is_ascii_quasi s then Quasi |
138 |
284 else if is_ascii_blank s then Blank |
139 fun is_letter s = |
285 else if is_char s then Other |
140 (size s = 1 andalso |
286 else if_none (Symtab.lookup (symbol_kinds, s)) Other; |
141 (ord "A" <= ord s andalso ord s <= ord "Z" orelse |
287 end; |
142 ord "a" <= ord s andalso ord s <= ord "z")) |
288 |
143 orelse is_ext_letter s |
289 fun is_letter s = kind s = Letter; |
144 |
290 fun is_digit s = kind s = Digit; |
145 fun is_digit s = |
291 fun is_quasi s = kind s = Quasi; |
146 size s = 1 andalso ord "0" <= ord s andalso ord s <= ord "9" |
292 fun is_blank s = kind s = Blank; |
147 |
293 |
148 fun is_quasi "_" = true |
294 fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end; |
149 | is_quasi "'" = true |
295 fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end; |
150 | is_quasi _ = false; |
296 |
151 |
297 |
152 fun is_quasi_letter s = is_quasi s orelse is_letter s; |
298 |
153 |
299 (** symbol input **) |
154 val is_blank = |
300 |
155 fn " " => true | "\t" => true | "\r" => true | "\n" => true | "\^L" => true |
301 (* scanning through symbols *) |
156 | "\160" => true | "\\<spacespace>" => true |
|
157 | _ => false; |
|
158 |
|
159 fun is_letdig s = is_quasi_letter s orelse is_digit s; |
|
160 |
|
161 fun is_symbolic s = |
|
162 size s > 2 andalso nth_elem_string (2, s) <> "^" |
|
163 andalso not (is_ext_letter s); |
|
164 |
|
165 fun is_printable s = |
|
166 size s = 1 andalso ord space <= ord s andalso ord s <= ord "~" orelse |
|
167 is_ext_letter s orelse |
|
168 is_symbolic s; |
|
169 |
|
170 fun is_ctrl_letter s = |
|
171 size s = 1 andalso ord space <= ord s andalso ord s <= ord "~" andalso s <> ">"; |
|
172 |
|
173 fun is_identifier s = |
|
174 case (explode s) of |
|
175 [] => false |
|
176 | c::cs => is_letter c andalso forall is_letdig cs; |
|
177 |
|
178 fun sym_length ss = foldl (fn (n, s) => |
|
179 (if not (is_printable s) then 0 else |
|
180 (case Library.try String.substring (s, 2, 4) of |
|
181 Some s' => if s' = "long" orelse s' = "Long" then 2 else 1 |
|
182 | None => 1)) + n) (0, ss); |
|
183 |
|
184 fun strip_blanks s = |
|
185 implode (#1 (Library.take_suffix is_blank (#2 (Library.take_prefix is_blank (explode s))))); |
|
186 |
|
187 |
|
188 (* beginning *) |
|
189 |
|
190 val smash_blanks = map (fn s => if is_blank s then space else s); |
|
191 |
302 |
192 fun beginning raw_ss = |
303 fun beginning raw_ss = |
193 let |
304 let |
194 val (all_ss, _) = take_suffix is_blank raw_ss; |
305 val (all_ss, _) = Library.take_suffix is_blank raw_ss; |
195 val dots = if length all_ss > 10 then " ..." else ""; |
306 val dots = if length all_ss > 10 then " ..." else ""; |
196 val (ss, _) = take_suffix is_blank (take (10, all_ss)); |
307 val (ss, _) = Library.take_suffix is_blank (Library.take (10, all_ss)); |
197 in implode (smash_blanks ss) ^ dots end; |
308 in implode (map (fn s => if is_blank s then space else s) ss) ^ dots end; |
198 |
|
199 |
|
200 |
|
201 (** scanning through symbols **) |
|
202 |
309 |
203 fun scanner msg scan chs = |
310 fun scanner msg scan chs = |
204 let |
311 let |
205 fun err_msg cs = msg ^ ": " ^ beginning cs; |
312 fun err_msg cs = msg ^ ": " ^ beginning cs; |
206 val fin_scan = Scan.error (Scan.finite stopper (!! (fn (cs, _) => err_msg cs) scan)); |
313 val fin_scan = Scan.error (Scan.finite stopper (!! (fn (cs, _) => err_msg cs) scan)); |