author | obua |
Sun, 09 May 2004 23:04:36 +0200 | |
changeset 14722 | 8e739a6eaf11 |
parent 14678 | 662b181cae05 |
child 14728 | df34201f1a15 |
permissions | -rw-r--r-- |
6118 | 1 |
(* Title: Pure/General/symbol.ML |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
2 |
ID: $Id$ |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
3 |
Author: Markus Wenzel, TU Muenchen |
8806 | 4 |
License: GPL (GNU GENERAL PUBLIC LICENSE) |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
5 |
|
14678 | 6 |
Generalized characters with and infinite amount of named symbols. |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
7 |
*) |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
8 |
|
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
9 |
signature SYMBOL = |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
10 |
sig |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
11 |
type symbol |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
12 |
val space: symbol |
10953 | 13 |
val spaces: int -> symbol |
14678 | 14 |
val is_char: symbol -> bool |
15 |
val is_symbolic: symbol -> bool |
|
16 |
val is_printable: symbol -> bool |
|
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
17 |
val eof: symbol |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
18 |
val is_eof: symbol -> bool |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
19 |
val not_eof: symbol -> bool |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
20 |
val stopper: symbol * (symbol -> bool) |
14678 | 21 |
val sync: symbol |
22 |
val is_sync: symbol -> bool |
|
23 |
val not_sync: symbol -> bool |
|
24 |
val malformed: symbol |
|
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
25 |
val is_ascii: symbol -> bool |
14678 | 26 |
val is_ascii_letter: symbol -> bool |
27 |
val is_ascii_digit: symbol -> bool |
|
28 |
val is_ascii_quasi: symbol -> bool |
|
29 |
val is_ascii_blank: symbol -> bool |
|
30 |
datatype kind = Letter | Digit | Quasi | Blank | Other |
|
31 |
val kind: symbol -> kind |
|
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
32 |
val is_letter: symbol -> bool |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
33 |
val is_digit: symbol -> bool |
12904 | 34 |
val is_quasi: symbol -> bool |
14678 | 35 |
val is_blank: symbol -> bool |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
36 |
val is_quasi_letter: symbol -> bool |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
37 |
val is_letdig: symbol -> bool |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
38 |
val beginning: symbol list -> string |
14678 | 39 |
val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a |
13730 | 40 |
val scan_id: string list -> string * string list |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
41 |
val scan: string list -> symbol * string list |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
42 |
val source: bool -> (string, 'a) Source.source -> |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
43 |
(symbol, (string, 'a) Source.source) Source.source |
6272 | 44 |
val explode: string -> symbol list |
14678 | 45 |
val strip_blanks: string -> string |
46 |
val bump_init: string -> string |
|
12904 | 47 |
val bump_string: string -> string |
14678 | 48 |
val length: symbol list -> int |
10953 | 49 |
val default_indent: string * int -> string |
50 |
val add_mode: string -> (string -> string * real) * (string * int -> string) -> unit |
|
6692 | 51 |
val symbolsN: string |
52 |
val xsymbolsN: string |
|
10923 | 53 |
val plain_output: string -> string |
6272 | 54 |
val output: string -> string |
55 |
val output_width: string -> string * real |
|
10953 | 56 |
val indent: string * int -> string |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
57 |
end; |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
58 |
|
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
59 |
structure Symbol: SYMBOL = |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
60 |
struct |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
61 |
|
14678 | 62 |
(** type symbol **) |
6272 | 63 |
|
14678 | 64 |
(*Symbols, which are considered the smallest entities of any Isabelle |
6272 | 65 |
string, may be of the following form: |
14678 | 66 |
|
6272 | 67 |
(a) ASCII symbols: a |
68 |
(b) printable symbols: \<ident> |
|
14557
31ae4a47267c
* cleaner distinction between control symbols "\<^...>" and "\<^raw...>" in
schirmer
parents:
14361
diff
changeset
|
69 |
(c) control symbols: \<^ident> |
14561
c53396af770e
* raw control symbols are of the form \<^raw:...> now.
schirmer
parents:
14559
diff
changeset
|
70 |
(d) raw control symbols: \<^raw:...>, where "..." may be any printable |
14557
31ae4a47267c
* cleaner distinction between control symbols "\<^...>" and "\<^raw...>" in
schirmer
parents:
14361
diff
changeset
|
71 |
character excluding ">" |
6272 | 72 |
|
14678 | 73 |
Output is subject to the print_mode variable (default: verbatim), |
74 |
actual interpretation in display is up to front-end tools. |
|
14561
c53396af770e
* raw control symbols are of the form \<^raw:...> now.
schirmer
parents:
14559
diff
changeset
|
75 |
|
14678 | 76 |
Symbols (b),(c) and (d) may optionally start with "\\" instead of |
77 |
just "\" for compatibility with ML string literals (e.g. used in |
|
78 |
old-style theory files and ML proof scripts). To be on the safe |
|
79 |
side, the default output of these symbols will also start with the |
|
80 |
double "\\". |
|
6272 | 81 |
*) |
82 |
||
83 |
type symbol = string; |
|
84 |
||
85 |
val space = " "; |
|
10953 | 86 |
fun spaces k = Library.replicate_string k space; |
14678 | 87 |
|
88 |
fun is_char s = size s = 1; |
|
89 |
||
90 |
fun is_symbolic s = |
|
91 |
String.isPrefix "\\<" s andalso not (String.isPrefix "\\<^" s); |
|
92 |
||
93 |
fun is_printable s = |
|
94 |
if is_char s then ord space <= ord s andalso ord s <= ord "~" |
|
95 |
else not (String.isPrefix "\\<^" s); |
|
6272 | 96 |
|
97 |
||
14678 | 98 |
(* input source control *) |
6272 | 99 |
|
14678 | 100 |
val eof = ""; |
6272 | 101 |
fun is_eof s = s = eof; |
102 |
fun not_eof s = s <> eof; |
|
103 |
val stopper = (eof, is_eof); |
|
104 |
||
14678 | 105 |
val sync = "\\<^sync>"; |
106 |
fun is_sync s = s = sync; |
|
107 |
fun not_sync s = s <> sync; |
|
108 |
||
109 |
val malformed = "\\<^malformed>"; |
|
110 |
||
111 |
||
112 |
(* ascii symbols *) |
|
113 |
||
114 |
fun is_ascii s = is_char s andalso ord s < 128; |
|
115 |
||
116 |
fun is_ascii_letter s = |
|
117 |
is_char s andalso |
|
118 |
(ord "A" <= ord s andalso ord s <= ord "Z" orelse |
|
119 |
ord "a" <= ord s andalso ord s <= ord "z"); |
|
120 |
||
121 |
fun is_ascii_digit s = |
|
122 |
is_char s andalso ord "0" <= ord s andalso ord s <= ord "9"; |
|
123 |
||
124 |
fun is_ascii_quasi "_" = true |
|
125 |
| is_ascii_quasi "'" = true |
|
126 |
| is_ascii_quasi _ = false; |
|
127 |
||
128 |
val is_ascii_blank = |
|
129 |
fn " " => true | "\t" => true | "\r" => true | "\n" => true | "\^L" => true |
|
130 |
| _ => false; |
|
131 |
||
132 |
||
133 |
(* standard symbol kinds *) |
|
134 |
||
135 |
datatype kind = Letter | Digit | Quasi | Blank | Other; |
|
6272 | 136 |
|
14171
0cab06e3bbd0
Extended the notion of letter and digit, such that now one may use greek,
skalberg
parents:
13730
diff
changeset
|
137 |
local |
14678 | 138 |
val symbol_kinds = Symtab.make |
139 |
[("\\<A>", Letter), |
|
140 |
("\\<B>", Letter), |
|
141 |
("\\<C>", Letter), |
|
142 |
("\\<D>", Letter), |
|
143 |
("\\<E>", Letter), |
|
144 |
("\\<F>", Letter), |
|
145 |
("\\<G>", Letter), |
|
146 |
("\\<H>", Letter), |
|
147 |
("\\<I>", Letter), |
|
148 |
("\\<J>", Letter), |
|
149 |
("\\<K>", Letter), |
|
150 |
("\\<L>", Letter), |
|
151 |
("\\<M>", Letter), |
|
152 |
("\\<N>", Letter), |
|
153 |
("\\<O>", Letter), |
|
154 |
("\\<P>", Letter), |
|
155 |
("\\<Q>", Letter), |
|
156 |
("\\<R>", Letter), |
|
157 |
("\\<S>", Letter), |
|
158 |
("\\<T>", Letter), |
|
159 |
("\\<U>", Letter), |
|
160 |
("\\<V>", Letter), |
|
161 |
("\\<W>", Letter), |
|
162 |
("\\<X>", Letter), |
|
163 |
("\\<Y>", Letter), |
|
164 |
("\\<Z>", Letter), |
|
165 |
("\\<a>", Letter), |
|
166 |
("\\<b>", Letter), |
|
167 |
("\\<c>", Letter), |
|
168 |
("\\<d>", Letter), |
|
169 |
("\\<e>", Letter), |
|
170 |
("\\<f>", Letter), |
|
171 |
("\\<g>", Letter), |
|
172 |
("\\<h>", Letter), |
|
173 |
("\\<i>", Letter), |
|
174 |
("\\<j>", Letter), |
|
175 |
("\\<k>", Letter), |
|
176 |
("\\<l>", Letter), |
|
177 |
("\\<m>", Letter), |
|
178 |
("\\<n>", Letter), |
|
179 |
("\\<o>", Letter), |
|
180 |
("\\<p>", Letter), |
|
181 |
("\\<q>", Letter), |
|
182 |
("\\<r>", Letter), |
|
183 |
("\\<s>", Letter), |
|
184 |
("\\<t>", Letter), |
|
185 |
("\\<u>", Letter), |
|
186 |
("\\<v>", Letter), |
|
187 |
("\\<w>", Letter), |
|
188 |
("\\<x>", Letter), |
|
189 |
("\\<y>", Letter), |
|
190 |
("\\<z>", Letter), |
|
191 |
("\\<AA>", Letter), |
|
192 |
("\\<BB>", Letter), |
|
193 |
("\\<CC>", Letter), |
|
194 |
("\\<DD>", Letter), |
|
195 |
("\\<EE>", Letter), |
|
196 |
("\\<FF>", Letter), |
|
197 |
("\\<GG>", Letter), |
|
198 |
("\\<HH>", Letter), |
|
199 |
("\\<II>", Letter), |
|
200 |
("\\<JJ>", Letter), |
|
201 |
("\\<KK>", Letter), |
|
202 |
("\\<LL>", Letter), |
|
203 |
("\\<MM>", Letter), |
|
204 |
("\\<NN>", Letter), |
|
205 |
("\\<OO>", Letter), |
|
206 |
("\\<PP>", Letter), |
|
207 |
("\\<QQ>", Letter), |
|
208 |
("\\<RR>", Letter), |
|
209 |
("\\<SS>", Letter), |
|
210 |
("\\<TT>", Letter), |
|
211 |
("\\<UU>", Letter), |
|
212 |
("\\<VV>", Letter), |
|
213 |
("\\<WW>", Letter), |
|
214 |
("\\<XX>", Letter), |
|
215 |
("\\<YY>", Letter), |
|
216 |
("\\<ZZ>", Letter), |
|
217 |
("\\<aa>", Letter), |
|
218 |
("\\<bb>", Letter), |
|
219 |
("\\<cc>", Letter), |
|
220 |
("\\<dd>", Letter), |
|
221 |
("\\<ee>", Letter), |
|
222 |
("\\<ff>", Letter), |
|
223 |
("\\<gg>", Letter), |
|
224 |
("\\<hh>", Letter), |
|
225 |
("\\<ii>", Letter), |
|
226 |
("\\<jj>", Letter), |
|
227 |
("\\<kk>", Letter), |
|
228 |
("\\<ll>", Letter), |
|
229 |
("\\<mm>", Letter), |
|
230 |
("\\<nn>", Letter), |
|
231 |
("\\<oo>", Letter), |
|
232 |
("\\<pp>", Letter), |
|
233 |
("\\<qq>", Letter), |
|
234 |
("\\<rr>", Letter), |
|
235 |
("\\<ss>", Letter), |
|
236 |
("\\<tt>", Letter), |
|
237 |
("\\<uu>", Letter), |
|
238 |
("\\<vv>", Letter), |
|
239 |
("\\<ww>", Letter), |
|
240 |
("\\<xx>", Letter), |
|
241 |
("\\<yy>", Letter), |
|
242 |
("\\<zz>", Letter), |
|
243 |
("\\<alpha>", Letter), |
|
244 |
("\\<beta>", Letter), |
|
245 |
("\\<gamma>", Letter), |
|
246 |
("\\<delta>", Letter), |
|
247 |
("\\<epsilon>", Letter), |
|
248 |
("\\<zeta>", Letter), |
|
249 |
("\\<eta>", Letter), |
|
250 |
("\\<theta>", Letter), |
|
251 |
("\\<iota>", Letter), |
|
252 |
("\\<kappa>", Letter), |
|
253 |
("\\<lambda>", Other), (*sic!*) |
|
254 |
("\\<mu>", Letter), |
|
255 |
("\\<nu>", Letter), |
|
256 |
("\\<xi>", Letter), |
|
257 |
("\\<pi>", Letter), |
|
258 |
("\\<rho>", Letter), |
|
259 |
("\\<sigma>", Letter), |
|
260 |
("\\<tau>", Letter), |
|
261 |
("\\<upsilon>", Letter), |
|
262 |
("\\<phi>", Letter), |
|
263 |
("\\<psi>", Letter), |
|
264 |
("\\<omega>", Letter), |
|
265 |
("\\<Gamma>", Letter), |
|
266 |
("\\<Delta>", Letter), |
|
267 |
("\\<Theta>", Letter), |
|
268 |
("\\<Lambda>", Letter), |
|
269 |
("\\<Xi>", Letter), |
|
270 |
("\\<Pi>", Letter), |
|
271 |
("\\<Sigma>", Letter), |
|
272 |
("\\<Upsilon>", Letter), |
|
273 |
("\\<Phi>", Letter), |
|
274 |
("\\<Psi>", Letter), |
|
275 |
("\\<Omega>", Letter), |
|
276 |
("\\<^isub>", Quasi), |
|
277 |
("\\<^isup>", Quasi), |
|
278 |
("\\<spacespace>", Blank)]; |
|
14171
0cab06e3bbd0
Extended the notion of letter and digit, such that now one may use greek,
skalberg
parents:
13730
diff
changeset
|
279 |
in |
14678 | 280 |
fun kind s = |
281 |
if is_ascii_letter s then Letter |
|
282 |
else if is_ascii_digit s then Digit |
|
283 |
else if is_ascii_quasi s then Quasi |
|
284 |
else if is_ascii_blank s then Blank |
|
285 |
else if is_char s then Other |
|
286 |
else if_none (Symtab.lookup (symbol_kinds, s)) Other; |
|
287 |
end; |
|
14173 | 288 |
|
14678 | 289 |
fun is_letter s = kind s = Letter; |
290 |
fun is_digit s = kind s = Digit; |
|
291 |
fun is_quasi s = kind s = Quasi; |
|
292 |
fun is_blank s = kind s = Blank; |
|
6272 | 293 |
|
14678 | 294 |
fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end; |
295 |
fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end; |
|
11010 | 296 |
|
6272 | 297 |
|
298 |
||
14678 | 299 |
(** symbol input **) |
300 |
||
301 |
(* scanning through symbols *) |
|
6272 | 302 |
|
303 |
fun beginning raw_ss = |
|
304 |
let |
|
14678 | 305 |
val (all_ss, _) = Library.take_suffix is_blank raw_ss; |
6272 | 306 |
val dots = if length all_ss > 10 then " ..." else ""; |
14678 | 307 |
val (ss, _) = Library.take_suffix is_blank (Library.take (10, all_ss)); |
308 |
in implode (map (fn s => if is_blank s then space else s) ss) ^ dots end; |
|
6640 | 309 |
|
310 |
fun scanner msg scan chs = |
|
311 |
let |
|
312 |
fun err_msg cs = msg ^ ": " ^ beginning cs; |
|
313 |
val fin_scan = Scan.error (Scan.finite stopper (!! (fn (cs, _) => err_msg cs) scan)); |
|
314 |
in |
|
315 |
(case fin_scan chs of |
|
316 |
(result, []) => result |
|
317 |
| (_, rest) => error (err_msg rest)) |
|
318 |
end; |
|
319 |
||
320 |
||
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
321 |
(* scan *) |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
322 |
|
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
323 |
val scan_id = Scan.one is_letter ^^ (Scan.any is_letdig >> implode); |
14678 | 324 |
|
325 |
local |
|
14561
c53396af770e
* raw control symbols are of the form \<^raw:...> now.
schirmer
parents:
14559
diff
changeset
|
326 |
|
14678 | 327 |
val scan_encoded_newline = |
328 |
$$ "\r" -- $$ "\n" >> K "\n" || |
|
329 |
$$ "\r" >> K "\n" || |
|
330 |
Scan.optional ($$ "\\") "" -- $$ "\\" -- $$ "<" -- $$ "^" -- $$ "n" |
|
331 |
-- $$ "e" -- $$ "w" -- $$ "l" -- $$ "i" -- $$ "n" -- $$ "e" -- $$ ">" >> K "\n"; |
|
332 |
||
333 |
fun raw_body c = ord space <= ord c andalso ord c <= ord "~" andalso c <> ">"; |
|
334 |
val scan_raw = $$ "r" ^^ $$ "a" ^^ $$ "w" ^^ $$ ":" ^^ (Scan.any raw_body >> implode); |
|
335 |
||
336 |
in |
|
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
337 |
|
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
338 |
val scan = |
14678 | 339 |
scan_encoded_newline || |
14561
c53396af770e
* raw control symbols are of the form \<^raw:...> now.
schirmer
parents:
14559
diff
changeset
|
340 |
($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^ |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
341 |
!! (fn (cs, _) => "Malformed symbolic character specification: \\" ^ "<" ^ beginning cs) |
14678 | 342 |
(($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">") || |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
343 |
Scan.one not_eof; |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
344 |
|
14678 | 345 |
end; |
346 |
||
14562
980da32f4617
proper handling of lines terminated by CRLF or CR;
wenzelm
parents:
14561
diff
changeset
|
347 |
|
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
348 |
(* source *) |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
349 |
|
10747 | 350 |
val recover = Scan.any ((not o is_blank) andf not_eof) >> K [malformed]; |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
351 |
|
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
352 |
fun source do_recover src = |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
353 |
Source.source stopper (Scan.bulk scan) (if do_recover then Some recover else None) src; |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
354 |
|
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
355 |
|
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
356 |
(* explode *) |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
357 |
|
14562
980da32f4617
proper handling of lines terminated by CRLF or CR;
wenzelm
parents:
14561
diff
changeset
|
358 |
fun no_explode [] = true |
980da32f4617
proper handling of lines terminated by CRLF or CR;
wenzelm
parents:
14561
diff
changeset
|
359 |
| no_explode ("\\" :: "<" :: _) = false |
980da32f4617
proper handling of lines terminated by CRLF or CR;
wenzelm
parents:
14561
diff
changeset
|
360 |
| no_explode ("\r" :: _) = false |
980da32f4617
proper handling of lines terminated by CRLF or CR;
wenzelm
parents:
14561
diff
changeset
|
361 |
| no_explode (_ :: cs) = no_explode cs; |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
362 |
|
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
363 |
fun sym_explode str = |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
364 |
let val chs = explode str in |
14562
980da32f4617
proper handling of lines terminated by CRLF or CR;
wenzelm
parents:
14561
diff
changeset
|
365 |
if no_explode chs then chs |
12116 | 366 |
else the (Scan.read stopper (Scan.repeat scan) chs) |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
367 |
end; |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
368 |
|
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
369 |
|
14678 | 370 |
(* blanks *) |
371 |
||
372 |
fun strip_blanks s = |
|
373 |
sym_explode s |
|
374 |
|> Library.take_prefix is_blank |> #2 |
|
375 |
|> Library.take_suffix is_blank |> #1 |
|
376 |
|> implode; |
|
377 |
||
378 |
||
379 |
(* bump string -- treat as base 26 or base 1 numbers *) |
|
380 |
||
381 |
fun ends_symbolic (_ :: "\\<^isup>" :: _) = true |
|
382 |
| ends_symbolic (_ :: "\\<^isub>" :: _) = true |
|
383 |
| ends_symbolic (s :: _) = is_symbolic s |
|
384 |
| ends_symbolic [] = false; |
|
385 |
||
386 |
fun bump_init str = |
|
387 |
if ends_symbolic (rev (sym_explode str)) then str ^ "'" |
|
388 |
else str ^ "a"; |
|
12904 | 389 |
|
390 |
fun bump_string str = |
|
391 |
let |
|
392 |
fun bump [] = ["a"] |
|
393 |
| bump ("z" :: ss) = "a" :: bump ss |
|
394 |
| bump (s :: ss) = |
|
14678 | 395 |
if is_char s andalso ord "a" <= ord s andalso ord s < ord "z" |
12904 | 396 |
then chr (ord s + 1) :: ss |
397 |
else "a" :: s :: ss; |
|
14678 | 398 |
|
399 |
val (ss, qs) = apfst rev (Library.take_suffix is_quasi (sym_explode str)); |
|
400 |
val ss' = if ends_symbolic ss then "'" :: ss else bump ss; |
|
401 |
in implode (rev ss' @ qs) end; |
|
402 |
||
12904 | 403 |
|
6272 | 404 |
|
405 |
(** symbol output **) |
|
406 |
||
14678 | 407 |
fun sym_len s = |
408 |
if not (is_printable s) then 0 |
|
409 |
else if String.isPrefix "\\<long" s then 2 |
|
410 |
else if String.isPrefix "\\<Long" s then 2 |
|
411 |
else if s = "\\<spacespace>" then 2 |
|
412 |
else 1; |
|
413 |
||
414 |
fun sym_length ss = foldl (fn (n, s) => sym_len s + n) (0, ss); |
|
415 |
||
416 |
||
417 |
(* default output *) |
|
6272 | 418 |
|
419 |
fun string_size s = (s, real (size s)); |
|
420 |
||
14678 | 421 |
fun sym_escape s = if is_char s then s else "\\" ^ s; |
13730 | 422 |
|
14561
c53396af770e
* raw control symbols are of the form \<^raw:...> now.
schirmer
parents:
14559
diff
changeset
|
423 |
fun default_output s = |
c53396af770e
* raw control symbols are of the form \<^raw:...> now.
schirmer
parents:
14559
diff
changeset
|
424 |
if not (exists_string (equal "\\") s) then string_size s |
c53396af770e
* raw control symbols are of the form \<^raw:...> now.
schirmer
parents:
14559
diff
changeset
|
425 |
else string_size (implode (map sym_escape (sym_explode s))); |
6272 | 426 |
|
10953 | 427 |
fun default_indent (_: string, k) = spaces k; |
6272 | 428 |
|
10953 | 429 |
|
14678 | 430 |
(* print modes *) |
6272 | 431 |
|
6692 | 432 |
val symbolsN = "symbols"; |
433 |
val xsymbolsN = "xsymbols"; |
|
434 |
||
12116 | 435 |
val modes = |
436 |
ref (Symtab.empty: ((string -> string * real) * (string * int -> string)) Symtab.table); |
|
6272 | 437 |
|
438 |
fun lookup_mode name = Symtab.lookup (! modes, name); |
|
439 |
||
10953 | 440 |
fun add_mode name m = |
6272 | 441 |
(if is_none (lookup_mode name) then () |
6320 | 442 |
else warning ("Redeclaration of symbol print mode " ^ quote name); |
10953 | 443 |
modes := Symtab.update ((name, m), ! modes)); |
444 |
||
445 |
fun get_mode () = |
|
446 |
if_none (get_first lookup_mode (! print_mode)) (default_output, default_indent); |
|
6272 | 447 |
|
448 |
||
449 |
(* mode output *) |
|
450 |
||
10953 | 451 |
fun output_width x = #1 (get_mode ()) x; |
6272 | 452 |
val output = #1 o output_width; |
14561
c53396af770e
* raw control symbols are of the form \<^raw:...> now.
schirmer
parents:
14559
diff
changeset
|
453 |
val plain_output = #1 o default_output; |
14678 | 454 |
|
10953 | 455 |
fun indent x = #2 (get_mode ()) x; |
456 |
||
6272 | 457 |
|
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
458 |
(*final declarations of this structure!*) |
6272 | 459 |
val length = sym_length; |
6116
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
460 |
val explode = sym_explode; |
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
461 |
|
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
wenzelm
parents:
diff
changeset
|
462 |
end; |