| author | wenzelm | 
| Sat, 10 Aug 2024 20:20:59 +0200 | |
| changeset 80684 | 5b8fccf0a48a | 
| parent 77847 | 3bb6468d202e | 
| child 80797 | 5d09ceca0924 | 
| permissions | -rw-r--r-- | 
| 6118 | 1  | 
(* Title: Pure/General/symbol.ML  | 
| 63936 | 2  | 
Author: Makarius  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
3  | 
|
| 69490 | 4  | 
Isabelle text symbols.  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
5  | 
*)  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
6  | 
|
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
7  | 
signature SYMBOL =  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
8  | 
sig  | 
| 40509 | 9  | 
type symbol = string  | 
| 63936 | 10  | 
val explode: string -> symbol list  | 
| 61865 | 11  | 
val spaces: int -> string  | 
| 26524 | 12  | 
val STX: symbol  | 
13  | 
val DEL: symbol  | 
|
| 62210 | 14  | 
val open_: symbol  | 
15  | 
val close: symbol  | 
|
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
16  | 
val space: symbol  | 
| 62804 | 17  | 
val is_space: symbol -> bool  | 
| 61579 | 18  | 
val comment: symbol  | 
| 67413 | 19  | 
val cancel: symbol  | 
| 67428 | 20  | 
val latex: symbol  | 
| 
69891
 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 
wenzelm 
parents: 
69592 
diff
changeset
 | 
21  | 
val marker: symbol  | 
| 14678 | 22  | 
val is_char: symbol -> bool  | 
| 
37533
 
d775bd70f571
explicit treatment of UTF8 character sequences as Isabelle symbols;
 
wenzelm 
parents: 
34095 
diff
changeset
 | 
23  | 
val is_utf8: symbol -> bool  | 
| 14678 | 24  | 
val is_symbolic: symbol -> bool  | 
| 55033 | 25  | 
val is_symbolic_char: symbol -> bool  | 
| 14678 | 26  | 
val is_printable: symbol -> bool  | 
| 
69592
 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 
wenzelm 
parents: 
69490 
diff
changeset
 | 
27  | 
val control_prefix: string  | 
| 
 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 
wenzelm 
parents: 
69490 
diff
changeset
 | 
28  | 
val control_suffix: string  | 
| 61470 | 29  | 
val is_control: symbol -> bool  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
30  | 
val eof: symbol  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
31  | 
val is_eof: symbol -> bool  | 
| 27766 | 32  | 
val not_eof: symbol -> bool  | 
| 27732 | 33  | 
val stopper: symbol Scan.stopper  | 
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40509 
diff
changeset
 | 
34  | 
val is_malformed: symbol -> bool  | 
| 
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40509 
diff
changeset
 | 
35  | 
val malformed_msg: symbol -> string  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
36  | 
val is_ascii: symbol -> bool  | 
| 14678 | 37  | 
val is_ascii_letter: symbol -> bool  | 
38  | 
val is_ascii_digit: symbol -> bool  | 
|
| 
24580
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
39  | 
val is_ascii_hex: symbol -> bool  | 
| 14678 | 40  | 
val is_ascii_quasi: symbol -> bool  | 
41  | 
val is_ascii_blank: symbol -> bool  | 
|
| 69452 | 42  | 
val is_ascii_line_terminator: symbol -> bool  | 
| 
34095
 
c2f176a38448
robust representation of low ASCII control characters within XML/YXML text;
 
wenzelm 
parents: 
33955 
diff
changeset
 | 
43  | 
val is_ascii_control: symbol -> bool  | 
| 50236 | 44  | 
val is_ascii_letdig: symbol -> bool  | 
| 20200 | 45  | 
val is_ascii_lower: symbol -> bool  | 
46  | 
val is_ascii_upper: symbol -> bool  | 
|
47  | 
val to_ascii_lower: symbol -> symbol  | 
|
48  | 
val to_ascii_upper: symbol -> symbol  | 
|
| 
50238
 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 
wenzelm 
parents: 
50237 
diff
changeset
 | 
49  | 
val is_ascii_identifier: string -> bool  | 
| 50236 | 50  | 
val scan_ascii_id: string list -> string * string list  | 
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40509 
diff
changeset
 | 
51  | 
datatype sym =  | 
| 63936 | 52  | 
Char of string | UTF8 of string | Sym of string | Control of string | Malformed of string | EOF  | 
| 14873 | 53  | 
val decode: symbol -> sym  | 
| 67373 | 54  | 
val encode: sym -> symbol  | 
| 14678 | 55  | 
datatype kind = Letter | Digit | Quasi | Blank | Other  | 
56  | 
val kind: symbol -> kind  | 
|
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
57  | 
val is_letter: symbol -> bool  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
58  | 
val is_digit: symbol -> bool  | 
| 12904 | 59  | 
val is_quasi: symbol -> bool  | 
| 14678 | 60  | 
val is_blank: symbol -> bool  | 
| 
73198
 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 
wenzelm 
parents: 
73163 
diff
changeset
 | 
61  | 
val is_control_block: symbol -> bool  | 
| 
 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 
wenzelm 
parents: 
73163 
diff
changeset
 | 
62  | 
val has_control_block: symbol list -> bool  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
63  | 
val is_quasi_letter: symbol -> bool  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
64  | 
val is_letdig: symbol -> bool  | 
| 14728 | 65  | 
val beginning: int -> symbol list -> string  | 
| 50237 | 66  | 
val esc: symbol -> string  | 
67  | 
val escape: string -> string  | 
|
68  | 
val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a  | 
|
| 50162 | 69  | 
val split_words: symbol list -> string list  | 
70  | 
val explode_words: string -> string list  | 
|
| 61435 | 71  | 
val trim_blanks: string -> string  | 
| 14678 | 72  | 
val bump_init: string -> string  | 
| 12904 | 73  | 
val bump_string: string -> string  | 
| 67512 | 74  | 
val sub: symbol  | 
75  | 
val sup: symbol  | 
|
76  | 
val bold: symbol  | 
|
77  | 
val make_sub: string -> string  | 
|
78  | 
val make_sup: string -> string  | 
|
79  | 
val make_bold: string -> string  | 
|
| 14678 | 80  | 
val length: symbol list -> int  | 
| 
40131
 
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
 
wenzelm 
parents: 
37728 
diff
changeset
 | 
81  | 
val output: string -> Output.output * int  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
82  | 
end;  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
83  | 
|
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
84  | 
structure Symbol: SYMBOL =  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
85  | 
struct  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
86  | 
|
| 14678 | 87  | 
(** type symbol **)  | 
| 6272 | 88  | 
|
| 14678 | 89  | 
(*Symbols, which are considered the smallest entities of any Isabelle  | 
| 6272 | 90  | 
string, may be of the following form:  | 
| 14678 | 91  | 
|
| 63936 | 92  | 
(1) ASCII: a  | 
93  | 
(2) UTF-8: ä  | 
|
| 17823 | 94  | 
(2) regular symbols: \<ident>  | 
| 14834 | 95  | 
(3) control symbols: \<^ident>  | 
| 6272 | 96  | 
|
| 14678 | 97  | 
Output is subject to the print_mode variable (default: verbatim),  | 
98  | 
actual interpretation in display is up to front-end tools.  | 
|
| 6272 | 99  | 
*)  | 
100  | 
||
101  | 
type symbol = string;  | 
|
102  | 
||
| 26524 | 103  | 
val STX = chr 2;  | 
104  | 
val DEL = chr 127;  | 
|
105  | 
||
| 
62529
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
106  | 
val open_ = "\<open>";  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
107  | 
val close = "\<close>";  | 
| 62210 | 108  | 
|
| 26524 | 109  | 
val space = chr 32;  | 
| 62804 | 110  | 
fun is_space s = s = space;  | 
| 17063 | 111  | 
|
| 61865 | 112  | 
local  | 
| 
77846
 
5ba68d3bd741
more operations, following Isabelle/ML conventions;
 
wenzelm 
parents: 
77723 
diff
changeset
 | 
113  | 
val spaces0 = Vector.tabulate (65, fn i => replicate_string i space);  | 
| 61865 | 114  | 
in  | 
115  | 
fun spaces n =  | 
|
| 
77846
 
5ba68d3bd741
more operations, following Isabelle/ML conventions;
 
wenzelm 
parents: 
77723 
diff
changeset
 | 
116  | 
if n < 64 then Vector.nth spaces0 n  | 
| 
 
5ba68d3bd741
more operations, following Isabelle/ML conventions;
 
wenzelm 
parents: 
77723 
diff
changeset
 | 
117  | 
else replicate_string (n div 64) (Vector.nth spaces0 64) ^ Vector.nth spaces0 (n mod 64);  | 
| 61865 | 118  | 
end;  | 
119  | 
||
| 
62529
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
120  | 
val comment = "\<comment>";  | 
| 67413 | 121  | 
val cancel = "\<^cancel>";  | 
| 67428 | 122  | 
val latex = "\<^latex>";  | 
| 
69891
 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 
wenzelm 
parents: 
69592 
diff
changeset
 | 
123  | 
val marker = "\<^marker>";  | 
| 61579 | 124  | 
|
| 14678 | 125  | 
fun is_char s = size s = 1;  | 
126  | 
||
| 
77847
 
3bb6468d202e
more operations, following Isabelle/ML conventions;
 
wenzelm 
parents: 
77846 
diff
changeset
 | 
127  | 
fun is_utf8 s = size s > 0 andalso String.forall (fn c => Char.ord c >= 128) s;  | 
| 
37533
 
d775bd70f571
explicit treatment of UTF8 character sequences as Isabelle symbols;
 
wenzelm 
parents: 
34095 
diff
changeset
 | 
128  | 
|
| 55033 | 129  | 
fun raw_symbolic s =  | 
| 
62877
 
741560a5283b
avoid malformed Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62804 
diff
changeset
 | 
130  | 
String.isPrefix "\092<" s andalso String.isSuffix ">" s andalso not (String.isPrefix "\092<^" s);  | 
| 55033 | 131  | 
|
| 14678 | 132  | 
fun is_symbolic s =  | 
| 62210 | 133  | 
s <> open_ andalso s <> close andalso raw_symbolic s;  | 
| 55033 | 134  | 
|
135  | 
val is_symbolic_char = member (op =) (raw_explode "!#$%&*+-/<=>?@^_|~");  | 
|
| 14678 | 136  | 
|
137  | 
fun is_printable s =  | 
|
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
138  | 
if is_char s then 32 <= ord s andalso ord s <= Char.ord #"~"  | 
| 55033 | 139  | 
else is_utf8 s orelse raw_symbolic s;  | 
| 26632 | 140  | 
|
| 
69592
 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 
wenzelm 
parents: 
69490 
diff
changeset
 | 
141  | 
val control_prefix = "\092<^";  | 
| 
 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 
wenzelm 
parents: 
69490 
diff
changeset
 | 
142  | 
val control_suffix = ">";  | 
| 
 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 
wenzelm 
parents: 
69490 
diff
changeset
 | 
143  | 
|
| 61470 | 144  | 
fun is_control s =  | 
| 
69592
 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 
wenzelm 
parents: 
69490 
diff
changeset
 | 
145  | 
String.isPrefix control_prefix s andalso String.isSuffix control_suffix s;  | 
| 61470 | 146  | 
|
| 6272 | 147  | 
|
| 14678 | 148  | 
(* input source control *)  | 
| 6272 | 149  | 
|
| 14678 | 150  | 
val eof = "";  | 
| 6272 | 151  | 
fun is_eof s = s = eof;  | 
152  | 
fun not_eof s = s <> eof;  | 
|
| 27732 | 153  | 
val stopper = Scan.stopper (K eof) is_eof;  | 
| 6272 | 154  | 
|
| 48774 | 155  | 
fun is_malformed s =  | 
| 
62877
 
741560a5283b
avoid malformed Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62804 
diff
changeset
 | 
156  | 
String.isPrefix "\092<" s andalso not (String.isSuffix ">" s)  | 
| 
 
741560a5283b
avoid malformed Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62804 
diff
changeset
 | 
157  | 
orelse s = "\092<>" orelse s = "\092<^>";  | 
| 48774 | 158  | 
|
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40509 
diff
changeset
 | 
159  | 
fun malformed_msg s = "Malformed symbolic character: " ^ quote s;  | 
| 14678 | 160  | 
|
161  | 
||
| 43418 | 162  | 
(* ASCII symbols *)  | 
| 14678 | 163  | 
|
164  | 
fun is_ascii s = is_char s andalso ord s < 128;  | 
|
165  | 
||
166  | 
fun is_ascii_letter s =  | 
|
167  | 
is_char s andalso  | 
|
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
168  | 
(Char.ord #"A" <= ord s andalso ord s <= Char.ord #"Z" orelse  | 
| 
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
169  | 
Char.ord #"a" <= ord s andalso ord s <= Char.ord #"z");  | 
| 14678 | 170  | 
|
171  | 
fun is_ascii_digit s =  | 
|
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
172  | 
is_char s andalso Char.ord #"0" <= ord s andalso ord s <= Char.ord #"9";  | 
| 14678 | 173  | 
|
| 
24580
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
174  | 
fun is_ascii_hex s =  | 
| 
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
175  | 
is_char s andalso  | 
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
176  | 
(Char.ord #"0" <= ord s andalso ord s <= Char.ord #"9" orelse  | 
| 
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
177  | 
Char.ord #"A" <= ord s andalso ord s <= Char.ord #"F" orelse  | 
| 
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
178  | 
Char.ord #"a" <= ord s andalso ord s <= Char.ord #"f");  | 
| 
24580
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
179  | 
|
| 14678 | 180  | 
fun is_ascii_quasi "_" = true  | 
181  | 
| is_ascii_quasi "'" = true  | 
|
182  | 
| is_ascii_quasi _ = false;  | 
|
183  | 
||
184  | 
val is_ascii_blank =  | 
|
| 
43845
 
d89353d17f54
added File.fold_pages for streaming of large files;
 
wenzelm 
parents: 
43777 
diff
changeset
 | 
185  | 
fn " " => true | "\t" => true | "\n" => true | "\^K" => true | "\f" => true | "\^M" => true  | 
| 14678 | 186  | 
| _ => false;  | 
187  | 
||
| 69452 | 188  | 
val is_ascii_line_terminator =  | 
189  | 
fn "\r" => true | "\n" => true | _ => false;  | 
|
190  | 
||
| 
34095
 
c2f176a38448
robust representation of low ASCII control characters within XML/YXML text;
 
wenzelm 
parents: 
33955 
diff
changeset
 | 
191  | 
fun is_ascii_control s = is_char s andalso ord s < 32 andalso not (is_ascii_blank s);  | 
| 
 
c2f176a38448
robust representation of low ASCII control characters within XML/YXML text;
 
wenzelm 
parents: 
33955 
diff
changeset
 | 
192  | 
|
| 50236 | 193  | 
fun is_ascii_letdig s = is_ascii_letter s orelse is_ascii_digit s orelse is_ascii_quasi s;  | 
194  | 
||
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
195  | 
fun is_ascii_lower s = is_char s andalso (Char.ord #"a" <= ord s andalso ord s <= Char.ord #"z");  | 
| 
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
196  | 
fun is_ascii_upper s = is_char s andalso (Char.ord #"A" <= ord s andalso ord s <= Char.ord #"Z");  | 
| 20200 | 197  | 
|
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
198  | 
fun to_ascii_lower s = if is_ascii_upper s then chr (ord s + Char.ord #"a" - Char.ord #"A") else s;  | 
| 
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
199  | 
fun to_ascii_upper s = if is_ascii_lower s then chr (ord s + Char.ord #"A" - Char.ord #"a") else s;  | 
| 20200 | 200  | 
|
| 
50238
 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 
wenzelm 
parents: 
50237 
diff
changeset
 | 
201  | 
fun is_ascii_identifier s =  | 
| 
 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 
wenzelm 
parents: 
50237 
diff
changeset
 | 
202  | 
size s > 0 andalso is_ascii_letter (String.substring (s, 0, 1)) andalso  | 
| 
 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 
wenzelm 
parents: 
50237 
diff
changeset
 | 
203  | 
forall_string is_ascii_letdig s;  | 
| 
 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 
wenzelm 
parents: 
50237 
diff
changeset
 | 
204  | 
|
| 50236 | 205  | 
val scan_ascii_id = Scan.one is_ascii_letter ^^ (Scan.many is_ascii_letdig >> implode);  | 
206  | 
||
| 14678 | 207  | 
|
| 
14956
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
208  | 
(* diagnostics *)  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
209  | 
|
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
210  | 
fun beginning n cs =  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
211  | 
let  | 
| 67522 | 212  | 
val drop_blanks = drop_suffix is_ascii_blank;  | 
| 
14956
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
213  | 
val all_cs = drop_blanks cs;  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
214  | 
val dots = if length all_cs > n then " ..." else "";  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
215  | 
in  | 
| 33955 | 216  | 
(drop_blanks (take n all_cs)  | 
| 
14956
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
217  | 
|> map (fn c => if is_ascii_blank c then space else c)  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
218  | 
|> implode) ^ dots  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
219  | 
end;  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
220  | 
|
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
221  | 
|
| 14873 | 222  | 
(* symbol variants *)  | 
223  | 
||
| 
37533
 
d775bd70f571
explicit treatment of UTF8 character sequences as Isabelle symbols;
 
wenzelm 
parents: 
34095 
diff
changeset
 | 
224  | 
datatype sym =  | 
| 63936 | 225  | 
Char of string | UTF8 of string | Sym of string | Control of string | Malformed of string | EOF;  | 
| 14873 | 226  | 
|
227  | 
fun decode s =  | 
|
| 43485 | 228  | 
if s = "" then EOF  | 
229  | 
else if is_char s then Char s  | 
|
| 
37533
 
d775bd70f571
explicit treatment of UTF8 character sequences as Isabelle symbols;
 
wenzelm 
parents: 
34095 
diff
changeset
 | 
230  | 
else if is_utf8 s then UTF8 s  | 
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40509 
diff
changeset
 | 
231  | 
else if is_malformed s then Malformed s  | 
| 61475 | 232  | 
else if is_control s then Control (String.substring (s, 3, size s - 4))  | 
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40509 
diff
changeset
 | 
233  | 
else Sym (String.substring (s, 2, size s - 3));  | 
| 14873 | 234  | 
|
| 67373 | 235  | 
fun encode (Char s) = s  | 
236  | 
| encode (UTF8 s) = s  | 
|
237  | 
| encode (Sym s) = "\092<" ^ s ^ ">"  | 
|
238  | 
| encode (Control s) = "\092<^" ^ s ^ ">"  | 
|
239  | 
| encode (Malformed s) = s  | 
|
240  | 
| encode EOF = "";  | 
|
241  | 
||
| 14873 | 242  | 
|
| 14678 | 243  | 
(* standard symbol kinds *)  | 
244  | 
||
| 
14171
 
0cab06e3bbd0
Extended the notion of letter and digit, such that now one may use greek,
 
skalberg 
parents: 
13730 
diff
changeset
 | 
245  | 
local  | 
| 50235 | 246  | 
val letter_symbols =  | 
| 
77723
 
b761c91c2447
performance tuning: prefer functor Set() over Table();
 
wenzelm 
parents: 
73198 
diff
changeset
 | 
247  | 
Symset.make [  | 
| 
62529
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
248  | 
"\<A>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
249  | 
"\<B>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
250  | 
"\<C>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
251  | 
"\<D>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
252  | 
"\<E>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
253  | 
"\<F>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
254  | 
"\<G>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
255  | 
"\<H>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
256  | 
"\<I>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
257  | 
"\<J>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
258  | 
"\<K>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
259  | 
"\<L>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
260  | 
"\<M>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
261  | 
"\<N>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
262  | 
"\<O>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
263  | 
"\<P>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
264  | 
"\<Q>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
265  | 
"\<R>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
266  | 
"\<S>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
267  | 
"\<T>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
268  | 
"\<U>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
269  | 
"\<V>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
270  | 
"\<W>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
271  | 
"\<X>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
272  | 
"\<Y>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
273  | 
"\<Z>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
274  | 
"\<a>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
275  | 
"\<b>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
276  | 
"\<c>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
277  | 
"\<d>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
278  | 
"\<e>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
279  | 
"\<f>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
280  | 
"\<g>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
281  | 
"\<h>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
282  | 
"\<i>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
283  | 
"\<j>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
284  | 
"\<k>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
285  | 
"\<l>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
286  | 
"\<m>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
287  | 
"\<n>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
288  | 
"\<o>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
289  | 
"\<p>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
290  | 
"\<q>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
291  | 
"\<r>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
292  | 
"\<s>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
293  | 
"\<t>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
294  | 
"\<u>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
295  | 
"\<v>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
296  | 
"\<w>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
297  | 
"\<x>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
298  | 
"\<y>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
299  | 
"\<z>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
300  | 
"\<AA>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
301  | 
"\<BB>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
302  | 
"\<CC>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
303  | 
"\<DD>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
304  | 
"\<EE>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
305  | 
"\<FF>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
306  | 
"\<GG>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
307  | 
"\<HH>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
308  | 
"\<II>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
309  | 
"\<JJ>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
310  | 
"\<KK>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
311  | 
"\<LL>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
312  | 
"\<MM>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
313  | 
"\<NN>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
314  | 
"\<OO>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
315  | 
"\<PP>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
316  | 
"\<QQ>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
317  | 
"\<RR>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
318  | 
"\<SS>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
319  | 
"\<TT>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
320  | 
"\<UU>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
321  | 
"\<VV>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
322  | 
"\<WW>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
323  | 
"\<XX>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
324  | 
"\<YY>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
325  | 
"\<ZZ>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
326  | 
"\<aa>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
327  | 
"\<bb>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
328  | 
"\<cc>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
329  | 
"\<dd>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
330  | 
"\<ee>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
331  | 
"\<ff>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
332  | 
"\<gg>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
333  | 
"\<hh>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
334  | 
"\<ii>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
335  | 
"\<jj>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
336  | 
"\<kk>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
337  | 
"\<ll>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
338  | 
"\<mm>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
339  | 
"\<nn>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
340  | 
"\<oo>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
341  | 
"\<pp>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
342  | 
"\<qq>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
343  | 
"\<rr>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
344  | 
"\<ss>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
345  | 
"\<tt>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
346  | 
"\<uu>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
347  | 
"\<vv>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
348  | 
"\<ww>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
349  | 
"\<xx>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
350  | 
"\<yy>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
351  | 
"\<zz>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
352  | 
"\<alpha>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
353  | 
"\<beta>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
354  | 
"\<gamma>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
355  | 
"\<delta>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
356  | 
"\<epsilon>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
357  | 
"\<zeta>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
358  | 
"\<eta>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
359  | 
"\<theta>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
360  | 
"\<iota>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
361  | 
"\<kappa>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
362  | 
(*"\<lambda>", sic!*)  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
363  | 
"\<mu>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
364  | 
"\<nu>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
365  | 
"\<xi>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
366  | 
"\<pi>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
367  | 
"\<rho>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
368  | 
"\<sigma>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
369  | 
"\<tau>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
370  | 
"\<upsilon>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
371  | 
"\<phi>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
372  | 
"\<chi>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
373  | 
"\<psi>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
374  | 
"\<omega>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
375  | 
"\<Gamma>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
376  | 
"\<Delta>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
377  | 
"\<Theta>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
378  | 
"\<Lambda>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
379  | 
"\<Xi>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
380  | 
"\<Pi>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
381  | 
"\<Sigma>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
382  | 
"\<Upsilon>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
383  | 
"\<Phi>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
384  | 
"\<Psi>",  | 
| 
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
385  | 
"\<Omega>"  | 
| 50235 | 386  | 
];  | 
| 
14171
 
0cab06e3bbd0
Extended the notion of letter and digit, such that now one may use greek,
 
skalberg 
parents: 
13730 
diff
changeset
 | 
387  | 
in  | 
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
388  | 
|
| 
77723
 
b761c91c2447
performance tuning: prefer functor Set() over Table();
 
wenzelm 
parents: 
73198 
diff
changeset
 | 
389  | 
val is_letter_symbol = Symset.member letter_symbols;  | 
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
390  | 
|
| 14678 | 391  | 
end;  | 
| 14173 | 392  | 
|
| 
50242
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
393  | 
datatype kind = Letter | Digit | Quasi | Blank | Other;  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
394  | 
|
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
395  | 
fun kind s =  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
396  | 
if is_ascii_letter s then Letter  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
397  | 
else if is_ascii_digit s then Digit  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
398  | 
else if is_ascii_quasi s then Quasi  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
399  | 
else if is_ascii_blank s then Blank  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
400  | 
else if is_char s then Other  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
401  | 
else if is_letter_symbol s then Letter  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
402  | 
else Other;  | 
| 
 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 
wenzelm 
parents: 
50239 
diff
changeset
 | 
403  | 
|
| 14678 | 404  | 
fun is_letter s = kind s = Letter;  | 
405  | 
fun is_digit s = kind s = Digit;  | 
|
406  | 
fun is_quasi s = kind s = Quasi;  | 
|
407  | 
fun is_blank s = kind s = Blank;  | 
|
| 6272 | 408  | 
|
| 
73198
 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 
wenzelm 
parents: 
73163 
diff
changeset
 | 
409  | 
val is_control_block = member (op =) ["\<^bsub>", "\<^esub>", "\<^bsup>", "\<^esup>"];  | 
| 
 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 
wenzelm 
parents: 
73163 
diff
changeset
 | 
410  | 
val has_control_block = exists is_control_block;  | 
| 
47850
 
c638127b4653
avoid interference of markup for literal tokens, which may contain slightly odd \<^bsub> \<^esub> counted as pseudo-markup (especially relevant for HTML output, e.g. of thm power3_eq_cube);
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
411  | 
|
| 14678 | 412  | 
fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end;  | 
413  | 
fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end;  | 
|
| 11010 | 414  | 
|
| 6272 | 415  | 
|
| 50237 | 416  | 
(* escape *)  | 
417  | 
||
418  | 
val esc = fn s =>  | 
|
419  | 
if is_char s then s  | 
|
420  | 
else if is_utf8 s then translate_string (fn c => "\\" ^ string_of_int (ord c)) s  | 
|
421  | 
else "\\" ^ s;  | 
|
422  | 
||
| 63936 | 423  | 
val escape = implode o map esc o Symbol.explode;  | 
| 50237 | 424  | 
|
425  | 
||
426  | 
||
427  | 
(** scanning through symbols **)  | 
|
428  | 
||
429  | 
(* scanner *)  | 
|
430  | 
||
431  | 
fun scanner msg scan syms =  | 
|
432  | 
let  | 
|
433  | 
fun message (ss, NONE) = (fn () => msg ^ ": " ^ quote (beginning 10 ss))  | 
|
434  | 
| message (ss, SOME msg') = (fn () => msg ^ ", " ^ msg' () ^ ": " ^ quote (beginning 10 ss));  | 
|
435  | 
val finite_scan = Scan.error (Scan.finite stopper (!! message scan));  | 
|
436  | 
in  | 
|
437  | 
(case finite_scan syms of  | 
|
438  | 
(result, []) => result  | 
|
439  | 
| (_, rest) => error (message (rest, NONE) ()))  | 
|
440  | 
end;  | 
|
441  | 
||
442  | 
||
| 50162 | 443  | 
(* space-separated words *)  | 
444  | 
||
445  | 
val scan_word =  | 
|
446  | 
Scan.many1 is_ascii_blank >> K NONE ||  | 
|
447  | 
Scan.many1 (fn s => not (is_ascii_blank s) andalso not_eof s) >> (SOME o implode);  | 
|
448  | 
||
449  | 
val split_words = scanner "Bad text" (Scan.repeat scan_word >> map_filter I);  | 
|
450  | 
||
| 63936 | 451  | 
val explode_words = split_words o Symbol.explode;  | 
| 50162 | 452  | 
|
453  | 
||
| 14678 | 454  | 
(* blanks *)  | 
455  | 
||
| 63936 | 456  | 
val trim_blanks = Symbol.explode #> trim is_blank #> implode;  | 
| 14678 | 457  | 
|
458  | 
||
459  | 
(* bump string -- treat as base 26 or base 1 numbers *)  | 
|
460  | 
||
| 
62529
 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 
wenzelm 
parents: 
62210 
diff
changeset
 | 
461  | 
fun symbolic_end (_ :: "\<^sub>" :: _) = true  | 
| 
53198
 
06b096cf89ca
ignore trailing primes, e.g. rename \<alpha>' to \<alpha>'' instead of \<alpha>'a;
 
wenzelm 
parents: 
53016 
diff
changeset
 | 
462  | 
  | symbolic_end ("'" :: ss) = symbolic_end ss
 | 
| 55033 | 463  | 
| symbolic_end (s :: _) = raw_symbolic s  | 
| 14908 | 464  | 
| symbolic_end [] = false;  | 
| 14678 | 465  | 
|
466  | 
fun bump_init str =  | 
|
| 63936 | 467  | 
if symbolic_end (rev (Symbol.explode str)) then str ^ "'"  | 
| 14678 | 468  | 
else str ^ "a";  | 
| 12904 | 469  | 
|
470  | 
fun bump_string str =  | 
|
471  | 
let  | 
|
472  | 
fun bump [] = ["a"]  | 
|
473  | 
      | bump ("z" :: ss) = "a" :: bump ss
 | 
|
474  | 
| bump (s :: ss) =  | 
|
| 
64275
 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 
wenzelm 
parents: 
63936 
diff
changeset
 | 
475  | 
if is_char s andalso Char.ord #"a" <= ord s andalso ord s < Char.ord #"z"  | 
| 12904 | 476  | 
then chr (ord s + 1) :: ss  | 
477  | 
else "a" :: s :: ss;  | 
|
| 14678 | 478  | 
|
| 67522 | 479  | 
val (ss, qs) = apfst rev (chop_suffix is_quasi (Symbol.explode str));  | 
| 14908 | 480  | 
val ss' = if symbolic_end ss then "'" :: ss else bump ss;  | 
| 14678 | 481  | 
in implode (rev ss' @ qs) end;  | 
482  | 
||
| 12904 | 483  | 
|
| 67512 | 484  | 
(* styles *)  | 
485  | 
||
486  | 
val sub = "\092<^sub>";  | 
|
487  | 
val sup = "\092<^sup>";  | 
|
488  | 
val bold = "\092<^bold>";  | 
|
489  | 
||
490  | 
fun make_style sym =  | 
|
491  | 
Symbol.explode #> maps (fn s => [sym, s]) #> implode;  | 
|
492  | 
||
493  | 
val make_sub = make_style sub;  | 
|
494  | 
val make_sup = make_style sup;  | 
|
495  | 
val make_bold = make_style bold;  | 
|
496  | 
||
497  | 
||
| 6272 | 498  | 
|
| 29324 | 499  | 
(** symbol output **)  | 
| 
14977
 
77d88064991a
added escape, export encode_raw, default mode now trivial, tuned;
 
wenzelm 
parents: 
14961 
diff
changeset
 | 
500  | 
|
| 29324 | 501  | 
(* length *)  | 
| 6272 | 502  | 
|
| 14678 | 503  | 
fun sym_len s =  | 
| 71649 | 504  | 
if String.isPrefix "\092<long" s orelse String.isPrefix "\092<Long" s then 2  | 
505  | 
else if is_printable s then 1  | 
|
506  | 
else 0;  | 
|
| 14678 | 507  | 
|
| 19473 | 508  | 
fun sym_length ss = fold (fn s => fn n => sym_len s + n) ss 0;  | 
| 14678 | 509  | 
|
| 63936 | 510  | 
fun output s = (s, sym_length (Symbol.explode s));  | 
| 29324 | 511  | 
|
512  | 
||
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
513  | 
(*final declarations of this structure!*)  | 
| 63936 | 514  | 
val explode = Symbol.explode;  | 
| 6272 | 515  | 
val length = sym_length;  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
516  | 
|
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
517  | 
end;  |