| author | wenzelm | 
| Sun, 10 Nov 2024 11:38:23 +0100 | |
| changeset 81415 | 1e3dfb722ee6 | 
| parent 81375 | ae5695161423 | 
| permissions | -rw-r--r-- | 
| 6118 | 1 | (* Title: Pure/General/symbol.ML | 
| 63936 | 2 | Author: Makarius | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 3 | |
| 69490 | 4 | Isabelle text symbols. | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 5 | *) | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 6 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 7 | signature SYMBOL = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 8 | sig | 
| 40509 | 9 | type symbol = string | 
| 63936 | 10 | val explode: string -> symbol list | 
| 61865 | 11 | val spaces: int -> string | 
| 26524 | 12 | val STX: symbol | 
| 13 | val DEL: symbol | |
| 62210 | 14 | val open_: symbol | 
| 15 | val close: symbol | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 16 | val space: symbol | 
| 62804 | 17 | val is_space: symbol -> bool | 
| 61579 | 18 | val comment: symbol | 
| 67413 | 19 | val cancel: symbol | 
| 67428 | 20 | val latex: symbol | 
| 69891 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 wenzelm parents: 
69592diff
changeset | 21 | val marker: symbol | 
| 14678 | 22 | val is_char: symbol -> bool | 
| 37533 
d775bd70f571
explicit treatment of UTF8 character sequences as Isabelle symbols;
 wenzelm parents: 
34095diff
changeset | 23 | val is_utf8: symbol -> bool | 
| 14678 | 24 | val is_symbolic: symbol -> bool | 
| 55033 | 25 | val is_symbolic_char: symbol -> bool | 
| 14678 | 26 | val is_printable: symbol -> bool | 
| 69592 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 wenzelm parents: 
69490diff
changeset | 27 | val control_prefix: string | 
| 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 wenzelm parents: 
69490diff
changeset | 28 | val control_suffix: string | 
| 61470 | 29 | val is_control: symbol -> bool | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 30 | val eof: symbol | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 31 | val is_eof: symbol -> bool | 
| 27766 | 32 | val not_eof: symbol -> bool | 
| 27732 | 33 | val stopper: symbol Scan.stopper | 
| 40523 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40509diff
changeset | 34 | val is_malformed: symbol -> bool | 
| 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40509diff
changeset | 35 | val malformed_msg: symbol -> string | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 36 | val is_ascii: symbol -> bool | 
| 14678 | 37 | val is_ascii_letter: symbol -> bool | 
| 38 | val is_ascii_digit: symbol -> bool | |
| 24580 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 wenzelm parents: 
23784diff
changeset | 39 | val is_ascii_hex: symbol -> bool | 
| 14678 | 40 | val is_ascii_quasi: symbol -> bool | 
| 41 | val is_ascii_blank: symbol -> bool | |
| 69452 | 42 | val is_ascii_line_terminator: symbol -> bool | 
| 34095 
c2f176a38448
robust representation of low ASCII control characters within XML/YXML text;
 wenzelm parents: 
33955diff
changeset | 43 | val is_ascii_control: symbol -> bool | 
| 50236 | 44 | val is_ascii_letdig: symbol -> bool | 
| 20200 | 45 | val is_ascii_lower: symbol -> bool | 
| 46 | val is_ascii_upper: symbol -> bool | |
| 47 | val to_ascii_lower: symbol -> symbol | |
| 48 | val to_ascii_upper: symbol -> symbol | |
| 50238 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 wenzelm parents: 
50237diff
changeset | 49 | val is_ascii_identifier: string -> bool | 
| 50236 | 50 | val scan_ascii_id: string list -> string * string list | 
| 40523 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40509diff
changeset | 51 | datatype sym = | 
| 63936 | 52 | Char of string | UTF8 of string | Sym of string | Control of string | Malformed of string | EOF | 
| 14873 | 53 | val decode: symbol -> sym | 
| 67373 | 54 | val encode: sym -> symbol | 
| 14678 | 55 | datatype kind = Letter | Digit | Quasi | Blank | Other | 
| 56 | val kind: symbol -> kind | |
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 57 | val is_letter: symbol -> bool | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 58 | val is_digit: symbol -> bool | 
| 12904 | 59 | val is_quasi: symbol -> bool | 
| 14678 | 60 | val is_blank: symbol -> bool | 
| 73198 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 wenzelm parents: 
73163diff
changeset | 61 | val is_control_block: symbol -> bool | 
| 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 wenzelm parents: 
73163diff
changeset | 62 | val has_control_block: symbol list -> bool | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 63 | val is_quasi_letter: symbol -> bool | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 64 | val is_letdig: symbol -> bool | 
| 14728 | 65 | val beginning: int -> symbol list -> string | 
| 50237 | 66 | val esc: symbol -> string | 
| 67 | val escape: string -> string | |
| 68 | val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a | |
| 50162 | 69 | val split_words: symbol list -> string list | 
| 70 | val explode_words: string -> string list | |
| 61435 | 71 | val trim_blanks: string -> string | 
| 14678 | 72 | val bump_init: string -> string | 
| 12904 | 73 | val bump_string: string -> string | 
| 67512 | 74 | val sub: symbol | 
| 75 | val sup: symbol | |
| 76 | val bold: symbol | |
| 77 | val make_sub: string -> string | |
| 78 | val make_sup: string -> string | |
| 79 | val make_bold: string -> string | |
| 14678 | 80 | val length: symbol list -> int | 
| 40131 
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
 wenzelm parents: 
37728diff
changeset | 81 | val output: string -> Output.output * int | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 82 | end; | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 83 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 84 | structure Symbol: SYMBOL = | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 85 | struct | 
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 86 | |
| 14678 | 87 | (** type symbol **) | 
| 6272 | 88 | |
| 14678 | 89 | (*Symbols, which are considered the smallest entities of any Isabelle | 
| 6272 | 90 | string, may be of the following form: | 
| 14678 | 91 | |
| 63936 | 92 | (1) ASCII: a | 
| 93 | (2) UTF-8: ä | |
| 17823 | 94 | (2) regular symbols: \<ident> | 
| 14834 | 95 | (3) control symbols: \<^ident> | 
| 6272 | 96 | |
| 14678 | 97 | Output is subject to the print_mode variable (default: verbatim), | 
| 98 | actual interpretation in display is up to front-end tools. | |
| 6272 | 99 | *) | 
| 100 | ||
| 101 | type symbol = string; | |
| 102 | ||
| 26524 | 103 | val STX = chr 2; | 
| 104 | val DEL = chr 127; | |
| 105 | ||
| 62529 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 106 | val open_ = "\<open>"; | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 107 | val close = "\<close>"; | 
| 62210 | 108 | |
| 26524 | 109 | val space = chr 32; | 
| 62804 | 110 | fun is_space s = s = space; | 
| 17063 | 111 | |
| 61865 | 112 | local | 
| 77846 
5ba68d3bd741
more operations, following Isabelle/ML conventions;
 wenzelm parents: 
77723diff
changeset | 113 | val spaces0 = Vector.tabulate (65, fn i => replicate_string i space); | 
| 61865 | 114 | in | 
| 115 | fun spaces n = | |
| 77846 
5ba68d3bd741
more operations, following Isabelle/ML conventions;
 wenzelm parents: 
77723diff
changeset | 116 | if n < 64 then Vector.nth spaces0 n | 
| 80797 | 117 | else implode (Vector.nth spaces0 (n mod 64) :: replicate (n div 64) (Vector.nth spaces0 64)); | 
| 61865 | 118 | end; | 
| 119 | ||
| 62529 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 120 | val comment = "\<comment>"; | 
| 67413 | 121 | val cancel = "\<^cancel>"; | 
| 67428 | 122 | val latex = "\<^latex>"; | 
| 69891 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 wenzelm parents: 
69592diff
changeset | 123 | val marker = "\<^marker>"; | 
| 61579 | 124 | |
| 14678 | 125 | fun is_char s = size s = 1; | 
| 126 | ||
| 77847 
3bb6468d202e
more operations, following Isabelle/ML conventions;
 wenzelm parents: 
77846diff
changeset | 127 | fun is_utf8 s = size s > 0 andalso String.forall (fn c => Char.ord c >= 128) s; | 
| 37533 
d775bd70f571
explicit treatment of UTF8 character sequences as Isabelle symbols;
 wenzelm parents: 
34095diff
changeset | 128 | |
| 55033 | 129 | fun raw_symbolic s = | 
| 62877 
741560a5283b
avoid malformed Isabelle symbols during bootstrap;
 wenzelm parents: 
62804diff
changeset | 130 | String.isPrefix "\092<" s andalso String.isSuffix ">" s andalso not (String.isPrefix "\092<^" s); | 
| 55033 | 131 | |
| 14678 | 132 | fun is_symbolic s = | 
| 62210 | 133 | s <> open_ andalso s <> close andalso raw_symbolic s; | 
| 55033 | 134 | |
| 135 | val is_symbolic_char = member (op =) (raw_explode "!#$%&*+-/<=>?@^_|~"); | |
| 14678 | 136 | |
| 137 | fun is_printable s = | |
| 64275 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 138 | if is_char s then 32 <= ord s andalso ord s <= Char.ord #"~" | 
| 55033 | 139 | else is_utf8 s orelse raw_symbolic s; | 
| 26632 | 140 | |
| 69592 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 wenzelm parents: 
69490diff
changeset | 141 | val control_prefix = "\092<^"; | 
| 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 wenzelm parents: 
69490diff
changeset | 142 | val control_suffix = ">"; | 
| 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 wenzelm parents: 
69490diff
changeset | 143 | |
| 61470 | 144 | fun is_control s = | 
| 69592 
a80d8ec6c998
support for isabelle update -u control_cartouches;
 wenzelm parents: 
69490diff
changeset | 145 | String.isPrefix control_prefix s andalso String.isSuffix control_suffix s; | 
| 61470 | 146 | |
| 6272 | 147 | |
| 14678 | 148 | (* input source control *) | 
| 6272 | 149 | |
| 14678 | 150 | val eof = ""; | 
| 6272 | 151 | fun is_eof s = s = eof; | 
| 152 | fun not_eof s = s <> eof; | |
| 27732 | 153 | val stopper = Scan.stopper (K eof) is_eof; | 
| 6272 | 154 | |
| 48774 | 155 | fun is_malformed s = | 
| 62877 
741560a5283b
avoid malformed Isabelle symbols during bootstrap;
 wenzelm parents: 
62804diff
changeset | 156 | String.isPrefix "\092<" s andalso not (String.isSuffix ">" s) | 
| 
741560a5283b
avoid malformed Isabelle symbols during bootstrap;
 wenzelm parents: 
62804diff
changeset | 157 | orelse s = "\092<>" orelse s = "\092<^>"; | 
| 48774 | 158 | |
| 40523 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40509diff
changeset | 159 | fun malformed_msg s = "Malformed symbolic character: " ^ quote s; | 
| 14678 | 160 | |
| 161 | ||
| 43418 | 162 | (* ASCII symbols *) | 
| 14678 | 163 | |
| 164 | fun is_ascii s = is_char s andalso ord s < 128; | |
| 165 | ||
| 166 | fun is_ascii_letter s = | |
| 167 | is_char s andalso | |
| 64275 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 168 | (Char.ord #"A" <= ord s andalso ord s <= Char.ord #"Z" orelse | 
| 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 169 | Char.ord #"a" <= ord s andalso ord s <= Char.ord #"z"); | 
| 14678 | 170 | |
| 171 | fun is_ascii_digit s = | |
| 64275 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 172 | is_char s andalso Char.ord #"0" <= ord s andalso ord s <= Char.ord #"9"; | 
| 14678 | 173 | |
| 24580 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 wenzelm parents: 
23784diff
changeset | 174 | fun is_ascii_hex s = | 
| 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 wenzelm parents: 
23784diff
changeset | 175 | is_char s andalso | 
| 64275 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 176 | (Char.ord #"0" <= ord s andalso ord s <= Char.ord #"9" orelse | 
| 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 177 | Char.ord #"A" <= ord s andalso ord s <= Char.ord #"F" orelse | 
| 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 178 | Char.ord #"a" <= ord s andalso ord s <= Char.ord #"f"); | 
| 24580 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 wenzelm parents: 
23784diff
changeset | 179 | |
| 14678 | 180 | fun is_ascii_quasi "_" = true | 
| 181 | | is_ascii_quasi "'" = true | |
| 182 | | is_ascii_quasi _ = false; | |
| 183 | ||
| 184 | val is_ascii_blank = | |
| 43845 
d89353d17f54
added File.fold_pages for streaming of large files;
 wenzelm parents: 
43777diff
changeset | 185 | fn " " => true | "\t" => true | "\n" => true | "\^K" => true | "\f" => true | "\^M" => true | 
| 14678 | 186 | | _ => false; | 
| 187 | ||
| 69452 | 188 | val is_ascii_line_terminator = | 
| 189 | fn "\r" => true | "\n" => true | _ => false; | |
| 190 | ||
| 34095 
c2f176a38448
robust representation of low ASCII control characters within XML/YXML text;
 wenzelm parents: 
33955diff
changeset | 191 | fun is_ascii_control s = is_char s andalso ord s < 32 andalso not (is_ascii_blank s); | 
| 
c2f176a38448
robust representation of low ASCII control characters within XML/YXML text;
 wenzelm parents: 
33955diff
changeset | 192 | |
| 50236 | 193 | fun is_ascii_letdig s = is_ascii_letter s orelse is_ascii_digit s orelse is_ascii_quasi s; | 
| 194 | ||
| 64275 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 195 | fun is_ascii_lower s = is_char s andalso (Char.ord #"a" <= ord s andalso ord s <= Char.ord #"z"); | 
| 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 196 | fun is_ascii_upper s = is_char s andalso (Char.ord #"A" <= ord s andalso ord s <= Char.ord #"Z"); | 
| 20200 | 197 | |
| 64275 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 198 | fun to_ascii_lower s = if is_ascii_upper s then chr (ord s + Char.ord #"a" - Char.ord #"A") else s; | 
| 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 199 | fun to_ascii_upper s = if is_ascii_lower s then chr (ord s + Char.ord #"A" - Char.ord #"a") else s; | 
| 20200 | 200 | |
| 50238 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 wenzelm parents: 
50237diff
changeset | 201 | fun is_ascii_identifier s = | 
| 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 wenzelm parents: 
50237diff
changeset | 202 | size s > 0 andalso is_ascii_letter (String.substring (s, 0, 1)) andalso | 
| 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 wenzelm parents: 
50237diff
changeset | 203 | forall_string is_ascii_letdig s; | 
| 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 wenzelm parents: 
50237diff
changeset | 204 | |
| 50236 | 205 | val scan_ascii_id = Scan.one is_ascii_letter ^^ (Scan.many is_ascii_letdig >> implode); | 
| 206 | ||
| 14678 | 207 | |
| 14956 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 208 | (* diagnostics *) | 
| 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 209 | |
| 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 210 | fun beginning n cs = | 
| 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 211 | let | 
| 67522 | 212 | val drop_blanks = drop_suffix is_ascii_blank; | 
| 14956 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 213 | val all_cs = drop_blanks cs; | 
| 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 214 | val dots = if length all_cs > n then " ..." else ""; | 
| 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 215 | in | 
| 33955 | 216 | (drop_blanks (take n all_cs) | 
| 14956 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 217 | |> map (fn c => if is_ascii_blank c then space else c) | 
| 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 218 | |> implode) ^ dots | 
| 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 219 | end; | 
| 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 220 | |
| 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 wenzelm parents: 
14908diff
changeset | 221 | |
| 14873 | 222 | (* symbol variants *) | 
| 223 | ||
| 37533 
d775bd70f571
explicit treatment of UTF8 character sequences as Isabelle symbols;
 wenzelm parents: 
34095diff
changeset | 224 | datatype sym = | 
| 63936 | 225 | Char of string | UTF8 of string | Sym of string | Control of string | Malformed of string | EOF; | 
| 14873 | 226 | |
| 227 | fun decode s = | |
| 43485 | 228 | if s = "" then EOF | 
| 229 | else if is_char s then Char s | |
| 37533 
d775bd70f571
explicit treatment of UTF8 character sequences as Isabelle symbols;
 wenzelm parents: 
34095diff
changeset | 230 | else if is_utf8 s then UTF8 s | 
| 40523 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40509diff
changeset | 231 | else if is_malformed s then Malformed s | 
| 61475 | 232 | else if is_control s then Control (String.substring (s, 3, size s - 4)) | 
| 40523 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 wenzelm parents: 
40509diff
changeset | 233 | else Sym (String.substring (s, 2, size s - 3)); | 
| 14873 | 234 | |
| 67373 | 235 | fun encode (Char s) = s | 
| 236 | | encode (UTF8 s) = s | |
| 237 | | encode (Sym s) = "\092<" ^ s ^ ">" | |
| 238 | | encode (Control s) = "\092<^" ^ s ^ ">" | |
| 239 | | encode (Malformed s) = s | |
| 240 | | encode EOF = ""; | |
| 241 | ||
| 14873 | 242 | |
| 14678 | 243 | (* standard symbol kinds *) | 
| 244 | ||
| 14171 
0cab06e3bbd0
Extended the notion of letter and digit, such that now one may use greek,
 skalberg parents: 
13730diff
changeset | 245 | local | 
| 50235 | 246 | val letter_symbols = | 
| 77723 
b761c91c2447
performance tuning: prefer functor Set() over Table();
 wenzelm parents: 
73198diff
changeset | 247 | Symset.make [ | 
| 62529 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 248 | "\<A>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 249 | "\<B>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 250 | "\<C>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 251 | "\<D>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 252 | "\<E>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 253 | "\<F>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 254 | "\<G>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 255 | "\<H>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 256 | "\<I>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 257 | "\<J>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 258 | "\<K>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 259 | "\<L>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 260 | "\<M>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 261 | "\<N>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 262 | "\<O>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 263 | "\<P>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 264 | "\<Q>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 265 | "\<R>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 266 | "\<S>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 267 | "\<T>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 268 | "\<U>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 269 | "\<V>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 270 | "\<W>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 271 | "\<X>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 272 | "\<Y>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 273 | "\<Z>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 274 | "\<a>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 275 | "\<b>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 276 | "\<c>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 277 | "\<d>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 278 | "\<e>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 279 | "\<f>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 280 | "\<g>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 281 | "\<h>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 282 | "\<i>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 283 | "\<j>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 284 | "\<k>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 285 | "\<l>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 286 | "\<m>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 287 | "\<n>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 288 | "\<o>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 289 | "\<p>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 290 | "\<q>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 291 | "\<r>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 292 | "\<s>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 293 | "\<t>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 294 | "\<u>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 295 | "\<v>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 296 | "\<w>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 297 | "\<x>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 298 | "\<y>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 299 | "\<z>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 300 | "\<AA>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 301 | "\<BB>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 302 | "\<CC>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 303 | "\<DD>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 304 | "\<EE>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 305 | "\<FF>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 306 | "\<GG>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 307 | "\<HH>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 308 | "\<II>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 309 | "\<JJ>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 310 | "\<KK>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 311 | "\<LL>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 312 | "\<MM>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 313 | "\<NN>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 314 | "\<OO>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 315 | "\<PP>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 316 | "\<QQ>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 317 | "\<RR>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 318 | "\<SS>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 319 | "\<TT>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 320 | "\<UU>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 321 | "\<VV>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 322 | "\<WW>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 323 | "\<XX>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 324 | "\<YY>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 325 | "\<ZZ>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 326 | "\<aa>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 327 | "\<bb>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 328 | "\<cc>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 329 | "\<dd>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 330 | "\<ee>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 331 | "\<ff>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 332 | "\<gg>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 333 | "\<hh>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 334 | "\<ii>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 335 | "\<jj>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 336 | "\<kk>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 337 | "\<ll>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 338 | "\<mm>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 339 | "\<nn>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 340 | "\<oo>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 341 | "\<pp>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 342 | "\<qq>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 343 | "\<rr>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 344 | "\<ss>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 345 | "\<tt>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 346 | "\<uu>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 347 | "\<vv>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 348 | "\<ww>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 349 | "\<xx>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 350 | "\<yy>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 351 | "\<zz>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 352 | "\<alpha>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 353 | "\<beta>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 354 | "\<gamma>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 355 | "\<delta>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 356 | "\<epsilon>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 357 | "\<zeta>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 358 | "\<eta>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 359 | "\<theta>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 360 | "\<iota>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 361 | "\<kappa>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 362 | (*"\<lambda>", sic!*) | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 363 | "\<mu>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 364 | "\<nu>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 365 | "\<xi>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 366 | "\<pi>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 367 | "\<rho>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 368 | "\<sigma>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 369 | "\<tau>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 370 | "\<upsilon>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 371 | "\<phi>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 372 | "\<chi>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 373 | "\<psi>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 374 | "\<omega>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 375 | "\<Gamma>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 376 | "\<Delta>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 377 | "\<Theta>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 378 | "\<Lambda>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 379 | "\<Xi>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 380 | "\<Pi>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 381 | "\<Sigma>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 382 | "\<Upsilon>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 383 | "\<Phi>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 384 | "\<Psi>", | 
| 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 385 | "\<Omega>" | 
| 50235 | 386 | ]; | 
| 14171 
0cab06e3bbd0
Extended the notion of letter and digit, such that now one may use greek,
 skalberg parents: 
13730diff
changeset | 387 | in | 
| 50242 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 388 | |
| 77723 
b761c91c2447
performance tuning: prefer functor Set() over Table();
 wenzelm parents: 
73198diff
changeset | 389 | val is_letter_symbol = Symset.member letter_symbols; | 
| 50242 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 390 | |
| 14678 | 391 | end; | 
| 14173 | 392 | |
| 50242 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 393 | datatype kind = Letter | Digit | Quasi | Blank | Other; | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 394 | |
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 395 | fun kind s = | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 396 | if is_ascii_letter s then Letter | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 397 | else if is_ascii_digit s then Digit | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 398 | else if is_ascii_quasi s then Quasi | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 399 | else if is_ascii_blank s then Blank | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 400 | else if is_char s then Other | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 401 | else if is_letter_symbol s then Letter | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 402 | else Other; | 
| 
56b9c792a98b
support for sub-structured identifier syntax (inactive);
 wenzelm parents: 
50239diff
changeset | 403 | |
| 14678 | 404 | fun is_letter s = kind s = Letter; | 
| 405 | fun is_digit s = kind s = Digit; | |
| 406 | fun is_quasi s = kind s = Quasi; | |
| 407 | fun is_blank s = kind s = Blank; | |
| 6272 | 408 | |
| 73198 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 wenzelm parents: 
73163diff
changeset | 409 | val is_control_block = member (op =) ["\<^bsub>", "\<^esub>", "\<^bsup>", "\<^esup>"]; | 
| 
a9eaf8c3b728
tuned signature (e.g. see HTML.control_block in Isabelle/Scala);
 wenzelm parents: 
73163diff
changeset | 410 | val has_control_block = exists is_control_block; | 
| 47850 
c638127b4653
avoid interference of markup for literal tokens, which may contain slightly odd \<^bsub> \<^esub> counted as pseudo-markup (especially relevant for HTML output, e.g. of thm power3_eq_cube);
 wenzelm parents: 
43947diff
changeset | 411 | |
| 14678 | 412 | fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end; | 
| 413 | fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end; | |
| 11010 | 414 | |
| 6272 | 415 | |
| 50237 | 416 | (* escape *) | 
| 417 | ||
| 418 | val esc = fn s => | |
| 419 | if is_char s then s | |
| 420 | else if is_utf8 s then translate_string (fn c => "\\" ^ string_of_int (ord c)) s | |
| 421 | else "\\" ^ s; | |
| 422 | ||
| 63936 | 423 | val escape = implode o map esc o Symbol.explode; | 
| 50237 | 424 | |
| 425 | ||
| 426 | ||
| 427 | (** scanning through symbols **) | |
| 428 | ||
| 429 | (* scanner *) | |
| 430 | ||
| 431 | fun scanner msg scan syms = | |
| 432 | let | |
| 433 | fun message (ss, NONE) = (fn () => msg ^ ": " ^ quote (beginning 10 ss)) | |
| 434 | | message (ss, SOME msg') = (fn () => msg ^ ", " ^ msg' () ^ ": " ^ quote (beginning 10 ss)); | |
| 435 | val finite_scan = Scan.error (Scan.finite stopper (!! message scan)); | |
| 436 | in | |
| 437 | (case finite_scan syms of | |
| 438 | (result, []) => result | |
| 439 | | (_, rest) => error (message (rest, NONE) ())) | |
| 440 | end; | |
| 441 | ||
| 442 | ||
| 50162 | 443 | (* space-separated words *) | 
| 444 | ||
| 445 | val scan_word = | |
| 446 | Scan.many1 is_ascii_blank >> K NONE || | |
| 447 | Scan.many1 (fn s => not (is_ascii_blank s) andalso not_eof s) >> (SOME o implode); | |
| 448 | ||
| 449 | val split_words = scanner "Bad text" (Scan.repeat scan_word >> map_filter I); | |
| 450 | ||
| 63936 | 451 | val explode_words = split_words o Symbol.explode; | 
| 50162 | 452 | |
| 453 | ||
| 14678 | 454 | (* blanks *) | 
| 455 | ||
| 63936 | 456 | val trim_blanks = Symbol.explode #> trim is_blank #> implode; | 
| 14678 | 457 | |
| 458 | ||
| 459 | (* bump string -- treat as base 26 or base 1 numbers *) | |
| 460 | ||
| 62529 
8b7bdfc09f3b
clarified treatment of fragments of Isabelle symbols during bootstrap;
 wenzelm parents: 
62210diff
changeset | 461 | fun symbolic_end (_ :: "\<^sub>" :: _) = true | 
| 53198 
06b096cf89ca
ignore trailing primes, e.g. rename \<alpha>' to \<alpha>'' instead of \<alpha>'a;
 wenzelm parents: 
53016diff
changeset | 462 |   | symbolic_end ("'" :: ss) = symbolic_end ss
 | 
| 55033 | 463 | | symbolic_end (s :: _) = raw_symbolic s | 
| 14908 | 464 | | symbolic_end [] = false; | 
| 14678 | 465 | |
| 466 | fun bump_init str = | |
| 63936 | 467 | if symbolic_end (rev (Symbol.explode str)) then str ^ "'" | 
| 14678 | 468 | else str ^ "a"; | 
| 12904 | 469 | |
| 470 | fun bump_string str = | |
| 471 | let | |
| 472 | fun bump [] = ["a"] | |
| 473 |       | bump ("z" :: ss) = "a" :: bump ss
 | |
| 474 | | bump (s :: ss) = | |
| 64275 
ac2abc987cf9
accomodate Poly/ML repository version, which treats singleton strings as boxed;
 wenzelm parents: 
63936diff
changeset | 475 | if is_char s andalso Char.ord #"a" <= ord s andalso ord s < Char.ord #"z" | 
| 12904 | 476 | then chr (ord s + 1) :: ss | 
| 477 | else "a" :: s :: ss; | |
| 14678 | 478 | |
| 67522 | 479 | val (ss, qs) = apfst rev (chop_suffix is_quasi (Symbol.explode str)); | 
| 14908 | 480 | val ss' = if symbolic_end ss then "'" :: ss else bump ss; | 
| 14678 | 481 | in implode (rev ss' @ qs) end; | 
| 482 | ||
| 12904 | 483 | |
| 67512 | 484 | (* styles *) | 
| 485 | ||
| 486 | val sub = "\092<^sub>"; | |
| 487 | val sup = "\092<^sup>"; | |
| 488 | val bold = "\092<^bold>"; | |
| 489 | ||
| 490 | fun make_style sym = | |
| 491 | Symbol.explode #> maps (fn s => [sym, s]) #> implode; | |
| 492 | ||
| 493 | val make_sub = make_style sub; | |
| 494 | val make_sup = make_style sup; | |
| 495 | val make_bold = make_style bold; | |
| 496 | ||
| 497 | ||
| 6272 | 498 | |
| 29324 | 499 | (** symbol output **) | 
| 14977 
77d88064991a
added escape, export encode_raw, default mode now trivial, tuned;
 wenzelm parents: 
14961diff
changeset | 500 | |
| 81375 
ae5695161423
more robust and uniform metric, still with special treatment motivated by jEdit (see also 0cdfce0bf956);
 wenzelm parents: 
81264diff
changeset | 501 | (* metric *) | 
| 6272 | 502 | |
| 81375 
ae5695161423
more robust and uniform metric, still with special treatment motivated by jEdit (see also 0cdfce0bf956);
 wenzelm parents: 
81264diff
changeset | 503 | fun metric1 s = | 
| 81264 | 504 | if String.isPrefix "\092<longlonglong" s then 4 | 
| 505 | else if String.isPrefix "\092<longlong" s then 3 | |
| 506 | else if String.isPrefix "\092<long" s orelse String.isPrefix "\092<Long" s then 2 | |
| 81375 
ae5695161423
more robust and uniform metric, still with special treatment motivated by jEdit (see also 0cdfce0bf956);
 wenzelm parents: 
81264diff
changeset | 507 | else if is_blank s orelse is_printable s then 1 | 
| 71649 | 508 | else 0; | 
| 14678 | 509 | |
| 81375 
ae5695161423
more robust and uniform metric, still with special treatment motivated by jEdit (see also 0cdfce0bf956);
 wenzelm parents: 
81264diff
changeset | 510 | fun metric ss = fold (fn s => fn n => metric1 s + n) ss 0; | 
| 14678 | 511 | |
| 81375 
ae5695161423
more robust and uniform metric, still with special treatment motivated by jEdit (see also 0cdfce0bf956);
 wenzelm parents: 
81264diff
changeset | 512 | fun output s = (s, metric (Symbol.explode s)); | 
| 29324 | 513 | |
| 514 | ||
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 515 | (*final declarations of this structure!*) | 
| 63936 | 516 | val explode = Symbol.explode; | 
| 81375 
ae5695161423
more robust and uniform metric, still with special treatment motivated by jEdit (see also 0cdfce0bf956);
 wenzelm parents: 
81264diff
changeset | 517 | val length = metric; | 
| 6116 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 518 | |
| 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 wenzelm parents: diff
changeset | 519 | end; |