| author | desharna | 
| Sat, 15 Mar 2025 20:17:03 +0100 | |
| changeset 82282 | 919eb0e67930 | 
| parent 82122 | f67fb2339eeb | 
| permissions | -rw-r--r-- | 
| 27901 | 1 | /* Title: Pure/General/symbol.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 69490 | 4 | Isabelle text symbols. | 
| 27901 | 5 | */ | 
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 55618 | 9 | |
| 36011 
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
 wenzelm parents: 
34316diff
changeset | 10 | import scala.collection.mutable | 
| 31522 | 11 | import scala.util.matching.Regex | 
| 48922 | 12 | import scala.annotation.tailrec | 
| 27901 | 13 | |
| 14 | ||
| 75393 | 15 | object Symbol {
 | 
| 43696 | 16 | type Symbol = String | 
| 17 | ||
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 18 | // counting Isabelle symbols, starting from 1 | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 19 | type Offset = Text.Offset | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 20 | type Range = Text.Range | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 21 | |
| 43696 | 22 | |
| 61865 | 23 | /* spaces */ | 
| 24 | ||
| 71649 | 25 | val space_char = ' ' | 
| 61865 | 26 | val space = " " | 
| 27 | ||
| 81658 | 28 | private val static_spaces = space * 4000 | 
| 81429 
0ccfc82fff57
minor performance tuning: avoid duplication of Symbol.spaces (e.g. from Pretty.formatted);
 wenzelm parents: 
81375diff
changeset | 29 | |
| 
0ccfc82fff57
minor performance tuning: avoid duplication of Symbol.spaces (e.g. from Pretty.formatted);
 wenzelm parents: 
81375diff
changeset | 30 |   def is_static_spaces(s: String): Boolean = {
 | 
| 
0ccfc82fff57
minor performance tuning: avoid duplication of Symbol.spaces (e.g. from Pretty.formatted);
 wenzelm parents: 
81375diff
changeset | 31 | val n = s.length | 
| 81658 | 32 | n == 0 || n <= static_spaces.length && s(0) == space_char && s.forall(_ == space_char) | 
| 81429 
0ccfc82fff57
minor performance tuning: avoid duplication of Symbol.spaces (e.g. from Pretty.formatted);
 wenzelm parents: 
81375diff
changeset | 33 | } | 
| 61865 | 34 | |
| 75393 | 35 |   def spaces(n: Int): String = {
 | 
| 73120 
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
 wenzelm parents: 
73030diff
changeset | 36 | require(n >= 0, "negative spaces") | 
| 81429 
0ccfc82fff57
minor performance tuning: avoid duplication of Symbol.spaces (e.g. from Pretty.formatted);
 wenzelm parents: 
81375diff
changeset | 37 | if (n == 0) "" | 
| 81658 | 38 | else if (n < static_spaces.length) static_spaces.substring(0, n) | 
| 61865 | 39 | else space * n | 
| 40 | } | |
| 41 | ||
| 42 | ||
| 75238 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 43 | /* char symbols */ | 
| 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 44 | |
| 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 45 | private val char_symbols: Array[Symbol] = | 
| 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 46 | (0 until 0x500).iterator.map(i => new String(Array(i.toChar))).toArray | 
| 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 47 | |
| 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 48 | def char_symbol(c: Char): String = | 
| 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 49 | if (c < char_symbols.length) char_symbols(c) | 
| 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 50 | else c.toString | 
| 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 51 | |
| 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 52 | |
| 43418 | 53 | /* ASCII characters */ | 
| 54 | ||
| 71649 | 55 | def is_ascii_printable(c: Char): Boolean = space_char <= c && c <= '~' | 
| 56 | ||
| 43418 | 57 | def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' | 
| 55497 | 58 | |
| 43418 | 59 | def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' | 
| 80548 | 60 | def is_ascii_digit(b: Byte): Boolean = is_ascii_digit(b.toChar) | 
| 55497 | 61 | |
| 62 | def is_ascii_hex(c: Char): Boolean = | |
| 63 | '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' | |
| 64 | ||
| 43418 | 65 | def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\'' | 
| 66 | ||
| 55497 | 67 | def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c) | 
| 68 | ||
| 69448 | 69 | def is_ascii_line_terminator(c: Char): Boolean = "\r\n".contains(c) | 
| 70 | ||
| 43418 | 71 | def is_ascii_letdig(c: Char): Boolean = | 
| 72 | is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c) | |
| 73 | ||
| 74 | def is_ascii_identifier(s: String): Boolean = | |
| 75192 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 75 | s.nonEmpty && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig) | 
| 43418 | 76 | |
| 75393 | 77 |   def ascii(c: Char): Symbol = {
 | 
| 62528 | 78 |     if (c > 127) error("Non-ASCII character: " + quote(c.toString))
 | 
| 75238 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 79 | else char_symbol(c) | 
| 62528 | 80 | } | 
| 81 | ||
| 66919 | 82 | def is_ascii(s: Symbol): Boolean = s.length == 1 && s(0) < 128 | 
| 83 | ||
| 43418 | 84 | |
| 48775 | 85 | /* symbol matching */ | 
| 27901 | 86 | |
| 48773 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 87 | def is_malformed(s: Symbol): Boolean = | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 88 |     s.length match {
 | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 89 | case 1 => | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 90 | val c = s(0) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 91 | Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd' | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 92 | case 2 => | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 93 | val c1 = s(0) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 94 | val c2 = s(1) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 95 | !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2)) | 
| 48774 | 96 |       case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
 | 
| 48773 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 97 | } | 
| 34137 | 98 | |
| 54734 
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
 wenzelm parents: 
53400diff
changeset | 99 | def is_newline(s: Symbol): Boolean = | 
| 43675 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 100 | s == "\n" || s == "\r" || s == "\r\n" | 
| 38877 | 101 | |
| 75393 | 102 |   class Matcher(text: CharSequence) {
 | 
| 75237 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 103 | private def ok(i: Int): Boolean = 0 <= i && i < text.length | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 104 | private def char(i: Int): Char = if (ok(i)) text.charAt(i) else 0 | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 105 | private def maybe_char(c: Char, i: Int): Int = if (char(i) == c) i + 1 else i | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 106 | |
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 107 | @tailrec private def many_ascii_letdig(i: Int): Int = | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 108 | if (is_ascii_letdig(char(i))) many_ascii_letdig(i + 1) else i | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 109 | private def maybe_ascii_id(i: Int): Int = | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 110 | if (is_ascii_letter(char(i))) many_ascii_letdig(i + 1) else i | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 111 | |
| 75393 | 112 |     def match_length(i: Int): Int = {
 | 
| 75237 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 113 | val a = char(i) | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 114 | val b = char(i + 1) | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 115 | |
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 116 | if (Character.isHighSurrogate(a) && Character.isLowSurrogate(b) || a == '\r' && b == '\n') 2 | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 117 |       else if (a == '\\' && b == '<') maybe_char('>', maybe_ascii_id(maybe_char('^', i + 2))) - i
 | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 118 | else if (ok(i)) 1 | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 119 | else 0 | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 120 | } | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 121 | |
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 122 | def match_symbol(i: Int): String = | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 123 |       match_length(i) match {
 | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 124 | case 0 => "" | 
| 75238 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 125 | case 1 => char_symbol(text.charAt(i)) | 
| 
e74d162ddf9f
clarified char symbols: cover most European languages;
 wenzelm parents: 
75237diff
changeset | 126 | case n => text.subSequence(i, i + n).toString | 
| 34137 | 127 | } | 
| 31522 | 128 | } | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 129 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 130 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 131 | /* iterator */ | 
| 33998 | 132 | |
| 43696 | 133 | def iterator(text: CharSequence): Iterator[Symbol] = | 
| 75393 | 134 |     new Iterator[Symbol] {
 | 
| 43489 | 135 | private val matcher = new Matcher(text) | 
| 136 | private var i = 0 | |
| 71601 | 137 | def hasNext: Boolean = i < text.length | 
| 75393 | 138 |       def next(): Symbol = {
 | 
| 75237 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 139 | val s = matcher.match_symbol(i) | 
| 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 140 | i += s.length | 
| 43489 | 141 | s | 
| 142 | } | |
| 33998 | 143 | } | 
| 43489 | 144 | |
| 44949 | 145 | def explode(text: CharSequence): List[Symbol] = iterator(text).toList | 
| 146 | ||
| 64615 | 147 | def length(text: CharSequence): Int = iterator(text).length | 
| 64617 | 148 | |
| 67435 
f83c1842a559
trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
 wenzelm parents: 
67389diff
changeset | 149 | def trim_blanks(text: CharSequence): String = | 
| 71601 | 150 | Library.trim(is_blank, explode(text)).mkString | 
| 67435 
f83c1842a559
trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
 wenzelm parents: 
67389diff
changeset | 151 | |
| 69318 | 152 | def all_blank(str: String): Boolean = | 
| 71601 | 153 | iterator(str).forall(is_blank) | 
| 69318 | 154 | |
| 155 | def trim_blank_lines(text: String): String = | |
| 156 | cat_lines(split_lines(text).dropWhile(all_blank).reverse.dropWhile(all_blank).reverse) | |
| 157 | ||
| 33998 | 158 | |
| 159 | /* decoding offsets */ | |
| 160 | ||
| 75393 | 161 |   object Index {
 | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 162 | private sealed case class Entry(chr: Int, sym: Int) | 
| 52507 | 163 | |
| 56472 | 164 | val empty: Index = new Index(Nil) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 165 | |
| 75393 | 166 |     def apply(text: CharSequence): Index = {
 | 
| 34137 | 167 | val matcher = new Matcher(text) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 168 | val buf = new mutable.ListBuffer[Entry] | 
| 31929 | 169 | var chr = 0 | 
| 170 | var sym = 0 | |
| 33998 | 171 |       while (chr < text.length) {
 | 
| 75237 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 wenzelm parents: 
75199diff
changeset | 172 | val n = matcher.match_length(chr) | 
| 34137 | 173 | chr += n | 
| 31929 | 174 | sym += 1 | 
| 34137 | 175 | if (n > 1) buf += Entry(chr, sym) | 
| 31929 | 176 | } | 
| 56472 | 177 | if (buf.isEmpty) empty else new Index(buf.toList) | 
| 31929 | 178 | } | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 179 | } | 
| 55430 | 180 | |
| 75393 | 181 |   final class Index private(entries: List[Index.Entry]) {
 | 
| 56472 | 182 | private val hash: Int = entries.hashCode | 
| 183 | private val index: Array[Index.Entry] = entries.toArray | |
| 184 | ||
| 75393 | 185 |     def decode(symbol_offset: Offset): Text.Offset = {
 | 
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 186 | val sym = symbol_offset - 1 | 
| 31929 | 187 | val end = index.length | 
| 75393 | 188 |       @tailrec def bisect(a: Int, b: Int): Int = {
 | 
| 31929 | 189 |         if (a < b) {
 | 
| 190 | val c = (a + b) / 2 | |
| 191 | if (sym < index(c).sym) bisect(a, c) | |
| 192 | else if (c + 1 == end || sym < index(c + 1).sym) c | |
| 193 | else bisect(c + 1, b) | |
| 194 | } | |
| 195 | else -1 | |
| 196 | } | |
| 197 | val i = bisect(0, end) | |
| 198 | if (i < 0) sym | |
| 199 | else index(i).chr + sym - index(i).sym | |
| 200 | } | |
| 71601 | 201 | def decode(symbol_range: Range): Text.Range = symbol_range.map(decode) | 
| 56335 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 202 | |
| 56338 
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
 wenzelm parents: 
56335diff
changeset | 203 | override def hashCode: Int = hash | 
| 56335 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 204 | override def equals(that: Any): Boolean = | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 205 |       that match {
 | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 206 | case other: Index => index.sameElements(other.index) | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 207 | case _ => false | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 208 | } | 
| 31929 | 209 | } | 
| 210 | ||
| 211 | ||
| 64477 | 212 | /* symbolic text chunks -- without actual text */ | 
| 56746 | 213 | |
| 75393 | 214 |   object Text_Chunk {
 | 
| 56746 | 215 | sealed abstract class Name | 
| 216 | case object Default extends Name | |
| 217 | case class Id(id: Document_ID.Generic) extends Name | |
| 218 | case class File(name: String) extends Name | |
| 219 | ||
| 220 | def apply(text: CharSequence): Text_Chunk = | |
| 76235 | 221 | new Text_Chunk(Text.Range.length(text), Index(text)) | 
| 56746 | 222 | } | 
| 223 | ||
| 75393 | 224 |   final class Text_Chunk private(val range: Text.Range, private val index: Index) {
 | 
| 56746 | 225 | override def hashCode: Int = (range, index).hashCode | 
| 226 | override def equals(that: Any): Boolean = | |
| 227 |       that match {
 | |
| 228 | case other: Text_Chunk => | |
| 229 | range == other.range && | |
| 230 | index == other.index | |
| 231 | case _ => false | |
| 232 | } | |
| 233 | ||
| 57840 | 234 | override def toString: String = "Text_Chunk" + range.toString | 
| 235 | ||
| 56746 | 236 | def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset) | 
| 237 | def decode(symbol_range: Range): Text.Range = index.decode(symbol_range) | |
| 75393 | 238 |     def incorporate(symbol_range: Range): Option[Text.Range] = {
 | 
| 75659 
9bd92ac9328f
more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
 wenzelm parents: 
75393diff
changeset | 239 |       def in(r: Range): Option[Text.Range] = {
 | 
| 56746 | 240 |         range.try_restrict(decode(r)) match {
 | 
| 241 | case Some(r1) if !r1.is_singularity => Some(r1) | |
| 242 | case _ => None | |
| 243 | } | |
| 75659 
9bd92ac9328f
more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
 wenzelm parents: 
75393diff
changeset | 244 | } | 
| 
9bd92ac9328f
more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
 wenzelm parents: 
75393diff
changeset | 245 | in(symbol_range) orElse in(symbol_range - 1) | 
| 56746 | 246 | } | 
| 247 | } | |
| 248 | ||
| 249 | ||
| 33998 | 250 | /* recoding text */ | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 251 | |
| 75393 | 252 |   private class Recoder(list: List[(String, String)]) {
 | 
| 253 |     private val (min, max) = {
 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 254 | var min = '\uffff' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 255 | var max = '\u0000' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 256 |       for ((x, _) <- list) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 257 | val c = x(0) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 258 | if (c < min) min = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 259 | if (c > max) max = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 260 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 261 | (min, max) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 262 | } | 
| 75393 | 263 |     private val table = {
 | 
| 40443 | 264 | var tab = Map[String, String]() | 
| 265 |       for ((x, y) <- list) {
 | |
| 266 |         tab.get(x) match {
 | |
| 267 | case None => tab += (x -> y) | |
| 268 | case Some(z) => | |
| 62230 | 269 |             error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
 | 
| 40443 | 270 | } | 
| 271 | } | |
| 272 | tab | |
| 273 | } | |
| 75393 | 274 |     def recode(text: String): String = {
 | 
| 80443 | 275 | val n = text.length | 
| 80445 | 276 |       val relevant = {
 | 
| 80441 | 277 | var i = 0 | 
| 80445 | 278 | var found = false | 
| 279 |         while (i < n && !found) {
 | |
| 80441 | 280 | val c = text(i) | 
| 80445 | 281 |           if (min <= c && c <= max) { found = true }
 | 
| 282 | i += 1 | |
| 283 | } | |
| 284 | found | |
| 285 | } | |
| 286 |       if (relevant) {
 | |
| 287 | val matcher = new Symbol.Matcher(text) | |
| 288 |         Library.string_builder(hint = n) { result =>
 | |
| 289 | var i = 0 | |
| 290 |           while (i < n) {
 | |
| 291 | val c = text(i) | |
| 292 |             if (min <= c && c <= max) {
 | |
| 293 | val s = matcher.match_symbol(i) | |
| 294 | result.append(table.getOrElse(s, s)) | |
| 295 | i += s.length | |
| 296 | } | |
| 297 |             else { result.append(c); i += 1 }
 | |
| 80441 | 298 | } | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 299 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 300 | } | 
| 80445 | 301 | else text | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 302 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 303 | } | 
| 27924 | 304 | |
| 27918 | 305 | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 306 | |
| 75194 | 307 | /** defined symbols **/ | 
| 27927 | 308 | |
| 78599 | 309 |   object Argument {
 | 
| 310 | def unapply(s: String): Option[Argument] = | |
| 311 |       try { Some(valueOf(s)) }
 | |
| 312 |       catch { case _: IllegalArgumentException => None}
 | |
| 313 | } | |
| 67311 | 314 | |
| 78599 | 315 |   enum Argument { case none, cartouche, space_cartouche }
 | 
| 75193 | 316 | |
| 75393 | 317 |   object Entry {
 | 
| 75197 | 318 |     private val Name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
 | 
| 319 |     private val Argument = new Properties.String("argument")
 | |
| 320 |     private val Abbrev = new Properties.String("abbrev")
 | |
| 321 |     private val Code = new Properties.String("code")
 | |
| 322 |     private val Font = new Properties.String("font")
 | |
| 323 |     private val Group = new Properties.String("group")
 | |
| 324 | ||
| 75393 | 325 |     def apply(symbol: Symbol, props: Properties.T): Entry = {
 | 
| 75197 | 326 | def err(msg: String): Nothing = error(msg + " for symbol " + quote(symbol)) | 
| 327 | ||
| 328 | val name = | |
| 329 |         symbol match { case Name(a) => a case _ => err("Cannot determine name") }
 | |
| 330 | ||
| 331 | val argument = | |
| 332 |         props match {
 | |
| 333 | case Argument(arg) => | |
| 334 | Symbol.Argument.unapply(arg) getOrElse | |
| 335 |               error("Bad argument: " + quote(arg) + " for symbol " + quote(symbol))
 | |
| 336 | case _ => Symbol.Argument.none | |
| 337 | } | |
| 338 | ||
| 339 | val code = | |
| 340 |         props match {
 | |
| 341 | case Code(s) => | |
| 342 |             try {
 | |
| 343 | val code = Integer.decode(s).intValue | |
| 344 |               if (code >= 128) Some(code) else err("Illegal ASCII code")
 | |
| 345 | } | |
| 346 |             catch { case _: NumberFormatException => err("Bad code") }
 | |
| 347 | case _ => None | |
| 348 | } | |
| 349 | ||
| 78592 | 350 | val groups = | 
| 351 |         proper_list(for (case (Group.name, a) <- props) yield a).getOrElse(List("unsorted"))
 | |
| 75197 | 352 | |
| 78592 | 353 | val abbrevs = for (case (Abbrev.name, a) <- props) yield a | 
| 75197 | 354 | |
| 355 | new Entry(symbol, name, argument, code, Font.unapply(props), groups, abbrevs) | |
| 356 | } | |
| 357 | } | |
| 358 | ||
| 359 | class Entry private( | |
| 360 | val symbol: Symbol, | |
| 361 | val name: String, | |
| 78599 | 362 | val argument: Symbol.Argument, | 
| 75197 | 363 | val code: Option[Int], | 
| 364 | val font: Option[String], | |
| 365 | val groups: List[String], | |
| 75393 | 366 | val abbrevs: List[String] | 
| 367 |   ) {
 | |
| 75197 | 368 | override def toString: String = symbol | 
| 369 | ||
| 370 | val decode: Option[String] = | |
| 371 | code.map(c => new String(Character.toChars(c))) | |
| 75194 | 372 | } | 
| 373 | ||
| 78938 
7774e1372476
clarified loading of symbols: permissive entries in $ISABELLE_SYMBOLS require explicit "?";
 wenzelm parents: 
78599diff
changeset | 374 | lazy val symbols: Symbols = | 
| 
7774e1372476
clarified loading of symbols: permissive entries in $ISABELLE_SYMBOLS require explicit "?";
 wenzelm parents: 
78599diff
changeset | 375 | Symbols.make(cat_lines(Symbols.files().map(File.read))) | 
| 75194 | 376 | |
| 75393 | 377 |   object Symbols {
 | 
| 78938 
7774e1372476
clarified loading of symbols: permissive entries in $ISABELLE_SYMBOLS require explicit "?";
 wenzelm parents: 
78599diff
changeset | 378 | def files(): List[Path] = | 
| 
7774e1372476
clarified loading of symbols: permissive entries in $ISABELLE_SYMBOLS require explicit "?";
 wenzelm parents: 
78599diff
changeset | 379 |       Path.split_permissive_files(Isabelle_System.getenv("ISABELLE_SYMBOLS"))
 | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 380 | |
| 75393 | 381 |     def make(symbols_spec: String): Symbols = {
 | 
| 75193 | 382 |       val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
 | 
| 383 |       val Key = new Regex("""(?xs) (.+): """)
 | |
| 31522 | 384 | |
| 75393 | 385 |       def read_decl(decl: String): (Symbol, Properties.T) = {
 | 
| 75193 | 386 |         def err() = error("Bad symbol declaration: " + decl)
 | 
| 31522 | 387 | |
| 75393 | 388 |         def read_props(props: List[String]): Properties.T = {
 | 
| 75193 | 389 |           props match {
 | 
| 390 | case Nil => Nil | |
| 391 | case _ :: Nil => err() | |
| 392 |             case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest)
 | |
| 393 | case _ => err() | |
| 394 | } | |
| 395 | } | |
| 396 |         decl.split("\\s+").toList match {
 | |
| 397 | case sym :: props if sym.length > 1 && !is_malformed(sym) => | |
| 398 | (sym, read_props(props)) | |
| 31522 | 399 | case _ => err() | 
| 400 | } | |
| 401 | } | |
| 402 | ||
| 75197 | 403 | new Symbols( | 
| 75193 | 404 | split_lines(symbols_spec).reverse. | 
| 75197 | 405 |           foldLeft((List.empty[Entry], Set.empty[Symbol])) {
 | 
| 75193 | 406 | case (res, No_Decl()) => res | 
| 407 | case ((list, known), decl) => | |
| 408 | val (sym, props) = read_decl(decl) | |
| 409 | if (known(sym)) (list, known) | |
| 75197 | 410 | else (Entry(sym, props) :: list, known + sym) | 
| 411 | }._1) | |
| 75193 | 412 | } | 
| 413 | } | |
| 31522 | 414 | |
| 75393 | 415 |   class Symbols(val entries: List[Entry]) {
 | 
| 75197 | 416 |     override def toString: String = entries.mkString("Symbols(", ", ", ")")
 | 
| 417 | ||
| 418 | ||
| 53400 | 419 | /* basic properties */ | 
| 420 | ||
| 75197 | 421 | private val entries_map: Map[Symbol, Entry] = | 
| 422 | (for (entry <- entries.iterator) yield entry.symbol -> entry).toMap | |
| 31651 | 423 | |
| 75197 | 424 | def get(sym: Symbol): Option[Entry] = entries_map.get(sym) | 
| 425 | def defined(sym: Symbol): Boolean = entries_map.isDefinedAt(sym) | |
| 426 | ||
| 427 | def defined_code(sym: Symbol): Boolean = | |
| 428 |       get(sym) match { case Some(entry) => entry.code.isDefined case _ => false }
 | |
| 31651 | 429 | |
| 75197 | 430 | val code_defined: Int => Boolean = | 
| 431 | (for (entry <- entries.iterator; code <- entry.code) yield code).toSet | |
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 432 | |
| 75197 | 433 | val groups_code: List[(String, List[Symbol])] = | 
| 434 |       (for {
 | |
| 435 | entry <- entries.iterator if entry.code.isDefined | |
| 436 | group <- entry.groups.iterator | |
| 437 | } yield entry.symbol -> group) | |
| 438 |         .toList.groupBy(_._2).toList.map({ case (g, list) => (g, list.map(_._1)) }).sortBy(_._1)
 | |
| 75195 | 439 | |
| 75199 | 440 | def get_abbrevs(sym: Symbol): List[String] = | 
| 441 |       get(sym) match { case Some(entry) => entry.abbrevs case _ => Nil }
 | |
| 75195 | 442 | |
| 43488 | 443 | |
| 43490 | 444 | /* recoding */ | 
| 31522 | 445 | |
| 75393 | 446 |     private val (decoder, encoder) = {
 | 
| 31522 | 447 | val mapping = | 
| 75197 | 448 | for (entry <- entries; s <- entry.decode) yield entry.symbol -> s | 
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 449 | (new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x))) | 
| 31522 | 450 | } | 
| 27918 | 451 | |
| 34098 | 452 | def decode(text: String): String = decoder.recode(text) | 
| 453 | def encode(text: String): String = encoder.recode(text) | |
| 34134 | 454 | |
| 75198 | 455 | private def recode_set(elems: Iterable[String]): Set[String] = | 
| 456 | (elems.iterator ++ elems.iterator.map(decode)).toSet | |
| 43490 | 457 | |
| 75198 | 458 | private def recode_map[A](elems: Iterable[(String, A)]): Map[String, A] = | 
| 459 |       (elems.iterator ++ elems.iterator.map({ case (sym, a) => (decode(sym), a) })).toMap
 | |
| 43490 | 460 | |
| 461 | ||
| 462 | /* user fonts */ | |
| 463 | ||
| 43696 | 464 | val fonts: Map[Symbol, String] = | 
| 75198 | 465 | recode_map(for (entry <- entries; font <- entry.font) yield entry.symbol -> font) | 
| 43490 | 466 | |
| 75192 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 467 | val font_names: List[String] = fonts.iterator.map(_._2).toSet.toList | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 468 | val font_index: Map[String, Int] = (font_names zip font_names.indices.toList).toMap | 
| 43490 | 469 | |
| 75195 | 470 | def lookup_font(sym: Symbol): Option[Int] = fonts.get(sym).map(font_index(_)) | 
| 471 | ||
| 34134 | 472 | |
| 473 | /* classification */ | |
| 474 | ||
| 75198 | 475 | val letters: Set[String] = recode_set(List( | 
| 34134 | 476 | "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", | 
| 477 | "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", | |
| 478 | "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", | |
| 479 | "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", | |
| 480 | ||
| 481 | "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", | |
| 482 | "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", | |
| 483 | "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", | |
| 484 | "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", | |
| 485 | "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", | |
| 486 | "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", | |
| 487 | "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", | |
| 488 | "\\<x>", "\\<y>", "\\<z>", | |
| 489 | ||
| 490 | "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", | |
| 491 | "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", | |
| 492 | "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", | |
| 493 | "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", | |
| 494 | "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", | |
| 495 | "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", | |
| 496 | "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", | |
| 497 | "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", | |
| 498 | "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", | |
| 499 | ||
| 500 | "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", | |
| 501 | "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", | |
| 502 | "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", | |
| 503 | "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", | |
| 504 | "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", | |
| 505 | "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", | |
| 75198 | 506 | "\\<Psi>", "\\<Omega>")) | 
| 34134 | 507 | |
| 75198 | 508 | val blanks: Set[String] = recode_set(List(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n")) | 
| 34138 | 509 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 510 | val sym_chars = | 
| 34138 | 511 |       Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
 | 
| 34134 | 512 | |
| 75197 | 513 | val symbolic: Set[String] = | 
| 75198 | 514 | recode_set(for (entry <- entries if raw_symbolic(entry.symbol)) yield entry.symbol) | 
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 515 | |
| 43455 | 516 | |
| 63528 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 517 | /* misc symbols */ | 
| 61579 | 518 | |
| 75192 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 519 | val newline_decoded: Symbol = decode(newline) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 520 | val comment_decoded: Symbol = decode(comment) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 521 | val cancel_decoded: Symbol = decode(cancel) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 522 | val latex_decoded: Symbol = decode(latex) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 523 | val marker_decoded: Symbol = decode(marker) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 524 | val open_decoded: Symbol = decode(open) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 525 | val close_decoded: Symbol = decode(close) | 
| 55033 | 526 | |
| 527 | ||
| 82120 | 528 | /* brackets */ | 
| 529 | ||
| 530 | val open_brackets_decoded = decode(open_brackets) | |
| 531 | val close_brackets_decoded = decode(close_brackets) | |
| 532 | ||
| 533 | ||
| 43488 | 534 | /* control symbols */ | 
| 535 | ||
| 59107 | 536 | val control_decoded: Set[Symbol] = | 
| 75197 | 537 |       (for (entry <- entries.iterator if entry.symbol.startsWith("\\<^"); s <- entry.decode)
 | 
| 538 | yield s).toSet | |
| 43488 | 539 | |
| 75192 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 540 | val sub_decoded: Symbol = decode(sub) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 541 | val sup_decoded: Symbol = decode(sup) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 542 | val bold_decoded: Symbol = decode(bold) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 543 | val emph_decoded: Symbol = decode(emph) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 544 | val bsub_decoded: Symbol = decode(bsub) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 545 | val esub_decoded: Symbol = decode(esub) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 546 | val bsup_decoded: Symbol = decode(bsup) | 
| 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 wenzelm parents: 
73359diff
changeset | 547 | val esup_decoded: Symbol = decode(esup) | 
| 27918 | 548 | } | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 549 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 550 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 551 | /* tables */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 552 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 553 | def decode(text: String): String = symbols.decode(text) | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 554 | def encode(text: String): String = symbols.encode(text) | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 555 | |
| 73030 | 556 | def decode_yxml(text: String, cache: XML.Cache = XML.Cache.none): XML.Body = | 
| 80480 
972f7a4cdc0e
clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
 wenzelm parents: 
80447diff
changeset | 557 | YXML.parse_body(YXML.Source(text), recode = decode, cache = cache) | 
| 73030 | 558 | |
| 559 | def decode_yxml_failsafe(text: String, cache: XML.Cache = XML.Cache.none): XML.Body = | |
| 80480 
972f7a4cdc0e
clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
 wenzelm parents: 
80447diff
changeset | 560 | YXML.parse_body_failsafe(YXML.Source(text), recode = decode, cache = cache) | 
| 73030 | 561 | |
| 80437 
2c07b9b2f9f4
minor performance tuning: more direct Bytes with Symbol.encode;
 wenzelm parents: 
78938diff
changeset | 562 | def encode_yxml(body: XML.Body): String = | 
| 
2c07b9b2f9f4
minor performance tuning: more direct Bytes with Symbol.encode;
 wenzelm parents: 
78938diff
changeset | 563 | YXML.string_of_body(body, recode = encode) | 
| 53337 
b3817a0e3211
sort items according to persistent history of frequency of use;
 wenzelm parents: 
53316diff
changeset | 564 | |
| 75393 | 565 |   def decode_strict(text: String): String = {
 | 
| 50291 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 566 | val decoded = decode(text) | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 567 | if (encode(decoded) == text) decoded | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 568 |     else {
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 569 | val bad = new mutable.ListBuffer[Symbol] | 
| 80444 | 570 | for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s)) bad += s | 
| 50291 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 571 |       error("Bad Unicode symbols in text: " + commas_quote(bad))
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 572 | } | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 573 | } | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 574 | |
| 72866 | 575 | def output(unicode_symbols: Boolean, text: String): String = | 
| 576 | if (unicode_symbols) Symbol.decode(text) else Symbol.encode(text) | |
| 577 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 578 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 579 | /* classification */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 580 | |
| 43696 | 581 | def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) | 
| 582 | def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' | |
| 583 | def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'" | |
| 584 | def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) | |
| 585 | def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 586 | |
| 55033 | 587 | |
| 67438 | 588 | /* symbolic newline */ | 
| 63528 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 589 | |
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 590 | val newline: Symbol = "\\<newline>" | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 591 | def newline_decoded: Symbol = symbols.newline_decoded | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 592 | |
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 593 | def print_newlines(str: String): String = | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 594 |     if (str.contains('\n'))
 | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 595 |       (for (s <- iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString
 | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 596 | else str | 
| 61579 | 597 | |
| 67438 | 598 | |
| 599 | /* formal comments */ | |
| 600 | ||
| 61579 | 601 | val comment: Symbol = "\\<comment>" | 
| 67449 | 602 | val cancel: Symbol = "\\<^cancel>" | 
| 603 | val latex: Symbol = "\\<^latex>" | |
| 69891 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 wenzelm parents: 
69887diff
changeset | 604 | val marker: Symbol = "\\<^marker>" | 
| 67449 | 605 | |
| 61579 | 606 | def comment_decoded: Symbol = symbols.comment_decoded | 
| 67438 | 607 | def cancel_decoded: Symbol = symbols.cancel_decoded | 
| 608 | def latex_decoded: Symbol = symbols.latex_decoded | |
| 69891 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 wenzelm parents: 
69887diff
changeset | 609 | def marker_decoded: Symbol = symbols.marker_decoded | 
| 61579 | 610 | |
| 611 | ||
| 55033 | 612 | /* cartouches */ | 
| 613 | ||
| 61579 | 614 | val open: Symbol = "\\<open>" | 
| 615 | val close: Symbol = "\\<close>" | |
| 55033 | 616 | |
| 617 | def open_decoded: Symbol = symbols.open_decoded | |
| 618 | def close_decoded: Symbol = symbols.close_decoded | |
| 619 | ||
| 620 | def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open | |
| 621 | def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close | |
| 622 | ||
| 67131 | 623 | def cartouche(s: String): String = open + s + close | 
| 624 | def cartouche_decoded(s: String): String = open_decoded + s + close_decoded | |
| 625 | ||
| 55033 | 626 | |
| 627 | /* symbols for symbolic identifiers */ | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 628 | |
| 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 629 | private def raw_symbolic(sym: Symbol): Boolean = | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 630 |     sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
 | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 631 | |
| 55033 | 632 | def is_symbolic(sym: Symbol): Boolean = | 
| 633 | !is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym)) | |
| 634 | ||
| 635 | def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) | |
| 636 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 637 | |
| 82120 | 638 | /* brackets */ | 
| 639 | ||
| 82122 | 640 |   val open_brackets = """([{\<guillemotleft>\<open>\<langle>\<lceil>\<lfloor>\<lparr>\<lbrakk>\<lbrace>\<llangle>\<lblot>"""
 | 
| 641 | val close_brackets = """)]}\<guillemotright>\<close>\<rangle>\<rceil>\<rfloor>\<rparr>\<rbrakk>\<rbrace>\<rrangle>\<rblot>""" | |
| 82120 | 642 | |
| 643 | def open_brackets_decoded = symbols.open_brackets_decoded | |
| 644 | def close_brackets_decoded = symbols.close_brackets_decoded | |
| 645 | ||
| 646 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 647 | /* control symbols */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 648 | |
| 67127 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 649 | val control_prefix = "\\<^" | 
| 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 650 | val control_suffix = ">" | 
| 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 651 | |
| 67255 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 wenzelm parents: 
67131diff
changeset | 652 | def control_name(sym: Symbol): Option[String] = | 
| 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 wenzelm parents: 
67131diff
changeset | 653 | if (is_control_encoded(sym)) | 
| 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 wenzelm parents: 
67131diff
changeset | 654 | Some(sym.substring(control_prefix.length, sym.length - control_suffix.length)) | 
| 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 wenzelm parents: 
67131diff
changeset | 655 | else None | 
| 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 wenzelm parents: 
67131diff
changeset | 656 | |
| 67127 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 657 | def is_control_encoded(sym: Symbol): Boolean = | 
| 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 658 | sym.startsWith(control_prefix) && sym.endsWith(control_suffix) | 
| 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 659 | |
| 59107 | 660 | def is_control(sym: Symbol): Boolean = | 
| 67127 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 661 | is_control_encoded(sym) || symbols.control_decoded.contains(sym) | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 662 | |
| 43696 | 663 | def is_controllable(sym: Symbol): Boolean = | 
| 66006 
cec184536dfd
uniform notion of Symbol.is_controllable (see also 265d9300d523);
 wenzelm parents: 
65997diff
changeset | 664 | !is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && | 
| 
cec184536dfd
uniform notion of Symbol.is_controllable (see also 265d9300d523);
 wenzelm parents: 
65997diff
changeset | 665 | !is_malformed(sym) && sym != "\"" | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 666 | |
| 73208 | 667 | val sub: Symbol = "\\<^sub>" | 
| 668 | val sup: Symbol = "\\<^sup>" | |
| 669 | val bold: Symbol = "\\<^bold>" | |
| 670 | val emph: Symbol = "\\<^emph>" | |
| 671 | val bsub: Symbol = "\\<^bsub>" | |
| 672 | val esub: Symbol = "\\<^esub>" | |
| 673 | val bsup: Symbol = "\\<^bsup>" | |
| 674 | val esup: Symbol = "\\<^esup>" | |
| 62103 | 675 | |
| 44238 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 676 | def sub_decoded: Symbol = symbols.sub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 677 | def sup_decoded: Symbol = symbols.sup_decoded | 
| 62103 | 678 | def bold_decoded: Symbol = symbols.bold_decoded | 
| 679 | def emph_decoded: Symbol = symbols.emph_decoded | |
| 44238 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 680 | def bsub_decoded: Symbol = symbols.bsub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 681 | def esub_decoded: Symbol = symbols.esub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 682 | def bsup_decoded: Symbol = symbols.bsup_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 683 | def esup_decoded: Symbol = symbols.esup_decoded | 
| 71649 | 684 | |
| 685 | ||
| 686 | /* metric */ | |
| 687 | ||
| 688 | def is_printable(sym: Symbol): Boolean = | |
| 689 | if (is_ascii(sym)) is_ascii_printable(sym(0)) | |
| 690 | else !is_control(sym) | |
| 691 | ||
| 75393 | 692 |   object Metric extends Pretty.Metric {
 | 
| 71649 | 693 | val unit = 1.0 | 
| 694 | def apply(str: String): Double = | |
| 695 |       (for (s <- iterator(str)) yield {
 | |
| 696 | val sym = encode(s) | |
| 81339 
e181259e539b
more accurate Symbol.Metric, following 6eccae338770;
 wenzelm parents: 
80548diff
changeset | 697 |         if (sym.startsWith("\\<longlonglong")) 4
 | 
| 
e181259e539b
more accurate Symbol.Metric, following 6eccae338770;
 wenzelm parents: 
80548diff
changeset | 698 |         else if (sym.startsWith("\\<longlong")) 3
 | 
| 
e181259e539b
more accurate Symbol.Metric, following 6eccae338770;
 wenzelm parents: 
80548diff
changeset | 699 |         else if (sym.startsWith("\\<long") || sym.startsWith("\\<Long")) 2
 | 
| 81375 
ae5695161423
more robust and uniform metric, still with special treatment motivated by jEdit (see also 0cdfce0bf956);
 wenzelm parents: 
81339diff
changeset | 700 | else if (is_blank(sym) || is_printable(sym)) 1 | 
| 71649 | 701 | else 0 | 
| 702 | }).sum | |
| 703 | } | |
| 27901 | 704 | } |