| author | haftmann | 
| Fri, 26 Oct 2018 08:20:45 +0000 | |
| changeset 69194 | 6d514e128a85 | 
| parent 67449 | 1caeb087d957 | 
| child 69318 | f3351bb4390e | 
| permissions | -rw-r--r-- | 
| 27901 | 1 | /* Title: Pure/General/symbol.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 27924 | 4 | Detecting and recoding Isabelle symbols. | 
| 27901 | 5 | */ | 
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 55618 | 9 | |
| 36011 
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
 wenzelm parents: 
34316diff
changeset | 10 | import scala.collection.mutable | 
| 31522 | 11 | import scala.util.matching.Regex | 
| 48922 | 12 | import scala.annotation.tailrec | 
| 27901 | 13 | |
| 14 | ||
| 31522 | 15 | object Symbol | 
| 16 | {
 | |
| 43696 | 17 | type Symbol = String | 
| 18 | ||
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 19 | // counting Isabelle symbols, starting from 1 | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 20 | type Offset = Text.Offset | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 21 | type Range = Text.Range | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 22 | |
| 43696 | 23 | |
| 61865 | 24 | /* spaces */ | 
| 25 | ||
| 26 | val space = " " | |
| 27 | ||
| 28 | private val static_spaces = space * 4000 | |
| 29 | ||
| 30 | def spaces(n: Int): String = | |
| 31 |   {
 | |
| 32 | require(n >= 0) | |
| 33 | if (n < static_spaces.length) static_spaces.substring(0, n) | |
| 34 | else space * n | |
| 35 | } | |
| 36 | ||
| 37 | ||
| 43418 | 38 | /* ASCII characters */ | 
| 39 | ||
| 40 | def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' | |
| 55497 | 41 | |
| 43418 | 42 | def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' | 
| 55497 | 43 | |
| 44 | def is_ascii_hex(c: Char): Boolean = | |
| 45 | '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' | |
| 46 | ||
| 43418 | 47 | def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\'' | 
| 48 | ||
| 55497 | 49 | def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c) | 
| 50 | ||
| 43418 | 51 | def is_ascii_letdig(c: Char): Boolean = | 
| 52 | is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c) | |
| 53 | ||
| 54 | def is_ascii_identifier(s: String): Boolean = | |
| 50238 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 wenzelm parents: 
50233diff
changeset | 55 | s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig) | 
| 43418 | 56 | |
| 62528 | 57 | def ascii(c: Char): Symbol = | 
| 58 |   {
 | |
| 59 |     if (c > 127) error("Non-ASCII character: " + quote(c.toString))
 | |
| 60 | else char_symbols(c.toInt) | |
| 61 | } | |
| 62 | ||
| 66919 | 63 | def is_ascii(s: Symbol): Boolean = s.length == 1 && s(0) < 128 | 
| 64 | ||
| 43418 | 65 | |
| 48775 | 66 | /* symbol matching */ | 
| 27901 | 67 | |
| 48775 | 68 |   private val symbol_total = new Regex("""(?xs)
 | 
| 63936 | 69 | [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""") | 
| 27924 | 70 | |
| 48775 | 71 | private def is_plain(c: Char): Boolean = | 
| 72 | !(c == '\r' || c == '\\' || Character.isHighSurrogate(c)) | |
| 48773 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 73 | |
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 74 | def is_malformed(s: Symbol): Boolean = | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 75 |     s.length match {
 | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 76 | case 1 => | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 77 | val c = s(0) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 78 | Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd' | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 79 | case 2 => | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 80 | val c1 = s(0) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 81 | val c2 = s(1) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 82 | !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2)) | 
| 48774 | 83 |       case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
 | 
| 48773 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 84 | } | 
| 34137 | 85 | |
| 54734 
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
 wenzelm parents: 
53400diff
changeset | 86 | def is_newline(s: Symbol): Boolean = | 
| 43675 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 87 | s == "\n" || s == "\r" || s == "\r\n" | 
| 38877 | 88 | |
| 34137 | 89 | class Matcher(text: CharSequence) | 
| 90 |   {
 | |
| 48775 | 91 | private val matcher = symbol_total.pattern.matcher(text) | 
| 34137 | 92 | def apply(start: Int, end: Int): Int = | 
| 93 |     {
 | |
| 94 | require(0 <= start && start < end && end <= text.length) | |
| 34316 
f879b649ac4c
clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
 wenzelm parents: 
34193diff
changeset | 95 | if (is_plain(text.charAt(start))) 1 | 
| 34138 | 96 |       else {
 | 
| 34137 | 97 | matcher.region(start, end).lookingAt | 
| 98 | matcher.group.length | |
| 99 | } | |
| 100 | } | |
| 31522 | 101 | } | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 102 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 103 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 104 | /* iterator */ | 
| 33998 | 105 | |
| 43696 | 106 | private val char_symbols: Array[Symbol] = | 
| 43675 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 107 | (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 108 | |
| 43696 | 109 | def iterator(text: CharSequence): Iterator[Symbol] = | 
| 110 | new Iterator[Symbol] | |
| 40522 | 111 |     {
 | 
| 43489 | 112 | private val matcher = new Matcher(text) | 
| 113 | private var i = 0 | |
| 114 | def hasNext = i < text.length | |
| 115 | def next = | |
| 116 |       {
 | |
| 117 | val n = matcher(i, text.length) | |
| 43675 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 118 | val s = | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 119 | if (n == 0) "" | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 120 |           else if (n == 1) {
 | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 121 | val c = text.charAt(i) | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 122 | if (c < char_symbols.length) char_symbols(c) | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 123 | else text.subSequence(i, i + n).toString | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 124 | } | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 125 | else text.subSequence(i, i + n).toString | 
| 43489 | 126 | i += n | 
| 127 | s | |
| 128 | } | |
| 33998 | 129 | } | 
| 43489 | 130 | |
| 44949 | 131 | def explode(text: CharSequence): List[Symbol] = iterator(text).toList | 
| 132 | ||
| 64615 | 133 | def length(text: CharSequence): Int = iterator(text).length | 
| 64617 | 134 | |
| 67435 
f83c1842a559
trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
 wenzelm parents: 
67389diff
changeset | 135 | def trim_blanks(text: CharSequence): String = | 
| 
f83c1842a559
trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
 wenzelm parents: 
67389diff
changeset | 136 | Library.trim(is_blank(_), explode(text)).mkString | 
| 
f83c1842a559
trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
 wenzelm parents: 
67389diff
changeset | 137 | |
| 33998 | 138 | |
| 139 | /* decoding offsets */ | |
| 140 | ||
| 52507 | 141 | object Index | 
| 142 |   {
 | |
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 143 | private sealed case class Entry(chr: Int, sym: Int) | 
| 52507 | 144 | |
| 56472 | 145 | val empty: Index = new Index(Nil) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 146 | |
| 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 147 | def apply(text: CharSequence): Index = | 
| 31929 | 148 |     {
 | 
| 34137 | 149 | val matcher = new Matcher(text) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 150 | val buf = new mutable.ListBuffer[Entry] | 
| 31929 | 151 | var chr = 0 | 
| 152 | var sym = 0 | |
| 33998 | 153 |       while (chr < text.length) {
 | 
| 34137 | 154 | val n = matcher(chr, text.length) | 
| 155 | chr += n | |
| 31929 | 156 | sym += 1 | 
| 34137 | 157 | if (n > 1) buf += Entry(chr, sym) | 
| 31929 | 158 | } | 
| 56472 | 159 | if (buf.isEmpty) empty else new Index(buf.toList) | 
| 31929 | 160 | } | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 161 | } | 
| 55430 | 162 | |
| 56472 | 163 | final class Index private(entries: List[Index.Entry]) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 164 |   {
 | 
| 56472 | 165 | private val hash: Int = entries.hashCode | 
| 166 | private val index: Array[Index.Entry] = entries.toArray | |
| 167 | ||
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 168 | def decode(symbol_offset: Offset): Text.Offset = | 
| 31929 | 169 |     {
 | 
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 170 | val sym = symbol_offset - 1 | 
| 31929 | 171 | val end = index.length | 
| 48922 | 172 | @tailrec def bisect(a: Int, b: Int): Int = | 
| 31929 | 173 |       {
 | 
| 174 |         if (a < b) {
 | |
| 175 | val c = (a + b) / 2 | |
| 176 | if (sym < index(c).sym) bisect(a, c) | |
| 177 | else if (c + 1 == end || sym < index(c + 1).sym) c | |
| 178 | else bisect(c + 1, b) | |
| 179 | } | |
| 180 | else -1 | |
| 181 | } | |
| 182 | val i = bisect(0, end) | |
| 183 | if (i < 0) sym | |
| 184 | else index(i).chr + sym - index(i).sym | |
| 185 | } | |
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 186 | def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_)) | 
| 56335 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 187 | |
| 56338 
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
 wenzelm parents: 
56335diff
changeset | 188 | override def hashCode: Int = hash | 
| 56335 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 189 | override def equals(that: Any): Boolean = | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 190 |       that match {
 | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 191 | case other: Index => index.sameElements(other.index) | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 192 | case _ => false | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 193 | } | 
| 31929 | 194 | } | 
| 195 | ||
| 196 | ||
| 64477 | 197 | /* symbolic text chunks -- without actual text */ | 
| 56746 | 198 | |
| 199 | object Text_Chunk | |
| 200 |   {
 | |
| 201 | sealed abstract class Name | |
| 202 | case object Default extends Name | |
| 203 | case class Id(id: Document_ID.Generic) extends Name | |
| 204 | case class File(name: String) extends Name | |
| 205 | ||
| 65335 | 206 | val encode_name: XML.Encode.T[Name] = | 
| 207 |     {
 | |
| 208 | import XML.Encode._ | |
| 209 | variant(List( | |
| 210 |         { case Default => (Nil, Nil) },
 | |
| 211 |         { case Id(a) => (List(long_atom(a)), Nil) },
 | |
| 212 |         { case File(a) => (List(a), Nil) }))
 | |
| 213 | } | |
| 214 | ||
| 215 | val decode_name: XML.Decode.T[Name] = | |
| 216 |     {
 | |
| 217 | import XML.Decode._ | |
| 218 | variant(List( | |
| 219 |         { case (Nil, Nil) => Default },
 | |
| 220 |         { case (List(a), Nil) => Id(long_atom(a)) },
 | |
| 221 |         { case (List(a), Nil) => File(a) }))
 | |
| 222 | } | |
| 223 | ||
| 56746 | 224 | def apply(text: CharSequence): Text_Chunk = | 
| 225 | new Text_Chunk(Text.Range(0, text.length), Index(text)) | |
| 226 | } | |
| 227 | ||
| 228 | final class Text_Chunk private(val range: Text.Range, private val index: Index) | |
| 229 |   {
 | |
| 230 | override def hashCode: Int = (range, index).hashCode | |
| 231 | override def equals(that: Any): Boolean = | |
| 232 |       that match {
 | |
| 233 | case other: Text_Chunk => | |
| 234 | range == other.range && | |
| 235 | index == other.index | |
| 236 | case _ => false | |
| 237 | } | |
| 238 | ||
| 57840 | 239 | override def toString: String = "Text_Chunk" + range.toString | 
| 240 | ||
| 56746 | 241 | def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset) | 
| 242 | def decode(symbol_range: Range): Text.Range = index.decode(symbol_range) | |
| 243 | def incorporate(symbol_range: Range): Option[Text.Range] = | |
| 244 |     {
 | |
| 245 | def in(r: Range): Option[Text.Range] = | |
| 246 |         range.try_restrict(decode(r)) match {
 | |
| 247 | case Some(r1) if !r1.is_singularity => Some(r1) | |
| 248 | case _ => None | |
| 249 | } | |
| 250 | in(symbol_range) orElse in(symbol_range - 1) | |
| 251 | } | |
| 252 | } | |
| 253 | ||
| 254 | ||
| 33998 | 255 | /* recoding text */ | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 256 | |
| 31522 | 257 | private class Recoder(list: List[(String, String)]) | 
| 258 |   {
 | |
| 259 | private val (min, max) = | |
| 260 |     {
 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 261 | var min = '\uffff' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 262 | var max = '\u0000' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 263 |       for ((x, _) <- list) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 264 | val c = x(0) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 265 | if (c < min) min = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 266 | if (c > max) max = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 267 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 268 | (min, max) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 269 | } | 
| 40443 | 270 | private val table = | 
| 271 |     {
 | |
| 272 | var tab = Map[String, String]() | |
| 273 |       for ((x, y) <- list) {
 | |
| 274 |         tab.get(x) match {
 | |
| 275 | case None => tab += (x -> y) | |
| 276 | case Some(z) => | |
| 62230 | 277 |             error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
 | 
| 40443 | 278 | } | 
| 279 | } | |
| 280 | tab | |
| 281 | } | |
| 31522 | 282 | def recode(text: String): String = | 
| 283 |     {
 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 284 | val len = text.length | 
| 48775 | 285 | val matcher = symbol_total.pattern.matcher(text) | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 286 | val result = new StringBuilder(len) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 287 | var i = 0 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 288 |       while (i < len) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 289 | val c = text(i) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 290 |         if (min <= c && c <= max) {
 | 
| 31929 | 291 | matcher.region(i, len).lookingAt | 
| 27938 | 292 | val x = matcher.group | 
| 52888 | 293 | result.append(table.getOrElse(x, x)) | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 294 | i = matcher.end | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 295 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 296 |         else { result.append(c); i += 1 }
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 297 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 298 | result.toString | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 299 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 300 | } | 
| 27924 | 301 | |
| 27918 | 302 | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 303 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 304 | /** symbol interpretation **/ | 
| 27927 | 305 | |
| 67311 | 306 | val ARGUMENT_CARTOUCHE = "cartouche" | 
| 307 | val ARGUMENT_SPACE_CARTOUCHE = "space_cartouche" | |
| 308 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 309 | private lazy val symbols = | 
| 61959 | 310 |   {
 | 
| 311 | val contents = | |
| 312 |       for (path <- Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS")) if path.is_file)
 | |
| 313 | yield (File.read(path)) | |
| 314 | new Interpretation(cat_lines(contents)) | |
| 315 | } | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 316 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 317 | private class Interpretation(symbols_spec: String) | 
| 29569 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 318 |   {
 | 
| 31522 | 319 | /* read symbols */ | 
| 320 | ||
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 321 |     private val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
 | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 322 |     private val Key = new Regex("""(?xs) (.+): """)
 | 
| 31522 | 323 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 324 | private def read_decl(decl: String): (Symbol, Properties.T) = | 
| 31522 | 325 |     {
 | 
| 326 |       def err() = error("Bad symbol declaration: " + decl)
 | |
| 327 | ||
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 328 | def read_props(props: List[String]): Properties.T = | 
| 31522 | 329 |       {
 | 
| 330 |         props match {
 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 331 | case Nil => Nil | 
| 31522 | 332 | case _ :: Nil => err() | 
| 61174 | 333 |           case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest)
 | 
| 31522 | 334 | case _ => err() | 
| 335 | } | |
| 336 | } | |
| 337 |       decl.split("\\s+").toList match {
 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 338 | case sym :: props if sym.length > 1 && !is_malformed(sym) => | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 339 | (sym, read_props(props)) | 
| 34193 | 340 | case _ => err() | 
| 31522 | 341 | } | 
| 342 | } | |
| 343 | ||
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 344 | private val symbols: List[(Symbol, Properties.T)] = | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 345 | (((List.empty[(Symbol, Properties.T)], Set.empty[Symbol]) /: | 
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 346 | split_lines(symbols_spec).reverse) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 347 |         { case (res, No_Decl()) => res
 | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 348 | case ((list, known), decl) => | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 349 | val (sym, props) = read_decl(decl) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 350 | if (known(sym)) (list, known) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 351 | else ((sym, props) :: list, known + sym) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 352 | })._1 | 
| 31522 | 353 | |
| 354 | ||
| 53400 | 355 | /* basic properties */ | 
| 356 | ||
| 357 | val properties: Map[Symbol, Properties.T] = Map(symbols: _*) | |
| 31651 | 358 | |
| 67311 | 359 | val names: Map[Symbol, (String, String)] = | 
| 34134 | 360 |     {
 | 
| 67311 | 361 |       val Name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
 | 
| 362 |       val Argument = new Properties.String("argument")
 | |
| 363 | def argument(sym: Symbol, props: Properties.T): String = | |
| 364 |         props match {
 | |
| 365 | case Argument(arg) => | |
| 366 | if (arg == ARGUMENT_CARTOUCHE || arg == ARGUMENT_SPACE_CARTOUCHE) arg | |
| 367 |             else error("Bad argument: " + quote(arg) + " for symbol " + quote(sym))
 | |
| 368 | case _ => "" | |
| 369 | } | |
| 370 | Map((for ((sym @ Name(a), props) <- symbols) yield sym -> (a, argument(sym, props))): _*) | |
| 31651 | 371 | } | 
| 372 | ||
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 373 | val groups: List[(String, List[Symbol])] = | 
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 374 |       symbols.map({ case (sym, props) =>
 | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 375 |         val gs = for (("group", g) <- props) yield g
 | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 376 | if (gs.isEmpty) List(sym -> "unsorted") else gs.map(sym -> _) | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 377 | }).flatten | 
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 378 |         .groupBy(_._2).toList.map({ case (group, list) => (group, list.map(_._1)) })
 | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 379 | .sortBy(_._1) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 380 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 381 | val abbrevs: Multi_Map[Symbol, String] = | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 382 | Multi_Map(( | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 383 |         for {
 | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 384 | (sym, props) <- symbols | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 385 |           ("abbrev", a) <- props.reverse
 | 
| 60215 | 386 | } yield sym -> a): _*) | 
| 43488 | 387 | |
| 66051 | 388 | val codes: List[(Symbol, Int)] = | 
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 389 |     {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 390 |       val Code = new Properties.String("code")
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 391 |       for {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 392 | (sym, props) <- symbols | 
| 67311 | 393 | code <- | 
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 394 |           props match {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 395 | case Code(s) => | 
| 67311 | 396 |               try { Some(Integer.decode(s).intValue) }
 | 
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 397 |               catch { case _: NumberFormatException => error("Bad code for symbol " + sym) }
 | 
| 67311 | 398 | case _ => None | 
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 399 | } | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 400 |       } yield {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 401 |         if (code < 128) error("Illegal ASCII code for symbol " + sym)
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 402 | else (sym, code) | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 403 | } | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 404 | } | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 405 | |
| 43488 | 406 | |
| 43490 | 407 | /* recoding */ | 
| 31522 | 408 | |
| 409 | private val (decoder, encoder) = | |
| 410 |     {
 | |
| 411 | val mapping = | |
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 412 | for ((sym, code) <- codes) yield (sym, new String(Character.toChars(code))) | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 413 | (new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x))) | 
| 31522 | 414 | } | 
| 27918 | 415 | |
| 34098 | 416 | def decode(text: String): String = decoder.recode(text) | 
| 417 | def encode(text: String): String = encoder.recode(text) | |
| 34134 | 418 | |
| 43490 | 419 | private def recode_set(elems: String*): Set[String] = | 
| 420 |     {
 | |
| 421 | val content = elems.toList | |
| 422 | Set((content ::: content.map(decode)): _*) | |
| 423 | } | |
| 424 | ||
| 425 | private def recode_map[A](elems: (String, A)*): Map[String, A] = | |
| 426 |     {
 | |
| 427 | val content = elems.toList | |
| 428 |       Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
 | |
| 429 | } | |
| 430 | ||
| 431 | ||
| 432 | /* user fonts */ | |
| 433 | ||
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 434 |     private val Font = new Properties.String("font")
 | 
| 43696 | 435 | val fonts: Map[Symbol, String] = | 
| 60215 | 436 | recode_map((for ((sym, Font(font)) <- symbols) yield sym -> font): _*) | 
| 43490 | 437 | |
| 438 | val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList | |
| 439 | val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*) | |
| 440 | ||
| 34134 | 441 | |
| 442 | /* classification */ | |
| 443 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 444 | val letters = recode_set( | 
| 34134 | 445 | "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", | 
| 446 | "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", | |
| 447 | "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", | |
| 448 | "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", | |
| 449 | ||
| 450 | "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", | |
| 451 | "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", | |
| 452 | "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", | |
| 453 | "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", | |
| 454 | "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", | |
| 455 | "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", | |
| 456 | "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", | |
| 457 | "\\<x>", "\\<y>", "\\<z>", | |
| 458 | ||
| 459 | "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", | |
| 460 | "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", | |
| 461 | "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", | |
| 462 | "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", | |
| 463 | "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", | |
| 464 | "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", | |
| 465 | "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", | |
| 466 | "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", | |
| 467 | "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", | |
| 468 | ||
| 469 | "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", | |
| 470 | "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", | |
| 471 | "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", | |
| 472 | "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", | |
| 473 | "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", | |
| 474 | "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", | |
| 52616 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 wenzelm parents: 
52507diff
changeset | 475 | "\\<Psi>", "\\<Omega>") | 
| 34134 | 476 | |
| 61865 | 477 | val blanks = recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n") | 
| 34138 | 478 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 479 | val sym_chars = | 
| 34138 | 480 |       Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
 | 
| 34134 | 481 | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 482 |     val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*)
 | 
| 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 483 | |
| 43455 | 484 | |
| 63528 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 485 | /* misc symbols */ | 
| 61579 | 486 | |
| 63528 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 487 | val newline_decoded = decode(newline) | 
| 61579 | 488 | val comment_decoded = decode(comment) | 
| 67438 | 489 | val cancel_decoded = decode(cancel) | 
| 490 | val latex_decoded = decode(latex) | |
| 55033 | 491 | val open_decoded = decode(open) | 
| 492 | val close_decoded = decode(close) | |
| 493 | ||
| 494 | ||
| 43488 | 495 | /* control symbols */ | 
| 496 | ||
| 59107 | 497 | val control_decoded: Set[Symbol] = | 
| 43488 | 498 |       Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
 | 
| 499 | ||
| 62103 | 500 | val sub_decoded = decode(sub) | 
| 501 | val sup_decoded = decode(sup) | |
| 502 | val bold_decoded = decode(bold) | |
| 62104 
fb73c0d7bb37
clarified symbol insertion, depending on buffer encoding;
 wenzelm parents: 
62103diff
changeset | 503 | val emph_decoded = decode(emph) | 
| 65997 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
 wenzelm parents: 
65521diff
changeset | 504 | val bsub_decoded = decode(bsub) | 
| 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
 wenzelm parents: 
65521diff
changeset | 505 | val esub_decoded = decode(esub) | 
| 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
 wenzelm parents: 
65521diff
changeset | 506 | val bsup_decoded = decode(bsup) | 
| 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
 wenzelm parents: 
65521diff
changeset | 507 | val esup_decoded = decode(esup) | 
| 27918 | 508 | } | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 509 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 510 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 511 | /* tables */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 512 | |
| 53400 | 513 | def properties: Map[Symbol, Properties.T] = symbols.properties | 
| 67311 | 514 | def names: Map[Symbol, (String, String)] = symbols.names | 
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 515 | def groups: List[(String, List[Symbol])] = symbols.groups | 
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 516 | def abbrevs: Multi_Map[Symbol, String] = symbols.abbrevs | 
| 66051 | 517 | def codes: List[(Symbol, Int)] = symbols.codes | 
| 67389 | 518 | def groups_code: List[(String, List[Symbol])] = | 
| 519 |   {
 | |
| 520 | val has_code = codes.iterator.map(_._1).toSet | |
| 521 |     groups.flatMap({ case (group, symbols) =>
 | |
| 522 | val symbols1 = symbols.filter(has_code) | |
| 523 | if (symbols1.isEmpty) None else Some((group, symbols1)) | |
| 524 | }) | |
| 525 | } | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 526 | |
| 67304 
3cf05d7cf174
more robust treatment of conflicts with existing Unicode text;
 wenzelm parents: 
67255diff
changeset | 527 | lazy val is_code: Int => Boolean = codes.map(_._2).toSet | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 528 | def decode(text: String): String = symbols.decode(text) | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 529 | def encode(text: String): String = symbols.encode(text) | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 530 | |
| 65344 
b99283eed13c
clarified YXML vs. symbol encoding: operate on whole message;
 wenzelm parents: 
65335diff
changeset | 531 | def decode_yxml(text: String): XML.Body = YXML.parse_body(decode(text)) | 
| 
b99283eed13c
clarified YXML vs. symbol encoding: operate on whole message;
 wenzelm parents: 
65335diff
changeset | 532 | def decode_yxml_failsafe(text: String): XML.Body = YXML.parse_body_failsafe(decode(text)) | 
| 
b99283eed13c
clarified YXML vs. symbol encoding: operate on whole message;
 wenzelm parents: 
65335diff
changeset | 533 | def encode_yxml(body: XML.Body): String = encode(YXML.string_of_body(body)) | 
| 53337 
b3817a0e3211
sort items according to persistent history of frequency of use;
 wenzelm parents: 
53316diff
changeset | 534 | |
| 50291 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 535 | def decode_strict(text: String): String = | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 536 |   {
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 537 | val decoded = decode(text) | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 538 | if (encode(decoded) == text) decoded | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 539 |     else {
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 540 | val bad = new mutable.ListBuffer[Symbol] | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 541 | for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s)) | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 542 | bad += s | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 543 |       error("Bad Unicode symbols in text: " + commas_quote(bad))
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 544 | } | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 545 | } | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 546 | |
| 43696 | 547 | def fonts: Map[Symbol, String] = symbols.fonts | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 548 | def font_names: List[String] = symbols.font_names | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 549 | def font_index: Map[String, Int] = symbols.font_index | 
| 43696 | 550 | def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 551 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 552 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 553 | /* classification */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 554 | |
| 43696 | 555 | def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) | 
| 556 | def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' | |
| 557 | def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'" | |
| 558 | def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) | |
| 559 | def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 560 | |
| 55033 | 561 | |
| 67438 | 562 | /* symbolic newline */ | 
| 63528 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 563 | |
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 564 | val newline: Symbol = "\\<newline>" | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 565 | def newline_decoded: Symbol = symbols.newline_decoded | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 566 | |
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 567 | def print_newlines(str: String): String = | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 568 |     if (str.contains('\n'))
 | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 569 |       (for (s <- iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString
 | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 570 | else str | 
| 61579 | 571 | |
| 67438 | 572 | |
| 573 | /* formal comments */ | |
| 574 | ||
| 61579 | 575 | val comment: Symbol = "\\<comment>" | 
| 67449 | 576 | val cancel: Symbol = "\\<^cancel>" | 
| 577 | val latex: Symbol = "\\<^latex>" | |
| 578 | ||
| 61579 | 579 | def comment_decoded: Symbol = symbols.comment_decoded | 
| 67438 | 580 | def cancel_decoded: Symbol = symbols.cancel_decoded | 
| 581 | def latex_decoded: Symbol = symbols.latex_decoded | |
| 61579 | 582 | |
| 583 | ||
| 55033 | 584 | /* cartouches */ | 
| 585 | ||
| 61579 | 586 | val open: Symbol = "\\<open>" | 
| 587 | val close: Symbol = "\\<close>" | |
| 55033 | 588 | |
| 589 | def open_decoded: Symbol = symbols.open_decoded | |
| 590 | def close_decoded: Symbol = symbols.close_decoded | |
| 591 | ||
| 592 | def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open | |
| 593 | def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close | |
| 594 | ||
| 67131 | 595 | def cartouche(s: String): String = open + s + close | 
| 596 | def cartouche_decoded(s: String): String = open_decoded + s + close_decoded | |
| 597 | ||
| 55033 | 598 | |
| 599 | /* symbols for symbolic identifiers */ | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 600 | |
| 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 601 | private def raw_symbolic(sym: Symbol): Boolean = | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 602 |     sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
 | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 603 | |
| 55033 | 604 | def is_symbolic(sym: Symbol): Boolean = | 
| 605 | !is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym)) | |
| 606 | ||
| 607 | def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) | |
| 608 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 609 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 610 | /* control symbols */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 611 | |
| 67127 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 612 | val control_prefix = "\\<^" | 
| 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 613 | val control_suffix = ">" | 
| 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 614 | |
| 67255 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 wenzelm parents: 
67131diff
changeset | 615 | def control_name(sym: Symbol): Option[String] = | 
| 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 wenzelm parents: 
67131diff
changeset | 616 | if (is_control_encoded(sym)) | 
| 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 wenzelm parents: 
67131diff
changeset | 617 | Some(sym.substring(control_prefix.length, sym.length - control_suffix.length)) | 
| 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 wenzelm parents: 
67131diff
changeset | 618 | else None | 
| 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 wenzelm parents: 
67131diff
changeset | 619 | |
| 67127 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 620 | def is_control_encoded(sym: Symbol): Boolean = | 
| 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 621 | sym.startsWith(control_prefix) && sym.endsWith(control_suffix) | 
| 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 622 | |
| 59107 | 623 | def is_control(sym: Symbol): Boolean = | 
| 67127 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 wenzelm parents: 
66919diff
changeset | 624 | is_control_encoded(sym) || symbols.control_decoded.contains(sym) | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 625 | |
| 43696 | 626 | def is_controllable(sym: Symbol): Boolean = | 
| 66006 
cec184536dfd
uniform notion of Symbol.is_controllable (see also 265d9300d523);
 wenzelm parents: 
65997diff
changeset | 627 | !is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && | 
| 
cec184536dfd
uniform notion of Symbol.is_controllable (see also 265d9300d523);
 wenzelm parents: 
65997diff
changeset | 628 | !is_malformed(sym) && sym != "\"" | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 629 | |
| 62103 | 630 | val sub = "\\<^sub>" | 
| 631 | val sup = "\\<^sup>" | |
| 632 | val bold = "\\<^bold>" | |
| 62104 
fb73c0d7bb37
clarified symbol insertion, depending on buffer encoding;
 wenzelm parents: 
62103diff
changeset | 633 | val emph = "\\<^emph>" | 
| 65997 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
 wenzelm parents: 
65521diff
changeset | 634 | val bsub = "\\<^bsub>" | 
| 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
 wenzelm parents: 
65521diff
changeset | 635 | val esub = "\\<^esub>" | 
| 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
 wenzelm parents: 
65521diff
changeset | 636 | val bsup = "\\<^bsup>" | 
| 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
 wenzelm parents: 
65521diff
changeset | 637 | val esup = "\\<^esup>" | 
| 62103 | 638 | |
| 44238 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 639 | def sub_decoded: Symbol = symbols.sub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 640 | def sup_decoded: Symbol = symbols.sup_decoded | 
| 62103 | 641 | def bold_decoded: Symbol = symbols.bold_decoded | 
| 642 | def emph_decoded: Symbol = symbols.emph_decoded | |
| 44238 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 643 | def bsub_decoded: Symbol = symbols.bsub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 644 | def esub_decoded: Symbol = symbols.esub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 645 | def bsup_decoded: Symbol = symbols.bsup_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 646 | def esup_decoded: Symbol = symbols.esup_decoded | 
| 27901 | 647 | } |