| author | matichuk <daniel.matichuk@nicta.com.au> | 
| Mon, 30 May 2016 16:11:53 +1000 | |
| changeset 63185 | 08369e33c185 | 
| parent 62528 | c8c532b22947 | 
| child 63528 | 0f39f59317c1 | 
| permissions | -rw-r--r-- | 
| 27901 | 1 | /* Title: Pure/General/symbol.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 27924 | 4 | Detecting and recoding Isabelle symbols. | 
| 27901 | 5 | */ | 
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 55618 | 9 | |
| 36011 
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
 wenzelm parents: 
34316diff
changeset | 10 | import scala.collection.mutable | 
| 31522 | 11 | import scala.util.matching.Regex | 
| 48922 | 12 | import scala.annotation.tailrec | 
| 27901 | 13 | |
| 14 | ||
| 31522 | 15 | object Symbol | 
| 16 | {
 | |
| 43696 | 17 | type Symbol = String | 
| 18 | ||
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 19 | // counting Isabelle symbols, starting from 1 | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 20 | type Offset = Text.Offset | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 21 | type Range = Text.Range | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 22 | |
| 43696 | 23 | |
| 61865 | 24 | /* spaces */ | 
| 25 | ||
| 26 | val space = " " | |
| 27 | ||
| 28 | private val static_spaces = space * 4000 | |
| 29 | ||
| 30 | def spaces(n: Int): String = | |
| 31 |   {
 | |
| 32 | require(n >= 0) | |
| 33 | if (n < static_spaces.length) static_spaces.substring(0, n) | |
| 34 | else space * n | |
| 35 | } | |
| 36 | ||
| 37 | ||
| 43418 | 38 | /* ASCII characters */ | 
| 39 | ||
| 40 | def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' | |
| 55497 | 41 | |
| 43418 | 42 | def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' | 
| 55497 | 43 | |
| 44 | def is_ascii_hex(c: Char): Boolean = | |
| 45 | '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' | |
| 46 | ||
| 43418 | 47 | def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\'' | 
| 48 | ||
| 55497 | 49 | def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c) | 
| 50 | ||
| 43418 | 51 | def is_ascii_letdig(c: Char): Boolean = | 
| 52 | is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c) | |
| 53 | ||
| 54 | def is_ascii_identifier(s: String): Boolean = | |
| 50238 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 wenzelm parents: 
50233diff
changeset | 55 | s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig) | 
| 43418 | 56 | |
| 62528 | 57 | def ascii(c: Char): Symbol = | 
| 58 |   {
 | |
| 59 |     if (c > 127) error("Non-ASCII character: " + quote(c.toString))
 | |
| 60 | else char_symbols(c.toInt) | |
| 61 | } | |
| 62 | ||
| 43418 | 63 | |
| 48775 | 64 | /* symbol matching */ | 
| 27901 | 65 | |
| 48775 | 66 |   private val symbol_total = new Regex("""(?xs)
 | 
| 67 | [\ud800-\udbff][\udc00-\udfff] | \r\n | | |
| 68 | \\ < (?: \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* | \^? ([A-Za-z][A-Za-z0-9_']*)? ) >? | | |
| 69 | .""") | |
| 27924 | 70 | |
| 48775 | 71 | private def is_plain(c: Char): Boolean = | 
| 72 | !(c == '\r' || c == '\\' || Character.isHighSurrogate(c)) | |
| 48773 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 73 | |
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 74 | def is_malformed(s: Symbol): Boolean = | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 75 |     s.length match {
 | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 76 | case 1 => | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 77 | val c = s(0) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 78 | Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd' | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 79 | case 2 => | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 80 | val c1 = s(0) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 81 | val c2 = s(1) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 82 | !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2)) | 
| 48774 | 83 |       case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
 | 
| 48773 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 84 | } | 
| 34137 | 85 | |
| 54734 
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
 wenzelm parents: 
53400diff
changeset | 86 | def is_newline(s: Symbol): Boolean = | 
| 43675 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 87 | s == "\n" || s == "\r" || s == "\r\n" | 
| 38877 | 88 | |
| 34137 | 89 | class Matcher(text: CharSequence) | 
| 90 |   {
 | |
| 48775 | 91 | private val matcher = symbol_total.pattern.matcher(text) | 
| 34137 | 92 | def apply(start: Int, end: Int): Int = | 
| 93 |     {
 | |
| 94 | require(0 <= start && start < end && end <= text.length) | |
| 34316 
f879b649ac4c
clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
 wenzelm parents: 
34193diff
changeset | 95 | if (is_plain(text.charAt(start))) 1 | 
| 34138 | 96 |       else {
 | 
| 34137 | 97 | matcher.region(start, end).lookingAt | 
| 98 | matcher.group.length | |
| 99 | } | |
| 100 | } | |
| 31522 | 101 | } | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 102 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 103 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 104 | /* iterator */ | 
| 33998 | 105 | |
| 43696 | 106 | private val char_symbols: Array[Symbol] = | 
| 43675 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 107 | (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 108 | |
| 43696 | 109 | def iterator(text: CharSequence): Iterator[Symbol] = | 
| 110 | new Iterator[Symbol] | |
| 40522 | 111 |     {
 | 
| 43489 | 112 | private val matcher = new Matcher(text) | 
| 113 | private var i = 0 | |
| 114 | def hasNext = i < text.length | |
| 115 | def next = | |
| 116 |       {
 | |
| 117 | val n = matcher(i, text.length) | |
| 43675 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 118 | val s = | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 119 | if (n == 0) "" | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 120 |           else if (n == 1) {
 | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 121 | val c = text.charAt(i) | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 122 | if (c < char_symbols.length) char_symbols(c) | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 123 | else text.subSequence(i, i + n).toString | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 124 | } | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 125 | else text.subSequence(i, i + n).toString | 
| 43489 | 126 | i += n | 
| 127 | s | |
| 128 | } | |
| 33998 | 129 | } | 
| 43489 | 130 | |
| 44949 | 131 | def explode(text: CharSequence): List[Symbol] = iterator(text).toList | 
| 132 | ||
| 48922 | 133 | def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) = | 
| 134 |   {
 | |
| 135 | var (line, column) = pos | |
| 136 |     for (sym <- iterator(text)) {
 | |
| 54734 
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
 wenzelm parents: 
53400diff
changeset | 137 |       if (is_newline(sym)) { line += 1; column = 1 }
 | 
| 48922 | 138 | else column += 1 | 
| 139 | } | |
| 140 | (line, column) | |
| 141 | } | |
| 142 | ||
| 33998 | 143 | |
| 144 | /* decoding offsets */ | |
| 145 | ||
| 52507 | 146 | object Index | 
| 147 |   {
 | |
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 148 | private sealed case class Entry(chr: Int, sym: Int) | 
| 52507 | 149 | |
| 56472 | 150 | val empty: Index = new Index(Nil) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 151 | |
| 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 152 | def apply(text: CharSequence): Index = | 
| 31929 | 153 |     {
 | 
| 34137 | 154 | val matcher = new Matcher(text) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 155 | val buf = new mutable.ListBuffer[Entry] | 
| 31929 | 156 | var chr = 0 | 
| 157 | var sym = 0 | |
| 33998 | 158 |       while (chr < text.length) {
 | 
| 34137 | 159 | val n = matcher(chr, text.length) | 
| 160 | chr += n | |
| 31929 | 161 | sym += 1 | 
| 34137 | 162 | if (n > 1) buf += Entry(chr, sym) | 
| 31929 | 163 | } | 
| 56472 | 164 | if (buf.isEmpty) empty else new Index(buf.toList) | 
| 31929 | 165 | } | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 166 | } | 
| 55430 | 167 | |
| 56472 | 168 | final class Index private(entries: List[Index.Entry]) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 169 |   {
 | 
| 56472 | 170 | private val hash: Int = entries.hashCode | 
| 171 | private val index: Array[Index.Entry] = entries.toArray | |
| 172 | ||
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 173 | def decode(symbol_offset: Offset): Text.Offset = | 
| 31929 | 174 |     {
 | 
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 175 | val sym = symbol_offset - 1 | 
| 31929 | 176 | val end = index.length | 
| 48922 | 177 | @tailrec def bisect(a: Int, b: Int): Int = | 
| 31929 | 178 |       {
 | 
| 179 |         if (a < b) {
 | |
| 180 | val c = (a + b) / 2 | |
| 181 | if (sym < index(c).sym) bisect(a, c) | |
| 182 | else if (c + 1 == end || sym < index(c + 1).sym) c | |
| 183 | else bisect(c + 1, b) | |
| 184 | } | |
| 185 | else -1 | |
| 186 | } | |
| 187 | val i = bisect(0, end) | |
| 188 | if (i < 0) sym | |
| 189 | else index(i).chr + sym - index(i).sym | |
| 190 | } | |
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 191 | def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_)) | 
| 56335 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 192 | |
| 56338 
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
 wenzelm parents: 
56335diff
changeset | 193 | override def hashCode: Int = hash | 
| 56335 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 194 | override def equals(that: Any): Boolean = | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 195 |       that match {
 | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 196 | case other: Index => index.sameElements(other.index) | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 197 | case _ => false | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 198 | } | 
| 31929 | 199 | } | 
| 200 | ||
| 201 | ||
| 56746 | 202 | /* text chunks */ | 
| 203 | ||
| 204 | object Text_Chunk | |
| 205 |   {
 | |
| 206 | sealed abstract class Name | |
| 207 | case object Default extends Name | |
| 208 | case class Id(id: Document_ID.Generic) extends Name | |
| 209 | case class File(name: String) extends Name | |
| 210 | ||
| 211 | def apply(text: CharSequence): Text_Chunk = | |
| 212 | new Text_Chunk(Text.Range(0, text.length), Index(text)) | |
| 213 | } | |
| 214 | ||
| 215 | final class Text_Chunk private(val range: Text.Range, private val index: Index) | |
| 216 |   {
 | |
| 217 | override def hashCode: Int = (range, index).hashCode | |
| 218 | override def equals(that: Any): Boolean = | |
| 219 |       that match {
 | |
| 220 | case other: Text_Chunk => | |
| 221 | range == other.range && | |
| 222 | index == other.index | |
| 223 | case _ => false | |
| 224 | } | |
| 225 | ||
| 57840 | 226 | override def toString: String = "Text_Chunk" + range.toString | 
| 227 | ||
| 56746 | 228 | def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset) | 
| 229 | def decode(symbol_range: Range): Text.Range = index.decode(symbol_range) | |
| 230 | def incorporate(symbol_range: Range): Option[Text.Range] = | |
| 231 |     {
 | |
| 232 | def in(r: Range): Option[Text.Range] = | |
| 233 |         range.try_restrict(decode(r)) match {
 | |
| 234 | case Some(r1) if !r1.is_singularity => Some(r1) | |
| 235 | case _ => None | |
| 236 | } | |
| 237 | in(symbol_range) orElse in(symbol_range - 1) | |
| 238 | } | |
| 239 | } | |
| 240 | ||
| 241 | ||
| 33998 | 242 | /* recoding text */ | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 243 | |
| 31522 | 244 | private class Recoder(list: List[(String, String)]) | 
| 245 |   {
 | |
| 246 | private val (min, max) = | |
| 247 |     {
 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 248 | var min = '\uffff' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 249 | var max = '\u0000' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 250 |       for ((x, _) <- list) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 251 | val c = x(0) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 252 | if (c < min) min = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 253 | if (c > max) max = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 254 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 255 | (min, max) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 256 | } | 
| 40443 | 257 | private val table = | 
| 258 |     {
 | |
| 259 | var tab = Map[String, String]() | |
| 260 |       for ((x, y) <- list) {
 | |
| 261 |         tab.get(x) match {
 | |
| 262 | case None => tab += (x -> y) | |
| 263 | case Some(z) => | |
| 62230 | 264 |             error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
 | 
| 40443 | 265 | } | 
| 266 | } | |
| 267 | tab | |
| 268 | } | |
| 31522 | 269 | def recode(text: String): String = | 
| 270 |     {
 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 271 | val len = text.length | 
| 48775 | 272 | val matcher = symbol_total.pattern.matcher(text) | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 273 | val result = new StringBuilder(len) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 274 | var i = 0 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 275 |       while (i < len) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 276 | val c = text(i) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 277 |         if (min <= c && c <= max) {
 | 
| 31929 | 278 | matcher.region(i, len).lookingAt | 
| 27938 | 279 | val x = matcher.group | 
| 52888 | 280 | result.append(table.getOrElse(x, x)) | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 281 | i = matcher.end | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 282 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 283 |         else { result.append(c); i += 1 }
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 284 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 285 | result.toString | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 286 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 287 | } | 
| 27924 | 288 | |
| 27918 | 289 | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 290 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 291 | /** symbol interpretation **/ | 
| 27927 | 292 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 293 | private lazy val symbols = | 
| 61959 | 294 |   {
 | 
| 295 | val contents = | |
| 296 |       for (path <- Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS")) if path.is_file)
 | |
| 297 | yield (File.read(path)) | |
| 298 | new Interpretation(cat_lines(contents)) | |
| 299 | } | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 300 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 301 | private class Interpretation(symbols_spec: String) | 
| 29569 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 302 |   {
 | 
| 31522 | 303 | /* read symbols */ | 
| 304 | ||
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 305 |     private val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
 | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 306 |     private val Key = new Regex("""(?xs) (.+): """)
 | 
| 31522 | 307 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 308 | private def read_decl(decl: String): (Symbol, Properties.T) = | 
| 31522 | 309 |     {
 | 
| 310 |       def err() = error("Bad symbol declaration: " + decl)
 | |
| 311 | ||
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 312 | def read_props(props: List[String]): Properties.T = | 
| 31522 | 313 |       {
 | 
| 314 |         props match {
 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 315 | case Nil => Nil | 
| 31522 | 316 | case _ :: Nil => err() | 
| 61174 | 317 |           case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest)
 | 
| 31522 | 318 | case _ => err() | 
| 319 | } | |
| 320 | } | |
| 321 |       decl.split("\\s+").toList match {
 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 322 | case sym :: props if sym.length > 1 && !is_malformed(sym) => | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 323 | (sym, read_props(props)) | 
| 34193 | 324 | case _ => err() | 
| 31522 | 325 | } | 
| 326 | } | |
| 327 | ||
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 328 | private val symbols: List[(Symbol, Properties.T)] = | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 329 | (((List.empty[(Symbol, Properties.T)], Set.empty[Symbol]) /: | 
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 330 | split_lines(symbols_spec).reverse) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 331 |         { case (res, No_Decl()) => res
 | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 332 | case ((list, known), decl) => | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 333 | val (sym, props) = read_decl(decl) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 334 | if (known(sym)) (list, known) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 335 | else ((sym, props) :: list, known + sym) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 336 | })._1 | 
| 31522 | 337 | |
| 338 | ||
| 53400 | 339 | /* basic properties */ | 
| 340 | ||
| 341 | val properties: Map[Symbol, Properties.T] = Map(symbols: _*) | |
| 31651 | 342 | |
| 43696 | 343 | val names: Map[Symbol, String] = | 
| 34134 | 344 |     {
 | 
| 43456 
8a6de1a6e1dc
names for control symbols without "^", which is relevant for completion;
 wenzelm parents: 
43455diff
changeset | 345 |       val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
 | 
| 60215 | 346 | Map((for ((sym @ name(a), _) <- symbols) yield sym -> a): _*) | 
| 31651 | 347 | } | 
| 348 | ||
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 349 | val groups: List[(String, List[Symbol])] = | 
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 350 |       symbols.map({ case (sym, props) =>
 | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 351 |         val gs = for (("group", g) <- props) yield g
 | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 352 | if (gs.isEmpty) List(sym -> "unsorted") else gs.map(sym -> _) | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 353 | }).flatten | 
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 354 |         .groupBy(_._2).toList.map({ case (group, list) => (group, list.map(_._1)) })
 | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 355 | .sortBy(_._1) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 356 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 357 | val abbrevs: Multi_Map[Symbol, String] = | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 358 | Multi_Map(( | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 359 |         for {
 | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 360 | (sym, props) <- symbols | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 361 |           ("abbrev", a) <- props.reverse
 | 
| 60215 | 362 | } yield sym -> a): _*) | 
| 43488 | 363 | |
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 364 | val codes: List[(String, Int)] = | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 365 |     {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 366 |       val Code = new Properties.String("code")
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 367 |       for {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 368 | (sym, props) <- symbols | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 369 | code = | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 370 |           props match {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 371 | case Code(s) => | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 372 |               try { Integer.decode(s).intValue }
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 373 |               catch { case _: NumberFormatException => error("Bad code for symbol " + sym) }
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 374 |             case _ => error("Missing code for symbol " + sym)
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 375 | } | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 376 |       } yield {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 377 |         if (code < 128) error("Illegal ASCII code for symbol " + sym)
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 378 | else (sym, code) | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 379 | } | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 380 | } | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 381 | |
| 43488 | 382 | |
| 43490 | 383 | /* recoding */ | 
| 31522 | 384 | |
| 385 | private val (decoder, encoder) = | |
| 386 |     {
 | |
| 387 | val mapping = | |
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 388 | for ((sym, code) <- codes) yield (sym, new String(Character.toChars(code))) | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 389 | (new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x))) | 
| 31522 | 390 | } | 
| 27918 | 391 | |
| 34098 | 392 | def decode(text: String): String = decoder.recode(text) | 
| 393 | def encode(text: String): String = encoder.recode(text) | |
| 34134 | 394 | |
| 43490 | 395 | private def recode_set(elems: String*): Set[String] = | 
| 396 |     {
 | |
| 397 | val content = elems.toList | |
| 398 | Set((content ::: content.map(decode)): _*) | |
| 399 | } | |
| 400 | ||
| 401 | private def recode_map[A](elems: (String, A)*): Map[String, A] = | |
| 402 |     {
 | |
| 403 | val content = elems.toList | |
| 404 |       Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
 | |
| 405 | } | |
| 406 | ||
| 407 | ||
| 408 | /* user fonts */ | |
| 409 | ||
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 410 |     private val Font = new Properties.String("font")
 | 
| 43696 | 411 | val fonts: Map[Symbol, String] = | 
| 60215 | 412 | recode_map((for ((sym, Font(font)) <- symbols) yield sym -> font): _*) | 
| 43490 | 413 | |
| 414 | val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList | |
| 415 | val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*) | |
| 416 | ||
| 34134 | 417 | |
| 418 | /* classification */ | |
| 419 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 420 | val letters = recode_set( | 
| 34134 | 421 | "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", | 
| 422 | "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", | |
| 423 | "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", | |
| 424 | "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", | |
| 425 | ||
| 426 | "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", | |
| 427 | "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", | |
| 428 | "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", | |
| 429 | "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", | |
| 430 | "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", | |
| 431 | "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", | |
| 432 | "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", | |
| 433 | "\\<x>", "\\<y>", "\\<z>", | |
| 434 | ||
| 435 | "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", | |
| 436 | "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", | |
| 437 | "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", | |
| 438 | "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", | |
| 439 | "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", | |
| 440 | "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", | |
| 441 | "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", | |
| 442 | "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", | |
| 443 | "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", | |
| 444 | ||
| 445 | "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", | |
| 446 | "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", | |
| 447 | "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", | |
| 448 | "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", | |
| 449 | "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", | |
| 450 | "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", | |
| 52616 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 wenzelm parents: 
52507diff
changeset | 451 | "\\<Psi>", "\\<Omega>") | 
| 34134 | 452 | |
| 61865 | 453 | val blanks = recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n") | 
| 34138 | 454 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 455 | val sym_chars = | 
| 34138 | 456 |       Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
 | 
| 34134 | 457 | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 458 |     val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*)
 | 
| 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 459 | |
| 43455 | 460 | |
| 61579 | 461 | /* comment */ | 
| 462 | ||
| 463 | val comment_decoded = decode(comment) | |
| 464 | ||
| 465 | ||
| 55033 | 466 | /* cartouches */ | 
| 467 | ||
| 468 | val open_decoded = decode(open) | |
| 469 | val close_decoded = decode(close) | |
| 470 | ||
| 471 | ||
| 43488 | 472 | /* control symbols */ | 
| 473 | ||
| 59107 | 474 | val control_decoded: Set[Symbol] = | 
| 43488 | 475 |       Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
 | 
| 476 | ||
| 62103 | 477 | val sub_decoded = decode(sub) | 
| 478 | val sup_decoded = decode(sup) | |
| 479 | val bold_decoded = decode(bold) | |
| 62104 
fb73c0d7bb37
clarified symbol insertion, depending on buffer encoding;
 wenzelm parents: 
62103diff
changeset | 480 | val emph_decoded = decode(emph) | 
| 43511 | 481 |     val bsub_decoded = decode("\\<^bsub>")
 | 
| 482 |     val esub_decoded = decode("\\<^esub>")
 | |
| 483 |     val bsup_decoded = decode("\\<^bsup>")
 | |
| 484 |     val esup_decoded = decode("\\<^esup>")
 | |
| 27918 | 485 | } | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 486 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 487 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 488 | /* tables */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 489 | |
| 53400 | 490 | def properties: Map[Symbol, Properties.T] = symbols.properties | 
| 43696 | 491 | def names: Map[Symbol, String] = symbols.names | 
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 492 | def groups: List[(String, List[Symbol])] = symbols.groups | 
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 493 | def abbrevs: Multi_Map[Symbol, String] = symbols.abbrevs | 
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 494 | def codes: List[(String, Int)] = symbols.codes | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 495 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 496 | def decode(text: String): String = symbols.decode(text) | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 497 | def encode(text: String): String = symbols.encode(text) | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 498 | |
| 53337 
b3817a0e3211
sort items according to persistent history of frequency of use;
 wenzelm parents: 
53316diff
changeset | 499 | def decode_string: XML.Decode.T[String] = (x => decode(XML.Decode.string(x))) | 
| 
b3817a0e3211
sort items according to persistent history of frequency of use;
 wenzelm parents: 
53316diff
changeset | 500 | def encode_string: XML.Encode.T[String] = (x => XML.Encode.string(encode(x))) | 
| 
b3817a0e3211
sort items according to persistent history of frequency of use;
 wenzelm parents: 
53316diff
changeset | 501 | |
| 50291 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 502 | def decode_strict(text: String): String = | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 503 |   {
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 504 | val decoded = decode(text) | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 505 | if (encode(decoded) == text) decoded | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 506 |     else {
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 507 | val bad = new mutable.ListBuffer[Symbol] | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 508 | for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s)) | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 509 | bad += s | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 510 |       error("Bad Unicode symbols in text: " + commas_quote(bad))
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 511 | } | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 512 | } | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 513 | |
| 43696 | 514 | def fonts: Map[Symbol, String] = symbols.fonts | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 515 | def font_names: List[String] = symbols.font_names | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 516 | def font_index: Map[String, Int] = symbols.font_index | 
| 43696 | 517 | def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 518 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 519 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 520 | /* classification */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 521 | |
| 43696 | 522 | def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) | 
| 523 | def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' | |
| 524 | def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'" | |
| 525 | def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) | |
| 526 | def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 527 | |
| 55033 | 528 | |
| 61579 | 529 | /* comment */ | 
| 530 | ||
| 531 | val comment: Symbol = "\\<comment>" | |
| 532 | def comment_decoded: Symbol = symbols.comment_decoded | |
| 533 | ||
| 534 | ||
| 55033 | 535 | /* cartouches */ | 
| 536 | ||
| 61579 | 537 | val open: Symbol = "\\<open>" | 
| 538 | val close: Symbol = "\\<close>" | |
| 55033 | 539 | |
| 540 | def open_decoded: Symbol = symbols.open_decoded | |
| 541 | def close_decoded: Symbol = symbols.close_decoded | |
| 542 | ||
| 543 | def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open | |
| 544 | def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close | |
| 545 | ||
| 546 | ||
| 547 | /* symbols for symbolic identifiers */ | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 548 | |
| 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 549 | private def raw_symbolic(sym: Symbol): Boolean = | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 550 |     sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
 | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 551 | |
| 55033 | 552 | def is_symbolic(sym: Symbol): Boolean = | 
| 553 | !is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym)) | |
| 554 | ||
| 555 | def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) | |
| 556 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 557 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 558 | /* control symbols */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 559 | |
| 59107 | 560 | def is_control(sym: Symbol): Boolean = | 
| 61470 | 561 |     (sym.startsWith("\\<^") && sym.endsWith(">")) || symbols.control_decoded.contains(sym)
 | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 562 | |
| 43696 | 563 | def is_controllable(sym: Symbol): Boolean = | 
| 59107 | 564 | !is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && !is_malformed(sym) | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 565 | |
| 62103 | 566 | val sub = "\\<^sub>" | 
| 567 | val sup = "\\<^sup>" | |
| 568 | val bold = "\\<^bold>" | |
| 62104 
fb73c0d7bb37
clarified symbol insertion, depending on buffer encoding;
 wenzelm parents: 
62103diff
changeset | 569 | val emph = "\\<^emph>" | 
| 62103 | 570 | |
| 44238 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 571 | def sub_decoded: Symbol = symbols.sub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 572 | def sup_decoded: Symbol = symbols.sup_decoded | 
| 62103 | 573 | def bold_decoded: Symbol = symbols.bold_decoded | 
| 574 | def emph_decoded: Symbol = symbols.emph_decoded | |
| 44238 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 575 | def bsub_decoded: Symbol = symbols.bsub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 576 | def esub_decoded: Symbol = symbols.esub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 577 | def bsup_decoded: Symbol = symbols.bsup_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 578 | def esup_decoded: Symbol = symbols.esup_decoded | 
| 27901 | 579 | } |