| author | wenzelm | 
| Tue, 14 Mar 2017 16:20:07 +0100 | |
| changeset 65232 | ca571c8c0788 | 
| parent 65196 | e8760a98db78 | 
| child 65335 | 7634d33c1a79 | 
| permissions | -rw-r--r-- | 
| 27901 | 1 | /* Title: Pure/General/symbol.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 27924 | 4 | Detecting and recoding Isabelle symbols. | 
| 27901 | 5 | */ | 
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 55618 | 9 | |
| 36011 
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
 wenzelm parents: 
34316diff
changeset | 10 | import scala.collection.mutable | 
| 31522 | 11 | import scala.util.matching.Regex | 
| 48922 | 12 | import scala.annotation.tailrec | 
| 27901 | 13 | |
| 14 | ||
| 31522 | 15 | object Symbol | 
| 16 | {
 | |
| 43696 | 17 | type Symbol = String | 
| 18 | ||
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 19 | // counting Isabelle symbols, starting from 1 | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 20 | type Offset = Text.Offset | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 21 | type Range = Text.Range | 
| 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 22 | |
| 43696 | 23 | |
| 61865 | 24 | /* spaces */ | 
| 25 | ||
| 26 | val space = " " | |
| 27 | ||
| 28 | private val static_spaces = space * 4000 | |
| 29 | ||
| 30 | def spaces(n: Int): String = | |
| 31 |   {
 | |
| 32 | require(n >= 0) | |
| 33 | if (n < static_spaces.length) static_spaces.substring(0, n) | |
| 34 | else space * n | |
| 35 | } | |
| 36 | ||
| 37 | ||
| 43418 | 38 | /* ASCII characters */ | 
| 39 | ||
| 40 | def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' | |
| 55497 | 41 | |
| 43418 | 42 | def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' | 
| 55497 | 43 | |
| 44 | def is_ascii_hex(c: Char): Boolean = | |
| 45 | '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' | |
| 46 | ||
| 43418 | 47 | def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\'' | 
| 48 | ||
| 55497 | 49 | def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c) | 
| 50 | ||
| 43418 | 51 | def is_ascii_letdig(c: Char): Boolean = | 
| 52 | is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c) | |
| 53 | ||
| 54 | def is_ascii_identifier(s: String): Boolean = | |
| 50238 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 wenzelm parents: 
50233diff
changeset | 55 | s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig) | 
| 43418 | 56 | |
| 62528 | 57 | def ascii(c: Char): Symbol = | 
| 58 |   {
 | |
| 59 |     if (c > 127) error("Non-ASCII character: " + quote(c.toString))
 | |
| 60 | else char_symbols(c.toInt) | |
| 61 | } | |
| 62 | ||
| 43418 | 63 | |
| 48775 | 64 | /* symbol matching */ | 
| 27901 | 65 | |
| 48775 | 66 |   private val symbol_total = new Regex("""(?xs)
 | 
| 63936 | 67 | [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""") | 
| 27924 | 68 | |
| 48775 | 69 | private def is_plain(c: Char): Boolean = | 
| 70 | !(c == '\r' || c == '\\' || Character.isHighSurrogate(c)) | |
| 48773 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 71 | |
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 72 | def is_malformed(s: Symbol): Boolean = | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 73 |     s.length match {
 | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 74 | case 1 => | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 75 | val c = s(0) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 76 | Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd' | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 77 | case 2 => | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 78 | val c1 = s(0) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 79 | val c2 = s(1) | 
| 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 80 | !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2)) | 
| 48774 | 81 |       case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
 | 
| 48773 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 wenzelm parents: 
48704diff
changeset | 82 | } | 
| 34137 | 83 | |
| 54734 
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
 wenzelm parents: 
53400diff
changeset | 84 | def is_newline(s: Symbol): Boolean = | 
| 43675 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 85 | s == "\n" || s == "\r" || s == "\r\n" | 
| 38877 | 86 | |
| 34137 | 87 | class Matcher(text: CharSequence) | 
| 88 |   {
 | |
| 48775 | 89 | private val matcher = symbol_total.pattern.matcher(text) | 
| 34137 | 90 | def apply(start: Int, end: Int): Int = | 
| 91 |     {
 | |
| 92 | require(0 <= start && start < end && end <= text.length) | |
| 34316 
f879b649ac4c
clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
 wenzelm parents: 
34193diff
changeset | 93 | if (is_plain(text.charAt(start))) 1 | 
| 34138 | 94 |       else {
 | 
| 34137 | 95 | matcher.region(start, end).lookingAt | 
| 96 | matcher.group.length | |
| 97 | } | |
| 98 | } | |
| 31522 | 99 | } | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 100 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 101 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 102 | /* iterator */ | 
| 33998 | 103 | |
| 43696 | 104 | private val char_symbols: Array[Symbol] = | 
| 43675 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 105 | (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 106 | |
| 43696 | 107 | def iterator(text: CharSequence): Iterator[Symbol] = | 
| 108 | new Iterator[Symbol] | |
| 40522 | 109 |     {
 | 
| 43489 | 110 | private val matcher = new Matcher(text) | 
| 111 | private var i = 0 | |
| 112 | def hasNext = i < text.length | |
| 113 | def next = | |
| 114 |       {
 | |
| 115 | val n = matcher(i, text.length) | |
| 43675 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 116 | val s = | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 117 | if (n == 0) "" | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 118 |           else if (n == 1) {
 | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 119 | val c = text.charAt(i) | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 120 | if (c < char_symbols.length) char_symbols(c) | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 121 | else text.subSequence(i, i + n).toString | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 122 | } | 
| 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 wenzelm parents: 
43511diff
changeset | 123 | else text.subSequence(i, i + n).toString | 
| 43489 | 124 | i += n | 
| 125 | s | |
| 126 | } | |
| 33998 | 127 | } | 
| 43489 | 128 | |
| 44949 | 129 | def explode(text: CharSequence): List[Symbol] = iterator(text).toList | 
| 130 | ||
| 64615 | 131 | def length(text: CharSequence): Int = iterator(text).length | 
| 64617 | 132 | |
| 33998 | 133 | |
| 134 | /* decoding offsets */ | |
| 135 | ||
| 52507 | 136 | object Index | 
| 137 |   {
 | |
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 138 | private sealed case class Entry(chr: Int, sym: Int) | 
| 52507 | 139 | |
| 56472 | 140 | val empty: Index = new Index(Nil) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 141 | |
| 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 142 | def apply(text: CharSequence): Index = | 
| 31929 | 143 |     {
 | 
| 34137 | 144 | val matcher = new Matcher(text) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 145 | val buf = new mutable.ListBuffer[Entry] | 
| 31929 | 146 | var chr = 0 | 
| 147 | var sym = 0 | |
| 33998 | 148 |       while (chr < text.length) {
 | 
| 34137 | 149 | val n = matcher(chr, text.length) | 
| 150 | chr += n | |
| 31929 | 151 | sym += 1 | 
| 34137 | 152 | if (n > 1) buf += Entry(chr, sym) | 
| 31929 | 153 | } | 
| 56472 | 154 | if (buf.isEmpty) empty else new Index(buf.toList) | 
| 31929 | 155 | } | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 156 | } | 
| 55430 | 157 | |
| 56472 | 158 | final class Index private(entries: List[Index.Entry]) | 
| 56471 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 wenzelm parents: 
56338diff
changeset | 159 |   {
 | 
| 56472 | 160 | private val hash: Int = entries.hashCode | 
| 161 | private val index: Array[Index.Entry] = entries.toArray | |
| 162 | ||
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 163 | def decode(symbol_offset: Offset): Text.Offset = | 
| 31929 | 164 |     {
 | 
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 165 | val sym = symbol_offset - 1 | 
| 31929 | 166 | val end = index.length | 
| 48922 | 167 | @tailrec def bisect(a: Int, b: Int): Int = | 
| 31929 | 168 |       {
 | 
| 169 |         if (a < b) {
 | |
| 170 | val c = (a + b) / 2 | |
| 171 | if (sym < index(c).sym) bisect(a, c) | |
| 172 | else if (c + 1 == end || sym < index(c + 1).sym) c | |
| 173 | else bisect(c + 1, b) | |
| 174 | } | |
| 175 | else -1 | |
| 176 | } | |
| 177 | val i = bisect(0, end) | |
| 178 | if (i < 0) sym | |
| 179 | else index(i).chr + sym - index(i).sym | |
| 180 | } | |
| 55884 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 wenzelm parents: 
55618diff
changeset | 181 | def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_)) | 
| 56335 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 182 | |
| 56338 
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
 wenzelm parents: 
56335diff
changeset | 183 | override def hashCode: Int = hash | 
| 56335 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 184 | override def equals(that: Any): Boolean = | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 185 |       that match {
 | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 186 | case other: Index => index.sameElements(other.index) | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 187 | case _ => false | 
| 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 wenzelm parents: 
55884diff
changeset | 188 | } | 
| 31929 | 189 | } | 
| 190 | ||
| 191 | ||
| 64477 | 192 | /* symbolic text chunks -- without actual text */ | 
| 56746 | 193 | |
| 194 | object Text_Chunk | |
| 195 |   {
 | |
| 196 | sealed abstract class Name | |
| 197 | case object Default extends Name | |
| 198 | case class Id(id: Document_ID.Generic) extends Name | |
| 199 | case class File(name: String) extends Name | |
| 200 | ||
| 201 | def apply(text: CharSequence): Text_Chunk = | |
| 202 | new Text_Chunk(Text.Range(0, text.length), Index(text)) | |
| 203 | } | |
| 204 | ||
| 205 | final class Text_Chunk private(val range: Text.Range, private val index: Index) | |
| 206 |   {
 | |
| 207 | override def hashCode: Int = (range, index).hashCode | |
| 208 | override def equals(that: Any): Boolean = | |
| 209 |       that match {
 | |
| 210 | case other: Text_Chunk => | |
| 211 | range == other.range && | |
| 212 | index == other.index | |
| 213 | case _ => false | |
| 214 | } | |
| 215 | ||
| 57840 | 216 | override def toString: String = "Text_Chunk" + range.toString | 
| 217 | ||
| 56746 | 218 | def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset) | 
| 219 | def decode(symbol_range: Range): Text.Range = index.decode(symbol_range) | |
| 220 | def incorporate(symbol_range: Range): Option[Text.Range] = | |
| 221 |     {
 | |
| 222 | def in(r: Range): Option[Text.Range] = | |
| 223 |         range.try_restrict(decode(r)) match {
 | |
| 224 | case Some(r1) if !r1.is_singularity => Some(r1) | |
| 225 | case _ => None | |
| 226 | } | |
| 227 | in(symbol_range) orElse in(symbol_range - 1) | |
| 228 | } | |
| 229 | } | |
| 230 | ||
| 231 | ||
| 33998 | 232 | /* recoding text */ | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 233 | |
| 31522 | 234 | private class Recoder(list: List[(String, String)]) | 
| 235 |   {
 | |
| 236 | private val (min, max) = | |
| 237 |     {
 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 238 | var min = '\uffff' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 239 | var max = '\u0000' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 240 |       for ((x, _) <- list) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 241 | val c = x(0) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 242 | if (c < min) min = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 243 | if (c > max) max = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 244 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 245 | (min, max) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 246 | } | 
| 40443 | 247 | private val table = | 
| 248 |     {
 | |
| 249 | var tab = Map[String, String]() | |
| 250 |       for ((x, y) <- list) {
 | |
| 251 |         tab.get(x) match {
 | |
| 252 | case None => tab += (x -> y) | |
| 253 | case Some(z) => | |
| 62230 | 254 |             error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
 | 
| 40443 | 255 | } | 
| 256 | } | |
| 257 | tab | |
| 258 | } | |
| 31522 | 259 | def recode(text: String): String = | 
| 260 |     {
 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 261 | val len = text.length | 
| 48775 | 262 | val matcher = symbol_total.pattern.matcher(text) | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 263 | val result = new StringBuilder(len) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 264 | var i = 0 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 265 |       while (i < len) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 266 | val c = text(i) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 267 |         if (min <= c && c <= max) {
 | 
| 31929 | 268 | matcher.region(i, len).lookingAt | 
| 27938 | 269 | val x = matcher.group | 
| 52888 | 270 | result.append(table.getOrElse(x, x)) | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 271 | i = matcher.end | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 272 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 273 |         else { result.append(c); i += 1 }
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 274 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 275 | result.toString | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 276 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 277 | } | 
| 27924 | 278 | |
| 27918 | 279 | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 280 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 281 | /** symbol interpretation **/ | 
| 27927 | 282 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 283 | private lazy val symbols = | 
| 61959 | 284 |   {
 | 
| 285 | val contents = | |
| 286 |       for (path <- Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS")) if path.is_file)
 | |
| 287 | yield (File.read(path)) | |
| 288 | new Interpretation(cat_lines(contents)) | |
| 289 | } | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 290 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 291 | private class Interpretation(symbols_spec: String) | 
| 29569 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 292 |   {
 | 
| 31522 | 293 | /* read symbols */ | 
| 294 | ||
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 295 |     private val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
 | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 296 |     private val Key = new Regex("""(?xs) (.+): """)
 | 
| 31522 | 297 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 298 | private def read_decl(decl: String): (Symbol, Properties.T) = | 
| 31522 | 299 |     {
 | 
| 300 |       def err() = error("Bad symbol declaration: " + decl)
 | |
| 301 | ||
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 302 | def read_props(props: List[String]): Properties.T = | 
| 31522 | 303 |       {
 | 
| 304 |         props match {
 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 305 | case Nil => Nil | 
| 31522 | 306 | case _ :: Nil => err() | 
| 61174 | 307 |           case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest)
 | 
| 31522 | 308 | case _ => err() | 
| 309 | } | |
| 310 | } | |
| 311 |       decl.split("\\s+").toList match {
 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 312 | case sym :: props if sym.length > 1 && !is_malformed(sym) => | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 313 | (sym, read_props(props)) | 
| 34193 | 314 | case _ => err() | 
| 31522 | 315 | } | 
| 316 | } | |
| 317 | ||
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 318 | private val symbols: List[(Symbol, Properties.T)] = | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 319 | (((List.empty[(Symbol, Properties.T)], Set.empty[Symbol]) /: | 
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 320 | split_lines(symbols_spec).reverse) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 321 |         { case (res, No_Decl()) => res
 | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 322 | case ((list, known), decl) => | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 323 | val (sym, props) = read_decl(decl) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 324 | if (known(sym)) (list, known) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 325 | else ((sym, props) :: list, known + sym) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 326 | })._1 | 
| 31522 | 327 | |
| 328 | ||
| 53400 | 329 | /* basic properties */ | 
| 330 | ||
| 331 | val properties: Map[Symbol, Properties.T] = Map(symbols: _*) | |
| 31651 | 332 | |
| 43696 | 333 | val names: Map[Symbol, String] = | 
| 34134 | 334 |     {
 | 
| 43456 
8a6de1a6e1dc
names for control symbols without "^", which is relevant for completion;
 wenzelm parents: 
43455diff
changeset | 335 |       val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
 | 
| 60215 | 336 | Map((for ((sym @ name(a), _) <- symbols) yield sym -> a): _*) | 
| 31651 | 337 | } | 
| 338 | ||
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 339 | val groups: List[(String, List[Symbol])] = | 
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 340 |       symbols.map({ case (sym, props) =>
 | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 341 |         val gs = for (("group", g) <- props) yield g
 | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 342 | if (gs.isEmpty) List(sym -> "unsorted") else gs.map(sym -> _) | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 343 | }).flatten | 
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 344 |         .groupBy(_._2).toList.map({ case (group, list) => (group, list.map(_._1)) })
 | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 345 | .sortBy(_._1) | 
| 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 346 | |
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 347 | val abbrevs: Multi_Map[Symbol, String] = | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 348 | Multi_Map(( | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 349 |         for {
 | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 350 | (sym, props) <- symbols | 
| 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 351 |           ("abbrev", a) <- props.reverse
 | 
| 60215 | 352 | } yield sym -> a): _*) | 
| 43488 | 353 | |
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 354 | val codes: List[(String, Int)] = | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 355 |     {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 356 |       val Code = new Properties.String("code")
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 357 |       for {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 358 | (sym, props) <- symbols | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 359 | code = | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 360 |           props match {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 361 | case Code(s) => | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 362 |               try { Integer.decode(s).intValue }
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 363 |               catch { case _: NumberFormatException => error("Bad code for symbol " + sym) }
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 364 |             case _ => error("Missing code for symbol " + sym)
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 365 | } | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 366 |       } yield {
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 367 |         if (code < 128) error("Illegal ASCII code for symbol " + sym)
 | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 368 | else (sym, code) | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 369 | } | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 370 | } | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 371 | |
| 43488 | 372 | |
| 43490 | 373 | /* recoding */ | 
| 31522 | 374 | |
| 375 | private val (decoder, encoder) = | |
| 376 |     {
 | |
| 377 | val mapping = | |
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 378 | for ((sym, code) <- codes) yield (sym, new String(Character.toChars(code))) | 
| 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 379 | (new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x))) | 
| 31522 | 380 | } | 
| 27918 | 381 | |
| 34098 | 382 | def decode(text: String): String = decoder.recode(text) | 
| 383 | def encode(text: String): String = encoder.recode(text) | |
| 34134 | 384 | |
| 43490 | 385 | private def recode_set(elems: String*): Set[String] = | 
| 386 |     {
 | |
| 387 | val content = elems.toList | |
| 388 | Set((content ::: content.map(decode)): _*) | |
| 389 | } | |
| 390 | ||
| 391 | private def recode_map[A](elems: (String, A)*): Map[String, A] = | |
| 392 |     {
 | |
| 393 | val content = elems.toList | |
| 394 |       Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
 | |
| 395 | } | |
| 396 | ||
| 397 | ||
| 398 | /* user fonts */ | |
| 399 | ||
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 400 |     private val Font = new Properties.String("font")
 | 
| 43696 | 401 | val fonts: Map[Symbol, String] = | 
| 60215 | 402 | recode_map((for ((sym, Font(font)) <- symbols) yield sym -> font): _*) | 
| 43490 | 403 | |
| 404 | val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList | |
| 405 | val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*) | |
| 406 | ||
| 34134 | 407 | |
| 408 | /* classification */ | |
| 409 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 410 | val letters = recode_set( | 
| 34134 | 411 | "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", | 
| 412 | "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", | |
| 413 | "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", | |
| 414 | "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", | |
| 415 | ||
| 416 | "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", | |
| 417 | "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", | |
| 418 | "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", | |
| 419 | "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", | |
| 420 | "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", | |
| 421 | "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", | |
| 422 | "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", | |
| 423 | "\\<x>", "\\<y>", "\\<z>", | |
| 424 | ||
| 425 | "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", | |
| 426 | "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", | |
| 427 | "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", | |
| 428 | "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", | |
| 429 | "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", | |
| 430 | "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", | |
| 431 | "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", | |
| 432 | "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", | |
| 433 | "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", | |
| 434 | ||
| 435 | "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", | |
| 436 | "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", | |
| 437 | "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", | |
| 438 | "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", | |
| 439 | "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", | |
| 440 | "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", | |
| 52616 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 wenzelm parents: 
52507diff
changeset | 441 | "\\<Psi>", "\\<Omega>") | 
| 34134 | 442 | |
| 61865 | 443 | val blanks = recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n") | 
| 34138 | 444 | |
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 445 | val sym_chars = | 
| 34138 | 446 |       Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
 | 
| 34134 | 447 | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 448 |     val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*)
 | 
| 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 449 | |
| 43455 | 450 | |
| 63528 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 451 | /* misc symbols */ | 
| 61579 | 452 | |
| 63528 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 453 | val newline_decoded = decode(newline) | 
| 61579 | 454 | val comment_decoded = decode(comment) | 
| 455 | ||
| 456 | ||
| 55033 | 457 | /* cartouches */ | 
| 458 | ||
| 459 | val open_decoded = decode(open) | |
| 460 | val close_decoded = decode(close) | |
| 461 | ||
| 462 | ||
| 43488 | 463 | /* control symbols */ | 
| 464 | ||
| 59107 | 465 | val control_decoded: Set[Symbol] = | 
| 43488 | 466 |       Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
 | 
| 467 | ||
| 62103 | 468 | val sub_decoded = decode(sub) | 
| 469 | val sup_decoded = decode(sup) | |
| 470 | val bold_decoded = decode(bold) | |
| 62104 
fb73c0d7bb37
clarified symbol insertion, depending on buffer encoding;
 wenzelm parents: 
62103diff
changeset | 471 | val emph_decoded = decode(emph) | 
| 43511 | 472 |     val bsub_decoded = decode("\\<^bsub>")
 | 
| 473 |     val esub_decoded = decode("\\<^esub>")
 | |
| 474 |     val bsup_decoded = decode("\\<^bsup>")
 | |
| 475 |     val esup_decoded = decode("\\<^esup>")
 | |
| 27918 | 476 | } | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 477 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 478 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 479 | /* tables */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 480 | |
| 53400 | 481 | def properties: Map[Symbol, Properties.T] = symbols.properties | 
| 43696 | 482 | def names: Map[Symbol, String] = symbols.names | 
| 50136 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 wenzelm parents: 
48922diff
changeset | 483 | def groups: List[(String, List[Symbol])] = symbols.groups | 
| 53316 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 wenzelm parents: 
53021diff
changeset | 484 | def abbrevs: Multi_Map[Symbol, String] = symbols.abbrevs | 
| 61376 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 wenzelm parents: 
61174diff
changeset | 485 | def codes: List[(String, Int)] = symbols.codes | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 486 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 487 | def decode(text: String): String = symbols.decode(text) | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 488 | def encode(text: String): String = symbols.encode(text) | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 489 | |
| 53337 
b3817a0e3211
sort items according to persistent history of frequency of use;
 wenzelm parents: 
53316diff
changeset | 490 | def decode_string: XML.Decode.T[String] = (x => decode(XML.Decode.string(x))) | 
| 
b3817a0e3211
sort items according to persistent history of frequency of use;
 wenzelm parents: 
53316diff
changeset | 491 | def encode_string: XML.Encode.T[String] = (x => XML.Encode.string(encode(x))) | 
| 
b3817a0e3211
sort items according to persistent history of frequency of use;
 wenzelm parents: 
53316diff
changeset | 492 | |
| 50291 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 493 | def decode_strict(text: String): String = | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 494 |   {
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 495 | val decoded = decode(text) | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 496 | if (encode(decoded) == text) decoded | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 497 |     else {
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 498 | val bad = new mutable.ListBuffer[Symbol] | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 499 | for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s)) | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 500 | bad += s | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 501 |       error("Bad Unicode symbols in text: " + commas_quote(bad))
 | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 502 | } | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 503 | } | 
| 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 wenzelm parents: 
50238diff
changeset | 504 | |
| 43696 | 505 | def fonts: Map[Symbol, String] = symbols.fonts | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 506 | def font_names: List[String] = symbols.font_names | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 507 | def font_index: Map[String, Int] = symbols.font_index | 
| 43696 | 508 | def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 509 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 510 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 511 | /* classification */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 512 | |
| 43696 | 513 | def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) | 
| 514 | def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' | |
| 515 | def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'" | |
| 516 | def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) | |
| 517 | def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 518 | |
| 55033 | 519 | |
| 63528 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 520 | /* misc symbols */ | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 521 | |
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 522 | val newline: Symbol = "\\<newline>" | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 523 | def newline_decoded: Symbol = symbols.newline_decoded | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 524 | |
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 525 | def print_newlines(str: String): String = | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 526 |     if (str.contains('\n'))
 | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 527 |       (for (s <- iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString
 | 
| 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 wenzelm parents: 
62528diff
changeset | 528 | else str | 
| 61579 | 529 | |
| 530 | val comment: Symbol = "\\<comment>" | |
| 531 | def comment_decoded: Symbol = symbols.comment_decoded | |
| 532 | ||
| 533 | ||
| 55033 | 534 | /* cartouches */ | 
| 535 | ||
| 61579 | 536 | val open: Symbol = "\\<open>" | 
| 537 | val close: Symbol = "\\<close>" | |
| 55033 | 538 | |
| 539 | def open_decoded: Symbol = symbols.open_decoded | |
| 540 | def close_decoded: Symbol = symbols.close_decoded | |
| 541 | ||
| 542 | def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open | |
| 543 | def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close | |
| 544 | ||
| 545 | ||
| 546 | /* symbols for symbolic identifiers */ | |
| 44992 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 547 | |
| 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 wenzelm parents: 
44949diff
changeset | 548 | private def raw_symbolic(sym: Symbol): Boolean = | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 549 |     sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
 | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 550 | |
| 55033 | 551 | def is_symbolic(sym: Symbol): Boolean = | 
| 552 | !is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym)) | |
| 553 | ||
| 554 | def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) | |
| 555 | ||
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 556 | |
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 557 | /* control symbols */ | 
| 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 558 | |
| 59107 | 559 | def is_control(sym: Symbol): Boolean = | 
| 61470 | 560 |     (sym.startsWith("\\<^") && sym.endsWith(">")) || symbols.control_decoded.contains(sym)
 | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 561 | |
| 43696 | 562 | def is_controllable(sym: Symbol): Boolean = | 
| 59107 | 563 | !is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && !is_malformed(sym) | 
| 43695 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 wenzelm parents: 
43675diff
changeset | 564 | |
| 62103 | 565 | val sub = "\\<^sub>" | 
| 566 | val sup = "\\<^sup>" | |
| 567 | val bold = "\\<^bold>" | |
| 62104 
fb73c0d7bb37
clarified symbol insertion, depending on buffer encoding;
 wenzelm parents: 
62103diff
changeset | 568 | val emph = "\\<^emph>" | 
| 62103 | 569 | |
| 44238 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 570 | def sub_decoded: Symbol = symbols.sub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 571 | def sup_decoded: Symbol = symbols.sup_decoded | 
| 62103 | 572 | def bold_decoded: Symbol = symbols.bold_decoded | 
| 573 | def emph_decoded: Symbol = symbols.emph_decoded | |
| 44238 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 574 | def bsub_decoded: Symbol = symbols.bsub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 575 | def esub_decoded: Symbol = symbols.esub_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 576 | def bsup_decoded: Symbol = symbols.bsup_decoded | 
| 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 wenzelm parents: 
44181diff
changeset | 577 | def esup_decoded: Symbol = symbols.esup_decoded | 
| 27901 | 578 | } |