author | wenzelm |
Mon, 20 Mar 2017 20:43:26 +0100 | |
changeset 65335 | 7634d33c1a79 |
parent 65196 | e8760a98db78 |
child 65344 | b99283eed13c |
permissions | -rw-r--r-- |
27901 | 1 |
/* Title: Pure/General/symbol.scala |
2 |
Author: Makarius |
|
3 |
||
27924 | 4 |
Detecting and recoding Isabelle symbols. |
27901 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
55618 | 9 |
|
36011
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
wenzelm
parents:
34316
diff
changeset
|
10 |
import scala.collection.mutable |
31522 | 11 |
import scala.util.matching.Regex |
48922 | 12 |
import scala.annotation.tailrec |
27901 | 13 |
|
14 |
||
31522 | 15 |
object Symbol |
16 |
{ |
|
43696 | 17 |
type Symbol = String |
18 |
||
55884
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
19 |
// counting Isabelle symbols, starting from 1 |
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
20 |
type Offset = Text.Offset |
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
21 |
type Range = Text.Range |
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
22 |
|
43696 | 23 |
|
61865 | 24 |
/* spaces */ |
25 |
||
26 |
val space = " " |
|
27 |
||
28 |
private val static_spaces = space * 4000 |
|
29 |
||
30 |
def spaces(n: Int): String = |
|
31 |
{ |
|
32 |
require(n >= 0) |
|
33 |
if (n < static_spaces.length) static_spaces.substring(0, n) |
|
34 |
else space * n |
|
35 |
} |
|
36 |
||
37 |
||
43418 | 38 |
/* ASCII characters */ |
39 |
||
40 |
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' |
|
55497 | 41 |
|
43418 | 42 |
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' |
55497 | 43 |
|
44 |
def is_ascii_hex(c: Char): Boolean = |
|
45 |
'0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' |
|
46 |
||
43418 | 47 |
def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\'' |
48 |
||
55497 | 49 |
def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c) |
50 |
||
43418 | 51 |
def is_ascii_letdig(c: Char): Boolean = |
52 |
is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c) |
|
53 |
||
54 |
def is_ascii_identifier(s: String): Boolean = |
|
50238
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
wenzelm
parents:
50233
diff
changeset
|
55 |
s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig) |
43418 | 56 |
|
62528 | 57 |
def ascii(c: Char): Symbol = |
58 |
{ |
|
59 |
if (c > 127) error("Non-ASCII character: " + quote(c.toString)) |
|
60 |
else char_symbols(c.toInt) |
|
61 |
} |
|
62 |
||
43418 | 63 |
|
48775 | 64 |
/* symbol matching */ |
27901 | 65 |
|
48775 | 66 |
private val symbol_total = new Regex("""(?xs) |
63936 | 67 |
[\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""") |
27924 | 68 |
|
48775 | 69 |
private def is_plain(c: Char): Boolean = |
70 |
!(c == '\r' || c == '\\' || Character.isHighSurrogate(c)) |
|
48773
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
71 |
|
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
72 |
def is_malformed(s: Symbol): Boolean = |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
73 |
s.length match { |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
74 |
case 1 => |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
75 |
val c = s(0) |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
76 |
Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd' |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
77 |
case 2 => |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
78 |
val c1 = s(0) |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
79 |
val c2 = s(1) |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
80 |
!(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2)) |
48774 | 81 |
case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>" |
48773
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
82 |
} |
34137 | 83 |
|
54734
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
wenzelm
parents:
53400
diff
changeset
|
84 |
def is_newline(s: Symbol): Boolean = |
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
85 |
s == "\n" || s == "\r" || s == "\r\n" |
38877 | 86 |
|
34137 | 87 |
class Matcher(text: CharSequence) |
88 |
{ |
|
48775 | 89 |
private val matcher = symbol_total.pattern.matcher(text) |
34137 | 90 |
def apply(start: Int, end: Int): Int = |
91 |
{ |
|
92 |
require(0 <= start && start < end && end <= text.length) |
|
34316
f879b649ac4c
clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents:
34193
diff
changeset
|
93 |
if (is_plain(text.charAt(start))) 1 |
34138 | 94 |
else { |
34137 | 95 |
matcher.region(start, end).lookingAt |
96 |
matcher.group.length |
|
97 |
} |
|
98 |
} |
|
31522 | 99 |
} |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
100 |
|
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
101 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
102 |
/* iterator */ |
33998 | 103 |
|
43696 | 104 |
private val char_symbols: Array[Symbol] = |
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
105 |
(0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
106 |
|
43696 | 107 |
def iterator(text: CharSequence): Iterator[Symbol] = |
108 |
new Iterator[Symbol] |
|
40522 | 109 |
{ |
43489 | 110 |
private val matcher = new Matcher(text) |
111 |
private var i = 0 |
|
112 |
def hasNext = i < text.length |
|
113 |
def next = |
|
114 |
{ |
|
115 |
val n = matcher(i, text.length) |
|
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
116 |
val s = |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
117 |
if (n == 0) "" |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
118 |
else if (n == 1) { |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
119 |
val c = text.charAt(i) |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
120 |
if (c < char_symbols.length) char_symbols(c) |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
121 |
else text.subSequence(i, i + n).toString |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
122 |
} |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
123 |
else text.subSequence(i, i + n).toString |
43489 | 124 |
i += n |
125 |
s |
|
126 |
} |
|
33998 | 127 |
} |
43489 | 128 |
|
44949 | 129 |
def explode(text: CharSequence): List[Symbol] = iterator(text).toList |
130 |
||
64615 | 131 |
def length(text: CharSequence): Int = iterator(text).length |
64617 | 132 |
|
33998 | 133 |
|
134 |
/* decoding offsets */ |
|
135 |
||
52507 | 136 |
object Index |
137 |
{ |
|
56471
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset
|
138 |
private sealed case class Entry(chr: Int, sym: Int) |
52507 | 139 |
|
56472 | 140 |
val empty: Index = new Index(Nil) |
56471
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset
|
141 |
|
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset
|
142 |
def apply(text: CharSequence): Index = |
31929 | 143 |
{ |
34137 | 144 |
val matcher = new Matcher(text) |
56471
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset
|
145 |
val buf = new mutable.ListBuffer[Entry] |
31929 | 146 |
var chr = 0 |
147 |
var sym = 0 |
|
33998 | 148 |
while (chr < text.length) { |
34137 | 149 |
val n = matcher(chr, text.length) |
150 |
chr += n |
|
31929 | 151 |
sym += 1 |
34137 | 152 |
if (n > 1) buf += Entry(chr, sym) |
31929 | 153 |
} |
56472 | 154 |
if (buf.isEmpty) empty else new Index(buf.toList) |
31929 | 155 |
} |
56471
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset
|
156 |
} |
55430 | 157 |
|
56472 | 158 |
final class Index private(entries: List[Index.Entry]) |
56471
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset
|
159 |
{ |
56472 | 160 |
private val hash: Int = entries.hashCode |
161 |
private val index: Array[Index.Entry] = entries.toArray |
|
162 |
||
55884
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
163 |
def decode(symbol_offset: Offset): Text.Offset = |
31929 | 164 |
{ |
55884
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
165 |
val sym = symbol_offset - 1 |
31929 | 166 |
val end = index.length |
48922 | 167 |
@tailrec def bisect(a: Int, b: Int): Int = |
31929 | 168 |
{ |
169 |
if (a < b) { |
|
170 |
val c = (a + b) / 2 |
|
171 |
if (sym < index(c).sym) bisect(a, c) |
|
172 |
else if (c + 1 == end || sym < index(c + 1).sym) c |
|
173 |
else bisect(c + 1, b) |
|
174 |
} |
|
175 |
else -1 |
|
176 |
} |
|
177 |
val i = bisect(0, end) |
|
178 |
if (i < 0) sym |
|
179 |
else index(i).chr + sym - index(i).sym |
|
180 |
} |
|
55884
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
181 |
def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_)) |
56335
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
182 |
|
56338
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
wenzelm
parents:
56335
diff
changeset
|
183 |
override def hashCode: Int = hash |
56335
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
184 |
override def equals(that: Any): Boolean = |
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
185 |
that match { |
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
186 |
case other: Index => index.sameElements(other.index) |
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
187 |
case _ => false |
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
188 |
} |
31929 | 189 |
} |
190 |
||
191 |
||
64477 | 192 |
/* symbolic text chunks -- without actual text */ |
56746 | 193 |
|
194 |
object Text_Chunk |
|
195 |
{ |
|
196 |
sealed abstract class Name |
|
197 |
case object Default extends Name |
|
198 |
case class Id(id: Document_ID.Generic) extends Name |
|
199 |
case class File(name: String) extends Name |
|
200 |
||
65335 | 201 |
val encode_name: XML.Encode.T[Name] = |
202 |
{ |
|
203 |
import XML.Encode._ |
|
204 |
variant(List( |
|
205 |
{ case Default => (Nil, Nil) }, |
|
206 |
{ case Id(a) => (List(long_atom(a)), Nil) }, |
|
207 |
{ case File(a) => (List(a), Nil) })) |
|
208 |
} |
|
209 |
||
210 |
val decode_name: XML.Decode.T[Name] = |
|
211 |
{ |
|
212 |
import XML.Decode._ |
|
213 |
variant(List( |
|
214 |
{ case (Nil, Nil) => Default }, |
|
215 |
{ case (List(a), Nil) => Id(long_atom(a)) }, |
|
216 |
{ case (List(a), Nil) => File(a) })) |
|
217 |
} |
|
218 |
||
219 |
||
56746 | 220 |
def apply(text: CharSequence): Text_Chunk = |
221 |
new Text_Chunk(Text.Range(0, text.length), Index(text)) |
|
222 |
} |
|
223 |
||
224 |
final class Text_Chunk private(val range: Text.Range, private val index: Index) |
|
225 |
{ |
|
226 |
override def hashCode: Int = (range, index).hashCode |
|
227 |
override def equals(that: Any): Boolean = |
|
228 |
that match { |
|
229 |
case other: Text_Chunk => |
|
230 |
range == other.range && |
|
231 |
index == other.index |
|
232 |
case _ => false |
|
233 |
} |
|
234 |
||
57840 | 235 |
override def toString: String = "Text_Chunk" + range.toString |
236 |
||
56746 | 237 |
def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset) |
238 |
def decode(symbol_range: Range): Text.Range = index.decode(symbol_range) |
|
239 |
def incorporate(symbol_range: Range): Option[Text.Range] = |
|
240 |
{ |
|
241 |
def in(r: Range): Option[Text.Range] = |
|
242 |
range.try_restrict(decode(r)) match { |
|
243 |
case Some(r1) if !r1.is_singularity => Some(r1) |
|
244 |
case _ => None |
|
245 |
} |
|
246 |
in(symbol_range) orElse in(symbol_range - 1) |
|
247 |
} |
|
248 |
} |
|
249 |
||
250 |
||
33998 | 251 |
/* recoding text */ |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
252 |
|
31522 | 253 |
private class Recoder(list: List[(String, String)]) |
254 |
{ |
|
255 |
private val (min, max) = |
|
256 |
{ |
|
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
257 |
var min = '\uffff' |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
258 |
var max = '\u0000' |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
259 |
for ((x, _) <- list) { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
260 |
val c = x(0) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
261 |
if (c < min) min = c |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
262 |
if (c > max) max = c |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
263 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
264 |
(min, max) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
265 |
} |
40443 | 266 |
private val table = |
267 |
{ |
|
268 |
var tab = Map[String, String]() |
|
269 |
for ((x, y) <- list) { |
|
270 |
tab.get(x) match { |
|
271 |
case None => tab += (x -> y) |
|
272 |
case Some(z) => |
|
62230 | 273 |
error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z)) |
40443 | 274 |
} |
275 |
} |
|
276 |
tab |
|
277 |
} |
|
31522 | 278 |
def recode(text: String): String = |
279 |
{ |
|
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
280 |
val len = text.length |
48775 | 281 |
val matcher = symbol_total.pattern.matcher(text) |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
282 |
val result = new StringBuilder(len) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
283 |
var i = 0 |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
284 |
while (i < len) { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
285 |
val c = text(i) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
286 |
if (min <= c && c <= max) { |
31929 | 287 |
matcher.region(i, len).lookingAt |
27938 | 288 |
val x = matcher.group |
52888 | 289 |
result.append(table.getOrElse(x, x)) |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
290 |
i = matcher.end |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
291 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
292 |
else { result.append(c); i += 1 } |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
293 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
294 |
result.toString |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
295 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
296 |
} |
27924 | 297 |
|
27918 | 298 |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
299 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
300 |
/** symbol interpretation **/ |
27927 | 301 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
302 |
private lazy val symbols = |
61959 | 303 |
{ |
304 |
val contents = |
|
305 |
for (path <- Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS")) if path.is_file) |
|
306 |
yield (File.read(path)) |
|
307 |
new Interpretation(cat_lines(contents)) |
|
308 |
} |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
309 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
310 |
private class Interpretation(symbols_spec: String) |
29569
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents:
29174
diff
changeset
|
311 |
{ |
31522 | 312 |
/* read symbols */ |
313 |
||
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
314 |
private val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """) |
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
315 |
private val Key = new Regex("""(?xs) (.+): """) |
31522 | 316 |
|
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
317 |
private def read_decl(decl: String): (Symbol, Properties.T) = |
31522 | 318 |
{ |
319 |
def err() = error("Bad symbol declaration: " + decl) |
|
320 |
||
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
321 |
def read_props(props: List[String]): Properties.T = |
31522 | 322 |
{ |
323 |
props match { |
|
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
324 |
case Nil => Nil |
31522 | 325 |
case _ :: Nil => err() |
61174 | 326 |
case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest) |
31522 | 327 |
case _ => err() |
328 |
} |
|
329 |
} |
|
330 |
decl.split("\\s+").toList match { |
|
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
331 |
case sym :: props if sym.length > 1 && !is_malformed(sym) => |
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
332 |
(sym, read_props(props)) |
34193 | 333 |
case _ => err() |
31522 | 334 |
} |
335 |
} |
|
336 |
||
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
337 |
private val symbols: List[(Symbol, Properties.T)] = |
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
338 |
(((List.empty[(Symbol, Properties.T)], Set.empty[Symbol]) /: |
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
339 |
split_lines(symbols_spec).reverse) |
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
340 |
{ case (res, No_Decl()) => res |
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
341 |
case ((list, known), decl) => |
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
342 |
val (sym, props) = read_decl(decl) |
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
343 |
if (known(sym)) (list, known) |
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
344 |
else ((sym, props) :: list, known + sym) |
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
345 |
})._1 |
31522 | 346 |
|
347 |
||
53400 | 348 |
/* basic properties */ |
349 |
||
350 |
val properties: Map[Symbol, Properties.T] = Map(symbols: _*) |
|
31651 | 351 |
|
43696 | 352 |
val names: Map[Symbol, String] = |
34134 | 353 |
{ |
43456
8a6de1a6e1dc
names for control symbols without "^", which is relevant for completion;
wenzelm
parents:
43455
diff
changeset
|
354 |
val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""") |
60215 | 355 |
Map((for ((sym @ name(a), _) <- symbols) yield sym -> a): _*) |
31651 | 356 |
} |
357 |
||
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
358 |
val groups: List[(String, List[Symbol])] = |
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
359 |
symbols.map({ case (sym, props) => |
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
360 |
val gs = for (("group", g) <- props) yield g |
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
361 |
if (gs.isEmpty) List(sym -> "unsorted") else gs.map(sym -> _) |
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
362 |
}).flatten |
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
363 |
.groupBy(_._2).toList.map({ case (group, list) => (group, list.map(_._1)) }) |
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
364 |
.sortBy(_._1) |
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
365 |
|
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
366 |
val abbrevs: Multi_Map[Symbol, String] = |
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
367 |
Multi_Map(( |
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
368 |
for { |
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
369 |
(sym, props) <- symbols |
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
370 |
("abbrev", a) <- props.reverse |
60215 | 371 |
} yield sym -> a): _*) |
43488 | 372 |
|
61376
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
373 |
val codes: List[(String, Int)] = |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
374 |
{ |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
375 |
val Code = new Properties.String("code") |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
376 |
for { |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
377 |
(sym, props) <- symbols |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
378 |
code = |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
379 |
props match { |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
380 |
case Code(s) => |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
381 |
try { Integer.decode(s).intValue } |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
382 |
catch { case _: NumberFormatException => error("Bad code for symbol " + sym) } |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
383 |
case _ => error("Missing code for symbol " + sym) |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
384 |
} |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
385 |
} yield { |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
386 |
if (code < 128) error("Illegal ASCII code for symbol " + sym) |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
387 |
else (sym, code) |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
388 |
} |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
389 |
} |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
390 |
|
43488 | 391 |
|
43490 | 392 |
/* recoding */ |
31522 | 393 |
|
394 |
private val (decoder, encoder) = |
|
395 |
{ |
|
396 |
val mapping = |
|
61376
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
397 |
for ((sym, code) <- codes) yield (sym, new String(Character.toChars(code))) |
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
398 |
(new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x))) |
31522 | 399 |
} |
27918 | 400 |
|
34098 | 401 |
def decode(text: String): String = decoder.recode(text) |
402 |
def encode(text: String): String = encoder.recode(text) |
|
34134 | 403 |
|
43490 | 404 |
private def recode_set(elems: String*): Set[String] = |
405 |
{ |
|
406 |
val content = elems.toList |
|
407 |
Set((content ::: content.map(decode)): _*) |
|
408 |
} |
|
409 |
||
410 |
private def recode_map[A](elems: (String, A)*): Map[String, A] = |
|
411 |
{ |
|
412 |
val content = elems.toList |
|
413 |
Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*) |
|
414 |
} |
|
415 |
||
416 |
||
417 |
/* user fonts */ |
|
418 |
||
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
419 |
private val Font = new Properties.String("font") |
43696 | 420 |
val fonts: Map[Symbol, String] = |
60215 | 421 |
recode_map((for ((sym, Font(font)) <- symbols) yield sym -> font): _*) |
43490 | 422 |
|
423 |
val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList |
|
424 |
val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*) |
|
425 |
||
34134 | 426 |
|
427 |
/* classification */ |
|
428 |
||
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
429 |
val letters = recode_set( |
34134 | 430 |
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", |
431 |
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", |
|
432 |
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", |
|
433 |
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", |
|
434 |
||
435 |
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", |
|
436 |
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", |
|
437 |
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", |
|
438 |
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", |
|
439 |
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", |
|
440 |
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", |
|
441 |
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", |
|
442 |
"\\<x>", "\\<y>", "\\<z>", |
|
443 |
||
444 |
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", |
|
445 |
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", |
|
446 |
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", |
|
447 |
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", |
|
448 |
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", |
|
449 |
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", |
|
450 |
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", |
|
451 |
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", |
|
452 |
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", |
|
453 |
||
454 |
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", |
|
455 |
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", |
|
456 |
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", |
|
457 |
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", |
|
458 |
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", |
|
459 |
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", |
|
52616
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
wenzelm
parents:
52507
diff
changeset
|
460 |
"\\<Psi>", "\\<Omega>") |
34134 | 461 |
|
61865 | 462 |
val blanks = recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n") |
34138 | 463 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
464 |
val sym_chars = |
34138 | 465 |
Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~") |
34134 | 466 |
|
44992
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
467 |
val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*) |
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
468 |
|
43455 | 469 |
|
63528
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
470 |
/* misc symbols */ |
61579 | 471 |
|
63528
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
472 |
val newline_decoded = decode(newline) |
61579 | 473 |
val comment_decoded = decode(comment) |
474 |
||
475 |
||
55033 | 476 |
/* cartouches */ |
477 |
||
478 |
val open_decoded = decode(open) |
|
479 |
val close_decoded = decode(close) |
|
480 |
||
481 |
||
43488 | 482 |
/* control symbols */ |
483 |
||
59107 | 484 |
val control_decoded: Set[Symbol] = |
43488 | 485 |
Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*) |
486 |
||
62103 | 487 |
val sub_decoded = decode(sub) |
488 |
val sup_decoded = decode(sup) |
|
489 |
val bold_decoded = decode(bold) |
|
62104
fb73c0d7bb37
clarified symbol insertion, depending on buffer encoding;
wenzelm
parents:
62103
diff
changeset
|
490 |
val emph_decoded = decode(emph) |
43511 | 491 |
val bsub_decoded = decode("\\<^bsub>") |
492 |
val esub_decoded = decode("\\<^esub>") |
|
493 |
val bsup_decoded = decode("\\<^bsup>") |
|
494 |
val esup_decoded = decode("\\<^esup>") |
|
27918 | 495 |
} |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
496 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
497 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
498 |
/* tables */ |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
499 |
|
53400 | 500 |
def properties: Map[Symbol, Properties.T] = symbols.properties |
43696 | 501 |
def names: Map[Symbol, String] = symbols.names |
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
502 |
def groups: List[(String, List[Symbol])] = symbols.groups |
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset
|
503 |
def abbrevs: Multi_Map[Symbol, String] = symbols.abbrevs |
61376
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
504 |
def codes: List[(String, Int)] = symbols.codes |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
505 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
506 |
def decode(text: String): String = symbols.decode(text) |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
507 |
def encode(text: String): String = symbols.encode(text) |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
508 |
|
53337
b3817a0e3211
sort items according to persistent history of frequency of use;
wenzelm
parents:
53316
diff
changeset
|
509 |
def decode_string: XML.Decode.T[String] = (x => decode(XML.Decode.string(x))) |
b3817a0e3211
sort items according to persistent history of frequency of use;
wenzelm
parents:
53316
diff
changeset
|
510 |
def encode_string: XML.Encode.T[String] = (x => XML.Encode.string(encode(x))) |
b3817a0e3211
sort items according to persistent history of frequency of use;
wenzelm
parents:
53316
diff
changeset
|
511 |
|
50291
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
512 |
def decode_strict(text: String): String = |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
513 |
{ |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
514 |
val decoded = decode(text) |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
515 |
if (encode(decoded) == text) decoded |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
516 |
else { |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
517 |
val bad = new mutable.ListBuffer[Symbol] |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
518 |
for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s)) |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
519 |
bad += s |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
520 |
error("Bad Unicode symbols in text: " + commas_quote(bad)) |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
521 |
} |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
522 |
} |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
523 |
|
43696 | 524 |
def fonts: Map[Symbol, String] = symbols.fonts |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
525 |
def font_names: List[String] = symbols.font_names |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
526 |
def font_index: Map[String, Int] = symbols.font_index |
43696 | 527 |
def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
528 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
529 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
530 |
/* classification */ |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
531 |
|
43696 | 532 |
def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) |
533 |
def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' |
|
534 |
def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'" |
|
535 |
def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) |
|
536 |
def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) |
|
44992
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
537 |
|
55033 | 538 |
|
63528
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
539 |
/* misc symbols */ |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
540 |
|
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
541 |
val newline: Symbol = "\\<newline>" |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
542 |
def newline_decoded: Symbol = symbols.newline_decoded |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
543 |
|
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
544 |
def print_newlines(str: String): String = |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
545 |
if (str.contains('\n')) |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
546 |
(for (s <- iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
547 |
else str |
61579 | 548 |
|
549 |
val comment: Symbol = "\\<comment>" |
|
550 |
def comment_decoded: Symbol = symbols.comment_decoded |
|
551 |
||
552 |
||
55033 | 553 |
/* cartouches */ |
554 |
||
61579 | 555 |
val open: Symbol = "\\<open>" |
556 |
val close: Symbol = "\\<close>" |
|
55033 | 557 |
|
558 |
def open_decoded: Symbol = symbols.open_decoded |
|
559 |
def close_decoded: Symbol = symbols.close_decoded |
|
560 |
||
561 |
def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open |
|
562 |
def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close |
|
563 |
||
564 |
||
565 |
/* symbols for symbolic identifiers */ |
|
44992
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
566 |
|
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
567 |
private def raw_symbolic(sym: Symbol): Boolean = |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
568 |
sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
569 |
|
55033 | 570 |
def is_symbolic(sym: Symbol): Boolean = |
571 |
!is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym)) |
|
572 |
||
573 |
def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) |
|
574 |
||
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
575 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
576 |
/* control symbols */ |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
577 |
|
59107 | 578 |
def is_control(sym: Symbol): Boolean = |
61470 | 579 |
(sym.startsWith("\\<^") && sym.endsWith(">")) || symbols.control_decoded.contains(sym) |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
580 |
|
43696 | 581 |
def is_controllable(sym: Symbol): Boolean = |
59107 | 582 |
!is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && !is_malformed(sym) |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
583 |
|
62103 | 584 |
val sub = "\\<^sub>" |
585 |
val sup = "\\<^sup>" |
|
586 |
val bold = "\\<^bold>" |
|
62104
fb73c0d7bb37
clarified symbol insertion, depending on buffer encoding;
wenzelm
parents:
62103
diff
changeset
|
587 |
val emph = "\\<^emph>" |
62103 | 588 |
|
44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
589 |
def sub_decoded: Symbol = symbols.sub_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
590 |
def sup_decoded: Symbol = symbols.sup_decoded |
62103 | 591 |
def bold_decoded: Symbol = symbols.bold_decoded |
592 |
def emph_decoded: Symbol = symbols.emph_decoded |
|
44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
593 |
def bsub_decoded: Symbol = symbols.bsub_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
594 |
def esub_decoded: Symbol = symbols.esub_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
595 |
def bsup_decoded: Symbol = symbols.bsup_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
596 |
def esup_decoded: Symbol = symbols.esup_decoded |
27901 | 597 |
} |