author | wenzelm |
Tue, 07 Aug 2012 12:10:26 +0200 | |
changeset 48704 | 85a3de10567d |
parent 48550 | 97592027a2a8 |
child 48773 | 0e1bab274672 |
permissions | -rw-r--r-- |
27901 | 1 |
/* Title: Pure/General/symbol.scala |
2 |
Author: Makarius |
|
3 |
||
27924 | 4 |
Detecting and recoding Isabelle symbols. |
27901 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
36011
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
wenzelm
parents:
34316
diff
changeset
|
9 |
import scala.collection.mutable |
31522 | 10 |
import scala.util.matching.Regex |
27901 | 11 |
|
12 |
||
31522 | 13 |
object Symbol |
14 |
{ |
|
43696 | 15 |
type Symbol = String |
16 |
||
17 |
||
43418 | 18 |
/* ASCII characters */ |
19 |
||
20 |
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' |
|
21 |
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' |
|
22 |
def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\'' |
|
23 |
||
24 |
def is_ascii_letdig(c: Char): Boolean = |
|
25 |
is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c) |
|
26 |
||
27 |
def is_ascii_identifier(s: String): Boolean = |
|
28 |
s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig) |
|
29 |
||
30 |
||
33998 | 31 |
/* Symbol regexps */ |
27901 | 32 |
|
31522 | 33 |
private val plain = new Regex("""(?xs) |
40524
6131d7a78ad3
treat Unicode "replacement character" (i.e. decoding error) is malformed;
wenzelm
parents:
40523
diff
changeset
|
34 |
[^\r\\\ud800-\udfff\ufffd] | [\ud800-\udbff][\udc00-\udfff] """) |
37556
2bf29095d26f
treat alternative newline symbols as in Isabelle/ML;
wenzelm
parents:
36816
diff
changeset
|
35 |
|
40522 | 36 |
private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """) |
27901 | 37 |
|
31522 | 38 |
private val symbol = new Regex("""(?xs) |
31545
5f1f0a20af4d
discontinued escaped symbols such as \\<forall> -- only one backslash should be used;
wenzelm
parents:
31523
diff
changeset
|
39 |
\\ < (?: |
27924 | 40 |
\^? [A-Za-z][A-Za-z0-9_']* | |
41 |
\^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""") |
|
42 |
||
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset
|
43 |
private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" + |
40529 | 44 |
""" [\ud800-\udbff\ufffd] | \\<\^? """) |
27924 | 45 |
|
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset
|
46 |
val regex_total = |
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset
|
47 |
new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .") |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
48 |
|
34137 | 49 |
|
50 |
/* basic matching */ |
|
51 |
||
37556
2bf29095d26f
treat alternative newline symbols as in Isabelle/ML;
wenzelm
parents:
36816
diff
changeset
|
52 |
def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff') |
34137 | 53 |
|
43696 | 54 |
def is_physical_newline(s: Symbol): Boolean = |
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
55 |
s == "\n" || s == "\r" || s == "\r\n" |
38877 | 56 |
|
43696 | 57 |
def is_malformed(s: Symbol): Boolean = |
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
58 |
!(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches |
34137 | 59 |
|
60 |
class Matcher(text: CharSequence) |
|
61 |
{ |
|
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset
|
62 |
private val matcher = regex_total.pattern.matcher(text) |
34137 | 63 |
def apply(start: Int, end: Int): Int = |
64 |
{ |
|
65 |
require(0 <= start && start < end && end <= text.length) |
|
34316
f879b649ac4c
clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents:
34193
diff
changeset
|
66 |
if (is_plain(text.charAt(start))) 1 |
34138 | 67 |
else { |
34137 | 68 |
matcher.region(start, end).lookingAt |
69 |
matcher.group.length |
|
70 |
} |
|
71 |
} |
|
31522 | 72 |
} |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
73 |
|
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
74 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
75 |
/* iterator */ |
33998 | 76 |
|
43696 | 77 |
private val char_symbols: Array[Symbol] = |
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
78 |
(0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
79 |
|
43696 | 80 |
def iterator(text: CharSequence): Iterator[Symbol] = |
81 |
new Iterator[Symbol] |
|
40522 | 82 |
{ |
43489 | 83 |
private val matcher = new Matcher(text) |
84 |
private var i = 0 |
|
85 |
def hasNext = i < text.length |
|
86 |
def next = |
|
87 |
{ |
|
88 |
val n = matcher(i, text.length) |
|
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
89 |
val s = |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
90 |
if (n == 0) "" |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
91 |
else if (n == 1) { |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
92 |
val c = text.charAt(i) |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
93 |
if (c < char_symbols.length) char_symbols(c) |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
94 |
else text.subSequence(i, i + n).toString |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
95 |
} |
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
96 |
else text.subSequence(i, i + n).toString |
43489 | 97 |
i += n |
98 |
s |
|
99 |
} |
|
33998 | 100 |
} |
43489 | 101 |
|
44949 | 102 |
def explode(text: CharSequence): List[Symbol] = iterator(text).toList |
103 |
||
33998 | 104 |
|
105 |
/* decoding offsets */ |
|
106 |
||
107 |
class Index(text: CharSequence) |
|
31929 | 108 |
{ |
43714 | 109 |
sealed case class Entry(chr: Int, sym: Int) |
31929 | 110 |
val index: Array[Entry] = |
111 |
{ |
|
34137 | 112 |
val matcher = new Matcher(text) |
31929 | 113 |
val buf = new mutable.ArrayBuffer[Entry] |
114 |
var chr = 0 |
|
115 |
var sym = 0 |
|
33998 | 116 |
while (chr < text.length) { |
34137 | 117 |
val n = matcher(chr, text.length) |
118 |
chr += n |
|
31929 | 119 |
sym += 1 |
34137 | 120 |
if (n > 1) buf += Entry(chr, sym) |
31929 | 121 |
} |
122 |
buf.toArray |
|
123 |
} |
|
38479
e628da370072
more efficient Markup_Tree, based on branches sorted by quasi-order;
wenzelm
parents:
37556
diff
changeset
|
124 |
def decode(sym1: Int): Int = |
31929 | 125 |
{ |
38479
e628da370072
more efficient Markup_Tree, based on branches sorted by quasi-order;
wenzelm
parents:
37556
diff
changeset
|
126 |
val sym = sym1 - 1 |
31929 | 127 |
val end = index.length |
128 |
def bisect(a: Int, b: Int): Int = |
|
129 |
{ |
|
130 |
if (a < b) { |
|
131 |
val c = (a + b) / 2 |
|
132 |
if (sym < index(c).sym) bisect(a, c) |
|
133 |
else if (c + 1 == end || sym < index(c + 1).sym) c |
|
134 |
else bisect(c + 1, b) |
|
135 |
} |
|
136 |
else -1 |
|
137 |
} |
|
138 |
val i = bisect(0, end) |
|
139 |
if (i < 0) sym |
|
140 |
else index(i).chr + sym - index(i).sym |
|
141 |
} |
|
38479
e628da370072
more efficient Markup_Tree, based on branches sorted by quasi-order;
wenzelm
parents:
37556
diff
changeset
|
142 |
def decode(range: Text.Range): Text.Range = range.map(decode(_)) |
31929 | 143 |
} |
144 |
||
145 |
||
33998 | 146 |
/* recoding text */ |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
147 |
|
31522 | 148 |
private class Recoder(list: List[(String, String)]) |
149 |
{ |
|
150 |
private val (min, max) = |
|
151 |
{ |
|
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
152 |
var min = '\uffff' |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
153 |
var max = '\u0000' |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
154 |
for ((x, _) <- list) { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
155 |
val c = x(0) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
156 |
if (c < min) min = c |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
157 |
if (c > max) max = c |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
158 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
159 |
(min, max) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
160 |
} |
40443 | 161 |
private val table = |
162 |
{ |
|
163 |
var tab = Map[String, String]() |
|
164 |
for ((x, y) <- list) { |
|
165 |
tab.get(x) match { |
|
166 |
case None => tab += (x -> y) |
|
167 |
case Some(z) => |
|
44181 | 168 |
error("Duplicate mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z)) |
40443 | 169 |
} |
170 |
} |
|
171 |
tab |
|
172 |
} |
|
31522 | 173 |
def recode(text: String): String = |
174 |
{ |
|
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
175 |
val len = text.length |
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset
|
176 |
val matcher = regex_total.pattern.matcher(text) |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
177 |
val result = new StringBuilder(len) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
178 |
var i = 0 |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
179 |
while (i < len) { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
180 |
val c = text(i) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
181 |
if (min <= c && c <= max) { |
31929 | 182 |
matcher.region(i, len).lookingAt |
27938 | 183 |
val x = matcher.group |
31522 | 184 |
result.append(table.get(x) getOrElse x) |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
185 |
i = matcher.end |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
186 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
187 |
else { result.append(c); i += 1 } |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
188 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
189 |
result.toString |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
190 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
191 |
} |
27924 | 192 |
|
27918 | 193 |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
194 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
195 |
/** symbol interpretation **/ |
27927 | 196 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
197 |
private lazy val symbols = |
48550 | 198 |
new Interpretation(File.try_read(Path.split(Isabelle_System.getenv_strict("ISABELLE_SYMBOLS")))) |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
199 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
200 |
private class Interpretation(symbols_spec: String) |
29569
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents:
29174
diff
changeset
|
201 |
{ |
31522 | 202 |
/* read symbols */ |
203 |
||
204 |
private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """) |
|
205 |
private val key = new Regex("""(?xs) (.+): """) |
|
206 |
||
43696 | 207 |
private def read_decl(decl: String): (Symbol, Map[String, String]) = |
31522 | 208 |
{ |
209 |
def err() = error("Bad symbol declaration: " + decl) |
|
210 |
||
211 |
def read_props(props: List[String]): Map[String, String] = |
|
212 |
{ |
|
213 |
props match { |
|
214 |
case Nil => Map() |
|
215 |
case _ :: Nil => err() |
|
216 |
case key(x) :: y :: rest => read_props(rest) + (x -> y) |
|
217 |
case _ => err() |
|
218 |
} |
|
219 |
} |
|
220 |
decl.split("\\s+").toList match { |
|
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset
|
221 |
case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props)) |
34193 | 222 |
case _ => err() |
31522 | 223 |
} |
224 |
} |
|
225 |
||
43696 | 226 |
private val symbols: List[(Symbol, Map[String, String])] = |
40443 | 227 |
Map(( |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
228 |
for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches) |
47993 | 229 |
yield read_decl(decl)): _*).toList |
31522 | 230 |
|
231 |
||
31651 | 232 |
/* misc properties */ |
233 |
||
43696 | 234 |
val names: Map[Symbol, String] = |
34134 | 235 |
{ |
43456
8a6de1a6e1dc
names for control symbols without "^", which is relevant for completion;
wenzelm
parents:
43455
diff
changeset
|
236 |
val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""") |
31651 | 237 |
Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*) |
238 |
} |
|
239 |
||
43696 | 240 |
val abbrevs: Map[Symbol, String] = |
43488 | 241 |
Map(( |
242 |
for ((sym, props) <- symbols if props.isDefinedAt("abbrev")) |
|
243 |
yield (sym -> props("abbrev"))): _*) |
|
244 |
||
245 |
||
43490 | 246 |
/* recoding */ |
31522 | 247 |
|
248 |
private val (decoder, encoder) = |
|
249 |
{ |
|
250 |
val mapping = |
|
251 |
for { |
|
252 |
(sym, props) <- symbols |
|
46997 | 253 |
code = |
31522 | 254 |
try { Integer.decode(props("code")).intValue } |
255 |
catch { |
|
256 |
case _: NoSuchElementException => error("Missing code for symbol " + sym) |
|
257 |
case _: NumberFormatException => error("Bad code for symbol " + sym) |
|
258 |
} |
|
46997 | 259 |
ch = new String(Character.toChars(code)) |
34193 | 260 |
} yield { |
261 |
if (code < 128) error("Illegal ASCII code for symbol " + sym) |
|
262 |
else (sym, ch) |
|
263 |
} |
|
31545
5f1f0a20af4d
discontinued escaped symbols such as \\<forall> -- only one backslash should be used;
wenzelm
parents:
31523
diff
changeset
|
264 |
(new Recoder(mapping), |
31548 | 265 |
new Recoder(mapping map { case (x, y) => (y, x) })) |
31522 | 266 |
} |
27918 | 267 |
|
34098 | 268 |
def decode(text: String): String = decoder.recode(text) |
269 |
def encode(text: String): String = encoder.recode(text) |
|
34134 | 270 |
|
43490 | 271 |
private def recode_set(elems: String*): Set[String] = |
272 |
{ |
|
273 |
val content = elems.toList |
|
274 |
Set((content ::: content.map(decode)): _*) |
|
275 |
} |
|
276 |
||
277 |
private def recode_map[A](elems: (String, A)*): Map[String, A] = |
|
278 |
{ |
|
279 |
val content = elems.toList |
|
280 |
Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*) |
|
281 |
} |
|
282 |
||
283 |
||
284 |
/* user fonts */ |
|
285 |
||
43696 | 286 |
val fonts: Map[Symbol, String] = |
43490 | 287 |
recode_map(( |
288 |
for ((sym, props) <- symbols if props.isDefinedAt("font")) |
|
289 |
yield (sym -> props("font"))): _*) |
|
290 |
||
291 |
val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList |
|
292 |
val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*) |
|
293 |
||
34134 | 294 |
|
295 |
/* classification */ |
|
296 |
||
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
297 |
val letters = recode_set( |
34134 | 298 |
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", |
299 |
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", |
|
300 |
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", |
|
301 |
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", |
|
302 |
||
303 |
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", |
|
304 |
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", |
|
305 |
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", |
|
306 |
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", |
|
307 |
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", |
|
308 |
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", |
|
309 |
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", |
|
310 |
"\\<x>", "\\<y>", "\\<z>", |
|
311 |
||
312 |
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", |
|
313 |
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", |
|
314 |
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", |
|
315 |
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", |
|
316 |
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", |
|
317 |
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", |
|
318 |
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", |
|
319 |
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", |
|
320 |
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", |
|
321 |
||
322 |
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", |
|
323 |
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", |
|
324 |
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", |
|
325 |
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", |
|
326 |
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", |
|
327 |
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", |
|
328 |
"\\<Psi>", "\\<Omega>", |
|
329 |
||
330 |
"\\<^isub>", "\\<^isup>") |
|
331 |
||
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
332 |
val blanks = |
48704
85a3de10567d
tuned signature -- make Pretty less dependent on Symbol;
wenzelm
parents:
48550
diff
changeset
|
333 |
recode_set(" ", "\t", "\n", "\u000B", "\f", "\r", "\r\n", "\\<spacespace>", "\\<^newline>") |
34138 | 334 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
335 |
val sym_chars = |
34138 | 336 |
Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~") |
34134 | 337 |
|
44992
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
338 |
val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*) |
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
339 |
|
43455 | 340 |
|
43488 | 341 |
/* control symbols */ |
342 |
||
43696 | 343 |
val ctrl_decoded: Set[Symbol] = |
43488 | 344 |
Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*) |
345 |
||
44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
346 |
val sub_decoded = decode("\\<^sub>") |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
347 |
val sup_decoded = decode("\\<^sup>") |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
348 |
val isub_decoded = decode("\\<^isub>") |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
349 |
val isup_decoded = decode("\\<^isup>") |
43511 | 350 |
val bsub_decoded = decode("\\<^bsub>") |
351 |
val esub_decoded = decode("\\<^esub>") |
|
352 |
val bsup_decoded = decode("\\<^bsup>") |
|
353 |
val esup_decoded = decode("\\<^esup>") |
|
44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
354 |
val bold_decoded = decode("\\<^bold>") |
27918 | 355 |
} |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
356 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
357 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
358 |
/* tables */ |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
359 |
|
43696 | 360 |
def names: Map[Symbol, String] = symbols.names |
361 |
def abbrevs: Map[Symbol, String] = symbols.abbrevs |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
362 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
363 |
def decode(text: String): String = symbols.decode(text) |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
364 |
def encode(text: String): String = symbols.encode(text) |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
365 |
|
43696 | 366 |
def fonts: Map[Symbol, String] = symbols.fonts |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
367 |
def font_names: List[String] = symbols.font_names |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
368 |
def font_index: Map[String, Int] = symbols.font_index |
43696 | 369 |
def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
370 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
371 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
372 |
/* classification */ |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
373 |
|
43696 | 374 |
def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) |
375 |
def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' |
|
376 |
def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'" |
|
377 |
def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) |
|
378 |
def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) |
|
44992
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
379 |
|
43696 | 380 |
def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) |
44992
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
381 |
def is_symbolic(sym: Symbol): Boolean = raw_symbolic(sym) || symbols.symbolic.contains(sym) |
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
382 |
|
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
383 |
private def raw_symbolic(sym: Symbol): Boolean = |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
384 |
sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
385 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
386 |
|
44992
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
387 |
|
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
388 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
389 |
/* control symbols */ |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
390 |
|
43696 | 391 |
def is_ctrl(sym: Symbol): Boolean = |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
392 |
sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym) |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
393 |
|
43696 | 394 |
def is_controllable(sym: Symbol): Boolean = |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
395 |
!is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym) |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
396 |
|
44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
397 |
def sub_decoded: Symbol = symbols.sub_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
398 |
def sup_decoded: Symbol = symbols.sup_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
399 |
def isub_decoded: Symbol = symbols.isub_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
400 |
def isup_decoded: Symbol = symbols.isup_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
401 |
def bsub_decoded: Symbol = symbols.bsub_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
402 |
def esub_decoded: Symbol = symbols.esub_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
403 |
def bsup_decoded: Symbol = symbols.bsup_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
404 |
def esup_decoded: Symbol = symbols.esup_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
405 |
def bold_decoded: Symbol = symbols.bold_decoded |
27901 | 406 |
} |