author  wenzelm 
Fri, 24 Aug 2012 19:35:44 +0200  
changeset 48922  6f3ccfa7818d 
parent 48775  92ceb058391f 
child 50136  a96bd08258a2 
permissions  rwrr 
27901  1 
/* Title: Pure/General/symbol.scala 
2 
Author: Makarius 

3 

27924  4 
Detecting and recoding Isabelle symbols. 
27901  5 
*/ 
6 

7 
package isabelle 

8 

36011
3ff725ac13a4
adapted to Scala 2.8.0 Beta1  with notable changes to scala.collection;
wenzelm
parents:
34316
diff
changeset

9 
import scala.collection.mutable 
31522  10 
import scala.util.matching.Regex 
48922  11 
import scala.annotation.tailrec 
27901  12 

13 

31522  14 
object Symbol 
15 
{ 

43696  16 
type Symbol = String 
17 

18 

43418  19 
/* ASCII characters */ 
20 

21 
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z'  'a' <= c && c <= 'z' 

22 
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' 

23 
def is_ascii_quasi(c: Char): Boolean = c == '_'  c == '\'' 

24 

25 
def is_ascii_letdig(c: Char): Boolean = 

26 
is_ascii_letter(c)  is_ascii_digit(c)  is_ascii_quasi(c) 

27 

28 
def is_ascii_identifier(s: String): Boolean = 

29 
s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig) 

30 

31 

48775  32 
/* symbol matching */ 
27901  33 

48775  34 
private val symbol_total = new Regex("""(?xs) 
35 
[\ud800\udbff][\udc00\udfff]  \r\n  

36 
\\ < (?: \^raw: [\x20\x7e\u0100\uffff && [^.>]]*  \^? ([AZaz][AZaz09_']*)? ) >?  

37 
.""") 

27924  38 

48775  39 
private def is_plain(c: Char): Boolean = 
40 
!(c == '\r'  c == '\\'  Character.isHighSurrogate(c)) 

48773
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

41 

0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

42 
def is_malformed(s: Symbol): Boolean = 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

43 
s.length match { 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

44 
case 1 => 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

45 
val c = s(0) 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

46 
Character.isHighSurrogate(c)  Character.isLowSurrogate(c)  c == '\ufffd' 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

47 
case 2 => 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

48 
val c1 = s(0) 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

49 
val c2 = s(1) 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

50 
!(c1 == '\r' && c2 == '\n'  Character.isSurrogatePair(c1, c2)) 
48774  51 
case _ => !s.endsWith(">")  s == "\\<>"  s == "\\<^>" 
48773
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

52 
} 
34137  53 

43696  54 
def is_physical_newline(s: Symbol): Boolean = 
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

55 
s == "\n"  s == "\r"  s == "\r\n" 
38877  56 

34137  57 
class Matcher(text: CharSequence) 
58 
{ 

48775  59 
private val matcher = symbol_total.pattern.matcher(text) 
34137  60 
def apply(start: Int, end: Int): Int = 
61 
{ 

62 
require(0 <= start && start < end && end <= text.length) 

34316
f879b649ac4c
clarified Symbol.is_plain/is_wellformed  is_closed was rejecting plain backslashes;
wenzelm
parents:
34193
diff
changeset

63 
if (is_plain(text.charAt(start))) 1 
34138  64 
else { 
34137  65 
matcher.region(start, end).lookingAt 
66 
matcher.group.length 

67 
} 

68 
} 

31522  69 
} 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

70 

fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

71 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

72 
/* iterator */ 
33998  73 

43696  74 
private val char_symbols: Array[Symbol] = 
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

75 
(0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

76 

43696  77 
def iterator(text: CharSequence): Iterator[Symbol] = 
78 
new Iterator[Symbol] 

40522  79 
{ 
43489  80 
private val matcher = new Matcher(text) 
81 
private var i = 0 

82 
def hasNext = i < text.length 

83 
def next = 

84 
{ 

85 
val n = matcher(i, text.length) 

43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

86 
val s = 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

87 
if (n == 0) "" 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

88 
else if (n == 1) { 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

89 
val c = text.charAt(i) 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

90 
if (c < char_symbols.length) char_symbols(c) 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

91 
else text.subSequence(i, i + n).toString 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

92 
} 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

93 
else text.subSequence(i, i + n).toString 
43489  94 
i += n 
95 
s 

96 
} 

33998  97 
} 
43489  98 

44949  99 
def explode(text: CharSequence): List[Symbol] = iterator(text).toList 
100 

48922  101 
def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) = 
102 
{ 

103 
var (line, column) = pos 

104 
for (sym < iterator(text)) { 

105 
if (is_physical_newline(sym)) { line += 1; column = 1 } 

106 
else column += 1 

107 
} 

108 
(line, column) 

109 
} 

110 

33998  111 

112 
/* decoding offsets */ 

113 

114 
class Index(text: CharSequence) 

31929  115 
{ 
43714  116 
sealed case class Entry(chr: Int, sym: Int) 
31929  117 
val index: Array[Entry] = 
118 
{ 

34137  119 
val matcher = new Matcher(text) 
31929  120 
val buf = new mutable.ArrayBuffer[Entry] 
121 
var chr = 0 

122 
var sym = 0 

33998  123 
while (chr < text.length) { 
34137  124 
val n = matcher(chr, text.length) 
125 
chr += n 

31929  126 
sym += 1 
34137  127 
if (n > 1) buf += Entry(chr, sym) 
31929  128 
} 
129 
buf.toArray 

130 
} 

38479
e628da370072
more efficient Markup_Tree, based on branches sorted by quasiorder;
wenzelm
parents:
37556
diff
changeset

131 
def decode(sym1: Int): Int = 
31929  132 
{ 
38479
e628da370072
more efficient Markup_Tree, based on branches sorted by quasiorder;
wenzelm
parents:
37556
diff
changeset

133 
val sym = sym1  1 
31929  134 
val end = index.length 
48922  135 
@tailrec def bisect(a: Int, b: Int): Int = 
31929  136 
{ 
137 
if (a < b) { 

138 
val c = (a + b) / 2 

139 
if (sym < index(c).sym) bisect(a, c) 

140 
else if (c + 1 == end  sym < index(c + 1).sym) c 

141 
else bisect(c + 1, b) 

142 
} 

143 
else 1 

144 
} 

145 
val i = bisect(0, end) 

146 
if (i < 0) sym 

147 
else index(i).chr + sym  index(i).sym 

148 
} 

38479
e628da370072
more efficient Markup_Tree, based on branches sorted by quasiorder;
wenzelm
parents:
37556
diff
changeset

149 
def decode(range: Text.Range): Text.Range = range.map(decode(_)) 
31929  150 
} 
151 

152 

33998  153 
/* recoding text */ 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

154 

31522  155 
private class Recoder(list: List[(String, String)]) 
156 
{ 

157 
private val (min, max) = 

158 
{ 

27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

159 
var min = '\uffff' 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

160 
var max = '\u0000' 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

161 
for ((x, _) < list) { 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

162 
val c = x(0) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

163 
if (c < min) min = c 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

164 
if (c > max) max = c 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

165 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

166 
(min, max) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

167 
} 
40443  168 
private val table = 
169 
{ 

170 
var tab = Map[String, String]() 

171 
for ((x, y) < list) { 

172 
tab.get(x) match { 

173 
case None => tab += (x > y) 

174 
case Some(z) => 

44181  175 
error("Duplicate mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z)) 
40443  176 
} 
177 
} 

178 
tab 

179 
} 

31522  180 
def recode(text: String): String = 
181 
{ 

27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

182 
val len = text.length 
48775  183 
val matcher = symbol_total.pattern.matcher(text) 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

184 
val result = new StringBuilder(len) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

185 
var i = 0 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

186 
while (i < len) { 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

187 
val c = text(i) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

188 
if (min <= c && c <= max) { 
31929  189 
matcher.region(i, len).lookingAt 
27938  190 
val x = matcher.group 
31522  191 
result.append(table.get(x) getOrElse x) 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

192 
i = matcher.end 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

193 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

194 
else { result.append(c); i += 1 } 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

195 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

196 
result.toString 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

197 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

198 
} 
27924  199 

27918  200 

27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset

201 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

202 
/** symbol interpretation **/ 
27927  203 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

204 
private lazy val symbols = 
48550  205 
new Interpretation(File.try_read(Path.split(Isabelle_System.getenv_strict("ISABELLE_SYMBOLS")))) 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

206 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

207 
private class Interpretation(symbols_spec: String) 
29569
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents:
29174
diff
changeset

208 
{ 
31522  209 
/* read symbols */ 
210 

211 
private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """) 

212 
private val key = new Regex("""(?xs) (.+): """) 

213 

43696  214 
private def read_decl(decl: String): (Symbol, Map[String, String]) = 
31522  215 
{ 
216 
def err() = error("Bad symbol declaration: " + decl) 

217 

218 
def read_props(props: List[String]): Map[String, String] = 

219 
{ 

220 
props match { 

221 
case Nil => Map() 

222 
case _ :: Nil => err() 

223 
case key(x) :: y :: rest => read_props(rest) + (x > y) 

224 
case _ => err() 

225 
} 

226 
} 

227 
decl.split("\\s+").toList match { 

40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset

228 
case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props)) 
34193  229 
case _ => err() 
31522  230 
} 
231 
} 

232 

43696  233 
private val symbols: List[(Symbol, Map[String, String])] = 
40443  234 
Map(( 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

235 
for (decl < split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches) 
47993  236 
yield read_decl(decl)): _*).toList 
31522  237 

238 

31651  239 
/* misc properties */ 
240 

43696  241 
val names: Map[Symbol, String] = 
34134  242 
{ 
43456
8a6de1a6e1dc
names for control symbols without "^", which is relevant for completion;
wenzelm
parents:
43455
diff
changeset

243 
val name = new Regex("""\\<\^?([AZaz][AZaz09_']*)>""") 
31651  244 
Map((for ((sym @ name(a), _) < symbols) yield (sym > a)): _*) 
245 
} 

246 

43696  247 
val abbrevs: Map[Symbol, String] = 
43488  248 
Map(( 
249 
for ((sym, props) < symbols if props.isDefinedAt("abbrev")) 

250 
yield (sym > props("abbrev"))): _*) 

251 

252 

43490  253 
/* recoding */ 
31522  254 

255 
private val (decoder, encoder) = 

256 
{ 

257 
val mapping = 

258 
for { 

259 
(sym, props) < symbols 

46997  260 
code = 
31522  261 
try { Integer.decode(props("code")).intValue } 
262 
catch { 

263 
case _: NoSuchElementException => error("Missing code for symbol " + sym) 

264 
case _: NumberFormatException => error("Bad code for symbol " + sym) 

265 
} 

46997  266 
ch = new String(Character.toChars(code)) 
34193  267 
} yield { 
268 
if (code < 128) error("Illegal ASCII code for symbol " + sym) 

269 
else (sym, ch) 

270 
} 

31545
5f1f0a20af4d
discontinued escaped symbols such as \\<forall>  only one backslash should be used;
wenzelm
parents:
31523
diff
changeset

271 
(new Recoder(mapping), 
31548  272 
new Recoder(mapping map { case (x, y) => (y, x) })) 
31522  273 
} 
27918  274 

34098  275 
def decode(text: String): String = decoder.recode(text) 
276 
def encode(text: String): String = encoder.recode(text) 

34134  277 

43490  278 
private def recode_set(elems: String*): Set[String] = 
279 
{ 

280 
val content = elems.toList 

281 
Set((content ::: content.map(decode)): _*) 

282 
} 

283 

284 
private def recode_map[A](elems: (String, A)*): Map[String, A] = 

285 
{ 

286 
val content = elems.toList 

287 
Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*) 

288 
} 

289 

290 

291 
/* user fonts */ 

292 

43696  293 
val fonts: Map[Symbol, String] = 
43490  294 
recode_map(( 
295 
for ((sym, props) < symbols if props.isDefinedAt("font")) 

296 
yield (sym > props("font"))): _*) 

297 

298 
val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList 

299 
val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*) 

300 

34134  301 

302 
/* classification */ 

303 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

304 
val letters = recode_set( 
34134  305 
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", 
306 
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", 

307 
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", 

308 
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", 

309 

310 
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", 

311 
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", 

312 
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", 

313 
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", 

314 
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", 

315 
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", 

316 
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", 

317 
"\\<x>", "\\<y>", "\\<z>", 

318 

319 
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", 

320 
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", 

321 
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", 

322 
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", 

323 
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", 

324 
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", 

325 
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", 

326 
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", 

327 
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", 

328 

329 
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", 

330 
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", 

331 
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", 

332 
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", 

333 
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", 

334 
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", 

335 
"\\<Psi>", "\\<Omega>", 

336 

337 
"\\<^isub>", "\\<^isup>") 

338 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

339 
val blanks = 
48704
85a3de10567d
tuned signature  make Pretty less dependent on Symbol;
wenzelm
parents:
48550
diff
changeset

340 
recode_set(" ", "\t", "\n", "\u000B", "\f", "\r", "\r\n", "\\<spacespace>", "\\<^newline>") 
34138  341 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

342 
val sym_chars = 
34138  343 
Set("!", "#", "$", "%", "&", "*", "+", "", "/", "<", "=", ">", "?", "@", "^", "_", "", "~") 
34134  344 

44992
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

345 
val symbolic = recode_set((for { (sym, _) < symbols; if raw_symbolic(sym) } yield sym): _*) 
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

346 

43455  347 

43488  348 
/* control symbols */ 
349 

43696  350 
val ctrl_decoded: Set[Symbol] = 
43488  351 
Set((for ((sym, _) < symbols if sym.startsWith("\\<^")) yield decode(sym)): _*) 
352 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

353 
val sub_decoded = decode("\\<^sub>") 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

354 
val sup_decoded = decode("\\<^sup>") 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

355 
val isub_decoded = decode("\\<^isub>") 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

356 
val isup_decoded = decode("\\<^isup>") 
43511  357 
val bsub_decoded = decode("\\<^bsub>") 
358 
val esub_decoded = decode("\\<^esub>") 

359 
val bsup_decoded = decode("\\<^bsup>") 

360 
val esup_decoded = decode("\\<^esup>") 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

361 
val bold_decoded = decode("\\<^bold>") 
27918  362 
} 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

363 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

364 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

365 
/* tables */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

366 

43696  367 
def names: Map[Symbol, String] = symbols.names 
368 
def abbrevs: Map[Symbol, String] = symbols.abbrevs 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

369 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

370 
def decode(text: String): String = symbols.decode(text) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

371 
def encode(text: String): String = symbols.encode(text) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

372 

43696  373 
def fonts: Map[Symbol, String] = symbols.fonts 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

374 
def font_names: List[String] = symbols.font_names 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

375 
def font_index: Map[String, Int] = symbols.font_index 
43696  376 
def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

377 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

378 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

379 
/* classification */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

380 

43696  381 
def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) 
382 
def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' 

383 
def is_quasi(sym: Symbol): Boolean = sym == "_"  sym == "'" 

384 
def is_letdig(sym: Symbol): Boolean = is_letter(sym)  is_digit(sym)  is_quasi(sym) 

385 
def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) 

44992
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

386 

43696  387 
def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) 
44992
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

388 
def is_symbolic(sym: Symbol): Boolean = raw_symbolic(sym)  symbols.symbolic.contains(sym) 
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

389 

aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

390 
private def raw_symbolic(sym: Symbol): Boolean = 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

391 
sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

392 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

393 

44992
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

394 

aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

395 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

396 
/* control symbols */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

397 

43696  398 
def is_ctrl(sym: Symbol): Boolean = 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

399 
sym.startsWith("\\<^")  symbols.ctrl_decoded.contains(sym) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

400 

43696  401 
def is_controllable(sym: Symbol): Boolean = 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

402 
!is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

403 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

404 
def sub_decoded: Symbol = symbols.sub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

405 
def sup_decoded: Symbol = symbols.sup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

406 
def isub_decoded: Symbol = symbols.isub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

407 
def isup_decoded: Symbol = symbols.isup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

408 
def bsub_decoded: Symbol = symbols.bsub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

409 
def esub_decoded: Symbol = symbols.esub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

410 
def bsup_decoded: Symbol = symbols.bsup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

411 
def esup_decoded: Symbol = symbols.esup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

412 
def bold_decoded: Symbol = symbols.bold_decoded 
27901  413 
} 