author  wenzelm 
Sat, 17 Sep 2011 16:19:40 +0200  
changeset 44949  b49d7f1066c8 
parent 44238  36120feb70ed 
child 44992  aa34d2d049ce 
permissions  rwrr 
27901  1 
/* Title: Pure/General/symbol.scala 
2 
Author: Makarius 

3 

27924  4 
Detecting and recoding Isabelle symbols. 
27901  5 
*/ 
6 

7 
package isabelle 

8 

27918  9 
import scala.io.Source 
36011
3ff725ac13a4
adapted to Scala 2.8.0 Beta1  with notable changes to scala.collection;
wenzelm
parents:
34316
diff
changeset

10 
import scala.collection.mutable 
31522  11 
import scala.util.matching.Regex 
27901  12 

13 

31522  14 
object Symbol 
15 
{ 

43696  16 
type Symbol = String 
17 

18 

36763  19 
/* spaces */ 
20 

36816  21 
val spc = ' ' 
22 
val space = " " 

23 

24 
private val static_spaces = space * 4000 

36763  25 

26 
def spaces(k: Int): String = 

27 
{ 

28 
require(k >= 0) 

29 
if (k < static_spaces.length) static_spaces.substring(0, k) 

36816  30 
else space * k 
36763  31 
} 
32 

33 

43418  34 
/* ASCII characters */ 
35 

36 
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z'  'a' <= c && c <= 'z' 

37 
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' 

38 
def is_ascii_quasi(c: Char): Boolean = c == '_'  c == '\'' 

39 

40 
def is_ascii_letdig(c: Char): Boolean = 

41 
is_ascii_letter(c)  is_ascii_digit(c)  is_ascii_quasi(c) 

42 

43 
def is_ascii_identifier(s: String): Boolean = 

44 
s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig) 

45 

46 

33998  47 
/* Symbol regexps */ 
27901  48 

31522  49 
private val plain = new Regex("""(?xs) 
40524
6131d7a78ad3
treat Unicode "replacement character" (i.e. decoding error) is malformed;
wenzelm
parents:
40523
diff
changeset

50 
[^\r\\\ud800\udfff\ufffd]  [\ud800\udbff][\udc00\udfff] """) 
37556
2bf29095d26f
treat alternative newline symbols as in Isabelle/ML;
wenzelm
parents:
36816
diff
changeset

51 

40522  52 
private val physical_newline = new Regex("""(?xs) \n  \r\n  \r """) 
27901  53 

31522  54 
private val symbol = new Regex("""(?xs) 
31545
5f1f0a20af4d
discontinued escaped symbols such as \\<forall>  only one backslash should be used;
wenzelm
parents:
31523
diff
changeset

55 
\\ < (?: 
27924  56 
\^? [AZaz][AZaz09_']*  
57 
\^raw: [\x20\x7e\u0100\uffff && [^.>]]* ) >""") 

58 

40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset

59 
private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" + 
40529  60 
""" [\ud800\udbff\ufffd]  \\<\^? """) 
27924  61 

40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset

62 
val regex_total = 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset

63 
new Regex(plain + "" + physical_newline + "" + symbol + "" + malformed_symbol + " .") 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

64 

34137  65 

66 
/* basic matching */ 

67 

37556
2bf29095d26f
treat alternative newline symbols as in Isabelle/ML;
wenzelm
parents:
36816
diff
changeset

68 
def is_plain(c: Char): Boolean = !(c == '\r'  c == '\\'  '\ud800' <= c && c <= '\udfff') 
34137  69 

43696  70 
def is_physical_newline(s: Symbol): Boolean = 
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

71 
s == "\n"  s == "\r"  s == "\r\n" 
38877  72 

43696  73 
def is_malformed(s: Symbol): Boolean = 
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

74 
!(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches 
34137  75 

76 
class Matcher(text: CharSequence) 

77 
{ 

40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset

78 
private val matcher = regex_total.pattern.matcher(text) 
34137  79 
def apply(start: Int, end: Int): Int = 
80 
{ 

81 
require(0 <= start && start < end && end <= text.length) 

34316
f879b649ac4c
clarified Symbol.is_plain/is_wellformed  is_closed was rejecting plain backslashes;
wenzelm
parents:
34193
diff
changeset

82 
if (is_plain(text.charAt(start))) 1 
34138  83 
else { 
34137  84 
matcher.region(start, end).lookingAt 
85 
matcher.group.length 

86 
} 

87 
} 

31522  88 
} 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

89 

fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

90 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

91 
/* iterator */ 
33998  92 

43696  93 
private val char_symbols: Array[Symbol] = 
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

94 
(0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

95 

43696  96 
def iterator(text: CharSequence): Iterator[Symbol] = 
97 
new Iterator[Symbol] 

40522  98 
{ 
43489  99 
private val matcher = new Matcher(text) 
100 
private var i = 0 

101 
def hasNext = i < text.length 

102 
def next = 

103 
{ 

104 
val n = matcher(i, text.length) 

43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

105 
val s = 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

106 
if (n == 0) "" 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

107 
else if (n == 1) { 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

108 
val c = text.charAt(i) 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

109 
if (c < char_symbols.length) char_symbols(c) 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

110 
else text.subSequence(i, i + n).toString 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

111 
} 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

112 
else text.subSequence(i, i + n).toString 
43489  113 
i += n 
114 
s 

115 
} 

33998  116 
} 
43489  117 

44949  118 
def explode(text: CharSequence): List[Symbol] = iterator(text).toList 
119 

33998  120 

121 
/* decoding offsets */ 

122 

123 
class Index(text: CharSequence) 

31929  124 
{ 
43714  125 
sealed case class Entry(chr: Int, sym: Int) 
31929  126 
val index: Array[Entry] = 
127 
{ 

34137  128 
val matcher = new Matcher(text) 
31929  129 
val buf = new mutable.ArrayBuffer[Entry] 
130 
var chr = 0 

131 
var sym = 0 

33998  132 
while (chr < text.length) { 
34137  133 
val n = matcher(chr, text.length) 
134 
chr += n 

31929  135 
sym += 1 
34137  136 
if (n > 1) buf += Entry(chr, sym) 
31929  137 
} 
138 
buf.toArray 

139 
} 

38479
e628da370072
more efficient Markup_Tree, based on branches sorted by quasiorder;
wenzelm
parents:
37556
diff
changeset

140 
def decode(sym1: Int): Int = 
31929  141 
{ 
38479
e628da370072
more efficient Markup_Tree, based on branches sorted by quasiorder;
wenzelm
parents:
37556
diff
changeset

142 
val sym = sym1  1 
31929  143 
val end = index.length 
144 
def bisect(a: Int, b: Int): Int = 

145 
{ 

146 
if (a < b) { 

147 
val c = (a + b) / 2 

148 
if (sym < index(c).sym) bisect(a, c) 

149 
else if (c + 1 == end  sym < index(c + 1).sym) c 

150 
else bisect(c + 1, b) 

151 
} 

152 
else 1 

153 
} 

154 
val i = bisect(0, end) 

155 
if (i < 0) sym 

156 
else index(i).chr + sym  index(i).sym 

157 
} 

38479
e628da370072
more efficient Markup_Tree, based on branches sorted by quasiorder;
wenzelm
parents:
37556
diff
changeset

158 
def decode(range: Text.Range): Text.Range = range.map(decode(_)) 
31929  159 
} 
160 

161 

33998  162 
/* recoding text */ 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

163 

31522  164 
private class Recoder(list: List[(String, String)]) 
165 
{ 

166 
private val (min, max) = 

167 
{ 

27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

168 
var min = '\uffff' 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

169 
var max = '\u0000' 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

170 
for ((x, _) < list) { 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

171 
val c = x(0) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

172 
if (c < min) min = c 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

173 
if (c > max) max = c 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

174 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

175 
(min, max) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

176 
} 
40443  177 
private val table = 
178 
{ 

179 
var tab = Map[String, String]() 

180 
for ((x, y) < list) { 

181 
tab.get(x) match { 

182 
case None => tab += (x > y) 

183 
case Some(z) => 

44181  184 
error("Duplicate mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z)) 
40443  185 
} 
186 
} 

187 
tab 

188 
} 

31522  189 
def recode(text: String): String = 
190 
{ 

27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

191 
val len = text.length 
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset

192 
val matcher = regex_total.pattern.matcher(text) 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

193 
val result = new StringBuilder(len) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

194 
var i = 0 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

195 
while (i < len) { 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

196 
val c = text(i) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

197 
if (min <= c && c <= max) { 
31929  198 
matcher.region(i, len).lookingAt 
27938  199 
val x = matcher.group 
31522  200 
result.append(table.get(x) getOrElse x) 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

201 
i = matcher.end 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

202 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

203 
else { result.append(c); i += 1 } 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

204 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

205 
result.toString 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

206 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

207 
} 
27924  208 

27918  209 

27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset

210 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

211 
/** symbol interpretation **/ 
27927  212 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

213 
private lazy val symbols = 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

214 
new Interpretation( 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

215 
Isabelle_System.try_read(Path.split(Isabelle_System.getenv_strict("ISABELLE_SYMBOLS")))) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

216 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

217 
private class Interpretation(symbols_spec: String) 
29569
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents:
29174
diff
changeset

218 
{ 
31522  219 
/* read symbols */ 
220 

221 
private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """) 

222 
private val key = new Regex("""(?xs) (.+): """) 

223 

43696  224 
private def read_decl(decl: String): (Symbol, Map[String, String]) = 
31522  225 
{ 
226 
def err() = error("Bad symbol declaration: " + decl) 

227 

228 
def read_props(props: List[String]): Map[String, String] = 

229 
{ 

230 
props match { 

231 
case Nil => Map() 

232 
case _ :: Nil => err() 

233 
case key(x) :: y :: rest => read_props(rest) + (x > y) 

234 
case _ => err() 

235 
} 

236 
} 

237 
decl.split("\\s+").toList match { 

40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40522
diff
changeset

238 
case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props)) 
34193  239 
case _ => err() 
31522  240 
} 
241 
} 

242 

43696  243 
private val symbols: List[(Symbol, Map[String, String])] = 
40443  244 
Map(( 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

245 
for (decl < split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches) 
40443  246 
yield read_decl(decl)): _*) toList 
31522  247 

248 

31651  249 
/* misc properties */ 
250 

43696  251 
val names: Map[Symbol, String] = 
34134  252 
{ 
43456
8a6de1a6e1dc
names for control symbols without "^", which is relevant for completion;
wenzelm
parents:
43455
diff
changeset

253 
val name = new Regex("""\\<\^?([AZaz][AZaz09_']*)>""") 
31651  254 
Map((for ((sym @ name(a), _) < symbols) yield (sym > a)): _*) 
255 
} 

256 

43696  257 
val abbrevs: Map[Symbol, String] = 
43488  258 
Map(( 
259 
for ((sym, props) < symbols if props.isDefinedAt("abbrev")) 

260 
yield (sym > props("abbrev"))): _*) 

261 

262 

43490  263 
/* recoding */ 
31522  264 

265 
private val (decoder, encoder) = 

266 
{ 

267 
val mapping = 

268 
for { 

269 
(sym, props) < symbols 

270 
val code = 

271 
try { Integer.decode(props("code")).intValue } 

272 
catch { 

273 
case _: NoSuchElementException => error("Missing code for symbol " + sym) 

274 
case _: NumberFormatException => error("Bad code for symbol " + sym) 

275 
} 

276 
val ch = new String(Character.toChars(code)) 

34193  277 
} yield { 
278 
if (code < 128) error("Illegal ASCII code for symbol " + sym) 

279 
else (sym, ch) 

280 
} 

31545
5f1f0a20af4d
discontinued escaped symbols such as \\<forall>  only one backslash should be used;
wenzelm
parents:
31523
diff
changeset

281 
(new Recoder(mapping), 
31548  282 
new Recoder(mapping map { case (x, y) => (y, x) })) 
31522  283 
} 
27918  284 

34098  285 
def decode(text: String): String = decoder.recode(text) 
286 
def encode(text: String): String = encoder.recode(text) 

34134  287 

43490  288 
private def recode_set(elems: String*): Set[String] = 
289 
{ 

290 
val content = elems.toList 

291 
Set((content ::: content.map(decode)): _*) 

292 
} 

293 

294 
private def recode_map[A](elems: (String, A)*): Map[String, A] = 

295 
{ 

296 
val content = elems.toList 

297 
Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*) 

298 
} 

299 

300 

301 
/* user fonts */ 

302 

43696  303 
val fonts: Map[Symbol, String] = 
43490  304 
recode_map(( 
305 
for ((sym, props) < symbols if props.isDefinedAt("font")) 

306 
yield (sym > props("font"))): _*) 

307 

308 
val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList 

309 
val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*) 

310 

34134  311 

312 
/* classification */ 

313 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

314 
val letters = recode_set( 
34134  315 
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", 
316 
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", 

317 
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", 

318 
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", 

319 

320 
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", 

321 
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", 

322 
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", 

323 
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", 

324 
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", 

325 
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", 

326 
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", 

327 
"\\<x>", "\\<y>", "\\<z>", 

328 

329 
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", 

330 
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", 

331 
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", 

332 
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", 

333 
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", 

334 
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", 

335 
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", 

336 
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", 

337 
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", 

338 

339 
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", 

340 
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", 

341 
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", 

342 
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", 

343 
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", 

344 
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", 

345 
"\\<Psi>", "\\<Omega>", 

346 

347 
"\\<^isub>", "\\<^isup>") 

348 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

349 
val blanks = 
43490  350 
recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\\<spacespace>", "\\<^newline>") 
34138  351 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

352 
val sym_chars = 
34138  353 
Set("!", "#", "$", "%", "&", "*", "+", "", "/", "<", "=", ">", "?", "@", "^", "_", "", "~") 
34134  354 

43455  355 

43488  356 
/* control symbols */ 
357 

43696  358 
val ctrl_decoded: Set[Symbol] = 
43488  359 
Set((for ((sym, _) < symbols if sym.startsWith("\\<^")) yield decode(sym)): _*) 
360 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

361 
val sub_decoded = decode("\\<^sub>") 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

362 
val sup_decoded = decode("\\<^sup>") 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

363 
val isub_decoded = decode("\\<^isub>") 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

364 
val isup_decoded = decode("\\<^isup>") 
43511  365 
val bsub_decoded = decode("\\<^bsub>") 
366 
val esub_decoded = decode("\\<^esub>") 

367 
val bsup_decoded = decode("\\<^bsup>") 

368 
val esup_decoded = decode("\\<^esup>") 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

369 
val bold_decoded = decode("\\<^bold>") 
27918  370 
} 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

371 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

372 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

373 
/* tables */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

374 

43696  375 
def names: Map[Symbol, String] = symbols.names 
376 
def abbrevs: Map[Symbol, String] = symbols.abbrevs 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

377 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

378 
def decode(text: String): String = symbols.decode(text) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

379 
def encode(text: String): String = symbols.encode(text) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

380 

43696  381 
def fonts: Map[Symbol, String] = symbols.fonts 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

382 
def font_names: List[String] = symbols.font_names 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

383 
def font_index: Map[String, Int] = symbols.font_index 
43696  384 
def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

385 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

386 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

387 
/* classification */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

388 

43696  389 
def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) 
390 
def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' 

391 
def is_quasi(sym: Symbol): Boolean = sym == "_"  sym == "'" 

392 
def is_letdig(sym: Symbol): Boolean = is_letter(sym)  is_digit(sym)  is_quasi(sym) 

393 
def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) 

394 
def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) 

395 
def is_symbolic(sym: Symbol): Boolean = 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

396 
sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

397 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

398 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

399 
/* control symbols */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

400 

43696  401 
def is_ctrl(sym: Symbol): Boolean = 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

402 
sym.startsWith("\\<^")  symbols.ctrl_decoded.contains(sym) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

403 

43696  404 
def is_controllable(sym: Symbol): Boolean = 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

405 
!is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

406 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

407 
def sub_decoded: Symbol = symbols.sub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

408 
def sup_decoded: Symbol = symbols.sup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

409 
def isub_decoded: Symbol = symbols.isub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

410 
def isup_decoded: Symbol = symbols.isup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

411 
def bsub_decoded: Symbol = symbols.bsub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

412 
def esub_decoded: Symbol = symbols.esub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

413 
def bsup_decoded: Symbol = symbols.bsup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

414 
def esup_decoded: Symbol = symbols.esup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

415 
def bold_decoded: Symbol = symbols.bold_decoded 
27901  416 
} 