author  wenzelm 
Wed, 03 Mar 2021 21:58:29 +0100  
changeset 73602  ec52a1a6ed31 
parent 73587  0af9e7e4476f 
child 73606  d8a0e996614b 
permissions  rwrr 
27901  1 
/* Title: Pure/General/symbol.scala 
2 
Author: Makarius 

3 

69502  4 
Isabelle text symbols. 
27901  5 
*/ 
6 

7 
package isabelle 

8 

55618  9 

36011
3ff725ac13a4
adapted to Scala 2.8.0 Beta1  with notable changes to scala.collection;
wenzelm
parents:
34316
diff
changeset

10 
import scala.collection.mutable 
31522  11 
import scala.util.matching.Regex 
48922  12 
import scala.annotation.tailrec 
27901  13 

14 

31522  15 
object Symbol 
16 
{ 

43696  17 
type Symbol = String 
18 

55884
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

19 
// counting Isabelle symbols, starting from 1 
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

20 
type Offset = Text.Offset 
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

21 
type Range = Text.Range 
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

22 

43696  23 

61867  24 
/* spaces */ 
25 

71864  26 
val space_char = ' ' 
61867  27 
val space = " " 
28 

29 
private val static_spaces = space * 4000 

30 

31 
def spaces(n: Int): String = 

32 
{ 

73364
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents:
73270
diff
changeset

33 
require(n >= 0, "negative spaces") 
61867  34 
if (n < static_spaces.length) static_spaces.substring(0, n) 
35 
else space * n 

36 
} 

37 

38 

43418  39 
/* ASCII characters */ 
40 

71864  41 
def is_ascii_printable(c: Char): Boolean = space_char <= c && c <= '~' 
42 

43418  43 
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z'  'a' <= c && c <= 'z' 
55497  44 

43418  45 
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' 
55497  46 

47 
def is_ascii_hex(c: Char): Boolean = 

48 
'0' <= c && c <= '9'  'A' <= c && c <= 'F'  'a' <= c && c <= 'f' 

49 

43418  50 
def is_ascii_quasi(c: Char): Boolean = c == '_'  c == '\'' 
51 

55497  52 
def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c) 
53 

69458  54 
def is_ascii_line_terminator(c: Char): Boolean = "\r\n".contains(c) 
55 

43418  56 
def is_ascii_letdig(c: Char): Boolean = 
57 
is_ascii_letter(c)  is_ascii_digit(c)  is_ascii_quasi(c) 

58 

59 
def is_ascii_identifier(s: String): Boolean = 

50238
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
wenzelm
parents:
50233
diff
changeset

60 
s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig) 
43418  61 

62529  62 
def ascii(c: Char): Symbol = 
63 
{ 

64 
if (c > 127) error("NonASCII character: " + quote(c.toString)) 

65 
else char_symbols(c.toInt) 

66 
} 

67 

66919  68 
def is_ascii(s: Symbol): Boolean = s.length == 1 && s(0) < 128 
69 

43418  70 

48775  71 
/* symbol matching */ 
27901  72 

73602  73 
private val symbol_total = 
74 
new Regex("(?xs) [\ud800\udbff][\udc00\udfff] " + 

75 
"""  \r\n  \\ < \^? ([AZaz][AZaz09_']*)? >?  .""") 

27924  76 

48775  77 
private def is_plain(c: Char): Boolean = 
78 
!(c == '\r'  c == '\\'  Character.isHighSurrogate(c)) 

48773
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

79 

0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

80 
def is_malformed(s: Symbol): Boolean = 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

81 
s.length match { 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

82 
case 1 => 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

83 
val c = s(0) 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

84 
Character.isHighSurrogate(c)  Character.isLowSurrogate(c)  c == '\ufffd' 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

85 
case 2 => 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

86 
val c1 = s(0) 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

87 
val c2 = s(1) 
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

88 
!(c1 == '\r' && c2 == '\n'  Character.isSurrogatePair(c1, c2)) 
48774  89 
case _ => !s.endsWith(">")  s == "\\<>"  s == "\\<^>" 
48773
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset

90 
} 
34137  91 

54734
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
wenzelm
parents:
53400
diff
changeset

92 
def is_newline(s: Symbol): Boolean = 
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

93 
s == "\n"  s == "\r"  s == "\r\n" 
38877  94 

34137  95 
class Matcher(text: CharSequence) 
96 
{ 

48775  97 
private val matcher = symbol_total.pattern.matcher(text) 
34137  98 
def apply(start: Int, end: Int): Int = 
99 
{ 

73364
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents:
73270
diff
changeset

100 
require(0 <= start && start < end && end <= text.length, "bad matcher range") 
34316
f879b649ac4c
clarified Symbol.is_plain/is_wellformed  is_closed was rejecting plain backslashes;
wenzelm
parents:
34193
diff
changeset

101 
if (is_plain(text.charAt(start))) 1 
34138  102 
else { 
34137  103 
matcher.region(start, end).lookingAt 
104 
matcher.group.length 

105 
} 

106 
} 

31522  107 
} 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

108 

fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

109 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

110 
/* iterator */ 
33998  111 

43696  112 
private val char_symbols: Array[Symbol] = 
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

113 
(0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

114 

43696  115 
def iterator(text: CharSequence): Iterator[Symbol] = 
116 
new Iterator[Symbol] 

40522  117 
{ 
43489  118 
private val matcher = new Matcher(text) 
119 
private var i = 0 

71816  120 
def hasNext: Boolean = i < text.length 
73587  121 
def next(): Symbol = 
43489  122 
{ 
123 
val n = matcher(i, text.length) 

43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

124 
val s = 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

125 
if (n == 0) "" 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

126 
else if (n == 1) { 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

127 
val c = text.charAt(i) 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

128 
if (c < char_symbols.length) char_symbols(c) 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

129 
else text.subSequence(i, i + n).toString 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

130 
} 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset

131 
else text.subSequence(i, i + n).toString 
43489  132 
i += n 
133 
s 

134 
} 

33998  135 
} 
43489  136 

44949  137 
def explode(text: CharSequence): List[Symbol] = iterator(text).toList 
138 

64616  139 
def length(text: CharSequence): Int = iterator(text).length 
64618  140 

67435
f83c1842a559
trim blanks  more thoroughly than in update_cartouches (for singleline comments);
wenzelm
parents:
67389
diff
changeset

141 
def trim_blanks(text: CharSequence): String = 
71816  142 
Library.trim(is_blank, explode(text)).mkString 
67435
f83c1842a559
trim blanks  more thoroughly than in update_cartouches (for singleline comments);
wenzelm
parents:
67389
diff
changeset

143 

69328  144 
def all_blank(str: String): Boolean = 
71816  145 
iterator(str).forall(is_blank) 
69328  146 

147 
def trim_blank_lines(text: String): String = 

148 
cat_lines(split_lines(text).dropWhile(all_blank).reverse.dropWhile(all_blank).reverse) 

149 

33998  150 

151 
/* decoding offsets */ 

152 

52507  153 
object Index 
154 
{ 

56471
2293a4350716
more frugal Symbol.Index  no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset

155 
private sealed case class Entry(chr: Int, sym: Int) 
52507  156 

56472  157 
val empty: Index = new Index(Nil) 
56471
2293a4350716
more frugal Symbol.Index  no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset

158 

2293a4350716
more frugal Symbol.Index  no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset

159 
def apply(text: CharSequence): Index = 
31929  160 
{ 
34137  161 
val matcher = new Matcher(text) 
56471
2293a4350716
more frugal Symbol.Index  no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset

162 
val buf = new mutable.ListBuffer[Entry] 
31929  163 
var chr = 0 
164 
var sym = 0 

33998  165 
while (chr < text.length) { 
34137  166 
val n = matcher(chr, text.length) 
167 
chr += n 

31929  168 
sym += 1 
34137  169 
if (n > 1) buf += Entry(chr, sym) 
31929  170 
} 
56472  171 
if (buf.isEmpty) empty else new Index(buf.toList) 
31929  172 
} 
56471
2293a4350716
more frugal Symbol.Index  no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset

173 
} 
55430  174 

56472  175 
final class Index private(entries: List[Index.Entry]) 
56471
2293a4350716
more frugal Symbol.Index  no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset

176 
{ 
56472  177 
private val hash: Int = entries.hashCode 
178 
private val index: Array[Index.Entry] = entries.toArray 

179 

55884
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

180 
def decode(symbol_offset: Offset): Text.Offset = 
31929  181 
{ 
55884
f2c0eaedd579
tuned signature  emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset

182 
val sym = symbol_offset  1 
31929  183 
val end = index.length 
48922  184 
@tailrec def bisect(a: Int, b: Int): Int = 
31929  185 
{ 
186 
if (a < b) { 

187 
val c = (a + b) / 2 

188 
if (sym < index(c).sym) bisect(a, c) 

189 
else if (c + 1 == end  sym < index(c + 1).sym) c 

190 
else bisect(c + 1, b) 

191 
} 

192 
else 1 

193 
} 

194 
val i = bisect(0, end) 

195 
if (i < 0) sym 

196 
else index(i).chr + sym  index(i).sym 

197 
} 

71816  198 
def decode(symbol_range: Range): Text.Range = symbol_range.map(decode) 
56335
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

199 

56338
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
wenzelm
parents:
56335
diff
changeset

200 
override def hashCode: Int = hash 
56335
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

201 
override def equals(that: Any): Boolean = 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

202 
that match { 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

203 
case other: Index => index.sameElements(other.index) 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

204 
case _ => false 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset

205 
} 
31929  206 
} 
207 

208 

64477  209 
/* symbolic text chunks  without actual text */ 
56746  210 

211 
object Text_Chunk 

212 
{ 

213 
sealed abstract class Name 

214 
case object Default extends Name 

215 
case class Id(id: Document_ID.Generic) extends Name 

216 
case class File(name: String) extends Name 

217 

218 
def apply(text: CharSequence): Text_Chunk = 

219 
new Text_Chunk(Text.Range(0, text.length), Index(text)) 

220 
} 

221 

222 
final class Text_Chunk private(val range: Text.Range, private val index: Index) 

223 
{ 

224 
override def hashCode: Int = (range, index).hashCode 

225 
override def equals(that: Any): Boolean = 

226 
that match { 

227 
case other: Text_Chunk => 

228 
range == other.range && 

229 
index == other.index 

230 
case _ => false 

231 
} 

232 

57840  233 
override def toString: String = "Text_Chunk" + range.toString 
234 

56746  235 
def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset) 
236 
def decode(symbol_range: Range): Text.Range = index.decode(symbol_range) 

237 
def incorporate(symbol_range: Range): Option[Text.Range] = 

238 
{ 

239 
def in(r: Range): Option[Text.Range] = 

240 
range.try_restrict(decode(r)) match { 

241 
case Some(r1) if !r1.is_singularity => Some(r1) 

242 
case _ => None 

243 
} 

244 
in(symbol_range) orElse in(symbol_range  1) 

245 
} 

246 
} 

247 

248 

33998  249 
/* recoding text */ 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

250 

31522  251 
private class Recoder(list: List[(String, String)]) 
252 
{ 

253 
private val (min, max) = 

254 
{ 

27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

255 
var min = '\uffff' 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

256 
var max = '\u0000' 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

257 
for ((x, _) < list) { 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

258 
val c = x(0) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

259 
if (c < min) min = c 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

260 
if (c > max) max = c 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

261 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

262 
(min, max) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

263 
} 
40443  264 
private val table = 
265 
{ 

266 
var tab = Map[String, String]() 

267 
for ((x, y) < list) { 

268 
tab.get(x) match { 

269 
case None => tab += (x > y) 

270 
case Some(z) => 

62230  271 
error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z)) 
40443  272 
} 
273 
} 

274 
tab 

275 
} 

31522  276 
def recode(text: String): String = 
277 
{ 

27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

278 
val len = text.length 
48775  279 
val matcher = symbol_total.pattern.matcher(text) 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

280 
val result = new StringBuilder(len) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

281 
var i = 0 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

282 
while (i < len) { 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

283 
val c = text(i) 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

284 
if (min <= c && c <= max) { 
31929  285 
matcher.region(i, len).lookingAt 
27938  286 
val x = matcher.group 
52888  287 
result.append(table.getOrElse(x, x)) 
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

288 
i = matcher.end 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

289 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

290 
else { result.append(c); i += 1 } 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

291 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

292 
result.toString 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

293 
} 
fdf77e7be01a
more robust pattern: look at longer matches first, added catchall case;
wenzelm
parents:
27935
diff
changeset

294 
} 
27924  295 

27918  296 

27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset

297 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

298 
/** symbol interpretation **/ 
27927  299 

67311  300 
val ARGUMENT_CARTOUCHE = "cartouche" 
301 
val ARGUMENT_SPACE_CARTOUCHE = "space_cartouche" 

302 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

303 
private lazy val symbols = 
61959  304 
{ 
305 
val contents = 

306 
for (path < Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS")) if path.is_file) 

307 
yield (File.read(path)) 

308 
new Interpretation(cat_lines(contents)) 

309 
} 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

310 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

311 
private class Interpretation(symbols_spec: String) 
29569
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents:
29174
diff
changeset

312 
{ 
31522  313 
/* read symbols */ 
314 

50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

315 
private val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

316 
private val Key = new Regex("""(?xs) (.+): """) 
31522  317 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

318 
private def read_decl(decl: String): (Symbol, Properties.T) = 
31522  319 
{ 
320 
def err() = error("Bad symbol declaration: " + decl) 

321 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

322 
def read_props(props: List[String]): Properties.T = 
31522  323 
{ 
324 
props match { 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

325 
case Nil => Nil 
31522  326 
case _ :: Nil => err() 
61174  327 
case Key(x) :: y :: rest => (x > y.replace('\u2423', ' ')) :: read_props(rest) 
31522  328 
case _ => err() 
329 
} 

330 
} 

331 
decl.split("\\s+").toList match { 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

332 
case sym :: props if sym.length > 1 && !is_malformed(sym) => 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

333 
(sym, read_props(props)) 
34193  334 
case _ => err() 
31522  335 
} 
336 
} 

337 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

338 
private val symbols: List[(Symbol, Properties.T)] = 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

339 
(((List.empty[(Symbol, Properties.T)], Set.empty[Symbol]) /: 
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

340 
split_lines(symbols_spec).reverse) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

341 
{ case (res, No_Decl()) => res 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

342 
case ((list, known), decl) => 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

343 
val (sym, props) = read_decl(decl) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

344 
if (known(sym)) (list, known) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

345 
else ((sym, props) :: list, known + sym) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

346 
})._1 
31522  347 

348 

53400  349 
/* basic properties */ 
350 

351 
val properties: Map[Symbol, Properties.T] = Map(symbols: _*) 

31651  352 

67311  353 
val names: Map[Symbol, (String, String)] = 
34134  354 
{ 
67311  355 
val Name = new Regex("""\\<\^?([AZaz][AZaz09_']*)>""") 
356 
val Argument = new Properties.String("argument") 

357 
def argument(sym: Symbol, props: Properties.T): String = 

358 
props match { 

359 
case Argument(arg) => 

360 
if (arg == ARGUMENT_CARTOUCHE  arg == ARGUMENT_SPACE_CARTOUCHE) arg 

361 
else error("Bad argument: " + quote(arg) + " for symbol " + quote(sym)) 

362 
case _ => "" 

363 
} 

364 
Map((for ((sym @ Name(a), props) < symbols) yield sym > (a, argument(sym, props))): _*) 

31651  365 
} 
366 

50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

367 
val groups: List[(String, List[Symbol])] = 
71592  368 
symbols.flatMap({ case (sym, props) => 
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

369 
val gs = for (("group", g) < props) yield g 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

370 
if (gs.isEmpty) List(sym > "unsorted") else gs.map(sym > _) 
71592  371 
}).groupBy(_._2).toList.map({ case (group, list) => (group, list.map(_._1)) }) 
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

372 
.sortBy(_._1) 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

373 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

374 
val abbrevs: Multi_Map[Symbol, String] = 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

375 
Multi_Map(( 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

376 
for { 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

377 
(sym, props) < symbols 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

378 
("abbrev", a) < props.reverse 
60215  379 
} yield sym > a): _*) 
43488  380 

66051  381 
val codes: List[(Symbol, Int)] = 
61376
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

382 
{ 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

383 
val Code = new Properties.String("code") 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

384 
for { 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

385 
(sym, props) < symbols 
67311  386 
code < 
61376
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

387 
props match { 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

388 
case Code(s) => 
67311  389 
try { Some(Integer.decode(s).intValue) } 
61376
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

390 
catch { case _: NumberFormatException => error("Bad code for symbol " + sym) } 
67311  391 
case _ => None 
61376
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

392 
} 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

393 
} yield { 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

394 
if (code < 128) error("Illegal ASCII code for symbol " + sym) 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

395 
else (sym, code) 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

396 
} 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

397 
} 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

398 

43488  399 

43490  400 
/* recoding */ 
31522  401 

402 
private val (decoder, encoder) = 

403 
{ 

404 
val mapping = 

61376
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

405 
for ((sym, code) < codes) yield (sym, new String(Character.toChars(code))) 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset

406 
(new Recoder(mapping), new Recoder(for ((x, y) < mapping) yield (y, x))) 
31522  407 
} 
27918  408 

34098  409 
def decode(text: String): String = decoder.recode(text) 
410 
def encode(text: String): String = encoder.recode(text) 

34134  411 

43490  412 
private def recode_set(elems: String*): Set[String] = 
413 
{ 

414 
val content = elems.toList 

415 
Set((content ::: content.map(decode)): _*) 

416 
} 

417 

418 
private def recode_map[A](elems: (String, A)*): Map[String, A] = 

419 
{ 

420 
val content = elems.toList 

421 
Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*) 

422 
} 

423 

424 

425 
/* user fonts */ 

426 

53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

427 
private val Font = new Properties.String("font") 
43696  428 
val fonts: Map[Symbol, String] = 
60215  429 
recode_map((for ((sym, Font(font)) < symbols) yield sym > font): _*) 
43490  430 

431 
val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList 

71592  432 
val font_index: Map[String, Int] = Map((font_names zip font_names.indices.toList): _*) 
43490  433 

34134  434 

435 
/* classification */ 

436 

71816  437 
val letters: Set[String] = recode_set( 
34134  438 
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", 
439 
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", 

440 
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", 

441 
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", 

442 

443 
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", 

444 
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", 

445 
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", 

446 
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", 

447 
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", 

448 
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", 

449 
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", 

450 
"\\<x>", "\\<y>", "\\<z>", 

451 

452 
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", 

453 
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", 

454 
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", 

455 
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", 

456 
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", 

457 
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", 

458 
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", 

459 
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", 

460 
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", 

461 

462 
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", 

463 
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", 

464 
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", 

465 
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", 

466 
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", 

467 
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", 

52616
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
wenzelm
parents:
52507
diff
changeset

468 
"\\<Psi>", "\\<Omega>") 
34134  469 

71816  470 
val blanks: Set[String] = recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n") 
34138  471 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

472 
val sym_chars = 
34138  473 
Set("!", "#", "$", "%", "&", "*", "+", "", "/", "<", "=", ">", "?", "@", "^", "_", "", "~") 
34134  474 

71816  475 
val symbolic: Set[String] = recode_set((for {(sym, _) < symbols; if raw_symbolic(sym)} yield sym): _*) 
44992
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

476 

43455  477 

63529
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62529
diff
changeset

478 
/* misc symbols */ 
61579  479 

63529
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62529
diff
changeset

480 
val newline_decoded = decode(newline) 
61579  481 
val comment_decoded = decode(comment) 
67438  482 
val cancel_decoded = decode(cancel) 
483 
val latex_decoded = decode(latex) 

70072
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the commandspan;
wenzelm
parents:
70068
diff
changeset

484 
val marker_decoded = decode(marker) 
55033  485 
val open_decoded = decode(open) 
486 
val close_decoded = decode(close) 

487 

488 

43488  489 
/* control symbols */ 
490 

59107  491 
val control_decoded: Set[Symbol] = 
43488  492 
Set((for ((sym, _) < symbols if sym.startsWith("\\<^")) yield decode(sym)): _*) 
493 

62103  494 
val sub_decoded = decode(sub) 
495 
val sup_decoded = decode(sup) 

496 
val bold_decoded = decode(bold) 

62104
fb73c0d7bb37
clarified symbol insertion, depending on buffer encoding;
wenzelm
parents:
62103
diff
changeset

497 
val emph_decoded = decode(emph) 
65999
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
wenzelm
parents:
65521
diff
changeset

498 
val bsub_decoded = decode(bsub) 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
wenzelm
parents:
65521
diff
changeset

499 
val esub_decoded = decode(esub) 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
wenzelm
parents:
65521
diff
changeset

500 
val bsup_decoded = decode(bsup) 
e3dc9ea67a62
output control symbols like ML version, with optionally hidden source;
wenzelm
parents:
65521
diff
changeset

501 
val esup_decoded = decode(esup) 
27918  502 
} 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

503 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

504 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

505 
/* tables */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

506 

53400  507 
def properties: Map[Symbol, Properties.T] = symbols.properties 
67311  508 
def names: Map[Symbol, (String, String)] = symbols.names 
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset

509 
def groups: List[(String, List[Symbol])] = symbols.groups 
53316
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents:
53021
diff
changeset

510 
def abbrevs: Multi_Map[Symbol, String] = symbols.abbrevs 
66051  511 
def codes: List[(Symbol, Int)] = symbols.codes 
67389  512 
def groups_code: List[(String, List[Symbol])] = 
513 
{ 

514 
val has_code = codes.iterator.map(_._1).toSet 

515 
groups.flatMap({ case (group, symbols) => 

516 
val symbols1 = symbols.filter(has_code) 

517 
if (symbols1.isEmpty) None else Some((group, symbols1)) 

518 
}) 

519 
} 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

520 

67304
3cf05d7cf174
more robust treatment of conflicts with existing Unicode text;
wenzelm
parents:
67256
diff
changeset

521 
lazy val is_code: Int => Boolean = codes.map(_._2).toSet 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

522 
def decode(text: String): String = symbols.decode(text) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

523 
def encode(text: String): String = symbols.encode(text) 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

524 

73270  525 
def decode_yxml(text: String, cache: XML.Cache = XML.Cache.none): XML.Body = 
526 
YXML.parse_body(decode(text), cache = cache) 

527 

528 
def decode_yxml_failsafe(text: String, cache: XML.Cache = XML.Cache.none): XML.Body = 

529 
YXML.parse_body_failsafe(decode(text), cache = cache) 

530 

65344
b99283eed13c
clarified YXML vs. symbol encoding: operate on whole message;
wenzelm
parents:
65335
diff
changeset

531 
def encode_yxml(body: XML.Body): String = encode(YXML.string_of_body(body)) 
53337
b3817a0e3211
sort items according to persistent history of frequency of use;
wenzelm
parents:
53316
diff
changeset

532 

50291
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

533 
def decode_strict(text: String): String = 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

534 
{ 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

535 
val decoded = decode(text) 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

536 
if (encode(decoded) == text) decoded 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

537 
else { 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

538 
val bad = new mutable.ListBuffer[Symbol] 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

539 
for (s < iterator(text) if encode(decode(s)) != s && !bad.contains(s)) 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

540 
bad += s 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

541 
error("Bad Unicode symbols in text: " + commas_quote(bad)) 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

542 
} 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

543 
} 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset

544 

73103  545 
def output(unicode_symbols: Boolean, text: String): String = 
546 
if (unicode_symbols) Symbol.decode(text) else Symbol.encode(text) 

547 

43696  548 
def fonts: Map[Symbol, String] = symbols.fonts 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

549 
def font_names: List[String] = symbols.font_names 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

550 
def font_index: Map[String, Int] = symbols.font_index 
43696  551 
def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

552 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

553 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

554 
/* classification */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

555 

43696  556 
def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) 
557 
def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' 

558 
def is_quasi(sym: Symbol): Boolean = sym == "_"  sym == "'" 

559 
def is_letdig(sym: Symbol): Boolean = is_letter(sym)  is_digit(sym)  is_quasi(sym) 

560 
def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) 

44992
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

561 

55033  562 

67438  563 
/* symbolic newline */ 
63529
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62529
diff
changeset

564 

0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62529
diff
changeset

565 
val newline: Symbol = "\\<newline>" 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62529
diff
changeset

566 
def newline_decoded: Symbol = symbols.newline_decoded 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62529
diff
changeset

567 

0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62529
diff
changeset

568 
def print_newlines(str: String): String = 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62529
diff
changeset

569 
if (str.contains('\n')) 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62529
diff
changeset

570 
(for (s < iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62529
diff
changeset

571 
else str 
61579  572 

67438  573 

574 
/* formal comments */ 

575 

61579  576 
val comment: Symbol = "\\<comment>" 
67449  577 
val cancel: Symbol = "\\<^cancel>" 
578 
val latex: Symbol = "\\<^latex>" 

70072
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the commandspan;
wenzelm
parents:
70068
diff
changeset

579 
val marker: Symbol = "\\<^marker>" 
67449  580 

61579  581 
def comment_decoded: Symbol = symbols.comment_decoded 
67438  582 
def cancel_decoded: Symbol = symbols.cancel_decoded 
583 
def latex_decoded: Symbol = symbols.latex_decoded 

70072
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the commandspan;
wenzelm
parents:
70068
diff
changeset

584 
def marker_decoded: Symbol = symbols.marker_decoded 
61579  585 

586 

55033  587 
/* cartouches */ 
588 

61579  589 
val open: Symbol = "\\<open>" 
590 
val close: Symbol = "\\<close>" 

55033  591 

592 
def open_decoded: Symbol = symbols.open_decoded 

593 
def close_decoded: Symbol = symbols.close_decoded 

594 

595 
def is_open(sym: Symbol): Boolean = sym == open_decoded  sym == open 

596 
def is_close(sym: Symbol): Boolean = sym == close_decoded  sym == close 

597 

67131  598 
def cartouche(s: String): String = open + s + close 
599 
def cartouche_decoded(s: String): String = open_decoded + s + close_decoded 

600 

55033  601 

602 
/* symbols for symbolic identifiers */ 

44992
aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

603 

aa34d2d049ce
refined Symbol.is_symbolic  cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset

604 
private def raw_symbolic(sym: Symbol): Boolean = 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

605 
sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

606 

55033  607 
def is_symbolic(sym: Symbol): Boolean = 
608 
!is_open(sym) && !is_close(sym) && (raw_symbolic(sym)  symbols.symbolic.contains(sym)) 

609 

610 
def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) 

611 

43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

612 

5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

613 
/* control symbols */ 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

614 

67127
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset

615 
val control_prefix = "\\<^" 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset

616 
val control_suffix = ">" 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset

617 

67256
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents:
67131
diff
changeset

618 
def control_name(sym: Symbol): Option[String] = 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents:
67131
diff
changeset

619 
if (is_control_encoded(sym)) 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents:
67131
diff
changeset

620 
Some(sym.substring(control_prefix.length, sym.length  control_suffix.length)) 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents:
67131
diff
changeset

621 
else None 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents:
67131
diff
changeset

622 

67127
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset

623 
def is_control_encoded(sym: Symbol): Boolean = 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset

624 
sym.startsWith(control_prefix) && sym.endsWith(control_suffix) 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset

625 

59107  626 
def is_control(sym: Symbol): Boolean = 
67127
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset

627 
is_control_encoded(sym)  symbols.control_decoded.contains(sym) 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

628 

43696  629 
def is_controllable(sym: Symbol): Boolean = 
66006
cec184536dfd
uniform notion of Symbol.is_controllable (see also 265d9300d523);
wenzelm
parents:
65999
diff
changeset

630 
!is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && 
cec184536dfd
uniform notion of Symbol.is_controllable (see also 265d9300d523);
wenzelm
parents:
65999
diff
changeset

631 
!is_malformed(sym) && sym != "\"" 
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation  reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset

632 

73454  633 
val sub: Symbol = "\\<^sub>" 
634 
val sup: Symbol = "\\<^sup>" 

635 
val bold: Symbol = "\\<^bold>" 

636 
val emph: Symbol = "\\<^emph>" 

637 
val bsub: Symbol = "\\<^bsub>" 

638 
val esub: Symbol = "\\<^esub>" 

639 
val bsup: Symbol = "\\<^bsup>" 

640 
val esup: Symbol = "\\<^esup>" 

62103  641 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

642 
def sub_decoded: Symbol = symbols.sub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

643 
def sup_decoded: Symbol = symbols.sup_decoded 
62103  644 
def bold_decoded: Symbol = symbols.bold_decoded 
645 
def emph_decoded: Symbol = symbols.emph_decoded 

44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

646 
def bsub_decoded: Symbol = symbols.bsub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

647 
def esub_decoded: Symbol = symbols.esub_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

648 
def bsup_decoded: Symbol = symbols.bsup_decoded 
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset

649 
def esup_decoded: Symbol = symbols.esup_decoded 
71864  650 

651 

652 
/* metric */ 

653 

654 
def is_printable(sym: Symbol): Boolean = 

655 
if (is_ascii(sym)) is_ascii_printable(sym(0)) 

656 
else !is_control(sym) 

657 

658 
object Metric extends Pretty.Metric 

659 
{ 

660 
val unit = 1.0 

661 
def apply(str: String): Double = 

662 
(for (s < iterator(str)) yield { 

663 
val sym = encode(s) 

664 
if (sym.startsWith("\\<long")  sym.startsWith("\\<Long")) 2 

665 
else if (is_printable(sym)) 1 

666 
else 0 

667 
}).sum 

668 
} 

27901  669 
} 