author | wenzelm |
Mon, 21 Aug 2023 15:54:08 +0200 | |
changeset 78556 | 20360824863a |
parent 76235 | 16c12979c132 |
child 78592 | fdfe9b91d96e |
permissions | -rw-r--r-- |
27901 | 1 |
/* Title: Pure/General/symbol.scala |
2 |
Author: Makarius |
|
3 |
||
69490 | 4 |
Isabelle text symbols. |
27901 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
55618 | 9 |
|
36011
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
wenzelm
parents:
34316
diff
changeset
|
10 |
import scala.collection.mutable |
31522 | 11 |
import scala.util.matching.Regex |
48922 | 12 |
import scala.annotation.tailrec |
27901 | 13 |
|
14 |
||
75393 | 15 |
object Symbol { |
43696 | 16 |
type Symbol = String |
17 |
||
55884
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
18 |
// counting Isabelle symbols, starting from 1 |
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
19 |
type Offset = Text.Offset |
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
20 |
type Range = Text.Range |
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
21 |
|
43696 | 22 |
|
61865 | 23 |
/* spaces */ |
24 |
||
71649 | 25 |
val space_char = ' ' |
61865 | 26 |
val space = " " |
27 |
||
28 |
private val static_spaces = space * 4000 |
|
29 |
||
75393 | 30 |
def spaces(n: Int): String = { |
73120
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents:
73030
diff
changeset
|
31 |
require(n >= 0, "negative spaces") |
61865 | 32 |
if (n < static_spaces.length) static_spaces.substring(0, n) |
33 |
else space * n |
|
34 |
} |
|
35 |
||
36 |
||
75238
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
37 |
/* char symbols */ |
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
38 |
|
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
39 |
private val char_symbols: Array[Symbol] = |
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
40 |
(0 until 0x500).iterator.map(i => new String(Array(i.toChar))).toArray |
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
41 |
|
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
42 |
def char_symbol(c: Char): String = |
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
43 |
if (c < char_symbols.length) char_symbols(c) |
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
44 |
else c.toString |
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
45 |
|
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
46 |
|
43418 | 47 |
/* ASCII characters */ |
48 |
||
71649 | 49 |
def is_ascii_printable(c: Char): Boolean = space_char <= c && c <= '~' |
50 |
||
43418 | 51 |
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' |
55497 | 52 |
|
43418 | 53 |
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' |
55497 | 54 |
|
55 |
def is_ascii_hex(c: Char): Boolean = |
|
56 |
'0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' |
|
57 |
||
43418 | 58 |
def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\'' |
59 |
||
55497 | 60 |
def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c) |
61 |
||
69448 | 62 |
def is_ascii_line_terminator(c: Char): Boolean = "\r\n".contains(c) |
63 |
||
43418 | 64 |
def is_ascii_letdig(c: Char): Boolean = |
65 |
is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c) |
|
66 |
||
67 |
def is_ascii_identifier(s: String): Boolean = |
|
75192
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
68 |
s.nonEmpty && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig) |
43418 | 69 |
|
75393 | 70 |
def ascii(c: Char): Symbol = { |
62528 | 71 |
if (c > 127) error("Non-ASCII character: " + quote(c.toString)) |
75238
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
72 |
else char_symbol(c) |
62528 | 73 |
} |
74 |
||
66919 | 75 |
def is_ascii(s: Symbol): Boolean = s.length == 1 && s(0) < 128 |
76 |
||
43418 | 77 |
|
48775 | 78 |
/* symbol matching */ |
27901 | 79 |
|
48773
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
80 |
def is_malformed(s: Symbol): Boolean = |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
81 |
s.length match { |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
82 |
case 1 => |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
83 |
val c = s(0) |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
84 |
Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd' |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
85 |
case 2 => |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
86 |
val c1 = s(0) |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
87 |
val c2 = s(1) |
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
88 |
!(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2)) |
48774 | 89 |
case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>" |
48773
0e1bab274672
more liberal scanning of potentially malformed symbols;
wenzelm
parents:
48704
diff
changeset
|
90 |
} |
34137 | 91 |
|
54734
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
wenzelm
parents:
53400
diff
changeset
|
92 |
def is_newline(s: Symbol): Boolean = |
43675
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents:
43511
diff
changeset
|
93 |
s == "\n" || s == "\r" || s == "\r\n" |
38877 | 94 |
|
75393 | 95 |
class Matcher(text: CharSequence) { |
75237
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
96 |
private def ok(i: Int): Boolean = 0 <= i && i < text.length |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
97 |
private def char(i: Int): Char = if (ok(i)) text.charAt(i) else 0 |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
98 |
private def maybe_char(c: Char, i: Int): Int = if (char(i) == c) i + 1 else i |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
99 |
|
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
100 |
@tailrec private def many_ascii_letdig(i: Int): Int = |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
101 |
if (is_ascii_letdig(char(i))) many_ascii_letdig(i + 1) else i |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
102 |
private def maybe_ascii_id(i: Int): Int = |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
103 |
if (is_ascii_letter(char(i))) many_ascii_letdig(i + 1) else i |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
104 |
|
75393 | 105 |
def match_length(i: Int): Int = { |
75237
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
106 |
val a = char(i) |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
107 |
val b = char(i + 1) |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
108 |
|
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
109 |
if (Character.isHighSurrogate(a) && Character.isLowSurrogate(b) || a == '\r' && b == '\n') 2 |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
110 |
else if (a == '\\' && b == '<') maybe_char('>', maybe_ascii_id(maybe_char('^', i + 2))) - i |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
111 |
else if (ok(i)) 1 |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
112 |
else 0 |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
113 |
} |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
114 |
|
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
115 |
def match_symbol(i: Int): String = |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
116 |
match_length(i) match { |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
117 |
case 0 => "" |
75238
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
118 |
case 1 => char_symbol(text.charAt(i)) |
e74d162ddf9f
clarified char symbols: cover most European languages;
wenzelm
parents:
75237
diff
changeset
|
119 |
case n => text.subSequence(i, i + n).toString |
34137 | 120 |
} |
31522 | 121 |
} |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
122 |
|
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
123 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
124 |
/* iterator */ |
33998 | 125 |
|
43696 | 126 |
def iterator(text: CharSequence): Iterator[Symbol] = |
75393 | 127 |
new Iterator[Symbol] { |
43489 | 128 |
private val matcher = new Matcher(text) |
129 |
private var i = 0 |
|
71601 | 130 |
def hasNext: Boolean = i < text.length |
75393 | 131 |
def next(): Symbol = { |
75237
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
132 |
val s = matcher.match_symbol(i) |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
133 |
i += s.length |
43489 | 134 |
s |
135 |
} |
|
33998 | 136 |
} |
43489 | 137 |
|
44949 | 138 |
def explode(text: CharSequence): List[Symbol] = iterator(text).toList |
139 |
||
64615 | 140 |
def length(text: CharSequence): Int = iterator(text).length |
64617 | 141 |
|
67435
f83c1842a559
trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
wenzelm
parents:
67389
diff
changeset
|
142 |
def trim_blanks(text: CharSequence): String = |
71601 | 143 |
Library.trim(is_blank, explode(text)).mkString |
67435
f83c1842a559
trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
wenzelm
parents:
67389
diff
changeset
|
144 |
|
69318 | 145 |
def all_blank(str: String): Boolean = |
71601 | 146 |
iterator(str).forall(is_blank) |
69318 | 147 |
|
148 |
def trim_blank_lines(text: String): String = |
|
149 |
cat_lines(split_lines(text).dropWhile(all_blank).reverse.dropWhile(all_blank).reverse) |
|
150 |
||
33998 | 151 |
|
152 |
/* decoding offsets */ |
|
153 |
||
75393 | 154 |
object Index { |
56471
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset
|
155 |
private sealed case class Entry(chr: Int, sym: Int) |
52507 | 156 |
|
56472 | 157 |
val empty: Index = new Index(Nil) |
56471
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset
|
158 |
|
75393 | 159 |
def apply(text: CharSequence): Index = { |
34137 | 160 |
val matcher = new Matcher(text) |
56471
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset
|
161 |
val buf = new mutable.ListBuffer[Entry] |
31929 | 162 |
var chr = 0 |
163 |
var sym = 0 |
|
33998 | 164 |
while (chr < text.length) { |
75237
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
165 |
val n = matcher.match_length(chr) |
34137 | 166 |
chr += n |
31929 | 167 |
sym += 1 |
34137 | 168 |
if (n > 1) buf += Entry(chr, sym) |
31929 | 169 |
} |
56472 | 170 |
if (buf.isEmpty) empty else new Index(buf.toList) |
31929 | 171 |
} |
56471
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents:
56338
diff
changeset
|
172 |
} |
55430 | 173 |
|
75393 | 174 |
final class Index private(entries: List[Index.Entry]) { |
56472 | 175 |
private val hash: Int = entries.hashCode |
176 |
private val index: Array[Index.Entry] = entries.toArray |
|
177 |
||
75393 | 178 |
def decode(symbol_offset: Offset): Text.Offset = { |
55884
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents:
55618
diff
changeset
|
179 |
val sym = symbol_offset - 1 |
31929 | 180 |
val end = index.length |
75393 | 181 |
@tailrec def bisect(a: Int, b: Int): Int = { |
31929 | 182 |
if (a < b) { |
183 |
val c = (a + b) / 2 |
|
184 |
if (sym < index(c).sym) bisect(a, c) |
|
185 |
else if (c + 1 == end || sym < index(c + 1).sym) c |
|
186 |
else bisect(c + 1, b) |
|
187 |
} |
|
188 |
else -1 |
|
189 |
} |
|
190 |
val i = bisect(0, end) |
|
191 |
if (i < 0) sym |
|
192 |
else index(i).chr + sym - index(i).sym |
|
193 |
} |
|
71601 | 194 |
def decode(symbol_range: Range): Text.Range = symbol_range.map(decode) |
56335
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
195 |
|
56338
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
wenzelm
parents:
56335
diff
changeset
|
196 |
override def hashCode: Int = hash |
56335
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
197 |
override def equals(that: Any): Boolean = |
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
198 |
that match { |
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
199 |
case other: Index => index.sameElements(other.index) |
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
200 |
case _ => false |
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents:
55884
diff
changeset
|
201 |
} |
31929 | 202 |
} |
203 |
||
204 |
||
64477 | 205 |
/* symbolic text chunks -- without actual text */ |
56746 | 206 |
|
75393 | 207 |
object Text_Chunk { |
56746 | 208 |
sealed abstract class Name |
209 |
case object Default extends Name |
|
210 |
case class Id(id: Document_ID.Generic) extends Name |
|
211 |
case class File(name: String) extends Name |
|
212 |
||
213 |
def apply(text: CharSequence): Text_Chunk = |
|
76235 | 214 |
new Text_Chunk(Text.Range.length(text), Index(text)) |
56746 | 215 |
} |
216 |
||
75393 | 217 |
final class Text_Chunk private(val range: Text.Range, private val index: Index) { |
56746 | 218 |
override def hashCode: Int = (range, index).hashCode |
219 |
override def equals(that: Any): Boolean = |
|
220 |
that match { |
|
221 |
case other: Text_Chunk => |
|
222 |
range == other.range && |
|
223 |
index == other.index |
|
224 |
case _ => false |
|
225 |
} |
|
226 |
||
57840 | 227 |
override def toString: String = "Text_Chunk" + range.toString |
228 |
||
56746 | 229 |
def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset) |
230 |
def decode(symbol_range: Range): Text.Range = index.decode(symbol_range) |
|
75393 | 231 |
def incorporate(symbol_range: Range): Option[Text.Range] = { |
75659
9bd92ac9328f
more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
wenzelm
parents:
75393
diff
changeset
|
232 |
def in(r: Range): Option[Text.Range] = { |
56746 | 233 |
range.try_restrict(decode(r)) match { |
234 |
case Some(r1) if !r1.is_singularity => Some(r1) |
|
235 |
case _ => None |
|
236 |
} |
|
75659
9bd92ac9328f
more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
wenzelm
parents:
75393
diff
changeset
|
237 |
} |
9bd92ac9328f
more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
wenzelm
parents:
75393
diff
changeset
|
238 |
in(symbol_range) orElse in(symbol_range - 1) |
56746 | 239 |
} |
240 |
} |
|
241 |
||
242 |
||
33998 | 243 |
/* recoding text */ |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
244 |
|
75393 | 245 |
private class Recoder(list: List[(String, String)]) { |
246 |
private val (min, max) = { |
|
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
247 |
var min = '\uffff' |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
248 |
var max = '\u0000' |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
249 |
for ((x, _) <- list) { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
250 |
val c = x(0) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
251 |
if (c < min) min = c |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
252 |
if (c > max) max = c |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
253 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
254 |
(min, max) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
255 |
} |
75393 | 256 |
private val table = { |
40443 | 257 |
var tab = Map[String, String]() |
258 |
for ((x, y) <- list) { |
|
259 |
tab.get(x) match { |
|
260 |
case None => tab += (x -> y) |
|
261 |
case Some(z) => |
|
62230 | 262 |
error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z)) |
40443 | 263 |
} |
264 |
} |
|
265 |
tab |
|
266 |
} |
|
75393 | 267 |
def recode(text: String): String = { |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
268 |
val len = text.length |
75237
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
269 |
val matcher = new Symbol.Matcher(text) |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
270 |
val result = new StringBuilder(len) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
271 |
var i = 0 |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
272 |
while (i < len) { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
273 |
val c = text(i) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
274 |
if (min <= c && c <= max) { |
75237
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
275 |
val s = matcher.match_symbol(i) |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
276 |
result.append(table.getOrElse(s, s)) |
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents:
75199
diff
changeset
|
277 |
i += s.length |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
278 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
279 |
else { result.append(c); i += 1 } |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
280 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
281 |
result.toString |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
282 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
283 |
} |
27924 | 284 |
|
27918 | 285 |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
286 |
|
75194 | 287 |
/** defined symbols **/ |
27927 | 288 |
|
75393 | 289 |
object Argument extends Enumeration { |
75194 | 290 |
val none, cartouche, space_cartouche = Value |
67311 | 291 |
|
75194 | 292 |
def unapply(s: String): Option[Value] = |
293 |
try { Some(withName(s)) } |
|
294 |
catch { case _: NoSuchElementException => None} |
|
75197 | 295 |
} |
75193 | 296 |
|
75393 | 297 |
object Entry { |
75197 | 298 |
private val Name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""") |
299 |
private val Argument = new Properties.String("argument") |
|
300 |
private val Abbrev = new Properties.String("abbrev") |
|
301 |
private val Code = new Properties.String("code") |
|
302 |
private val Font = new Properties.String("font") |
|
303 |
private val Group = new Properties.String("group") |
|
304 |
||
75393 | 305 |
def apply(symbol: Symbol, props: Properties.T): Entry = { |
75197 | 306 |
def err(msg: String): Nothing = error(msg + " for symbol " + quote(symbol)) |
307 |
||
308 |
val name = |
|
309 |
symbol match { case Name(a) => a case _ => err("Cannot determine name") } |
|
310 |
||
311 |
val argument = |
|
312 |
props match { |
|
313 |
case Argument(arg) => |
|
314 |
Symbol.Argument.unapply(arg) getOrElse |
|
315 |
error("Bad argument: " + quote(arg) + " for symbol " + quote(symbol)) |
|
316 |
case _ => Symbol.Argument.none |
|
317 |
} |
|
318 |
||
319 |
val code = |
|
320 |
props match { |
|
321 |
case Code(s) => |
|
322 |
try { |
|
323 |
val code = Integer.decode(s).intValue |
|
324 |
if (code >= 128) Some(code) else err("Illegal ASCII code") |
|
325 |
} |
|
326 |
catch { case _: NumberFormatException => err("Bad code") } |
|
327 |
case _ => None |
|
328 |
} |
|
329 |
||
330 |
val groups = proper_list(for ((Group.name, a) <- props) yield a).getOrElse(List("unsorted")) |
|
331 |
||
332 |
val abbrevs = for ((Abbrev.name, a) <- props) yield a |
|
333 |
||
334 |
new Entry(symbol, name, argument, code, Font.unapply(props), groups, abbrevs) |
|
335 |
} |
|
336 |
} |
|
337 |
||
338 |
class Entry private( |
|
339 |
val symbol: Symbol, |
|
340 |
val name: String, |
|
341 |
val argument: Symbol.Argument.Value, |
|
342 |
val code: Option[Int], |
|
343 |
val font: Option[String], |
|
344 |
val groups: List[String], |
|
75393 | 345 |
val abbrevs: List[String] |
346 |
) { |
|
75197 | 347 |
override def toString: String = symbol |
348 |
||
349 |
val decode: Option[String] = |
|
350 |
code.map(c => new String(Character.toChars(c))) |
|
75194 | 351 |
} |
352 |
||
75195 | 353 |
lazy val symbols: Symbols = Symbols.load() |
75194 | 354 |
|
75393 | 355 |
object Symbols { |
356 |
def load(static: Boolean = false): Symbols = { |
|
75252
41dfe941c3da
inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
wenzelm
parents:
75238
diff
changeset
|
357 |
val paths = |
41dfe941c3da
inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
wenzelm
parents:
75238
diff
changeset
|
358 |
if (static) List(Path.explode("~~/etc/symbols")) |
41dfe941c3da
inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
wenzelm
parents:
75238
diff
changeset
|
359 |
else Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS")) |
41dfe941c3da
inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
wenzelm
parents:
75238
diff
changeset
|
360 |
make(cat_lines(for (path <- paths if path.is_file) yield File.read(path))) |
75193 | 361 |
} |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
362 |
|
75393 | 363 |
def make(symbols_spec: String): Symbols = { |
75193 | 364 |
val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """) |
365 |
val Key = new Regex("""(?xs) (.+): """) |
|
31522 | 366 |
|
75393 | 367 |
def read_decl(decl: String): (Symbol, Properties.T) = { |
75193 | 368 |
def err() = error("Bad symbol declaration: " + decl) |
31522 | 369 |
|
75393 | 370 |
def read_props(props: List[String]): Properties.T = { |
75193 | 371 |
props match { |
372 |
case Nil => Nil |
|
373 |
case _ :: Nil => err() |
|
374 |
case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest) |
|
375 |
case _ => err() |
|
376 |
} |
|
377 |
} |
|
378 |
decl.split("\\s+").toList match { |
|
379 |
case sym :: props if sym.length > 1 && !is_malformed(sym) => |
|
380 |
(sym, read_props(props)) |
|
31522 | 381 |
case _ => err() |
382 |
} |
|
383 |
} |
|
384 |
||
75197 | 385 |
new Symbols( |
75193 | 386 |
split_lines(symbols_spec).reverse. |
75197 | 387 |
foldLeft((List.empty[Entry], Set.empty[Symbol])) { |
75193 | 388 |
case (res, No_Decl()) => res |
389 |
case ((list, known), decl) => |
|
390 |
val (sym, props) = read_decl(decl) |
|
391 |
if (known(sym)) (list, known) |
|
75197 | 392 |
else (Entry(sym, props) :: list, known + sym) |
393 |
}._1) |
|
75193 | 394 |
} |
395 |
} |
|
31522 | 396 |
|
75393 | 397 |
class Symbols(val entries: List[Entry]) { |
75197 | 398 |
override def toString: String = entries.mkString("Symbols(", ", ", ")") |
399 |
||
400 |
||
53400 | 401 |
/* basic properties */ |
402 |
||
75197 | 403 |
private val entries_map: Map[Symbol, Entry] = |
404 |
(for (entry <- entries.iterator) yield entry.symbol -> entry).toMap |
|
31651 | 405 |
|
75197 | 406 |
def get(sym: Symbol): Option[Entry] = entries_map.get(sym) |
407 |
def defined(sym: Symbol): Boolean = entries_map.isDefinedAt(sym) |
|
408 |
||
409 |
def defined_code(sym: Symbol): Boolean = |
|
410 |
get(sym) match { case Some(entry) => entry.code.isDefined case _ => false } |
|
31651 | 411 |
|
75197 | 412 |
val code_defined: Int => Boolean = |
413 |
(for (entry <- entries.iterator; code <- entry.code) yield code).toSet |
|
50136
a96bd08258a2
support for symbol groups, retaining original order of declarations;
wenzelm
parents:
48922
diff
changeset
|
414 |
|
75197 | 415 |
val groups_code: List[(String, List[Symbol])] = |
416 |
(for { |
|
417 |
entry <- entries.iterator if entry.code.isDefined |
|
418 |
group <- entry.groups.iterator |
|
419 |
} yield entry.symbol -> group) |
|
420 |
.toList.groupBy(_._2).toList.map({ case (g, list) => (g, list.map(_._1)) }).sortBy(_._1) |
|
75195 | 421 |
|
75199 | 422 |
def get_abbrevs(sym: Symbol): List[String] = |
423 |
get(sym) match { case Some(entry) => entry.abbrevs case _ => Nil } |
|
75195 | 424 |
|
43488 | 425 |
|
43490 | 426 |
/* recoding */ |
31522 | 427 |
|
75393 | 428 |
private val (decoder, encoder) = { |
31522 | 429 |
val mapping = |
75197 | 430 |
for (entry <- entries; s <- entry.decode) yield entry.symbol -> s |
61376
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents:
61174
diff
changeset
|
431 |
(new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x))) |
31522 | 432 |
} |
27918 | 433 |
|
34098 | 434 |
def decode(text: String): String = decoder.recode(text) |
435 |
def encode(text: String): String = encoder.recode(text) |
|
34134 | 436 |
|
75198 | 437 |
private def recode_set(elems: Iterable[String]): Set[String] = |
438 |
(elems.iterator ++ elems.iterator.map(decode)).toSet |
|
43490 | 439 |
|
75198 | 440 |
private def recode_map[A](elems: Iterable[(String, A)]): Map[String, A] = |
441 |
(elems.iterator ++ elems.iterator.map({ case (sym, a) => (decode(sym), a) })).toMap |
|
43490 | 442 |
|
443 |
||
444 |
/* user fonts */ |
|
445 |
||
43696 | 446 |
val fonts: Map[Symbol, String] = |
75198 | 447 |
recode_map(for (entry <- entries; font <- entry.font) yield entry.symbol -> font) |
43490 | 448 |
|
75192
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
449 |
val font_names: List[String] = fonts.iterator.map(_._2).toSet.toList |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
450 |
val font_index: Map[String, Int] = (font_names zip font_names.indices.toList).toMap |
43490 | 451 |
|
75195 | 452 |
def lookup_font(sym: Symbol): Option[Int] = fonts.get(sym).map(font_index(_)) |
453 |
||
34134 | 454 |
|
455 |
/* classification */ |
|
456 |
||
75198 | 457 |
val letters: Set[String] = recode_set(List( |
34134 | 458 |
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", |
459 |
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", |
|
460 |
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", |
|
461 |
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", |
|
462 |
||
463 |
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>", |
|
464 |
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>", |
|
465 |
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>", |
|
466 |
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>", |
|
467 |
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>", |
|
468 |
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>", |
|
469 |
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>", |
|
470 |
"\\<x>", "\\<y>", "\\<z>", |
|
471 |
||
472 |
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>", |
|
473 |
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>", |
|
474 |
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>", |
|
475 |
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>", |
|
476 |
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>", |
|
477 |
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>", |
|
478 |
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>", |
|
479 |
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>", |
|
480 |
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>", |
|
481 |
||
482 |
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>", |
|
483 |
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>", |
|
484 |
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>", |
|
485 |
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>", |
|
486 |
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>", |
|
487 |
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>", |
|
75198 | 488 |
"\\<Psi>", "\\<Omega>")) |
34134 | 489 |
|
75198 | 490 |
val blanks: Set[String] = recode_set(List(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n")) |
34138 | 491 |
|
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
492 |
val sym_chars = |
34138 | 493 |
Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~") |
34134 | 494 |
|
75197 | 495 |
val symbolic: Set[String] = |
75198 | 496 |
recode_set(for (entry <- entries if raw_symbolic(entry.symbol)) yield entry.symbol) |
44992
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
497 |
|
43455 | 498 |
|
63528
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
499 |
/* misc symbols */ |
61579 | 500 |
|
75192
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
501 |
val newline_decoded: Symbol = decode(newline) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
502 |
val comment_decoded: Symbol = decode(comment) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
503 |
val cancel_decoded: Symbol = decode(cancel) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
504 |
val latex_decoded: Symbol = decode(latex) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
505 |
val marker_decoded: Symbol = decode(marker) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
506 |
val open_decoded: Symbol = decode(open) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
507 |
val close_decoded: Symbol = decode(close) |
55033 | 508 |
|
509 |
||
43488 | 510 |
/* control symbols */ |
511 |
||
59107 | 512 |
val control_decoded: Set[Symbol] = |
75197 | 513 |
(for (entry <- entries.iterator if entry.symbol.startsWith("\\<^"); s <- entry.decode) |
514 |
yield s).toSet |
|
43488 | 515 |
|
75192
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
516 |
val sub_decoded: Symbol = decode(sub) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
517 |
val sup_decoded: Symbol = decode(sup) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
518 |
val bold_decoded: Symbol = decode(bold) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
519 |
val emph_decoded: Symbol = decode(emph) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
520 |
val bsub_decoded: Symbol = decode(bsub) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
521 |
val esub_decoded: Symbol = decode(esub) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
522 |
val bsup_decoded: Symbol = decode(bsup) |
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents:
73359
diff
changeset
|
523 |
val esup_decoded: Symbol = decode(esup) |
27918 | 524 |
} |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
525 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
526 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
527 |
/* tables */ |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
528 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
529 |
def decode(text: String): String = symbols.decode(text) |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
530 |
def encode(text: String): String = symbols.encode(text) |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
531 |
|
73030 | 532 |
def decode_yxml(text: String, cache: XML.Cache = XML.Cache.none): XML.Body = |
533 |
YXML.parse_body(decode(text), cache = cache) |
|
534 |
||
535 |
def decode_yxml_failsafe(text: String, cache: XML.Cache = XML.Cache.none): XML.Body = |
|
536 |
YXML.parse_body_failsafe(decode(text), cache = cache) |
|
537 |
||
65344
b99283eed13c
clarified YXML vs. symbol encoding: operate on whole message;
wenzelm
parents:
65335
diff
changeset
|
538 |
def encode_yxml(body: XML.Body): String = encode(YXML.string_of_body(body)) |
53337
b3817a0e3211
sort items according to persistent history of frequency of use;
wenzelm
parents:
53316
diff
changeset
|
539 |
|
75393 | 540 |
def decode_strict(text: String): String = { |
50291
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
541 |
val decoded = decode(text) |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
542 |
if (encode(decoded) == text) decoded |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
543 |
else { |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
544 |
val bad = new mutable.ListBuffer[Symbol] |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
545 |
for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s)) |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
546 |
bad += s |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
547 |
error("Bad Unicode symbols in text: " + commas_quote(bad)) |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
548 |
} |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
549 |
} |
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents:
50238
diff
changeset
|
550 |
|
72866 | 551 |
def output(unicode_symbols: Boolean, text: String): String = |
552 |
if (unicode_symbols) Symbol.decode(text) else Symbol.encode(text) |
|
553 |
||
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
554 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
555 |
/* classification */ |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
556 |
|
43696 | 557 |
def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) |
558 |
def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' |
|
559 |
def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'" |
|
560 |
def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) |
|
561 |
def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) |
|
44992
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
562 |
|
55033 | 563 |
|
67438 | 564 |
/* symbolic newline */ |
63528
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
565 |
|
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
566 |
val newline: Symbol = "\\<newline>" |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
567 |
def newline_decoded: Symbol = symbols.newline_decoded |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
568 |
|
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
569 |
def print_newlines(str: String): String = |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
570 |
if (str.contains('\n')) |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
571 |
(for (s <- iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString |
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
wenzelm
parents:
62528
diff
changeset
|
572 |
else str |
61579 | 573 |
|
67438 | 574 |
|
575 |
/* formal comments */ |
|
576 |
||
61579 | 577 |
val comment: Symbol = "\\<comment>" |
67449 | 578 |
val cancel: Symbol = "\\<^cancel>" |
579 |
val latex: Symbol = "\\<^latex>" |
|
69891
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
wenzelm
parents:
69887
diff
changeset
|
580 |
val marker: Symbol = "\\<^marker>" |
67449 | 581 |
|
61579 | 582 |
def comment_decoded: Symbol = symbols.comment_decoded |
67438 | 583 |
def cancel_decoded: Symbol = symbols.cancel_decoded |
584 |
def latex_decoded: Symbol = symbols.latex_decoded |
|
69891
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
wenzelm
parents:
69887
diff
changeset
|
585 |
def marker_decoded: Symbol = symbols.marker_decoded |
61579 | 586 |
|
587 |
||
55033 | 588 |
/* cartouches */ |
589 |
||
61579 | 590 |
val open: Symbol = "\\<open>" |
591 |
val close: Symbol = "\\<close>" |
|
55033 | 592 |
|
593 |
def open_decoded: Symbol = symbols.open_decoded |
|
594 |
def close_decoded: Symbol = symbols.close_decoded |
|
595 |
||
596 |
def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open |
|
597 |
def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close |
|
598 |
||
67131 | 599 |
def cartouche(s: String): String = open + s + close |
600 |
def cartouche_decoded(s: String): String = open_decoded + s + close_decoded |
|
601 |
||
55033 | 602 |
|
603 |
/* symbols for symbolic identifiers */ |
|
44992
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
604 |
|
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents:
44949
diff
changeset
|
605 |
private def raw_symbolic(sym: Symbol): Boolean = |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
606 |
sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
607 |
|
55033 | 608 |
def is_symbolic(sym: Symbol): Boolean = |
609 |
!is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym)) |
|
610 |
||
611 |
def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) |
|
612 |
||
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
613 |
|
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
614 |
/* control symbols */ |
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
615 |
|
67127
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset
|
616 |
val control_prefix = "\\<^" |
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset
|
617 |
val control_suffix = ">" |
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset
|
618 |
|
67255
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents:
67131
diff
changeset
|
619 |
def control_name(sym: Symbol): Option[String] = |
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents:
67131
diff
changeset
|
620 |
if (is_control_encoded(sym)) |
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents:
67131
diff
changeset
|
621 |
Some(sym.substring(control_prefix.length, sym.length - control_suffix.length)) |
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents:
67131
diff
changeset
|
622 |
else None |
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents:
67131
diff
changeset
|
623 |
|
67127
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset
|
624 |
def is_control_encoded(sym: Symbol): Boolean = |
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset
|
625 |
sym.startsWith(control_prefix) && sym.endsWith(control_suffix) |
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset
|
626 |
|
59107 | 627 |
def is_control(sym: Symbol): Boolean = |
67127
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
wenzelm
parents:
66919
diff
changeset
|
628 |
is_control_encoded(sym) || symbols.control_decoded.contains(sym) |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
629 |
|
43696 | 630 |
def is_controllable(sym: Symbol): Boolean = |
66006
cec184536dfd
uniform notion of Symbol.is_controllable (see also 265d9300d523);
wenzelm
parents:
65997
diff
changeset
|
631 |
!is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && |
cec184536dfd
uniform notion of Symbol.is_controllable (see also 265d9300d523);
wenzelm
parents:
65997
diff
changeset
|
632 |
!is_malformed(sym) && sym != "\"" |
43695
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents:
43675
diff
changeset
|
633 |
|
73208 | 634 |
val sub: Symbol = "\\<^sub>" |
635 |
val sup: Symbol = "\\<^sup>" |
|
636 |
val bold: Symbol = "\\<^bold>" |
|
637 |
val emph: Symbol = "\\<^emph>" |
|
638 |
val bsub: Symbol = "\\<^bsub>" |
|
639 |
val esub: Symbol = "\\<^esub>" |
|
640 |
val bsup: Symbol = "\\<^bsup>" |
|
641 |
val esup: Symbol = "\\<^esup>" |
|
62103 | 642 |
|
44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
643 |
def sub_decoded: Symbol = symbols.sub_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
644 |
def sup_decoded: Symbol = symbols.sup_decoded |
62103 | 645 |
def bold_decoded: Symbol = symbols.bold_decoded |
646 |
def emph_decoded: Symbol = symbols.emph_decoded |
|
44238
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
647 |
def bsub_decoded: Symbol = symbols.bsub_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
648 |
def esub_decoded: Symbol = symbols.esub_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
649 |
def bsup_decoded: Symbol = symbols.bsup_decoded |
36120feb70ed
some convenience actions/shortcuts for control symbols;
wenzelm
parents:
44181
diff
changeset
|
650 |
def esup_decoded: Symbol = symbols.esup_decoded |
71649 | 651 |
|
652 |
||
653 |
/* metric */ |
|
654 |
||
655 |
def is_printable(sym: Symbol): Boolean = |
|
656 |
if (is_ascii(sym)) is_ascii_printable(sym(0)) |
|
657 |
else !is_control(sym) |
|
658 |
||
75393 | 659 |
object Metric extends Pretty.Metric { |
71649 | 660 |
val unit = 1.0 |
661 |
def apply(str: String): Double = |
|
662 |
(for (s <- iterator(str)) yield { |
|
663 |
val sym = encode(s) |
|
664 |
if (sym.startsWith("\\<long") || sym.startsWith("\\<Long")) 2 |
|
665 |
else if (is_printable(sym)) 1 |
|
666 |
else 0 |
|
667 |
}).sum |
|
668 |
} |
|
27901 | 669 |
} |