| author | wenzelm | 
| Tue, 24 Jan 2023 17:16:00 +0100 | |
| changeset 77075 | 973de7855948 | 
| parent 76235 | 16c12979c132 | 
| child 78592 | fdfe9b91d96e | 
| permissions | -rw-r--r-- | 
| 27901 | 1  | 
/* Title: Pure/General/symbol.scala  | 
2  | 
Author: Makarius  | 
|
3  | 
||
| 69490 | 4  | 
Isabelle text symbols.  | 
| 27901 | 5  | 
*/  | 
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
| 55618 | 9  | 
|
| 
36011
 
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
 
wenzelm 
parents: 
34316 
diff
changeset
 | 
10  | 
import scala.collection.mutable  | 
| 31522 | 11  | 
import scala.util.matching.Regex  | 
| 48922 | 12  | 
import scala.annotation.tailrec  | 
| 27901 | 13  | 
|
14  | 
||
| 75393 | 15  | 
object Symbol {
 | 
| 43696 | 16  | 
type Symbol = String  | 
17  | 
||
| 
55884
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
18  | 
// counting Isabelle symbols, starting from 1  | 
| 
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
19  | 
type Offset = Text.Offset  | 
| 
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
20  | 
type Range = Text.Range  | 
| 
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
21  | 
|
| 43696 | 22  | 
|
| 61865 | 23  | 
/* spaces */  | 
24  | 
||
| 71649 | 25  | 
val space_char = ' '  | 
| 61865 | 26  | 
val space = " "  | 
27  | 
||
28  | 
private val static_spaces = space * 4000  | 
|
29  | 
||
| 75393 | 30  | 
  def spaces(n: Int): String = {
 | 
| 
73120
 
c3589f2dff31
more informative errors: simplify diagnosis of spurious failures reported by users;
 
wenzelm 
parents: 
73030 
diff
changeset
 | 
31  | 
require(n >= 0, "negative spaces")  | 
| 61865 | 32  | 
if (n < static_spaces.length) static_spaces.substring(0, n)  | 
33  | 
else space * n  | 
|
34  | 
}  | 
|
35  | 
||
36  | 
||
| 
75238
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
37  | 
/* char symbols */  | 
| 
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
38  | 
|
| 
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
39  | 
private val char_symbols: Array[Symbol] =  | 
| 
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
40  | 
(0 until 0x500).iterator.map(i => new String(Array(i.toChar))).toArray  | 
| 
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
41  | 
|
| 
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
42  | 
def char_symbol(c: Char): String =  | 
| 
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
43  | 
if (c < char_symbols.length) char_symbols(c)  | 
| 
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
44  | 
else c.toString  | 
| 
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
45  | 
|
| 
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
46  | 
|
| 43418 | 47  | 
/* ASCII characters */  | 
48  | 
||
| 71649 | 49  | 
def is_ascii_printable(c: Char): Boolean = space_char <= c && c <= '~'  | 
50  | 
||
| 43418 | 51  | 
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'  | 
| 55497 | 52  | 
|
| 43418 | 53  | 
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'  | 
| 55497 | 54  | 
|
55  | 
def is_ascii_hex(c: Char): Boolean =  | 
|
56  | 
'0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'  | 
|
57  | 
||
| 43418 | 58  | 
def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''  | 
59  | 
||
| 55497 | 60  | 
def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)  | 
61  | 
||
| 69448 | 62  | 
def is_ascii_line_terminator(c: Char): Boolean = "\r\n".contains(c)  | 
63  | 
||
| 43418 | 64  | 
def is_ascii_letdig(c: Char): Boolean =  | 
65  | 
is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)  | 
|
66  | 
||
67  | 
def is_ascii_identifier(s: String): Boolean =  | 
|
| 
75192
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
68  | 
s.nonEmpty && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig)  | 
| 43418 | 69  | 
|
| 75393 | 70  | 
  def ascii(c: Char): Symbol = {
 | 
| 62528 | 71  | 
    if (c > 127) error("Non-ASCII character: " + quote(c.toString))
 | 
| 
75238
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
72  | 
else char_symbol(c)  | 
| 62528 | 73  | 
}  | 
74  | 
||
| 66919 | 75  | 
def is_ascii(s: Symbol): Boolean = s.length == 1 && s(0) < 128  | 
76  | 
||
| 43418 | 77  | 
|
| 48775 | 78  | 
/* symbol matching */  | 
| 27901 | 79  | 
|
| 
48773
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
80  | 
def is_malformed(s: Symbol): Boolean =  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
81  | 
    s.length match {
 | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
82  | 
case 1 =>  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
83  | 
val c = s(0)  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
84  | 
Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
85  | 
case 2 =>  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
86  | 
val c1 = s(0)  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
87  | 
val c2 = s(1)  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
88  | 
!(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))  | 
| 48774 | 89  | 
      case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
 | 
| 
48773
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
90  | 
}  | 
| 34137 | 91  | 
|
| 
54734
 
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
 
wenzelm 
parents: 
53400 
diff
changeset
 | 
92  | 
def is_newline(s: Symbol): Boolean =  | 
| 
43675
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
93  | 
s == "\n" || s == "\r" || s == "\r\n"  | 
| 38877 | 94  | 
|
| 75393 | 95  | 
  class Matcher(text: CharSequence) {
 | 
| 
75237
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
96  | 
private def ok(i: Int): Boolean = 0 <= i && i < text.length  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
97  | 
private def char(i: Int): Char = if (ok(i)) text.charAt(i) else 0  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
98  | 
private def maybe_char(c: Char, i: Int): Int = if (char(i) == c) i + 1 else i  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
99  | 
|
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
100  | 
@tailrec private def many_ascii_letdig(i: Int): Int =  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
101  | 
if (is_ascii_letdig(char(i))) many_ascii_letdig(i + 1) else i  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
102  | 
private def maybe_ascii_id(i: Int): Int =  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
103  | 
if (is_ascii_letter(char(i))) many_ascii_letdig(i + 1) else i  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
104  | 
|
| 75393 | 105  | 
    def match_length(i: Int): Int = {
 | 
| 
75237
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
106  | 
val a = char(i)  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
107  | 
val b = char(i + 1)  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
108  | 
|
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
109  | 
if (Character.isHighSurrogate(a) && Character.isLowSurrogate(b) || a == '\r' && b == '\n') 2  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
110  | 
      else if (a == '\\' && b == '<') maybe_char('>', maybe_ascii_id(maybe_char('^', i + 2))) - i
 | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
111  | 
else if (ok(i)) 1  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
112  | 
else 0  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
113  | 
}  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
114  | 
|
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
115  | 
def match_symbol(i: Int): String =  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
116  | 
      match_length(i) match {
 | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
117  | 
case 0 => ""  | 
| 
75238
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
118  | 
case 1 => char_symbol(text.charAt(i))  | 
| 
 
e74d162ddf9f
clarified char symbols: cover most European languages;
 
wenzelm 
parents: 
75237 
diff
changeset
 | 
119  | 
case n => text.subSequence(i, i + n).toString  | 
| 34137 | 120  | 
}  | 
| 31522 | 121  | 
}  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
122  | 
|
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
123  | 
|
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
124  | 
/* iterator */  | 
| 33998 | 125  | 
|
| 43696 | 126  | 
def iterator(text: CharSequence): Iterator[Symbol] =  | 
| 75393 | 127  | 
    new Iterator[Symbol] {
 | 
| 43489 | 128  | 
private val matcher = new Matcher(text)  | 
129  | 
private var i = 0  | 
|
| 71601 | 130  | 
def hasNext: Boolean = i < text.length  | 
| 75393 | 131  | 
      def next(): Symbol = {
 | 
| 
75237
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
132  | 
val s = matcher.match_symbol(i)  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
133  | 
i += s.length  | 
| 43489 | 134  | 
s  | 
135  | 
}  | 
|
| 33998 | 136  | 
}  | 
| 43489 | 137  | 
|
| 44949 | 138  | 
def explode(text: CharSequence): List[Symbol] = iterator(text).toList  | 
139  | 
||
| 64615 | 140  | 
def length(text: CharSequence): Int = iterator(text).length  | 
| 64617 | 141  | 
|
| 
67435
 
f83c1842a559
trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
 
wenzelm 
parents: 
67389 
diff
changeset
 | 
142  | 
def trim_blanks(text: CharSequence): String =  | 
| 71601 | 143  | 
Library.trim(is_blank, explode(text)).mkString  | 
| 
67435
 
f83c1842a559
trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
 
wenzelm 
parents: 
67389 
diff
changeset
 | 
144  | 
|
| 69318 | 145  | 
def all_blank(str: String): Boolean =  | 
| 71601 | 146  | 
iterator(str).forall(is_blank)  | 
| 69318 | 147  | 
|
148  | 
def trim_blank_lines(text: String): String =  | 
|
149  | 
cat_lines(split_lines(text).dropWhile(all_blank).reverse.dropWhile(all_blank).reverse)  | 
|
150  | 
||
| 33998 | 151  | 
|
152  | 
/* decoding offsets */  | 
|
153  | 
||
| 75393 | 154  | 
  object Index {
 | 
| 
56471
 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 
wenzelm 
parents: 
56338 
diff
changeset
 | 
155  | 
private sealed case class Entry(chr: Int, sym: Int)  | 
| 52507 | 156  | 
|
| 56472 | 157  | 
val empty: Index = new Index(Nil)  | 
| 
56471
 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 
wenzelm 
parents: 
56338 
diff
changeset
 | 
158  | 
|
| 75393 | 159  | 
    def apply(text: CharSequence): Index = {
 | 
| 34137 | 160  | 
val matcher = new Matcher(text)  | 
| 
56471
 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 
wenzelm 
parents: 
56338 
diff
changeset
 | 
161  | 
val buf = new mutable.ListBuffer[Entry]  | 
| 31929 | 162  | 
var chr = 0  | 
163  | 
var sym = 0  | 
|
| 33998 | 164  | 
      while (chr < text.length) {
 | 
| 
75237
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
165  | 
val n = matcher.match_length(chr)  | 
| 34137 | 166  | 
chr += n  | 
| 31929 | 167  | 
sym += 1  | 
| 34137 | 168  | 
if (n > 1) buf += Entry(chr, sym)  | 
| 31929 | 169  | 
}  | 
| 56472 | 170  | 
if (buf.isEmpty) empty else new Index(buf.toList)  | 
| 31929 | 171  | 
}  | 
| 
56471
 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 
wenzelm 
parents: 
56338 
diff
changeset
 | 
172  | 
}  | 
| 55430 | 173  | 
|
| 75393 | 174  | 
  final class Index private(entries: List[Index.Entry]) {
 | 
| 56472 | 175  | 
private val hash: Int = entries.hashCode  | 
176  | 
private val index: Array[Index.Entry] = entries.toArray  | 
|
177  | 
||
| 75393 | 178  | 
    def decode(symbol_offset: Offset): Text.Offset = {
 | 
| 
55884
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
179  | 
val sym = symbol_offset - 1  | 
| 31929 | 180  | 
val end = index.length  | 
| 75393 | 181  | 
      @tailrec def bisect(a: Int, b: Int): Int = {
 | 
| 31929 | 182  | 
        if (a < b) {
 | 
183  | 
val c = (a + b) / 2  | 
|
184  | 
if (sym < index(c).sym) bisect(a, c)  | 
|
185  | 
else if (c + 1 == end || sym < index(c + 1).sym) c  | 
|
186  | 
else bisect(c + 1, b)  | 
|
187  | 
}  | 
|
188  | 
else -1  | 
|
189  | 
}  | 
|
190  | 
val i = bisect(0, end)  | 
|
191  | 
if (i < 0) sym  | 
|
192  | 
else index(i).chr + sym - index(i).sym  | 
|
193  | 
}  | 
|
| 71601 | 194  | 
def decode(symbol_range: Range): Text.Range = symbol_range.map(decode)  | 
| 
56335
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
195  | 
|
| 
56338
 
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
 
wenzelm 
parents: 
56335 
diff
changeset
 | 
196  | 
override def hashCode: Int = hash  | 
| 
56335
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
197  | 
override def equals(that: Any): Boolean =  | 
| 
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
198  | 
      that match {
 | 
| 
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
199  | 
case other: Index => index.sameElements(other.index)  | 
| 
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
200  | 
case _ => false  | 
| 
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
201  | 
}  | 
| 31929 | 202  | 
}  | 
203  | 
||
204  | 
||
| 64477 | 205  | 
/* symbolic text chunks -- without actual text */  | 
| 56746 | 206  | 
|
| 75393 | 207  | 
  object Text_Chunk {
 | 
| 56746 | 208  | 
sealed abstract class Name  | 
209  | 
case object Default extends Name  | 
|
210  | 
case class Id(id: Document_ID.Generic) extends Name  | 
|
211  | 
case class File(name: String) extends Name  | 
|
212  | 
||
213  | 
def apply(text: CharSequence): Text_Chunk =  | 
|
| 76235 | 214  | 
new Text_Chunk(Text.Range.length(text), Index(text))  | 
| 56746 | 215  | 
}  | 
216  | 
||
| 75393 | 217  | 
  final class Text_Chunk private(val range: Text.Range, private val index: Index) {
 | 
| 56746 | 218  | 
override def hashCode: Int = (range, index).hashCode  | 
219  | 
override def equals(that: Any): Boolean =  | 
|
220  | 
      that match {
 | 
|
221  | 
case other: Text_Chunk =>  | 
|
222  | 
range == other.range &&  | 
|
223  | 
index == other.index  | 
|
224  | 
case _ => false  | 
|
225  | 
}  | 
|
226  | 
||
| 57840 | 227  | 
override def toString: String = "Text_Chunk" + range.toString  | 
228  | 
||
| 56746 | 229  | 
def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset)  | 
230  | 
def decode(symbol_range: Range): Text.Range = index.decode(symbol_range)  | 
|
| 75393 | 231  | 
    def incorporate(symbol_range: Range): Option[Text.Range] = {
 | 
| 
75659
 
9bd92ac9328f
more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
 
wenzelm 
parents: 
75393 
diff
changeset
 | 
232  | 
      def in(r: Range): Option[Text.Range] = {
 | 
| 56746 | 233  | 
        range.try_restrict(decode(r)) match {
 | 
234  | 
case Some(r1) if !r1.is_singularity => Some(r1)  | 
|
235  | 
case _ => None  | 
|
236  | 
}  | 
|
| 
75659
 
9bd92ac9328f
more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
 
wenzelm 
parents: 
75393 
diff
changeset
 | 
237  | 
}  | 
| 
 
9bd92ac9328f
more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
 
wenzelm 
parents: 
75393 
diff
changeset
 | 
238  | 
in(symbol_range) orElse in(symbol_range - 1)  | 
| 56746 | 239  | 
}  | 
240  | 
}  | 
|
241  | 
||
242  | 
||
| 33998 | 243  | 
/* recoding text */  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
244  | 
|
| 75393 | 245  | 
  private class Recoder(list: List[(String, String)]) {
 | 
246  | 
    private val (min, max) = {
 | 
|
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
247  | 
var min = '\uffff'  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
248  | 
var max = '\u0000'  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
249  | 
      for ((x, _) <- list) {
 | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
250  | 
val c = x(0)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
251  | 
if (c < min) min = c  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
252  | 
if (c > max) max = c  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
253  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
254  | 
(min, max)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
255  | 
}  | 
| 75393 | 256  | 
    private val table = {
 | 
| 40443 | 257  | 
var tab = Map[String, String]()  | 
258  | 
      for ((x, y) <- list) {
 | 
|
259  | 
        tab.get(x) match {
 | 
|
260  | 
case None => tab += (x -> y)  | 
|
261  | 
case Some(z) =>  | 
|
| 62230 | 262  | 
            error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
 | 
| 40443 | 263  | 
}  | 
264  | 
}  | 
|
265  | 
tab  | 
|
266  | 
}  | 
|
| 75393 | 267  | 
    def recode(text: String): String = {
 | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
268  | 
val len = text.length  | 
| 
75237
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
269  | 
val matcher = new Symbol.Matcher(text)  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
270  | 
val result = new StringBuilder(len)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
271  | 
var i = 0  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
272  | 
      while (i < len) {
 | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
273  | 
val c = text(i)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
274  | 
        if (min <= c && c <= max) {
 | 
| 
75237
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
275  | 
val s = matcher.match_symbol(i)  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
276  | 
result.append(table.getOrElse(s, s))  | 
| 
 
90eaac98b3fa
more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
 
wenzelm 
parents: 
75199 
diff
changeset
 | 
277  | 
i += s.length  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
278  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
279  | 
        else { result.append(c); i += 1 }
 | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
280  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
281  | 
result.toString  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
282  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
283  | 
}  | 
| 27924 | 284  | 
|
| 27918 | 285  | 
|
| 
27923
 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 
wenzelm 
parents: 
27918 
diff
changeset
 | 
286  | 
|
| 75194 | 287  | 
/** defined symbols **/  | 
| 27927 | 288  | 
|
| 75393 | 289  | 
  object Argument extends Enumeration {
 | 
| 75194 | 290  | 
val none, cartouche, space_cartouche = Value  | 
| 67311 | 291  | 
|
| 75194 | 292  | 
def unapply(s: String): Option[Value] =  | 
293  | 
      try { Some(withName(s)) }
 | 
|
294  | 
      catch { case _: NoSuchElementException => None}
 | 
|
| 75197 | 295  | 
}  | 
| 75193 | 296  | 
|
| 75393 | 297  | 
  object Entry {
 | 
| 75197 | 298  | 
    private val Name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
 | 
299  | 
    private val Argument = new Properties.String("argument")
 | 
|
300  | 
    private val Abbrev = new Properties.String("abbrev")
 | 
|
301  | 
    private val Code = new Properties.String("code")
 | 
|
302  | 
    private val Font = new Properties.String("font")
 | 
|
303  | 
    private val Group = new Properties.String("group")
 | 
|
304  | 
||
| 75393 | 305  | 
    def apply(symbol: Symbol, props: Properties.T): Entry = {
 | 
| 75197 | 306  | 
def err(msg: String): Nothing = error(msg + " for symbol " + quote(symbol))  | 
307  | 
||
308  | 
val name =  | 
|
309  | 
        symbol match { case Name(a) => a case _ => err("Cannot determine name") }
 | 
|
310  | 
||
311  | 
val argument =  | 
|
312  | 
        props match {
 | 
|
313  | 
case Argument(arg) =>  | 
|
314  | 
Symbol.Argument.unapply(arg) getOrElse  | 
|
315  | 
              error("Bad argument: " + quote(arg) + " for symbol " + quote(symbol))
 | 
|
316  | 
case _ => Symbol.Argument.none  | 
|
317  | 
}  | 
|
318  | 
||
319  | 
val code =  | 
|
320  | 
        props match {
 | 
|
321  | 
case Code(s) =>  | 
|
322  | 
            try {
 | 
|
323  | 
val code = Integer.decode(s).intValue  | 
|
324  | 
              if (code >= 128) Some(code) else err("Illegal ASCII code")
 | 
|
325  | 
}  | 
|
326  | 
            catch { case _: NumberFormatException => err("Bad code") }
 | 
|
327  | 
case _ => None  | 
|
328  | 
}  | 
|
329  | 
||
330  | 
      val groups = proper_list(for ((Group.name, a) <- props) yield a).getOrElse(List("unsorted"))
 | 
|
331  | 
||
332  | 
val abbrevs = for ((Abbrev.name, a) <- props) yield a  | 
|
333  | 
||
334  | 
new Entry(symbol, name, argument, code, Font.unapply(props), groups, abbrevs)  | 
|
335  | 
}  | 
|
336  | 
}  | 
|
337  | 
||
338  | 
class Entry private(  | 
|
339  | 
val symbol: Symbol,  | 
|
340  | 
val name: String,  | 
|
341  | 
val argument: Symbol.Argument.Value,  | 
|
342  | 
val code: Option[Int],  | 
|
343  | 
val font: Option[String],  | 
|
344  | 
val groups: List[String],  | 
|
| 75393 | 345  | 
val abbrevs: List[String]  | 
346  | 
  ) {
 | 
|
| 75197 | 347  | 
override def toString: String = symbol  | 
348  | 
||
349  | 
val decode: Option[String] =  | 
|
350  | 
code.map(c => new String(Character.toChars(c)))  | 
|
| 75194 | 351  | 
}  | 
352  | 
||
| 75195 | 353  | 
lazy val symbols: Symbols = Symbols.load()  | 
| 75194 | 354  | 
|
| 75393 | 355  | 
  object Symbols {
 | 
356  | 
    def load(static: Boolean = false): Symbols = {
 | 
|
| 
75252
 
41dfe941c3da
inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
 
wenzelm 
parents: 
75238 
diff
changeset
 | 
357  | 
val paths =  | 
| 
 
41dfe941c3da
inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
 
wenzelm 
parents: 
75238 
diff
changeset
 | 
358  | 
        if (static) List(Path.explode("~~/etc/symbols"))
 | 
| 
 
41dfe941c3da
inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
 
wenzelm 
parents: 
75238 
diff
changeset
 | 
359  | 
        else Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS"))
 | 
| 
 
41dfe941c3da
inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
 
wenzelm 
parents: 
75238 
diff
changeset
 | 
360  | 
make(cat_lines(for (path <- paths if path.is_file) yield File.read(path)))  | 
| 75193 | 361  | 
}  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
362  | 
|
| 75393 | 363  | 
    def make(symbols_spec: String): Symbols = {
 | 
| 75193 | 364  | 
      val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
 | 
365  | 
      val Key = new Regex("""(?xs) (.+): """)
 | 
|
| 31522 | 366  | 
|
| 75393 | 367  | 
      def read_decl(decl: String): (Symbol, Properties.T) = {
 | 
| 75193 | 368  | 
        def err() = error("Bad symbol declaration: " + decl)
 | 
| 31522 | 369  | 
|
| 75393 | 370  | 
        def read_props(props: List[String]): Properties.T = {
 | 
| 75193 | 371  | 
          props match {
 | 
372  | 
case Nil => Nil  | 
|
373  | 
case _ :: Nil => err()  | 
|
374  | 
            case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest)
 | 
|
375  | 
case _ => err()  | 
|
376  | 
}  | 
|
377  | 
}  | 
|
378  | 
        decl.split("\\s+").toList match {
 | 
|
379  | 
case sym :: props if sym.length > 1 && !is_malformed(sym) =>  | 
|
380  | 
(sym, read_props(props))  | 
|
| 31522 | 381  | 
case _ => err()  | 
382  | 
}  | 
|
383  | 
}  | 
|
384  | 
||
| 75197 | 385  | 
new Symbols(  | 
| 75193 | 386  | 
split_lines(symbols_spec).reverse.  | 
| 75197 | 387  | 
          foldLeft((List.empty[Entry], Set.empty[Symbol])) {
 | 
| 75193 | 388  | 
case (res, No_Decl()) => res  | 
389  | 
case ((list, known), decl) =>  | 
|
390  | 
val (sym, props) = read_decl(decl)  | 
|
391  | 
if (known(sym)) (list, known)  | 
|
| 75197 | 392  | 
else (Entry(sym, props) :: list, known + sym)  | 
393  | 
}._1)  | 
|
| 75193 | 394  | 
}  | 
395  | 
}  | 
|
| 31522 | 396  | 
|
| 75393 | 397  | 
  class Symbols(val entries: List[Entry]) {
 | 
| 75197 | 398  | 
    override def toString: String = entries.mkString("Symbols(", ", ", ")")
 | 
399  | 
||
400  | 
||
| 53400 | 401  | 
/* basic properties */  | 
402  | 
||
| 75197 | 403  | 
private val entries_map: Map[Symbol, Entry] =  | 
404  | 
(for (entry <- entries.iterator) yield entry.symbol -> entry).toMap  | 
|
| 31651 | 405  | 
|
| 75197 | 406  | 
def get(sym: Symbol): Option[Entry] = entries_map.get(sym)  | 
407  | 
def defined(sym: Symbol): Boolean = entries_map.isDefinedAt(sym)  | 
|
408  | 
||
409  | 
def defined_code(sym: Symbol): Boolean =  | 
|
410  | 
      get(sym) match { case Some(entry) => entry.code.isDefined case _ => false }
 | 
|
| 31651 | 411  | 
|
| 75197 | 412  | 
val code_defined: Int => Boolean =  | 
413  | 
(for (entry <- entries.iterator; code <- entry.code) yield code).toSet  | 
|
| 
50136
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
414  | 
|
| 75197 | 415  | 
val groups_code: List[(String, List[Symbol])] =  | 
416  | 
      (for {
 | 
|
417  | 
entry <- entries.iterator if entry.code.isDefined  | 
|
418  | 
group <- entry.groups.iterator  | 
|
419  | 
} yield entry.symbol -> group)  | 
|
420  | 
        .toList.groupBy(_._2).toList.map({ case (g, list) => (g, list.map(_._1)) }).sortBy(_._1)
 | 
|
| 75195 | 421  | 
|
| 75199 | 422  | 
def get_abbrevs(sym: Symbol): List[String] =  | 
423  | 
      get(sym) match { case Some(entry) => entry.abbrevs case _ => Nil }
 | 
|
| 75195 | 424  | 
|
| 43488 | 425  | 
|
| 43490 | 426  | 
/* recoding */  | 
| 31522 | 427  | 
|
| 75393 | 428  | 
    private val (decoder, encoder) = {
 | 
| 31522 | 429  | 
val mapping =  | 
| 75197 | 430  | 
for (entry <- entries; s <- entry.decode) yield entry.symbol -> s  | 
| 
61376
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
431  | 
(new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x)))  | 
| 31522 | 432  | 
}  | 
| 27918 | 433  | 
|
| 34098 | 434  | 
def decode(text: String): String = decoder.recode(text)  | 
435  | 
def encode(text: String): String = encoder.recode(text)  | 
|
| 34134 | 436  | 
|
| 75198 | 437  | 
private def recode_set(elems: Iterable[String]): Set[String] =  | 
438  | 
(elems.iterator ++ elems.iterator.map(decode)).toSet  | 
|
| 43490 | 439  | 
|
| 75198 | 440  | 
private def recode_map[A](elems: Iterable[(String, A)]): Map[String, A] =  | 
441  | 
      (elems.iterator ++ elems.iterator.map({ case (sym, a) => (decode(sym), a) })).toMap
 | 
|
| 43490 | 442  | 
|
443  | 
||
444  | 
/* user fonts */  | 
|
445  | 
||
| 43696 | 446  | 
val fonts: Map[Symbol, String] =  | 
| 75198 | 447  | 
recode_map(for (entry <- entries; font <- entry.font) yield entry.symbol -> font)  | 
| 43490 | 448  | 
|
| 
75192
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
449  | 
val font_names: List[String] = fonts.iterator.map(_._2).toSet.toList  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
450  | 
val font_index: Map[String, Int] = (font_names zip font_names.indices.toList).toMap  | 
| 43490 | 451  | 
|
| 75195 | 452  | 
def lookup_font(sym: Symbol): Option[Int] = fonts.get(sym).map(font_index(_))  | 
453  | 
||
| 34134 | 454  | 
|
455  | 
/* classification */  | 
|
456  | 
||
| 75198 | 457  | 
val letters: Set[String] = recode_set(List(  | 
| 34134 | 458  | 
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",  | 
459  | 
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",  | 
|
460  | 
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",  | 
|
461  | 
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",  | 
|
462  | 
||
463  | 
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",  | 
|
464  | 
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",  | 
|
465  | 
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",  | 
|
466  | 
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",  | 
|
467  | 
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",  | 
|
468  | 
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",  | 
|
469  | 
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",  | 
|
470  | 
"\\<x>", "\\<y>", "\\<z>",  | 
|
471  | 
||
472  | 
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",  | 
|
473  | 
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",  | 
|
474  | 
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",  | 
|
475  | 
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",  | 
|
476  | 
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",  | 
|
477  | 
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",  | 
|
478  | 
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",  | 
|
479  | 
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",  | 
|
480  | 
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",  | 
|
481  | 
||
482  | 
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",  | 
|
483  | 
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",  | 
|
484  | 
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",  | 
|
485  | 
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",  | 
|
486  | 
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",  | 
|
487  | 
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",  | 
|
| 75198 | 488  | 
"\\<Psi>", "\\<Omega>"))  | 
| 34134 | 489  | 
|
| 75198 | 490  | 
val blanks: Set[String] = recode_set(List(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n"))  | 
| 34138 | 491  | 
|
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
492  | 
val sym_chars =  | 
| 34138 | 493  | 
      Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
 | 
| 34134 | 494  | 
|
| 75197 | 495  | 
val symbolic: Set[String] =  | 
| 75198 | 496  | 
recode_set(for (entry <- entries if raw_symbolic(entry.symbol)) yield entry.symbol)  | 
| 
44992
 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 
wenzelm 
parents: 
44949 
diff
changeset
 | 
497  | 
|
| 43455 | 498  | 
|
| 
63528
 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 
wenzelm 
parents: 
62528 
diff
changeset
 | 
499  | 
/* misc symbols */  | 
| 61579 | 500  | 
|
| 
75192
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
501  | 
val newline_decoded: Symbol = decode(newline)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
502  | 
val comment_decoded: Symbol = decode(comment)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
503  | 
val cancel_decoded: Symbol = decode(cancel)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
504  | 
val latex_decoded: Symbol = decode(latex)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
505  | 
val marker_decoded: Symbol = decode(marker)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
506  | 
val open_decoded: Symbol = decode(open)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
507  | 
val close_decoded: Symbol = decode(close)  | 
| 55033 | 508  | 
|
509  | 
||
| 43488 | 510  | 
/* control symbols */  | 
511  | 
||
| 59107 | 512  | 
val control_decoded: Set[Symbol] =  | 
| 75197 | 513  | 
      (for (entry <- entries.iterator if entry.symbol.startsWith("\\<^"); s <- entry.decode)
 | 
514  | 
yield s).toSet  | 
|
| 43488 | 515  | 
|
| 
75192
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
516  | 
val sub_decoded: Symbol = decode(sub)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
517  | 
val sup_decoded: Symbol = decode(sup)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
518  | 
val bold_decoded: Symbol = decode(bold)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
519  | 
val emph_decoded: Symbol = decode(emph)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
520  | 
val bsub_decoded: Symbol = decode(bsub)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
521  | 
val esub_decoded: Symbol = decode(esub)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
522  | 
val bsup_decoded: Symbol = decode(bsup)  | 
| 
 
7d680dcd69b1
misc tuning, based on suggestions by IntelliJ IDEA;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
523  | 
val esup_decoded: Symbol = decode(esup)  | 
| 27918 | 524  | 
}  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
525  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
526  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
527  | 
/* tables */  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
528  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
529  | 
def decode(text: String): String = symbols.decode(text)  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
530  | 
def encode(text: String): String = symbols.encode(text)  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
531  | 
|
| 73030 | 532  | 
def decode_yxml(text: String, cache: XML.Cache = XML.Cache.none): XML.Body =  | 
533  | 
YXML.parse_body(decode(text), cache = cache)  | 
|
534  | 
||
535  | 
def decode_yxml_failsafe(text: String, cache: XML.Cache = XML.Cache.none): XML.Body =  | 
|
536  | 
YXML.parse_body_failsafe(decode(text), cache = cache)  | 
|
537  | 
||
| 
65344
 
b99283eed13c
clarified YXML vs. symbol encoding: operate on whole message;
 
wenzelm 
parents: 
65335 
diff
changeset
 | 
538  | 
def encode_yxml(body: XML.Body): String = encode(YXML.string_of_body(body))  | 
| 
53337
 
b3817a0e3211
sort items according to persistent history of frequency of use;
 
wenzelm 
parents: 
53316 
diff
changeset
 | 
539  | 
|
| 75393 | 540  | 
  def decode_strict(text: String): String = {
 | 
| 
50291
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
541  | 
val decoded = decode(text)  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
542  | 
if (encode(decoded) == text) decoded  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
543  | 
    else {
 | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
544  | 
val bad = new mutable.ListBuffer[Symbol]  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
545  | 
for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s))  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
546  | 
bad += s  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
547  | 
      error("Bad Unicode symbols in text: " + commas_quote(bad))
 | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
548  | 
}  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
549  | 
}  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
550  | 
|
| 72866 | 551  | 
def output(unicode_symbols: Boolean, text: String): String =  | 
552  | 
if (unicode_symbols) Symbol.decode(text) else Symbol.encode(text)  | 
|
553  | 
||
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
554  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
555  | 
/* classification */  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
556  | 
|
| 43696 | 557  | 
def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym)  | 
558  | 
def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'  | 
|
559  | 
def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'"  | 
|
560  | 
def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)  | 
|
561  | 
def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)  | 
|
| 
44992
 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 
wenzelm 
parents: 
44949 
diff
changeset
 | 
562  | 
|
| 55033 | 563  | 
|
| 67438 | 564  | 
/* symbolic newline */  | 
| 
63528
 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 
wenzelm 
parents: 
62528 
diff
changeset
 | 
565  | 
|
| 
 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 
wenzelm 
parents: 
62528 
diff
changeset
 | 
566  | 
val newline: Symbol = "\\<newline>"  | 
| 
 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 
wenzelm 
parents: 
62528 
diff
changeset
 | 
567  | 
def newline_decoded: Symbol = symbols.newline_decoded  | 
| 
 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 
wenzelm 
parents: 
62528 
diff
changeset
 | 
568  | 
|
| 
 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 
wenzelm 
parents: 
62528 
diff
changeset
 | 
569  | 
def print_newlines(str: String): String =  | 
| 
 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 
wenzelm 
parents: 
62528 
diff
changeset
 | 
570  | 
    if (str.contains('\n'))
 | 
| 
 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 
wenzelm 
parents: 
62528 
diff
changeset
 | 
571  | 
      (for (s <- iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString
 | 
| 
 
0f39f59317c1
completion templates for commands involving "begin ... end" blocks;
 
wenzelm 
parents: 
62528 
diff
changeset
 | 
572  | 
else str  | 
| 61579 | 573  | 
|
| 67438 | 574  | 
|
575  | 
/* formal comments */  | 
|
576  | 
||
| 61579 | 577  | 
val comment: Symbol = "\\<comment>"  | 
| 67449 | 578  | 
val cancel: Symbol = "\\<^cancel>"  | 
579  | 
val latex: Symbol = "\\<^latex>"  | 
|
| 
69891
 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 
wenzelm 
parents: 
69887 
diff
changeset
 | 
580  | 
val marker: Symbol = "\\<^marker>"  | 
| 67449 | 581  | 
|
| 61579 | 582  | 
def comment_decoded: Symbol = symbols.comment_decoded  | 
| 67438 | 583  | 
def cancel_decoded: Symbol = symbols.cancel_decoded  | 
584  | 
def latex_decoded: Symbol = symbols.latex_decoded  | 
|
| 
69891
 
def3ec9cdb7e
document markers are formal comments, and may thus occur anywhere in the command-span;
 
wenzelm 
parents: 
69887 
diff
changeset
 | 
585  | 
def marker_decoded: Symbol = symbols.marker_decoded  | 
| 61579 | 586  | 
|
587  | 
||
| 55033 | 588  | 
/* cartouches */  | 
589  | 
||
| 61579 | 590  | 
val open: Symbol = "\\<open>"  | 
591  | 
val close: Symbol = "\\<close>"  | 
|
| 55033 | 592  | 
|
593  | 
def open_decoded: Symbol = symbols.open_decoded  | 
|
594  | 
def close_decoded: Symbol = symbols.close_decoded  | 
|
595  | 
||
596  | 
def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open  | 
|
597  | 
def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close  | 
|
598  | 
||
| 67131 | 599  | 
def cartouche(s: String): String = open + s + close  | 
600  | 
def cartouche_decoded(s: String): String = open_decoded + s + close_decoded  | 
|
601  | 
||
| 55033 | 602  | 
|
603  | 
/* symbols for symbolic identifiers */  | 
|
| 
44992
 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 
wenzelm 
parents: 
44949 
diff
changeset
 | 
604  | 
|
| 
 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 
wenzelm 
parents: 
44949 
diff
changeset
 | 
605  | 
private def raw_symbolic(sym: Symbol): Boolean =  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
606  | 
    sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
 | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
607  | 
|
| 55033 | 608  | 
def is_symbolic(sym: Symbol): Boolean =  | 
609  | 
!is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym))  | 
|
610  | 
||
611  | 
def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)  | 
|
612  | 
||
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
613  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
614  | 
/* control symbols */  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
615  | 
|
| 
67127
 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 
wenzelm 
parents: 
66919 
diff
changeset
 | 
616  | 
val control_prefix = "\\<^"  | 
| 
 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 
wenzelm 
parents: 
66919 
diff
changeset
 | 
617  | 
val control_suffix = ">"  | 
| 
 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 
wenzelm 
parents: 
66919 
diff
changeset
 | 
618  | 
|
| 
67255
 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 
wenzelm 
parents: 
67131 
diff
changeset
 | 
619  | 
def control_name(sym: Symbol): Option[String] =  | 
| 
 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 
wenzelm 
parents: 
67131 
diff
changeset
 | 
620  | 
if (is_control_encoded(sym))  | 
| 
 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 
wenzelm 
parents: 
67131 
diff
changeset
 | 
621  | 
Some(sym.substring(control_prefix.length, sym.length - control_suffix.length))  | 
| 
 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 
wenzelm 
parents: 
67131 
diff
changeset
 | 
622  | 
else None  | 
| 
 
f1f983484878
HTML rendering of \<^control> as in Isabelle/jEdit;
 
wenzelm 
parents: 
67131 
diff
changeset
 | 
623  | 
|
| 
67127
 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 
wenzelm 
parents: 
66919 
diff
changeset
 | 
624  | 
def is_control_encoded(sym: Symbol): Boolean =  | 
| 
 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 
wenzelm 
parents: 
66919 
diff
changeset
 | 
625  | 
sym.startsWith(control_prefix) && sym.endsWith(control_suffix)  | 
| 
 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 
wenzelm 
parents: 
66919 
diff
changeset
 | 
626  | 
|
| 59107 | 627  | 
def is_control(sym: Symbol): Boolean =  | 
| 
67127
 
cf111622c9f8
font style for literal control symbols, notably for antiquotations;
 
wenzelm 
parents: 
66919 
diff
changeset
 | 
628  | 
is_control_encoded(sym) || symbols.control_decoded.contains(sym)  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
629  | 
|
| 43696 | 630  | 
def is_controllable(sym: Symbol): Boolean =  | 
| 
66006
 
cec184536dfd
uniform notion of Symbol.is_controllable (see also 265d9300d523);
 
wenzelm 
parents: 
65997 
diff
changeset
 | 
631  | 
!is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) &&  | 
| 
 
cec184536dfd
uniform notion of Symbol.is_controllable (see also 265d9300d523);
 
wenzelm 
parents: 
65997 
diff
changeset
 | 
632  | 
!is_malformed(sym) && sym != "\""  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
633  | 
|
| 73208 | 634  | 
val sub: Symbol = "\\<^sub>"  | 
635  | 
val sup: Symbol = "\\<^sup>"  | 
|
636  | 
val bold: Symbol = "\\<^bold>"  | 
|
637  | 
val emph: Symbol = "\\<^emph>"  | 
|
638  | 
val bsub: Symbol = "\\<^bsub>"  | 
|
639  | 
val esub: Symbol = "\\<^esub>"  | 
|
640  | 
val bsup: Symbol = "\\<^bsup>"  | 
|
641  | 
val esup: Symbol = "\\<^esup>"  | 
|
| 62103 | 642  | 
|
| 
44238
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
643  | 
def sub_decoded: Symbol = symbols.sub_decoded  | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
644  | 
def sup_decoded: Symbol = symbols.sup_decoded  | 
| 62103 | 645  | 
def bold_decoded: Symbol = symbols.bold_decoded  | 
646  | 
def emph_decoded: Symbol = symbols.emph_decoded  | 
|
| 
44238
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
647  | 
def bsub_decoded: Symbol = symbols.bsub_decoded  | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
648  | 
def esub_decoded: Symbol = symbols.esub_decoded  | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
649  | 
def bsup_decoded: Symbol = symbols.bsup_decoded  | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
650  | 
def esup_decoded: Symbol = symbols.esup_decoded  | 
| 71649 | 651  | 
|
652  | 
||
653  | 
/* metric */  | 
|
654  | 
||
655  | 
def is_printable(sym: Symbol): Boolean =  | 
|
656  | 
if (is_ascii(sym)) is_ascii_printable(sym(0))  | 
|
657  | 
else !is_control(sym)  | 
|
658  | 
||
| 75393 | 659  | 
  object Metric extends Pretty.Metric {
 | 
| 71649 | 660  | 
val unit = 1.0  | 
661  | 
def apply(str: String): Double =  | 
|
662  | 
      (for (s <- iterator(str)) yield {
 | 
|
663  | 
val sym = encode(s)  | 
|
664  | 
        if (sym.startsWith("\\<long") || sym.startsWith("\\<Long")) 2
 | 
|
665  | 
else if (is_printable(sym)) 1  | 
|
666  | 
else 0  | 
|
667  | 
}).sum  | 
|
668  | 
}  | 
|
| 27901 | 669  | 
}  |