src/Pure/General/symbol.scala
author wenzelm
Tue, 29 Aug 2023 12:53:28 +0200
changeset 78592 fdfe9b91d96e
parent 76235 16c12979c132
child 78599 1ce1abed5082
permissions -rw-r--r--
misc tuning: support "scalac -source 3.3";
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/symbol.scala
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     3
69490
ce85542368b9 more Haskell operations;
wenzelm
parents: 69448
diff changeset
     4
Isabelle text symbols.
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     5
*/
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     6
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     7
package isabelle
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     8
55618
995162143ef4 tuned imports;
wenzelm
parents: 55497
diff changeset
     9
36011
3ff725ac13a4 adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
wenzelm
parents: 34316
diff changeset
    10
import scala.collection.mutable
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    11
import scala.util.matching.Regex
48922
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
    12
import scala.annotation.tailrec
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    13
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    14
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
    15
object Symbol {
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    16
  type Symbol = String
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    17
55884
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
    18
  // counting Isabelle symbols, starting from 1
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
    19
  type Offset = Text.Offset
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
    20
  type Range = Text.Range
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
    21
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    22
61865
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    23
  /* spaces */
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    24
71649
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
    25
  val space_char = ' '
61865
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    26
  val space = " "
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    27
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    28
  private val static_spaces = space * 4000
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    29
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
    30
  def spaces(n: Int): String = {
73120
c3589f2dff31 more informative errors: simplify diagnosis of spurious failures reported by users;
wenzelm
parents: 73030
diff changeset
    31
    require(n >= 0, "negative spaces")
61865
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    32
    if (n < static_spaces.length) static_spaces.substring(0, n)
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    33
    else space * n
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    34
  }
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    35
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    36
75238
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    37
  /* char symbols */
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    38
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    39
  private val char_symbols: Array[Symbol] =
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    40
    (0 until 0x500).iterator.map(i => new String(Array(i.toChar))).toArray
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    41
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    42
  def char_symbol(c: Char): String =
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    43
    if (c < char_symbols.length) char_symbols(c)
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    44
    else c.toString
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    45
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    46
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    47
  /* ASCII characters */
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    48
71649
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
    49
  def is_ascii_printable(c: Char): Boolean = space_char <= c && c <= '~'
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
    50
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    51
  def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
55497
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    52
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    53
  def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
55497
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    54
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    55
  def is_ascii_hex(c: Char): Boolean =
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    56
    '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    57
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    58
  def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    59
55497
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    60
  def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    61
69448
51e696887b81 more uniform multi-language operations;
wenzelm
parents: 69318
diff changeset
    62
  def is_ascii_line_terminator(c: Char): Boolean = "\r\n".contains(c)
51e696887b81 more uniform multi-language operations;
wenzelm
parents: 69318
diff changeset
    63
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    64
  def is_ascii_letdig(c: Char): Boolean =
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    65
    is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    66
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    67
  def is_ascii_identifier(s: String): Boolean =
75192
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
    68
    s.nonEmpty && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig)
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    69
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
    70
  def ascii(c: Char): Symbol = {
62528
c8c532b22947 clarified ML syntax for strings concerning UTF8;
wenzelm
parents: 62230
diff changeset
    71
    if (c > 127) error("Non-ASCII character: " + quote(c.toString))
75238
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
    72
    else char_symbol(c)
62528
c8c532b22947 clarified ML syntax for strings concerning UTF8;
wenzelm
parents: 62230
diff changeset
    73
  }
c8c532b22947 clarified ML syntax for strings concerning UTF8;
wenzelm
parents: 62230
diff changeset
    74
66919
1f93e376aeb6 more explicit check;
wenzelm
parents: 66051
diff changeset
    75
  def is_ascii(s: Symbol): Boolean = s.length == 1 && s(0) < 128
1f93e376aeb6 more explicit check;
wenzelm
parents: 66051
diff changeset
    76
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    77
48775
92ceb058391f simplified symbol matching;
wenzelm
parents: 48774
diff changeset
    78
  /* symbol matching */
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    79
48773
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    80
  def is_malformed(s: Symbol): Boolean =
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    81
    s.length match {
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    82
      case 1 =>
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    83
        val c = s(0)
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    84
        Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    85
      case 2 =>
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    86
        val c1 = s(0)
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    87
        val c2 = s(1)
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    88
        !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))
48774
c4bd5bb3ae69 further clarification of malformed symbols;
wenzelm
parents: 48773
diff changeset
    89
      case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
48773
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    90
    }
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    91
54734
b91afc3aa3e6 clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
wenzelm
parents: 53400
diff changeset
    92
  def is_newline(s: Symbol): Boolean =
43675
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    93
    s == "\n" || s == "\r" || s == "\r\n"
38877
682c4932b3cc Command.newlines: account for physical newlines;
wenzelm
parents: 38479
diff changeset
    94
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
    95
  class Matcher(text: CharSequence) {
75237
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
    96
    private def ok(i: Int): Boolean = 0 <= i && i < text.length
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
    97
    private def char(i: Int): Char = if (ok(i)) text.charAt(i) else 0
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
    98
    private def maybe_char(c: Char, i: Int): Int = if (char(i) == c) i + 1 else i
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
    99
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   100
    @tailrec private def many_ascii_letdig(i: Int): Int =
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   101
      if (is_ascii_letdig(char(i))) many_ascii_letdig(i + 1) else i
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   102
    private def maybe_ascii_id(i: Int): Int =
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   103
      if (is_ascii_letter(char(i))) many_ascii_letdig(i + 1) else i
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   104
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   105
    def match_length(i: Int): Int = {
75237
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   106
      val a = char(i)
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   107
      val b = char(i + 1)
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   108
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   109
      if (Character.isHighSurrogate(a) && Character.isLowSurrogate(b) || a == '\r' && b == '\n') 2
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   110
      else if (a == '\\' && b == '<') maybe_char('>', maybe_ascii_id(maybe_char('^', i + 2))) - i
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   111
      else if (ok(i)) 1
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   112
      else 0
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   113
    }
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   114
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   115
    def match_symbol(i: Int): String =
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   116
      match_length(i) match {
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   117
        case 0 => ""
75238
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
   118
        case 1 => char_symbol(text.charAt(i))
e74d162ddf9f clarified char symbols: cover most European languages;
wenzelm
parents: 75237
diff changeset
   119
        case n => text.subSequence(i, i + n).toString
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   120
      }
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   121
  }
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   122
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   123
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   124
  /* iterator */
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   125
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   126
  def iterator(text: CharSequence): Iterator[Symbol] =
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   127
    new Iterator[Symbol] {
43489
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   128
      private val matcher = new Matcher(text)
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   129
      private var i = 0
71601
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 71383
diff changeset
   130
      def hasNext: Boolean = i < text.length
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   131
      def next(): Symbol = {
75237
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   132
        val s = matcher.match_symbol(i)
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   133
        i += s.length
43489
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   134
        s
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   135
      }
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   136
    }
43489
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   137
44949
b49d7f1066c8 Symbol.explode as in ML;
wenzelm
parents: 44238
diff changeset
   138
  def explode(text: CharSequence): List[Symbol] = iterator(text).toList
b49d7f1066c8 Symbol.explode as in ML;
wenzelm
parents: 44238
diff changeset
   139
64615
fd0d6de380c6 more systematic text length;
wenzelm
parents: 64612
diff changeset
   140
  def length(text: CharSequence): Int = iterator(text).length
64617
01e50039edc9 more systematic text length wrt. encoding;
wenzelm
parents: 64615
diff changeset
   141
67435
f83c1842a559 trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
wenzelm
parents: 67389
diff changeset
   142
  def trim_blanks(text: CharSequence): String =
71601
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 71383
diff changeset
   143
    Library.trim(is_blank, explode(text)).mkString
67435
f83c1842a559 trim blanks -- more thoroughly than in update_cartouches (for single-line comments);
wenzelm
parents: 67389
diff changeset
   144
69318
f3351bb4390e clarified presentation;
wenzelm
parents: 67449
diff changeset
   145
  def all_blank(str: String): Boolean =
71601
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 71383
diff changeset
   146
    iterator(str).forall(is_blank)
69318
f3351bb4390e clarified presentation;
wenzelm
parents: 67449
diff changeset
   147
f3351bb4390e clarified presentation;
wenzelm
parents: 67449
diff changeset
   148
  def trim_blank_lines(text: String): String =
f3351bb4390e clarified presentation;
wenzelm
parents: 67449
diff changeset
   149
    cat_lines(split_lines(text).dropWhile(all_blank).reverse.dropWhile(all_blank).reverse)
f3351bb4390e clarified presentation;
wenzelm
parents: 67449
diff changeset
   150
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   151
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   152
  /* decoding offsets */
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   153
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   154
  object Index {
56471
2293a4350716 more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents: 56338
diff changeset
   155
    private sealed case class Entry(chr: Int, sym: Int)
52507
27925b58d6bd tuned signature;
wenzelm
parents: 50564
diff changeset
   156
56472
wenzelm
parents: 56471
diff changeset
   157
    val empty: Index = new Index(Nil)
56471
2293a4350716 more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents: 56338
diff changeset
   158
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   159
    def apply(text: CharSequence): Index = {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   160
      val matcher = new Matcher(text)
56471
2293a4350716 more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents: 56338
diff changeset
   161
      val buf = new mutable.ListBuffer[Entry]
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   162
      var chr = 0
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   163
      var sym = 0
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   164
      while (chr < text.length) {
75237
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   165
        val n = matcher.match_length(chr)
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   166
        chr += n
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   167
        sym += 1
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   168
        if (n > 1) buf += Entry(chr, sym)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   169
      }
56472
wenzelm
parents: 56471
diff changeset
   170
      if (buf.isEmpty) empty else new Index(buf.toList)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   171
    }
56471
2293a4350716 more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents: 56338
diff changeset
   172
  }
55430
8eb6c740ec1a tuned signature;
wenzelm
parents: 55033
diff changeset
   173
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   174
  final class Index private(entries: List[Index.Entry]) {
56472
wenzelm
parents: 56471
diff changeset
   175
    private val hash: Int = entries.hashCode
wenzelm
parents: 56471
diff changeset
   176
    private val index: Array[Index.Entry] = entries.toArray
wenzelm
parents: 56471
diff changeset
   177
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   178
    def decode(symbol_offset: Offset): Text.Offset = {
55884
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
   179
      val sym = symbol_offset - 1
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   180
      val end = index.length
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   181
      @tailrec def bisect(a: Int, b: Int): Int = {
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   182
        if (a < b) {
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   183
          val c = (a + b) / 2
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   184
          if (sym < index(c).sym) bisect(a, c)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   185
          else if (c + 1 == end || sym < index(c + 1).sym) c
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   186
          else bisect(c + 1, b)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   187
        }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   188
        else -1
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   189
      }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   190
      val i = bisect(0, end)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   191
      if (i < 0) sym
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   192
      else index(i).chr + sym - index(i).sym
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   193
    }
71601
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 71383
diff changeset
   194
    def decode(symbol_range: Range): Text.Range = symbol_range.map(decode)
56335
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   195
56338
f968f4e3d520 proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
wenzelm
parents: 56335
diff changeset
   196
    override def hashCode: Int = hash
56335
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   197
    override def equals(that: Any): Boolean =
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   198
      that match {
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   199
        case other: Index => index.sameElements(other.index)
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   200
        case _ => false
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   201
      }
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   202
  }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   203
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   204
64477
8be21ca788ca tuned comment;
wenzelm
parents: 63936
diff changeset
   205
  /* symbolic text chunks -- without actual text */
56746
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   206
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   207
  object Text_Chunk {
56746
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   208
    sealed abstract class Name
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   209
    case object Default extends Name
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   210
    case class Id(id: Document_ID.Generic) extends Name
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   211
    case class File(name: String) extends Name
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   212
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   213
    def apply(text: CharSequence): Text_Chunk =
76235
16c12979c132 tuned signature;
wenzelm
parents: 75659
diff changeset
   214
      new Text_Chunk(Text.Range.length(text), Index(text))
56746
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   215
  }
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   216
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   217
  final class Text_Chunk private(val range: Text.Range, private val index: Index) {
56746
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   218
    override def hashCode: Int = (range, index).hashCode
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   219
    override def equals(that: Any): Boolean =
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   220
      that match {
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   221
        case other: Text_Chunk =>
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   222
          range == other.range &&
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   223
          index == other.index
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   224
        case _ => false
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   225
      }
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   226
57840
074cb68b40a8 tuned output;
wenzelm
parents: 56746
diff changeset
   227
    override def toString: String = "Text_Chunk" + range.toString
074cb68b40a8 tuned output;
wenzelm
parents: 56746
diff changeset
   228
56746
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   229
    def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset)
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   230
    def decode(symbol_range: Range): Text.Range = index.decode(symbol_range)
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   231
    def incorporate(symbol_range: Range): Option[Text.Range] = {
75659
9bd92ac9328f more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
wenzelm
parents: 75393
diff changeset
   232
      def in(r: Range): Option[Text.Range] = {
56746
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   233
        range.try_restrict(decode(r)) match {
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   234
          case Some(r1) if !r1.is_singularity => Some(r1)
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   235
          case _ => None
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   236
        }
75659
9bd92ac9328f more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
wenzelm
parents: 75393
diff changeset
   237
      }
9bd92ac9328f more robust Scala 3 indentation, for the sake of IntelliJ IDEA;
wenzelm
parents: 75393
diff changeset
   238
      in(symbol_range) orElse in(symbol_range - 1)
56746
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   239
    }
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   240
  }
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   241
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   242
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   243
  /* recoding text */
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   244
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   245
  private class Recoder(list: List[(String, String)]) {
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   246
    private val (min, max) = {
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   247
      var min = '\uffff'
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   248
      var max = '\u0000'
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   249
      for ((x, _) <- list) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   250
        val c = x(0)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   251
        if (c < min) min = c
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   252
        if (c > max) max = c
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   253
      }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   254
      (min, max)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   255
    }
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   256
    private val table = {
40443
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   257
      var tab = Map[String, String]()
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   258
      for ((x, y) <- list) {
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   259
        tab.get(x) match {
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   260
          case None => tab += (x -> y)
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   261
          case Some(z) =>
62230
949d2c9f6ff7 tuned message;
wenzelm
parents: 62104
diff changeset
   262
            error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
40443
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   263
        }
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   264
      }
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   265
      tab
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   266
    }
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   267
    def recode(text: String): String = {
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   268
      val len = text.length
75237
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   269
      val matcher = new Symbol.Matcher(text)
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   270
      val result = new StringBuilder(len)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   271
      var i = 0
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   272
      while (i < len) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   273
        val c = text(i)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   274
        if (min <= c && c <= max) {
75237
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   275
          val s = matcher.match_symbol(i)
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   276
          result.append(table.getOrElse(s, s))
90eaac98b3fa more elementary Symbol.Matcher without detour via Regex (see also Pure/General/symbol_explode.ML);
wenzelm
parents: 75199
diff changeset
   277
          i += s.length
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   278
        }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   279
        else { result.append(c); i += 1 }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   280
      }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   281
      result.toString
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   282
    }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   283
  }
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   284
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   285
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   286
75194
5a9932dbaf1f clarified signature;
wenzelm
parents: 75193
diff changeset
   287
  /** defined symbols **/
27927
eb624bb54bc6 tuned Recoder;
wenzelm
parents: 27926
diff changeset
   288
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   289
  object Argument extends Enumeration {
75194
5a9932dbaf1f clarified signature;
wenzelm
parents: 75193
diff changeset
   290
    val none, cartouche, space_cartouche = Value
67311
3869b2400e22 more completion templates;
wenzelm
parents: 67304
diff changeset
   291
75194
5a9932dbaf1f clarified signature;
wenzelm
parents: 75193
diff changeset
   292
    def unapply(s: String): Option[Value] =
5a9932dbaf1f clarified signature;
wenzelm
parents: 75193
diff changeset
   293
      try { Some(withName(s)) }
5a9932dbaf1f clarified signature;
wenzelm
parents: 75193
diff changeset
   294
      catch { case _: NoSuchElementException => None}
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   295
  }
75193
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   296
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   297
  object Entry {
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   298
    private val Name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   299
    private val Argument = new Properties.String("argument")
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   300
    private val Abbrev = new Properties.String("abbrev")
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   301
    private val Code = new Properties.String("code")
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   302
    private val Font = new Properties.String("font")
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   303
    private val Group = new Properties.String("group")
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   304
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   305
    def apply(symbol: Symbol, props: Properties.T): Entry = {
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   306
      def err(msg: String): Nothing = error(msg + " for symbol " + quote(symbol))
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   307
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   308
      val name =
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   309
        symbol match { case Name(a) => a case _ => err("Cannot determine name") }
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   310
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   311
      val argument =
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   312
        props match {
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   313
          case Argument(arg) =>
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   314
            Symbol.Argument.unapply(arg) getOrElse
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   315
              error("Bad argument: " + quote(arg) + " for symbol " + quote(symbol))
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   316
          case _ => Symbol.Argument.none
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   317
        }
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   318
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   319
      val code =
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   320
        props match {
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   321
          case Code(s) =>
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   322
            try {
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   323
              val code = Integer.decode(s).intValue
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   324
              if (code >= 128) Some(code) else err("Illegal ASCII code")
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   325
            }
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   326
            catch { case _: NumberFormatException => err("Bad code") }
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   327
          case _ => None
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   328
        }
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   329
78592
fdfe9b91d96e misc tuning: support "scalac -source 3.3";
wenzelm
parents: 76235
diff changeset
   330
      val groups =
fdfe9b91d96e misc tuning: support "scalac -source 3.3";
wenzelm
parents: 76235
diff changeset
   331
        proper_list(for (case (Group.name, a) <- props) yield a).getOrElse(List("unsorted"))
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   332
78592
fdfe9b91d96e misc tuning: support "scalac -source 3.3";
wenzelm
parents: 76235
diff changeset
   333
      val abbrevs = for (case (Abbrev.name, a) <- props) yield a
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   334
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   335
      new Entry(symbol, name, argument, code, Font.unapply(props), groups, abbrevs)
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   336
    }
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   337
  }
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   338
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   339
  class Entry private(
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   340
    val symbol: Symbol,
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   341
    val name: String,
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   342
    val argument: Symbol.Argument.Value,
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   343
    val code: Option[Int],
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   344
    val font: Option[String],
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   345
    val groups: List[String],
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   346
    val abbrevs: List[String]
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   347
  ) {
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   348
    override def toString: String = symbol
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   349
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   350
    val decode: Option[String] =
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   351
      code.map(c => new String(Character.toChars(c)))
75194
5a9932dbaf1f clarified signature;
wenzelm
parents: 75193
diff changeset
   352
  }
5a9932dbaf1f clarified signature;
wenzelm
parents: 75193
diff changeset
   353
75195
596e77cda169 clarified signature;
wenzelm
parents: 75194
diff changeset
   354
  lazy val symbols: Symbols = Symbols.load()
75194
5a9932dbaf1f clarified signature;
wenzelm
parents: 75193
diff changeset
   355
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   356
  object Symbols {
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   357
    def load(static: Boolean = false): Symbols = {
75252
41dfe941c3da inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
wenzelm
parents: 75238
diff changeset
   358
      val paths =
41dfe941c3da inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
wenzelm
parents: 75238
diff changeset
   359
        if (static) List(Path.explode("~~/etc/symbols"))
41dfe941c3da inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
wenzelm
parents: 75238
diff changeset
   360
        else Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS"))
41dfe941c3da inline Isabelle symbols into source text, so that "isabelle vscode" can start up properly without access to process.env or fs;
wenzelm
parents: 75238
diff changeset
   361
      make(cat_lines(for (path <- paths if path.is_file) yield File.read(path)))
75193
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   362
    }
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   363
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   364
    def make(symbols_spec: String): Symbols = {
75193
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   365
      val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   366
      val Key = new Regex("""(?xs) (.+): """)
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   367
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   368
      def read_decl(decl: String): (Symbol, Properties.T) = {
75193
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   369
        def err() = error("Bad symbol declaration: " + decl)
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   370
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   371
        def read_props(props: List[String]): Properties.T = {
75193
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   372
          props match {
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   373
            case Nil => Nil
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   374
            case _ :: Nil => err()
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   375
            case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest)
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   376
            case _ => err()
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   377
          }
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   378
        }
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   379
        decl.split("\\s+").toList match {
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   380
          case sym :: props if sym.length > 1 && !is_malformed(sym) =>
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   381
            (sym, read_props(props))
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   382
          case _ => err()
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   383
        }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   384
      }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   385
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   386
      new Symbols(
75193
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   387
        split_lines(symbols_spec).reverse.
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   388
          foldLeft((List.empty[Entry], Set.empty[Symbol])) {
75193
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   389
            case (res, No_Decl()) => res
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   390
            case ((list, known), decl) =>
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   391
              val (sym, props) = read_decl(decl)
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   392
              if (known(sym)) (list, known)
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   393
              else (Entry(sym, props) :: list, known + sym)
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   394
          }._1)
75193
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   395
    }
d6aa59dde5b3 tuned signature;
wenzelm
parents: 75192
diff changeset
   396
  }
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   397
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   398
  class Symbols(val entries: List[Entry]) {
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   399
    override def toString: String = entries.mkString("Symbols(", ", ", ")")
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   400
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   401
53400
673eb869e6ee expose basic Symbol.properties (uninterpreted);
wenzelm
parents: 53337
diff changeset
   402
    /* basic properties */
673eb869e6ee expose basic Symbol.properties (uninterpreted);
wenzelm
parents: 53337
diff changeset
   403
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   404
    private val entries_map: Map[Symbol, Entry] =
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   405
      (for (entry <- entries.iterator) yield entry.symbol -> entry).toMap
31651
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   406
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   407
    def get(sym: Symbol): Option[Entry] = entries_map.get(sym)
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   408
    def defined(sym: Symbol): Boolean = entries_map.isDefinedAt(sym)
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   409
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   410
    def defined_code(sym: Symbol): Boolean =
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   411
      get(sym) match { case Some(entry) => entry.code.isDefined case _ => false }
31651
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   412
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   413
    val code_defined: Int => Boolean =
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   414
      (for (entry <- entries.iterator; code <- entry.code) yield code).toSet
50136
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   415
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   416
    val groups_code: List[(String, List[Symbol])] =
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   417
      (for {
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   418
        entry <- entries.iterator if entry.code.isDefined
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   419
        group <- entry.groups.iterator
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   420
      } yield entry.symbol -> group)
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   421
        .toList.groupBy(_._2).toList.map({ case (g, list) => (g, list.map(_._1)) }).sortBy(_._1)
75195
596e77cda169 clarified signature;
wenzelm
parents: 75194
diff changeset
   422
75199
1ced8ee860e2 clarified signature;
wenzelm
parents: 75198
diff changeset
   423
    def get_abbrevs(sym: Symbol): List[String] =
1ced8ee860e2 clarified signature;
wenzelm
parents: 75198
diff changeset
   424
      get(sym) match { case Some(entry) => entry.abbrevs case _ => Nil }
75195
596e77cda169 clarified signature;
wenzelm
parents: 75194
diff changeset
   425
43488
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   426
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   427
    /* recoding */
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   428
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   429
    private val (decoder, encoder) = {
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   430
      val mapping =
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   431
        for (entry <- entries; s <- entry.decode) yield entry.symbol -> s
61376
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   432
      (new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x)))
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   433
    }
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   434
34098
2b9cdf23c188 tuned signature;
wenzelm
parents: 34001
diff changeset
   435
    def decode(text: String): String = decoder.recode(text)
2b9cdf23c188 tuned signature;
wenzelm
parents: 34001
diff changeset
   436
    def encode(text: String): String = encoder.recode(text)
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   437
75198
19f1d8c074c8 tuned signature;
wenzelm
parents: 75197
diff changeset
   438
    private def recode_set(elems: Iterable[String]): Set[String] =
19f1d8c074c8 tuned signature;
wenzelm
parents: 75197
diff changeset
   439
      (elems.iterator ++ elems.iterator.map(decode)).toSet
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   440
75198
19f1d8c074c8 tuned signature;
wenzelm
parents: 75197
diff changeset
   441
    private def recode_map[A](elems: Iterable[(String, A)]): Map[String, A] =
19f1d8c074c8 tuned signature;
wenzelm
parents: 75197
diff changeset
   442
      (elems.iterator ++ elems.iterator.map({ case (sym, a) => (decode(sym), a) })).toMap
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   443
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   444
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   445
    /* user fonts */
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   446
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   447
    val fonts: Map[Symbol, String] =
75198
19f1d8c074c8 tuned signature;
wenzelm
parents: 75197
diff changeset
   448
      recode_map(for (entry <- entries; font <- entry.font) yield entry.symbol -> font)
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   449
75192
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   450
    val font_names: List[String] = fonts.iterator.map(_._2).toSet.toList
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   451
    val font_index: Map[String, Int] = (font_names zip font_names.indices.toList).toMap
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   452
75195
596e77cda169 clarified signature;
wenzelm
parents: 75194
diff changeset
   453
    def lookup_font(sym: Symbol): Option[Int] = fonts.get(sym).map(font_index(_))
596e77cda169 clarified signature;
wenzelm
parents: 75194
diff changeset
   454
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   455
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   456
    /* classification */
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   457
75198
19f1d8c074c8 tuned signature;
wenzelm
parents: 75197
diff changeset
   458
    val letters: Set[String] = recode_set(List(
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   459
      "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   460
      "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   461
      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   462
      "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   463
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   464
      "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   465
      "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   466
      "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   467
      "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   468
      "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   469
      "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   470
      "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   471
      "\\<x>", "\\<y>", "\\<z>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   472
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   473
      "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   474
      "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   475
      "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   476
      "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   477
      "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   478
      "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   479
      "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   480
      "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   481
      "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   482
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   483
      "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   484
      "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   485
      "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   486
      "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   487
      "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   488
      "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",
75198
19f1d8c074c8 tuned signature;
wenzelm
parents: 75197
diff changeset
   489
      "\\<Psi>", "\\<Omega>"))
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   490
75198
19f1d8c074c8 tuned signature;
wenzelm
parents: 75197
diff changeset
   491
    val blanks: Set[String] = recode_set(List(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n"))
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   492
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   493
    val sym_chars =
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   494
      Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   495
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   496
    val symbolic: Set[String] =
75198
19f1d8c074c8 tuned signature;
wenzelm
parents: 75197
diff changeset
   497
      recode_set(for (entry <- entries if raw_symbolic(entry.symbol)) yield entry.symbol)
44992
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   498
43455
4b4b93672f15 some unicode chars for special control symbols;
wenzelm
parents: 43447
diff changeset
   499
63528
0f39f59317c1 completion templates for commands involving "begin ... end" blocks;
wenzelm
parents: 62528
diff changeset
   500
    /* misc symbols */
61579
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   501
75192
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   502
    val newline_decoded: Symbol = decode(newline)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   503
    val comment_decoded: Symbol = decode(comment)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   504
    val cancel_decoded: Symbol = decode(cancel)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   505
    val latex_decoded: Symbol = decode(latex)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   506
    val marker_decoded: Symbol = decode(marker)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   507
    val open_decoded: Symbol = decode(open)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   508
    val close_decoded: Symbol = decode(close)
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   509
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   510
43488
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   511
    /* control symbols */
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   512
59107
48429ad6d0c8 tuned signature;
wenzelm
parents: 57840
diff changeset
   513
    val control_decoded: Set[Symbol] =
75197
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   514
      (for (entry <- entries.iterator if entry.symbol.startsWith("\\<^"); s <- entry.decode)
29e11ce79a52 clarified signature;
wenzelm
parents: 75195
diff changeset
   515
        yield s).toSet
43488
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   516
75192
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   517
    val sub_decoded: Symbol = decode(sub)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   518
    val sup_decoded: Symbol = decode(sup)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   519
    val bold_decoded: Symbol = decode(bold)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   520
    val emph_decoded: Symbol = decode(emph)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   521
    val bsub_decoded: Symbol = decode(bsub)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   522
    val esub_decoded: Symbol = decode(esub)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   523
    val bsup_decoded: Symbol = decode(bsup)
7d680dcd69b1 misc tuning, based on suggestions by IntelliJ IDEA;
wenzelm
parents: 73359
diff changeset
   524
    val esup_decoded: Symbol = decode(esup)
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   525
  }
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   526
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   527
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   528
  /* tables */
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   529
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   530
  def decode(text: String): String = symbols.decode(text)
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   531
  def encode(text: String): String = symbols.encode(text)
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   532
73030
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 72866
diff changeset
   533
  def decode_yxml(text: String, cache: XML.Cache = XML.Cache.none): XML.Body =
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 72866
diff changeset
   534
    YXML.parse_body(decode(text), cache = cache)
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 72866
diff changeset
   535
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 72866
diff changeset
   536
  def decode_yxml_failsafe(text: String, cache: XML.Cache = XML.Cache.none): XML.Body =
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 72866
diff changeset
   537
    YXML.parse_body_failsafe(decode(text), cache = cache)
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 72866
diff changeset
   538
65344
b99283eed13c clarified YXML vs. symbol encoding: operate on whole message;
wenzelm
parents: 65335
diff changeset
   539
  def encode_yxml(body: XML.Body): String = encode(YXML.string_of_body(body))
53337
b3817a0e3211 sort items according to persistent history of frequency of use;
wenzelm
parents: 53316
diff changeset
   540
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   541
  def decode_strict(text: String): String = {
50291
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   542
    val decoded = decode(text)
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   543
    if (encode(decoded) == text) decoded
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   544
    else {
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   545
      val bad = new mutable.ListBuffer[Symbol]
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   546
      for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s))
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   547
        bad += s
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   548
      error("Bad Unicode symbols in text: " + commas_quote(bad))
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   549
    }
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   550
  }
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   551
72866
1d21b4c8023d tuned signature;
wenzelm
parents: 72744
diff changeset
   552
  def output(unicode_symbols: Boolean, text: String): String =
1d21b4c8023d tuned signature;
wenzelm
parents: 72744
diff changeset
   553
    if (unicode_symbols) Symbol.decode(text) else Symbol.encode(text)
1d21b4c8023d tuned signature;
wenzelm
parents: 72744
diff changeset
   554
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   555
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   556
  /* classification */
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   557
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   558
  def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym)
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   559
  def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   560
  def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'"
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   561
  def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   562
  def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)
44992
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   563
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   564
67438
fdb7b995974d clarified modules;
wenzelm
parents: 67435
diff changeset
   565
  /* symbolic newline */
63528
0f39f59317c1 completion templates for commands involving "begin ... end" blocks;
wenzelm
parents: 62528
diff changeset
   566
0f39f59317c1 completion templates for commands involving "begin ... end" blocks;
wenzelm
parents: 62528
diff changeset
   567
  val newline: Symbol = "\\<newline>"
0f39f59317c1 completion templates for commands involving "begin ... end" blocks;
wenzelm
parents: 62528
diff changeset
   568
  def newline_decoded: Symbol = symbols.newline_decoded
0f39f59317c1 completion templates for commands involving "begin ... end" blocks;
wenzelm
parents: 62528
diff changeset
   569
0f39f59317c1 completion templates for commands involving "begin ... end" blocks;
wenzelm
parents: 62528
diff changeset
   570
  def print_newlines(str: String): String =
0f39f59317c1 completion templates for commands involving "begin ... end" blocks;
wenzelm
parents: 62528
diff changeset
   571
    if (str.contains('\n'))
0f39f59317c1 completion templates for commands involving "begin ... end" blocks;
wenzelm
parents: 62528
diff changeset
   572
      (for (s <- iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString
0f39f59317c1 completion templates for commands involving "begin ... end" blocks;
wenzelm
parents: 62528
diff changeset
   573
    else str
61579
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   574
67438
fdb7b995974d clarified modules;
wenzelm
parents: 67435
diff changeset
   575
fdb7b995974d clarified modules;
wenzelm
parents: 67435
diff changeset
   576
  /* formal comments */
fdb7b995974d clarified modules;
wenzelm
parents: 67435
diff changeset
   577
61579
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   578
  val comment: Symbol = "\\<comment>"
67449
1caeb087d957 tuned signature;
wenzelm
parents: 67438
diff changeset
   579
  val cancel: Symbol = "\\<^cancel>"
1caeb087d957 tuned signature;
wenzelm
parents: 67438
diff changeset
   580
  val latex: Symbol = "\\<^latex>"
69891
def3ec9cdb7e document markers are formal comments, and may thus occur anywhere in the command-span;
wenzelm
parents: 69887
diff changeset
   581
  val marker: Symbol = "\\<^marker>"
67449
1caeb087d957 tuned signature;
wenzelm
parents: 67438
diff changeset
   582
61579
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   583
  def comment_decoded: Symbol = symbols.comment_decoded
67438
fdb7b995974d clarified modules;
wenzelm
parents: 67435
diff changeset
   584
  def cancel_decoded: Symbol = symbols.cancel_decoded
fdb7b995974d clarified modules;
wenzelm
parents: 67435
diff changeset
   585
  def latex_decoded: Symbol = symbols.latex_decoded
69891
def3ec9cdb7e document markers are formal comments, and may thus occur anywhere in the command-span;
wenzelm
parents: 69887
diff changeset
   586
  def marker_decoded: Symbol = symbols.marker_decoded
61579
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   587
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   588
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   589
  /* cartouches */
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   590
61579
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   591
  val open: Symbol = "\\<open>"
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   592
  val close: Symbol = "\\<close>"
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   593
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   594
  def open_decoded: Symbol = symbols.open_decoded
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   595
  def close_decoded: Symbol = symbols.close_decoded
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   596
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   597
  def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   598
  def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   599
67131
85d10959c2e4 tuned signature;
wenzelm
parents: 67127
diff changeset
   600
  def cartouche(s: String): String = open + s + close
85d10959c2e4 tuned signature;
wenzelm
parents: 67127
diff changeset
   601
  def cartouche_decoded(s: String): String = open_decoded + s + close_decoded
85d10959c2e4 tuned signature;
wenzelm
parents: 67127
diff changeset
   602
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   603
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   604
  /* symbols for symbolic identifiers */
44992
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   605
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   606
  private def raw_symbolic(sym: Symbol): Boolean =
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   607
    sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   608
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   609
  def is_symbolic(sym: Symbol): Boolean =
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   610
    !is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym))
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   611
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   612
  def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   613
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   614
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   615
  /* control symbols */
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   616
67127
cf111622c9f8 font style for literal control symbols, notably for antiquotations;
wenzelm
parents: 66919
diff changeset
   617
  val control_prefix = "\\<^"
cf111622c9f8 font style for literal control symbols, notably for antiquotations;
wenzelm
parents: 66919
diff changeset
   618
  val control_suffix = ">"
cf111622c9f8 font style for literal control symbols, notably for antiquotations;
wenzelm
parents: 66919
diff changeset
   619
67255
f1f983484878 HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents: 67131
diff changeset
   620
  def control_name(sym: Symbol): Option[String] =
f1f983484878 HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents: 67131
diff changeset
   621
    if (is_control_encoded(sym))
f1f983484878 HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents: 67131
diff changeset
   622
      Some(sym.substring(control_prefix.length, sym.length - control_suffix.length))
f1f983484878 HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents: 67131
diff changeset
   623
    else None
f1f983484878 HTML rendering of \<^control> as in Isabelle/jEdit;
wenzelm
parents: 67131
diff changeset
   624
67127
cf111622c9f8 font style for literal control symbols, notably for antiquotations;
wenzelm
parents: 66919
diff changeset
   625
  def is_control_encoded(sym: Symbol): Boolean =
cf111622c9f8 font style for literal control symbols, notably for antiquotations;
wenzelm
parents: 66919
diff changeset
   626
    sym.startsWith(control_prefix) && sym.endsWith(control_suffix)
cf111622c9f8 font style for literal control symbols, notably for antiquotations;
wenzelm
parents: 66919
diff changeset
   627
59107
48429ad6d0c8 tuned signature;
wenzelm
parents: 57840
diff changeset
   628
  def is_control(sym: Symbol): Boolean =
67127
cf111622c9f8 font style for literal control symbols, notably for antiquotations;
wenzelm
parents: 66919
diff changeset
   629
    is_control_encoded(sym) || symbols.control_decoded.contains(sym)
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   630
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   631
  def is_controllable(sym: Symbol): Boolean =
66006
cec184536dfd uniform notion of Symbol.is_controllable (see also 265d9300d523);
wenzelm
parents: 65997
diff changeset
   632
    !is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) &&
cec184536dfd uniform notion of Symbol.is_controllable (see also 265d9300d523);
wenzelm
parents: 65997
diff changeset
   633
    !is_malformed(sym) && sym != "\""
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   634
73208
e53f4c5927a1 tuned signature: more types;
wenzelm
parents: 73120
diff changeset
   635
  val sub: Symbol = "\\<^sub>"
e53f4c5927a1 tuned signature: more types;
wenzelm
parents: 73120
diff changeset
   636
  val sup: Symbol = "\\<^sup>"
e53f4c5927a1 tuned signature: more types;
wenzelm
parents: 73120
diff changeset
   637
  val bold: Symbol = "\\<^bold>"
e53f4c5927a1 tuned signature: more types;
wenzelm
parents: 73120
diff changeset
   638
  val emph: Symbol = "\\<^emph>"
e53f4c5927a1 tuned signature: more types;
wenzelm
parents: 73120
diff changeset
   639
  val bsub: Symbol = "\\<^bsub>"
e53f4c5927a1 tuned signature: more types;
wenzelm
parents: 73120
diff changeset
   640
  val esub: Symbol = "\\<^esub>"
e53f4c5927a1 tuned signature: more types;
wenzelm
parents: 73120
diff changeset
   641
  val bsup: Symbol = "\\<^bsup>"
e53f4c5927a1 tuned signature: more types;
wenzelm
parents: 73120
diff changeset
   642
  val esup: Symbol = "\\<^esup>"
62103
wenzelm
parents: 61959
diff changeset
   643
44238
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   644
  def sub_decoded: Symbol = symbols.sub_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   645
  def sup_decoded: Symbol = symbols.sup_decoded
62103
wenzelm
parents: 61959
diff changeset
   646
  def bold_decoded: Symbol = symbols.bold_decoded
wenzelm
parents: 61959
diff changeset
   647
  def emph_decoded: Symbol = symbols.emph_decoded
44238
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   648
  def bsub_decoded: Symbol = symbols.bsub_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   649
  def esub_decoded: Symbol = symbols.esub_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   650
  def bsup_decoded: Symbol = symbols.bsup_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   651
  def esup_decoded: Symbol = symbols.esup_decoded
71649
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   652
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   653
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   654
  /* metric */
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   655
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   656
  def is_printable(sym: Symbol): Boolean =
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   657
    if (is_ascii(sym)) is_ascii_printable(sym(0))
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   658
    else !is_control(sym)
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   659
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 75252
diff changeset
   660
  object Metric extends Pretty.Metric {
71649
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   661
    val unit = 1.0
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   662
    def apply(str: String): Double =
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   663
      (for (s <- iterator(str)) yield {
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   664
        val sym = encode(s)
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   665
        if (sym.startsWith("\\<long") || sym.startsWith("\\<Long")) 2
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   666
        else if (is_printable(sym)) 1
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   667
        else 0
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   668
      }).sum
2acdbb6ee521 pretty formatting as in Isabelle/ML;
wenzelm
parents: 71601
diff changeset
   669
  }
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
   670
}