src/Pure/General/symbol.scala
author wenzelm
Sun, 06 Mar 2016 13:19:19 +0100
changeset 62528 c8c532b22947
parent 62230 949d2c9f6ff7
child 63528 0f39f59317c1
permissions -rw-r--r--
clarified ML syntax for strings concerning UTF8;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/symbol.scala
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     3
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
     4
Detecting and recoding Isabelle symbols.
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     5
*/
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     6
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     7
package isabelle
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     8
55618
995162143ef4 tuned imports;
wenzelm
parents: 55497
diff changeset
     9
36011
3ff725ac13a4 adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
wenzelm
parents: 34316
diff changeset
    10
import scala.collection.mutable
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    11
import scala.util.matching.Regex
48922
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
    12
import scala.annotation.tailrec
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    13
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    14
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    15
object Symbol
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    16
{
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    17
  type Symbol = String
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    18
55884
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
    19
  // counting Isabelle symbols, starting from 1
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
    20
  type Offset = Text.Offset
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
    21
  type Range = Text.Range
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
    22
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    23
61865
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    24
  /* spaces */
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    25
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    26
  val space = " "
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    27
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    28
  private val static_spaces = space * 4000
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    29
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    30
  def spaces(n: Int): String =
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    31
  {
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    32
    require(n >= 0)
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    33
    if (n < static_spaces.length) static_spaces.substring(0, n)
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    34
    else space * n
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    35
  }
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    36
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
    37
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    38
  /* ASCII characters */
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    39
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    40
  def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
55497
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    41
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    42
  def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
55497
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    43
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    44
  def is_ascii_hex(c: Char): Boolean =
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    45
    '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    46
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    47
  def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    48
55497
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    49
  def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)
c0f8aebfb43d lexical syntax for SML (in Scala);
wenzelm
parents: 55430
diff changeset
    50
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    51
  def is_ascii_letdig(c: Char): Boolean =
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    52
    is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    53
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    54
  def is_ascii_identifier(s: String): Boolean =
50238
98d35a7368bd more uniform Symbol.is_ascii_identifier in ML/Scala;
wenzelm
parents: 50233
diff changeset
    55
    s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig)
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    56
62528
c8c532b22947 clarified ML syntax for strings concerning UTF8;
wenzelm
parents: 62230
diff changeset
    57
  def ascii(c: Char): Symbol =
c8c532b22947 clarified ML syntax for strings concerning UTF8;
wenzelm
parents: 62230
diff changeset
    58
  {
c8c532b22947 clarified ML syntax for strings concerning UTF8;
wenzelm
parents: 62230
diff changeset
    59
    if (c > 127) error("Non-ASCII character: " + quote(c.toString))
c8c532b22947 clarified ML syntax for strings concerning UTF8;
wenzelm
parents: 62230
diff changeset
    60
    else char_symbols(c.toInt)
c8c532b22947 clarified ML syntax for strings concerning UTF8;
wenzelm
parents: 62230
diff changeset
    61
  }
c8c532b22947 clarified ML syntax for strings concerning UTF8;
wenzelm
parents: 62230
diff changeset
    62
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    63
48775
92ceb058391f simplified symbol matching;
wenzelm
parents: 48774
diff changeset
    64
  /* symbol matching */
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    65
48775
92ceb058391f simplified symbol matching;
wenzelm
parents: 48774
diff changeset
    66
  private val symbol_total = new Regex("""(?xs)
92ceb058391f simplified symbol matching;
wenzelm
parents: 48774
diff changeset
    67
    [\ud800-\udbff][\udc00-\udfff] | \r\n |
92ceb058391f simplified symbol matching;
wenzelm
parents: 48774
diff changeset
    68
    \\ < (?: \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* | \^? ([A-Za-z][A-Za-z0-9_']*)? ) >? |
92ceb058391f simplified symbol matching;
wenzelm
parents: 48774
diff changeset
    69
    .""")
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    70
48775
92ceb058391f simplified symbol matching;
wenzelm
parents: 48774
diff changeset
    71
  private def is_plain(c: Char): Boolean =
92ceb058391f simplified symbol matching;
wenzelm
parents: 48774
diff changeset
    72
    !(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
48773
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    73
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    74
  def is_malformed(s: Symbol): Boolean =
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    75
    s.length match {
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    76
      case 1 =>
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    77
        val c = s(0)
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    78
        Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    79
      case 2 =>
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    80
        val c1 = s(0)
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    81
        val c2 = s(1)
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    82
        !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))
48774
c4bd5bb3ae69 further clarification of malformed symbols;
wenzelm
parents: 48773
diff changeset
    83
      case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
48773
0e1bab274672 more liberal scanning of potentially malformed symbols;
wenzelm
parents: 48704
diff changeset
    84
    }
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    85
54734
b91afc3aa3e6 clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
wenzelm
parents: 53400
diff changeset
    86
  def is_newline(s: Symbol): Boolean =
43675
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    87
    s == "\n" || s == "\r" || s == "\r\n"
38877
682c4932b3cc Command.newlines: account for physical newlines;
wenzelm
parents: 38479
diff changeset
    88
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    89
  class Matcher(text: CharSequence)
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    90
  {
48775
92ceb058391f simplified symbol matching;
wenzelm
parents: 48774
diff changeset
    91
    private val matcher = symbol_total.pattern.matcher(text)
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    92
    def apply(start: Int, end: Int): Int =
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    93
    {
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    94
      require(0 <= start && start < end && end <= text.length)
34316
f879b649ac4c clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents: 34193
diff changeset
    95
      if (is_plain(text.charAt(start))) 1
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
    96
      else {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    97
        matcher.region(start, end).lookingAt
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    98
        matcher.group.length
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    99
      }
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   100
    }
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   101
  }
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   102
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   103
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   104
  /* iterator */
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   105
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   106
  private val char_symbols: Array[Symbol] =
43675
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
   107
    (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
   108
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   109
  def iterator(text: CharSequence): Iterator[Symbol] =
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   110
    new Iterator[Symbol]
40522
wenzelm
parents: 40443
diff changeset
   111
    {
43489
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   112
      private val matcher = new Matcher(text)
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   113
      private var i = 0
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   114
      def hasNext = i < text.length
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   115
      def next =
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   116
      {
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   117
        val n = matcher(i, text.length)
43675
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
   118
        val s =
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
   119
          if (n == 0) ""
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
   120
          else if (n == 1) {
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
   121
            val c = text.charAt(i)
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
   122
            if (c < char_symbols.length) char_symbols(c)
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
   123
            else text.subSequence(i, i + n).toString
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
   124
          }
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
   125
          else text.subSequence(i, i + n).toString
43489
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   126
        i += n
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   127
        s
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   128
      }
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   129
    }
43489
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   130
44949
b49d7f1066c8 Symbol.explode as in ML;
wenzelm
parents: 44238
diff changeset
   131
  def explode(text: CharSequence): List[Symbol] = iterator(text).toList
b49d7f1066c8 Symbol.explode as in ML;
wenzelm
parents: 44238
diff changeset
   132
48922
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
   133
  def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) =
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
   134
  {
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
   135
    var (line, column) = pos
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
   136
    for (sym <- iterator(text)) {
54734
b91afc3aa3e6 clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
wenzelm
parents: 53400
diff changeset
   137
      if (is_newline(sym)) { line += 1; column = 1 }
48922
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
   138
      else column += 1
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
   139
    }
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
   140
    (line, column)
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
   141
  }
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
   142
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   143
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   144
  /* decoding offsets */
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   145
52507
27925b58d6bd tuned signature;
wenzelm
parents: 50564
diff changeset
   146
  object Index
27925b58d6bd tuned signature;
wenzelm
parents: 50564
diff changeset
   147
  {
56471
2293a4350716 more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents: 56338
diff changeset
   148
    private sealed case class Entry(chr: Int, sym: Int)
52507
27925b58d6bd tuned signature;
wenzelm
parents: 50564
diff changeset
   149
56472
wenzelm
parents: 56471
diff changeset
   150
    val empty: Index = new Index(Nil)
56471
2293a4350716 more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents: 56338
diff changeset
   151
2293a4350716 more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents: 56338
diff changeset
   152
    def apply(text: CharSequence): Index =
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   153
    {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   154
      val matcher = new Matcher(text)
56471
2293a4350716 more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents: 56338
diff changeset
   155
      val buf = new mutable.ListBuffer[Entry]
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   156
      var chr = 0
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   157
      var sym = 0
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   158
      while (chr < text.length) {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   159
        val n = matcher(chr, text.length)
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   160
        chr += n
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   161
        sym += 1
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   162
        if (n > 1) buf += Entry(chr, sym)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   163
      }
56472
wenzelm
parents: 56471
diff changeset
   164
      if (buf.isEmpty) empty else new Index(buf.toList)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   165
    }
56471
2293a4350716 more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents: 56338
diff changeset
   166
  }
55430
8eb6c740ec1a tuned signature;
wenzelm
parents: 55033
diff changeset
   167
56472
wenzelm
parents: 56471
diff changeset
   168
  final class Index private(entries: List[Index.Entry])
56471
2293a4350716 more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
wenzelm
parents: 56338
diff changeset
   169
  {
56472
wenzelm
parents: 56471
diff changeset
   170
    private val hash: Int = entries.hashCode
wenzelm
parents: 56471
diff changeset
   171
    private val index: Array[Index.Entry] = entries.toArray
wenzelm
parents: 56471
diff changeset
   172
55884
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
   173
    def decode(symbol_offset: Offset): Text.Offset =
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   174
    {
55884
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
   175
      val sym = symbol_offset - 1
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   176
      val end = index.length
48922
6f3ccfa7818d more precise counting of line/column;
wenzelm
parents: 48775
diff changeset
   177
      @tailrec def bisect(a: Int, b: Int): Int =
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   178
      {
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   179
        if (a < b) {
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   180
          val c = (a + b) / 2
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   181
          if (sym < index(c).sym) bisect(a, c)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   182
          else if (c + 1 == end || sym < index(c + 1).sym) c
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   183
          else bisect(c + 1, b)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   184
        }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   185
        else -1
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   186
      }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   187
      val i = bisect(0, end)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   188
      if (i < 0) sym
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   189
      else index(i).chr + sym - index(i).sym
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   190
    }
55884
f2c0eaedd579 tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
wenzelm
parents: 55618
diff changeset
   191
    def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_))
56335
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   192
56338
f968f4e3d520 proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
wenzelm
parents: 56335
diff changeset
   193
    override def hashCode: Int = hash
56335
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   194
    override def equals(that: Any): Boolean =
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   195
      that match {
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   196
        case other: Index => index.sameElements(other.index)
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   197
        case _ => false
8953d4cc060a store blob content within document node: aux. files that were once open are made persistent;
wenzelm
parents: 55884
diff changeset
   198
      }
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   199
  }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   200
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   201
56746
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   202
  /* text chunks */
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   203
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   204
  object Text_Chunk
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   205
  {
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   206
    sealed abstract class Name
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   207
    case object Default extends Name
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   208
    case class Id(id: Document_ID.Generic) extends Name
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   209
    case class File(name: String) extends Name
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   210
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   211
    def apply(text: CharSequence): Text_Chunk =
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   212
      new Text_Chunk(Text.Range(0, text.length), Index(text))
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   213
  }
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   214
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   215
  final class Text_Chunk private(val range: Text.Range, private val index: Index)
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   216
  {
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   217
    override def hashCode: Int = (range, index).hashCode
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   218
    override def equals(that: Any): Boolean =
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   219
      that match {
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   220
        case other: Text_Chunk =>
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   221
          range == other.range &&
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   222
          index == other.index
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   223
        case _ => false
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   224
      }
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   225
57840
074cb68b40a8 tuned output;
wenzelm
parents: 56746
diff changeset
   226
    override def toString: String = "Text_Chunk" + range.toString
074cb68b40a8 tuned output;
wenzelm
parents: 56746
diff changeset
   227
56746
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   228
    def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset)
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   229
    def decode(symbol_range: Range): Text.Range = index.decode(symbol_range)
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   230
    def incorporate(symbol_range: Range): Option[Text.Range] =
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   231
    {
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   232
      def in(r: Range): Option[Text.Range] =
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   233
        range.try_restrict(decode(r)) match {
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   234
          case Some(r1) if !r1.is_singularity => Some(r1)
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   235
          case _ => None
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   236
        }
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   237
     in(symbol_range) orElse in(symbol_range - 1)
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   238
    }
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   239
  }
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   240
d37a5d09a277 tuned signature;
wenzelm
parents: 56472
diff changeset
   241
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   242
  /* recoding text */
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   243
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   244
  private class Recoder(list: List[(String, String)])
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   245
  {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   246
    private val (min, max) =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   247
    {
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   248
      var min = '\uffff'
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   249
      var max = '\u0000'
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   250
      for ((x, _) <- list) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   251
        val c = x(0)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   252
        if (c < min) min = c
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   253
        if (c > max) max = c
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   254
      }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   255
      (min, max)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   256
    }
40443
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   257
    private val table =
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   258
    {
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   259
      var tab = Map[String, String]()
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   260
      for ((x, y) <- list) {
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   261
        tab.get(x) match {
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   262
          case None => tab += (x -> y)
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   263
          case Some(z) =>
62230
949d2c9f6ff7 tuned message;
wenzelm
parents: 62104
diff changeset
   264
            error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
40443
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   265
        }
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   266
      }
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   267
      tab
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   268
    }
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   269
    def recode(text: String): String =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   270
    {
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   271
      val len = text.length
48775
92ceb058391f simplified symbol matching;
wenzelm
parents: 48774
diff changeset
   272
      val matcher = symbol_total.pattern.matcher(text)
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   273
      val result = new StringBuilder(len)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   274
      var i = 0
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   275
      while (i < len) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   276
        val c = text(i)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   277
        if (min <= c && c <= max) {
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   278
          matcher.region(i, len).lookingAt
27938
3d5b12f23f15 recode: proper result for unmatched symbols;
wenzelm
parents: 27937
diff changeset
   279
          val x = matcher.group
52888
wenzelm
parents: 52616
diff changeset
   280
          result.append(table.getOrElse(x, x))
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   281
          i = matcher.end
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   282
        }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   283
        else { result.append(c); i += 1 }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   284
      }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   285
      result.toString
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   286
    }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   287
  }
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   288
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   289
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   290
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   291
  /** symbol interpretation **/
27927
eb624bb54bc6 tuned Recoder;
wenzelm
parents: 27926
diff changeset
   292
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   293
  private lazy val symbols =
61959
wenzelm
parents: 61865
diff changeset
   294
  {
wenzelm
parents: 61865
diff changeset
   295
    val contents =
wenzelm
parents: 61865
diff changeset
   296
      for (path <- Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS")) if path.is_file)
wenzelm
parents: 61865
diff changeset
   297
        yield (File.read(path))
wenzelm
parents: 61865
diff changeset
   298
    new Interpretation(cat_lines(contents))
wenzelm
parents: 61865
diff changeset
   299
  }
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   300
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   301
  private class Interpretation(symbols_spec: String)
29569
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
   302
  {
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   303
    /* read symbols */
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   304
50136
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   305
    private val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   306
    private val Key = new Regex("""(?xs) (.+): """)
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   307
53316
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   308
    private def read_decl(decl: String): (Symbol, Properties.T) =
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   309
    {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   310
      def err() = error("Bad symbol declaration: " + decl)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   311
53316
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   312
      def read_props(props: List[String]): Properties.T =
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   313
      {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   314
        props match {
53316
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   315
          case Nil => Nil
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   316
          case _ :: Nil => err()
61174
74eddfef841e replacement character for spaces;
wenzelm
parents: 60215
diff changeset
   317
          case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest)
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   318
          case _ => err()
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   319
        }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   320
      }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   321
      decl.split("\\s+").toList match {
53316
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   322
        case sym :: props if sym.length > 1 && !is_malformed(sym) =>
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   323
          (sym, read_props(props))
34193
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   324
        case _ => err()
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   325
      }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   326
    }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   327
53316
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   328
    private val symbols: List[(Symbol, Properties.T)] =
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   329
      (((List.empty[(Symbol, Properties.T)], Set.empty[Symbol]) /:
50136
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   330
          split_lines(symbols_spec).reverse)
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   331
        { case (res, No_Decl()) => res
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   332
          case ((list, known), decl) =>
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   333
            val (sym, props) = read_decl(decl)
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   334
            if (known(sym)) (list, known)
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   335
            else ((sym, props) :: list, known + sym)
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   336
        })._1
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   337
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   338
53400
673eb869e6ee expose basic Symbol.properties (uninterpreted);
wenzelm
parents: 53337
diff changeset
   339
    /* basic properties */
673eb869e6ee expose basic Symbol.properties (uninterpreted);
wenzelm
parents: 53337
diff changeset
   340
673eb869e6ee expose basic Symbol.properties (uninterpreted);
wenzelm
parents: 53337
diff changeset
   341
    val properties: Map[Symbol, Properties.T] = Map(symbols: _*)
31651
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   342
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   343
    val names: Map[Symbol, String] =
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   344
    {
43456
8a6de1a6e1dc names for control symbols without "^", which is relevant for completion;
wenzelm
parents: 43455
diff changeset
   345
      val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
60215
5fb4990dfc73 misc tuning, based on warnings by IntelliJ IDEA;
wenzelm
parents: 59107
diff changeset
   346
      Map((for ((sym @ name(a), _) <- symbols) yield sym -> a): _*)
31651
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   347
    }
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   348
50136
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   349
    val groups: List[(String, List[Symbol])] =
53316
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   350
      symbols.map({ case (sym, props) =>
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   351
        val gs = for (("group", g) <- props) yield g
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   352
        if (gs.isEmpty) List(sym -> "unsorted") else gs.map(sym -> _)
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   353
      }).flatten
50136
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   354
        .groupBy(_._2).toList.map({ case (group, list) => (group, list.map(_._1)) })
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   355
        .sortBy(_._1)
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   356
53316
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   357
    val abbrevs: Multi_Map[Symbol, String] =
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   358
      Multi_Map((
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   359
        for {
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   360
          (sym, props) <- symbols
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   361
          ("abbrev", a) <- props.reverse
60215
5fb4990dfc73 misc tuning, based on warnings by IntelliJ IDEA;
wenzelm
parents: 59107
diff changeset
   362
        } yield sym -> a): _*)
43488
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   363
61376
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   364
    val codes: List[(String, Int)] =
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   365
    {
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   366
      val Code = new Properties.String("code")
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   367
      for {
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   368
        (sym, props) <- symbols
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   369
        code =
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   370
          props match {
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   371
            case Code(s) =>
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   372
              try { Integer.decode(s).intValue }
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   373
              catch { case _: NumberFormatException => error("Bad code for symbol " + sym) }
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   374
            case _ => error("Missing code for symbol " + sym)
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   375
          }
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   376
      } yield {
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   377
        if (code < 128) error("Illegal ASCII code for symbol " + sym)
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   378
        else (sym, code)
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   379
      }
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   380
    }
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   381
43488
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   382
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   383
    /* recoding */
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   384
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   385
    private val (decoder, encoder) =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   386
    {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   387
      val mapping =
61376
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   388
        for ((sym, code) <- codes) yield (sym, new String(Character.toChars(code)))
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   389
      (new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x)))
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   390
    }
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   391
34098
2b9cdf23c188 tuned signature;
wenzelm
parents: 34001
diff changeset
   392
    def decode(text: String): String = decoder.recode(text)
2b9cdf23c188 tuned signature;
wenzelm
parents: 34001
diff changeset
   393
    def encode(text: String): String = encoder.recode(text)
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   394
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   395
    private def recode_set(elems: String*): Set[String] =
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   396
    {
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   397
      val content = elems.toList
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   398
      Set((content ::: content.map(decode)): _*)
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   399
    }
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   400
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   401
    private def recode_map[A](elems: (String, A)*): Map[String, A] =
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   402
    {
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   403
      val content = elems.toList
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   404
      Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   405
    }
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   406
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   407
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   408
    /* user fonts */
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   409
53316
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   410
    private val Font = new Properties.String("font")
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   411
    val fonts: Map[Symbol, String] =
60215
5fb4990dfc73 misc tuning, based on warnings by IntelliJ IDEA;
wenzelm
parents: 59107
diff changeset
   412
      recode_map((for ((sym, Font(font)) <- symbols) yield sym -> font): _*)
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   413
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   414
    val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   415
    val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*)
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   416
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   417
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   418
    /* classification */
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   419
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   420
    val letters = recode_set(
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   421
      "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   422
      "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   423
      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   424
      "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   425
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   426
      "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   427
      "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   428
      "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   429
      "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   430
      "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   431
      "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   432
      "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   433
      "\\<x>", "\\<y>", "\\<z>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   434
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   435
      "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   436
      "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   437
      "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   438
      "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   439
      "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   440
      "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   441
      "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   442
      "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   443
      "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   444
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   445
      "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   446
      "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   447
      "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   448
      "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   449
      "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   450
      "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",
52616
3ac2878764f9 more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
wenzelm
parents: 52507
diff changeset
   451
      "\\<Psi>", "\\<Omega>")
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   452
61865
6dcc9e4f1aa6 tuned signature;
wenzelm
parents: 61579
diff changeset
   453
    val blanks = recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n")
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   454
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   455
    val sym_chars =
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   456
      Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   457
44992
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   458
    val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*)
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   459
43455
4b4b93672f15 some unicode chars for special control symbols;
wenzelm
parents: 43447
diff changeset
   460
61579
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   461
    /* comment */
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   462
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   463
    val comment_decoded = decode(comment)
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   464
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   465
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   466
    /* cartouches */
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   467
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   468
    val open_decoded = decode(open)
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   469
    val close_decoded = decode(close)
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   470
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   471
43488
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   472
    /* control symbols */
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   473
59107
48429ad6d0c8 tuned signature;
wenzelm
parents: 57840
diff changeset
   474
    val control_decoded: Set[Symbol] =
43488
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   475
      Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   476
62103
wenzelm
parents: 61959
diff changeset
   477
    val sub_decoded = decode(sub)
wenzelm
parents: 61959
diff changeset
   478
    val sup_decoded = decode(sup)
wenzelm
parents: 61959
diff changeset
   479
    val bold_decoded = decode(bold)
62104
fb73c0d7bb37 clarified symbol insertion, depending on buffer encoding;
wenzelm
parents: 62103
diff changeset
   480
    val emph_decoded = decode(emph)
43511
d138e7482a1b clarified decoded control symbols;
wenzelm
parents: 43490
diff changeset
   481
    val bsub_decoded = decode("\\<^bsub>")
d138e7482a1b clarified decoded control symbols;
wenzelm
parents: 43490
diff changeset
   482
    val esub_decoded = decode("\\<^esub>")
d138e7482a1b clarified decoded control symbols;
wenzelm
parents: 43490
diff changeset
   483
    val bsup_decoded = decode("\\<^bsup>")
d138e7482a1b clarified decoded control symbols;
wenzelm
parents: 43490
diff changeset
   484
    val esup_decoded = decode("\\<^esup>")
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   485
  }
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   486
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   487
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   488
  /* tables */
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   489
53400
673eb869e6ee expose basic Symbol.properties (uninterpreted);
wenzelm
parents: 53337
diff changeset
   490
  def properties: Map[Symbol, Properties.T] = symbols.properties
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   491
  def names: Map[Symbol, String] = symbols.names
50136
a96bd08258a2 support for symbol groups, retaining original order of declarations;
wenzelm
parents: 48922
diff changeset
   492
  def groups: List[(String, List[Symbol])] = symbols.groups
53316
c3e549e0d3c7 allow multiple symbol properties, notably groups and abbrevs;
wenzelm
parents: 53021
diff changeset
   493
  def abbrevs: Multi_Map[Symbol, String] = symbols.abbrevs
61376
93224745477f output HTML text according to Isabelle/Scala Symbol.Interpretation;
wenzelm
parents: 61174
diff changeset
   494
  def codes: List[(String, Int)] = symbols.codes
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   495
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   496
  def decode(text: String): String = symbols.decode(text)
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   497
  def encode(text: String): String = symbols.encode(text)
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   498
53337
b3817a0e3211 sort items according to persistent history of frequency of use;
wenzelm
parents: 53316
diff changeset
   499
  def decode_string: XML.Decode.T[String] = (x => decode(XML.Decode.string(x)))
b3817a0e3211 sort items according to persistent history of frequency of use;
wenzelm
parents: 53316
diff changeset
   500
  def encode_string: XML.Encode.T[String] = (x => XML.Encode.string(encode(x)))
b3817a0e3211 sort items according to persistent history of frequency of use;
wenzelm
parents: 53316
diff changeset
   501
50291
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   502
  def decode_strict(text: String): String =
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   503
  {
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   504
    val decoded = decode(text)
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   505
    if (encode(decoded) == text) decoded
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   506
    else {
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   507
      val bad = new mutable.ListBuffer[Symbol]
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   508
      for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s))
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   509
        bad += s
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   510
      error("Bad Unicode symbols in text: " + commas_quote(bad))
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   511
    }
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   512
  }
674893679352 prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
wenzelm
parents: 50238
diff changeset
   513
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   514
  def fonts: Map[Symbol, String] = symbols.fonts
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   515
  def font_names: List[String] = symbols.font_names
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   516
  def font_index: Map[String, Int] = symbols.font_index
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   517
  def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_))
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   518
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   519
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   520
  /* classification */
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   521
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   522
  def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym)
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   523
  def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   524
  def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'"
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   525
  def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   526
  def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)
44992
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   527
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   528
61579
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   529
  /* comment */
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   530
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   531
  val comment: Symbol = "\\<comment>"
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   532
  def comment_decoded: Symbol = symbols.comment_decoded
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   533
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   534
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   535
  /* cartouches */
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   536
61579
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   537
  val open: Symbol = "\\<open>"
634cd44bb1d3 symbolic syntax "\<comment> text";
wenzelm
parents: 61483
diff changeset
   538
  val close: Symbol = "\\<close>"
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   539
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   540
  def open_decoded: Symbol = symbols.open_decoded
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   541
  def close_decoded: Symbol = symbols.close_decoded
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   542
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   543
  def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   544
  def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   545
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   546
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   547
  /* symbols for symbolic identifiers */
44992
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   548
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   549
  private def raw_symbolic(sym: Symbol): Boolean =
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   550
    sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   551
55033
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   552
  def is_symbolic(sym: Symbol): Boolean =
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   553
    !is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym))
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   554
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   555
  def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)
8e8243975860 support for nested text cartouches;
wenzelm
parents: 54734
diff changeset
   556
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   557
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   558
  /* control symbols */
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   559
59107
48429ad6d0c8 tuned signature;
wenzelm
parents: 57840
diff changeset
   560
  def is_control(sym: Symbol): Boolean =
61470
c42960228a81 clarified Symbol.is_control;
wenzelm
parents: 61376
diff changeset
   561
    (sym.startsWith("\\<^") && sym.endsWith(">")) || symbols.control_decoded.contains(sym)
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   562
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   563
  def is_controllable(sym: Symbol): Boolean =
59107
48429ad6d0c8 tuned signature;
wenzelm
parents: 57840
diff changeset
   564
    !is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && !is_malformed(sym)
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   565
62103
wenzelm
parents: 61959
diff changeset
   566
  val sub = "\\<^sub>"
wenzelm
parents: 61959
diff changeset
   567
  val sup = "\\<^sup>"
wenzelm
parents: 61959
diff changeset
   568
  val bold = "\\<^bold>"
62104
fb73c0d7bb37 clarified symbol insertion, depending on buffer encoding;
wenzelm
parents: 62103
diff changeset
   569
  val emph = "\\<^emph>"
62103
wenzelm
parents: 61959
diff changeset
   570
44238
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   571
  def sub_decoded: Symbol = symbols.sub_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   572
  def sup_decoded: Symbol = symbols.sup_decoded
62103
wenzelm
parents: 61959
diff changeset
   573
  def bold_decoded: Symbol = symbols.bold_decoded
wenzelm
parents: 61959
diff changeset
   574
  def emph_decoded: Symbol = symbols.emph_decoded
44238
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   575
  def bsub_decoded: Symbol = symbols.bsub_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   576
  def esub_decoded: Symbol = symbols.esub_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   577
  def bsup_decoded: Symbol = symbols.bsup_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   578
  def esup_decoded: Symbol = symbols.esup_decoded
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
   579
}