src/Pure/General/symbol.scala
author wenzelm
Tue, 07 Aug 2012 12:10:26 +0200
changeset 48704 85a3de10567d
parent 48550 97592027a2a8
child 48773 0e1bab274672
permissions -rw-r--r--
tuned signature -- make Pretty less dependent on Symbol;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/symbol.scala
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     3
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
     4
Detecting and recoding Isabelle symbols.
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     5
*/
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     6
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     7
package isabelle
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     8
36011
3ff725ac13a4 adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
wenzelm
parents: 34316
diff changeset
     9
import scala.collection.mutable
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    10
import scala.util.matching.Regex
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    11
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    12
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    13
object Symbol
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    14
{
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    15
  type Symbol = String
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    16
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    17
43418
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    18
  /* ASCII characters */
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    19
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    20
  def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    21
  def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    22
  def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    23
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    24
  def is_ascii_letdig(c: Char): Boolean =
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    25
    is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    26
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    27
  def is_ascii_identifier(s: String): Boolean =
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    28
    s.length > 0 && is_ascii_letter(s(0)) && s.substring(1).forall(is_ascii_letdig)
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    29
c69e9fbb81a8 recovered markup for non-alphabetic keywords;
wenzelm
parents: 40529
diff changeset
    30
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    31
  /* Symbol regexps */
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    32
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    33
  private val plain = new Regex("""(?xs)
40524
6131d7a78ad3 treat Unicode "replacement character" (i.e. decoding error) is malformed;
wenzelm
parents: 40523
diff changeset
    34
      [^\r\\\ud800-\udfff\ufffd] | [\ud800-\udbff][\udc00-\udfff] """)
37556
2bf29095d26f treat alternative newline symbols as in Isabelle/ML;
wenzelm
parents: 36816
diff changeset
    35
40522
wenzelm
parents: 40443
diff changeset
    36
  private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """)
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    37
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    38
  private val symbol = new Regex("""(?xs)
31545
5f1f0a20af4d discontinued escaped symbols such as \\<forall> -- only one backslash should be used;
wenzelm
parents: 31523
diff changeset
    39
      \\ < (?:
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    40
      \^? [A-Za-z][A-Za-z0-9_']* |
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    41
      \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    42
40523
1050315f6ee2 simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents: 40522
diff changeset
    43
  private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
40529
d5fb1f1a5857 proper escape in regex;
wenzelm
parents: 40524
diff changeset
    44
    """ [\ud800-\udbff\ufffd] | \\<\^? """)
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    45
40523
1050315f6ee2 simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents: 40522
diff changeset
    46
  val regex_total =
1050315f6ee2 simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents: 40522
diff changeset
    47
    new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .")
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    48
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    49
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    50
  /* basic matching */
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    51
37556
2bf29095d26f treat alternative newline symbols as in Isabelle/ML;
wenzelm
parents: 36816
diff changeset
    52
  def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    53
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    54
  def is_physical_newline(s: Symbol): Boolean =
43675
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    55
    s == "\n" || s == "\r" || s == "\r\n"
38877
682c4932b3cc Command.newlines: account for physical newlines;
wenzelm
parents: 38479
diff changeset
    56
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    57
  def is_malformed(s: Symbol): Boolean =
43675
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    58
    !(s.length == 1 && is_plain(s(0))) && malformed_symbol.pattern.matcher(s).matches
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    59
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    60
  class Matcher(text: CharSequence)
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    61
  {
40523
1050315f6ee2 simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents: 40522
diff changeset
    62
    private val matcher = regex_total.pattern.matcher(text)
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    63
    def apply(start: Int, end: Int): Int =
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    64
    {
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    65
      require(0 <= start && start < end && end <= text.length)
34316
f879b649ac4c clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents: 34193
diff changeset
    66
      if (is_plain(text.charAt(start))) 1
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
    67
      else {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    68
        matcher.region(start, end).lookingAt
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    69
        matcher.group.length
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    70
      }
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    71
    }
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    72
  }
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    73
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    74
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
    75
  /* iterator */
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    76
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    77
  private val char_symbols: Array[Symbol] =
43675
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    78
    (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    79
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    80
  def iterator(text: CharSequence): Iterator[Symbol] =
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
    81
    new Iterator[Symbol]
40522
wenzelm
parents: 40443
diff changeset
    82
    {
43489
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
    83
      private val matcher = new Matcher(text)
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
    84
      private var i = 0
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
    85
      def hasNext = i < text.length
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
    86
      def next =
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
    87
      {
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
    88
        val n = matcher(i, text.length)
43675
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    89
        val s =
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    90
          if (n == 0) ""
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    91
          else if (n == 1) {
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    92
            val c = text.charAt(i)
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    93
            if (c < char_symbols.length) char_symbols(c)
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    94
            else text.subSequence(i, i + n).toString
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    95
          }
8252d51d70e2 simplified Symbol.iterator: produce strings, which are mostly preallocated;
wenzelm
parents: 43511
diff changeset
    96
          else text.subSequence(i, i + n).toString
43489
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
    97
        i += n
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
    98
        s
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
    99
      }
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   100
    }
43489
132f99cc0a43 tuned iteration over short symbols;
wenzelm
parents: 43488
diff changeset
   101
44949
b49d7f1066c8 Symbol.explode as in ML;
wenzelm
parents: 44238
diff changeset
   102
  def explode(text: CharSequence): List[Symbol] = iterator(text).toList
b49d7f1066c8 Symbol.explode as in ML;
wenzelm
parents: 44238
diff changeset
   103
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   104
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   105
  /* decoding offsets */
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   106
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   107
  class Index(text: CharSequence)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   108
  {
43714
3749d1e6dde9 tuned signature;
wenzelm
parents: 43696
diff changeset
   109
    sealed case class Entry(chr: Int, sym: Int)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   110
    val index: Array[Entry] =
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   111
    {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   112
      val matcher = new Matcher(text)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   113
      val buf = new mutable.ArrayBuffer[Entry]
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   114
      var chr = 0
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   115
      var sym = 0
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   116
      while (chr < text.length) {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   117
        val n = matcher(chr, text.length)
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   118
        chr += n
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   119
        sym += 1
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   120
        if (n > 1) buf += Entry(chr, sym)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   121
      }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   122
      buf.toArray
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   123
    }
38479
e628da370072 more efficient Markup_Tree, based on branches sorted by quasi-order;
wenzelm
parents: 37556
diff changeset
   124
    def decode(sym1: Int): Int =
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   125
    {
38479
e628da370072 more efficient Markup_Tree, based on branches sorted by quasi-order;
wenzelm
parents: 37556
diff changeset
   126
      val sym = sym1 - 1
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   127
      val end = index.length
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   128
      def bisect(a: Int, b: Int): Int =
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   129
      {
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   130
        if (a < b) {
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   131
          val c = (a + b) / 2
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   132
          if (sym < index(c).sym) bisect(a, c)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   133
          else if (c + 1 == end || sym < index(c + 1).sym) c
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   134
          else bisect(c + 1, b)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   135
        }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   136
        else -1
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   137
      }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   138
      val i = bisect(0, end)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   139
      if (i < 0) sym
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   140
      else index(i).chr + sym - index(i).sym
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   141
    }
38479
e628da370072 more efficient Markup_Tree, based on branches sorted by quasi-order;
wenzelm
parents: 37556
diff changeset
   142
    def decode(range: Text.Range): Text.Range = range.map(decode(_))
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   143
  }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   144
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   145
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   146
  /* recoding text */
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   147
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   148
  private class Recoder(list: List[(String, String)])
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   149
  {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   150
    private val (min, max) =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   151
    {
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   152
      var min = '\uffff'
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   153
      var max = '\u0000'
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   154
      for ((x, _) <- list) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   155
        val c = x(0)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   156
        if (c < min) min = c
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   157
        if (c > max) max = c
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   158
      }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   159
      (min, max)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   160
    }
40443
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   161
    private val table =
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   162
    {
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   163
      var tab = Map[String, String]()
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   164
      for ((x, y) <- list) {
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   165
        tab.get(x) match {
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   166
          case None => tab += (x -> y)
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   167
          case Some(z) =>
44181
wenzelm
parents: 43714
diff changeset
   168
            error("Duplicate mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
40443
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   169
        }
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   170
      }
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   171
      tab
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   172
    }
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   173
    def recode(text: String): String =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   174
    {
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   175
      val len = text.length
40523
1050315f6ee2 simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents: 40522
diff changeset
   176
      val matcher = regex_total.pattern.matcher(text)
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   177
      val result = new StringBuilder(len)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   178
      var i = 0
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   179
      while (i < len) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   180
        val c = text(i)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   181
        if (min <= c && c <= max) {
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   182
          matcher.region(i, len).lookingAt
27938
3d5b12f23f15 recode: proper result for unmatched symbols;
wenzelm
parents: 27937
diff changeset
   183
          val x = matcher.group
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   184
          result.append(table.get(x) getOrElse x)
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   185
          i = matcher.end
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   186
        }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   187
        else { result.append(c); i += 1 }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   188
      }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   189
      result.toString
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   190
    }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   191
  }
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   192
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   193
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   194
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   195
  /** symbol interpretation **/
27927
eb624bb54bc6 tuned Recoder;
wenzelm
parents: 27926
diff changeset
   196
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   197
  private lazy val symbols =
48550
97592027a2a8 tuned signature;
wenzelm
parents: 48277
diff changeset
   198
    new Interpretation(File.try_read(Path.split(Isabelle_System.getenv_strict("ISABELLE_SYMBOLS"))))
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   199
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   200
  private class Interpretation(symbols_spec: String)
29569
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
   201
  {
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   202
    /* read symbols */
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   203
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   204
    private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   205
    private val key = new Regex("""(?xs) (.+): """)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   206
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   207
    private def read_decl(decl: String): (Symbol, Map[String, String]) =
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   208
    {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   209
      def err() = error("Bad symbol declaration: " + decl)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   210
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   211
      def read_props(props: List[String]): Map[String, String] =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   212
      {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   213
        props match {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   214
          case Nil => Map()
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   215
          case _ :: Nil => err()
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   216
          case key(x) :: y :: rest => read_props(rest) + (x -> y)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   217
          case _ => err()
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   218
        }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   219
      }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   220
      decl.split("\\s+").toList match {
40523
1050315f6ee2 simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents: 40522
diff changeset
   221
        case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))
34193
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   222
        case _ => err()
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   223
      }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   224
    }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   225
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   226
    private val symbols: List[(Symbol, Map[String, String])] =
40443
41c32616298c explicitly check uniqueness of symbol recoding;
wenzelm
parents: 38877
diff changeset
   227
      Map((
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   228
        for (decl <- split_lines(symbols_spec) if !empty.pattern.matcher(decl).matches)
47993
135fd6f2dadd less warning in scala-2.10.0-M3;
wenzelm
parents: 47121
diff changeset
   229
          yield read_decl(decl)): _*).toList
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   230
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   231
31651
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   232
    /* misc properties */
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   233
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   234
    val names: Map[Symbol, String] =
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   235
    {
43456
8a6de1a6e1dc names for control symbols without "^", which is relevant for completion;
wenzelm
parents: 43455
diff changeset
   236
      val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
31651
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   237
      Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   238
    }
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   239
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   240
    val abbrevs: Map[Symbol, String] =
43488
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   241
      Map((
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   242
        for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   243
          yield (sym -> props("abbrev"))): _*)
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   244
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   245
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   246
    /* recoding */
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   247
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   248
    private val (decoder, encoder) =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   249
    {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   250
      val mapping =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   251
        for {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   252
          (sym, props) <- symbols
46997
395b7277ed76 misc tuning to accomodate scala-2.10.0-M2;
wenzelm
parents: 44992
diff changeset
   253
          code =
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   254
            try { Integer.decode(props("code")).intValue }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   255
            catch {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   256
              case _: NoSuchElementException => error("Missing code for symbol " + sym)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   257
              case _: NumberFormatException => error("Bad code for symbol " + sym)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   258
            }
46997
395b7277ed76 misc tuning to accomodate scala-2.10.0-M2;
wenzelm
parents: 44992
diff changeset
   259
          ch = new String(Character.toChars(code))
34193
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   260
        } yield {
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   261
          if (code < 128) error("Illegal ASCII code for symbol " + sym)
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   262
          else (sym, ch)
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   263
        }
31545
5f1f0a20af4d discontinued escaped symbols such as \\<forall> -- only one backslash should be used;
wenzelm
parents: 31523
diff changeset
   264
      (new Recoder(mapping),
31548
wenzelm
parents: 31545
diff changeset
   265
       new Recoder(mapping map { case (x, y) => (y, x) }))
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   266
    }
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   267
34098
2b9cdf23c188 tuned signature;
wenzelm
parents: 34001
diff changeset
   268
    def decode(text: String): String = decoder.recode(text)
2b9cdf23c188 tuned signature;
wenzelm
parents: 34001
diff changeset
   269
    def encode(text: String): String = encoder.recode(text)
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   270
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   271
    private def recode_set(elems: String*): Set[String] =
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   272
    {
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   273
      val content = elems.toList
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   274
      Set((content ::: content.map(decode)): _*)
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   275
    }
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   276
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   277
    private def recode_map[A](elems: (String, A)*): Map[String, A] =
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   278
    {
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   279
      val content = elems.toList
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   280
      Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   281
    }
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   282
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   283
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   284
    /* user fonts */
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   285
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   286
    val fonts: Map[Symbol, String] =
43490
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   287
      recode_map((
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   288
        for ((sym, props) <- symbols if props.isDefinedAt("font"))
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   289
          yield (sym -> props("font"))): _*)
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   290
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   291
    val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   292
    val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*)
5e6f76cacb93 more uniform treatment of recode_set/recode_map;
wenzelm
parents: 43489
diff changeset
   293
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   294
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   295
    /* classification */
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   296
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   297
    val letters = recode_set(
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   298
      "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   299
      "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   300
      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   301
      "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   302
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   303
      "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   304
      "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   305
      "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   306
      "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   307
      "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   308
      "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   309
      "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   310
      "\\<x>", "\\<y>", "\\<z>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   311
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   312
      "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   313
      "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   314
      "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   315
      "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   316
      "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   317
      "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   318
      "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   319
      "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   320
      "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   321
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   322
      "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   323
      "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   324
      "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   325
      "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   326
      "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   327
      "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   328
      "\\<Psi>", "\\<Omega>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   329
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   330
      "\\<^isub>", "\\<^isup>")
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   331
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   332
    val blanks =
48704
85a3de10567d tuned signature -- make Pretty less dependent on Symbol;
wenzelm
parents: 48550
diff changeset
   333
      recode_set(" ", "\t", "\n", "\u000B", "\f", "\r", "\r\n", "\\<spacespace>", "\\<^newline>")
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   334
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   335
    val sym_chars =
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   336
      Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   337
44992
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   338
    val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*)
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   339
43455
4b4b93672f15 some unicode chars for special control symbols;
wenzelm
parents: 43447
diff changeset
   340
43488
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   341
    /* control symbols */
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   342
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   343
    val ctrl_decoded: Set[Symbol] =
43488
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   344
      Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
39035276927c Symbol.is_ctrl: handle decoded version as well;
wenzelm
parents: 43487
diff changeset
   345
44238
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   346
    val sub_decoded = decode("\\<^sub>")
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   347
    val sup_decoded = decode("\\<^sup>")
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   348
    val isub_decoded = decode("\\<^isub>")
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   349
    val isup_decoded = decode("\\<^isup>")
43511
d138e7482a1b clarified decoded control symbols;
wenzelm
parents: 43490
diff changeset
   350
    val bsub_decoded = decode("\\<^bsub>")
d138e7482a1b clarified decoded control symbols;
wenzelm
parents: 43490
diff changeset
   351
    val esub_decoded = decode("\\<^esub>")
d138e7482a1b clarified decoded control symbols;
wenzelm
parents: 43490
diff changeset
   352
    val bsup_decoded = decode("\\<^bsup>")
d138e7482a1b clarified decoded control symbols;
wenzelm
parents: 43490
diff changeset
   353
    val esup_decoded = decode("\\<^esup>")
44238
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   354
    val bold_decoded = decode("\\<^bold>")
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   355
  }
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   356
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   357
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   358
  /* tables */
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   359
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   360
  def names: Map[Symbol, String] = symbols.names
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   361
  def abbrevs: Map[Symbol, String] = symbols.abbrevs
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   362
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   363
  def decode(text: String): String = symbols.decode(text)
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   364
  def encode(text: String): String = symbols.encode(text)
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   365
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   366
  def fonts: Map[Symbol, String] = symbols.fonts
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   367
  def font_names: List[String] = symbols.font_names
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   368
  def font_index: Map[String, Int] = symbols.font_index
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   369
  def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_))
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   370
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   371
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   372
  /* classification */
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   373
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   374
  def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym)
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   375
  def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   376
  def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'"
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   377
  def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   378
  def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)
44992
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   379
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   380
  def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)
44992
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   381
  def is_symbolic(sym: Symbol): Boolean = raw_symbolic(sym) || symbols.symbolic.contains(sym)
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   382
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   383
  private def raw_symbolic(sym: Symbol): Boolean =
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   384
    sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   385
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   386
44992
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   387
aa34d2d049ce refined Symbol.is_symbolic -- cover recoded versions as well;
wenzelm
parents: 44949
diff changeset
   388
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   389
  /* control symbols */
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   390
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   391
  def is_ctrl(sym: Symbol): Boolean =
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   392
    sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym)
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   393
43696
58bb7ca5c7a2 explicit indication of type Symbol.Symbol;
wenzelm
parents: 43695
diff changeset
   394
  def is_controllable(sym: Symbol): Boolean =
43695
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   395
    !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym)
5130dfe1b7be simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
wenzelm
parents: 43675
diff changeset
   396
44238
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   397
  def sub_decoded: Symbol = symbols.sub_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   398
  def sup_decoded: Symbol = symbols.sup_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   399
  def isub_decoded: Symbol = symbols.isub_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   400
  def isup_decoded: Symbol = symbols.isup_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   401
  def bsub_decoded: Symbol = symbols.bsub_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   402
  def esub_decoded: Symbol = symbols.esub_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   403
  def bsup_decoded: Symbol = symbols.bsup_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   404
  def esup_decoded: Symbol = symbols.esup_decoded
36120feb70ed some convenience actions/shortcuts for control symbols;
wenzelm
parents: 44181
diff changeset
   405
  def bold_decoded: Symbol = symbols.bold_decoded
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
   406
}