src/Pure/General/symbol.scala
author wenzelm
Mon, 29 Mar 2010 22:43:56 +0200
changeset 36011 3ff725ac13a4
parent 34316 f879b649ac4c
child 36763 096ebe74aeaf
permissions -rw-r--r--
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/symbol.scala
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     3
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
     4
Detecting and recoding Isabelle symbols.
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     5
*/
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     6
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     7
package isabelle
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     8
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
     9
import scala.io.Source
36011
3ff725ac13a4 adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
wenzelm
parents: 34316
diff changeset
    10
import scala.collection.mutable
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    11
import scala.util.matching.Regex
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    12
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    13
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    14
object Symbol
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    15
{
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    16
  /* Symbol regexps */
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    17
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    18
  private val plain = new Regex("""(?xs)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    19
    [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    20
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    21
  private val symbol = new Regex("""(?xs)
31545
5f1f0a20af4d discontinued escaped symbols such as \\<forall> -- only one backslash should be used;
wenzelm
parents: 31523
diff changeset
    22
      \\ < (?:
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    23
      \^? [A-Za-z][A-Za-z0-9_']* |
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    24
      \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    25
34316
f879b649ac4c clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents: 34193
diff changeset
    26
  // FIXME cover bad surrogates!?
f879b649ac4c clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents: 34193
diff changeset
    27
  // FIXME check wrt. ML version
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    28
  private val bad_symbol = new Regex("(?xs) (?!" + symbol + ")" +
31545
5f1f0a20af4d discontinued escaped symbols such as \\<forall> -- only one backslash should be used;
wenzelm
parents: 31523
diff changeset
    29
    """ \\ < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    30
27939
41b1c0b769bf pattern: proper "." not "[.]"!
wenzelm
parents: 27938
diff changeset
    31
  // total pattern
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    32
  val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .")
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    33
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    34
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    35
  /* basic matching */
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    36
34316
f879b649ac4c clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents: 34193
diff changeset
    37
  def is_plain(c: Char): Boolean = !(c == '\\' || '\ud800' <= c && c <= '\udfff')
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    38
34316
f879b649ac4c clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents: 34193
diff changeset
    39
  def is_wellformed(s: CharSequence): Boolean =
f879b649ac4c clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents: 34193
diff changeset
    40
    s.length == 1 && is_plain(s.charAt(0)) || !bad_symbol.pattern.matcher(s).matches
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    41
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    42
  class Matcher(text: CharSequence)
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    43
  {
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    44
    private val matcher = regex.pattern.matcher(text)
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    45
    def apply(start: Int, end: Int): Int =
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    46
    {
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    47
      require(0 <= start && start < end && end <= text.length)
34316
f879b649ac4c clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents: 34193
diff changeset
    48
      if (is_plain(text.charAt(start))) 1
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
    49
      else {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    50
        matcher.region(start, end).lookingAt
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    51
        matcher.group.length
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    52
      }
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    53
    }
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
    54
  }
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    55
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    56
36011
3ff725ac13a4 adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
wenzelm
parents: 34316
diff changeset
    57
  /* iterator */
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    58
36011
3ff725ac13a4 adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
wenzelm
parents: 34316
diff changeset
    59
  def iterator(text: CharSequence) = new Iterator[CharSequence]
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
    60
  {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    61
    private val matcher = new Matcher(text)
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    62
    private var i = 0
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    63
    def hasNext = i < text.length
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    64
    def next = {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    65
      val n = matcher(i, text.length)
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    66
      val s = text.subSequence(i, i + n)
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    67
      i += n
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    68
      s
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    69
    }
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    70
  }
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    71
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    72
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    73
  /* decoding offsets */
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    74
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    75
  class Index(text: CharSequence)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    76
  {
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    77
    case class Entry(chr: Int, sym: Int)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    78
    val index: Array[Entry] =
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    79
    {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    80
      val matcher = new Matcher(text)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    81
      val buf = new mutable.ArrayBuffer[Entry]
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    82
      var chr = 0
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    83
      var sym = 0
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
    84
      while (chr < text.length) {
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    85
        val n = matcher(chr, text.length)
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    86
        chr += n
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    87
        sym += 1
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
    88
        if (n > 1) buf += Entry(chr, sym)
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    89
      }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    90
      buf.toArray
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    91
    }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    92
    def decode(sym: Int): Int =
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    93
    {
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    94
      val end = index.length
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    95
      def bisect(a: Int, b: Int): Int =
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    96
      {
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    97
        if (a < b) {
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    98
          val c = (a + b) / 2
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
    99
          if (sym < index(c).sym) bisect(a, c)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   100
          else if (c + 1 == end || sym < index(c + 1).sym) c
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   101
          else bisect(c + 1, b)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   102
        }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   103
        else -1
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   104
      }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   105
      val i = bisect(0, end)
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   106
      if (i < 0) sym
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   107
      else index(i).chr + sym - index(i).sym
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   108
    }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   109
  }
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   110
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   111
33998
fc56cfc6906e added elements: Interator;
wenzelm
parents: 31929
diff changeset
   112
  /* recoding text */
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   113
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   114
  private class Recoder(list: List[(String, String)])
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   115
  {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   116
    private val (min, max) =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   117
    {
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   118
      var min = '\uffff'
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   119
      var max = '\u0000'
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   120
      for ((x, _) <- list) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   121
        val c = x(0)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   122
        if (c < min) min = c
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   123
        if (c > max) max = c
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   124
      }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   125
      (min, max)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   126
    }
36011
3ff725ac13a4 adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
wenzelm
parents: 34316
diff changeset
   127
    private val table = Map[String, String]() ++ list
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   128
    def recode(text: String): String =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   129
    {
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   130
      val len = text.length
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   131
      val matcher = regex.pattern.matcher(text)
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   132
      val result = new StringBuilder(len)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   133
      var i = 0
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   134
      while (i < len) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   135
        val c = text(i)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   136
        if (min <= c && c <= max) {
31929
ecfc667cac53 is_open: surrogate sequence is High..Low;
wenzelm
parents: 31651
diff changeset
   137
          matcher.region(i, len).lookingAt
27938
3d5b12f23f15 recode: proper result for unmatched symbols;
wenzelm
parents: 27937
diff changeset
   138
          val x = matcher.group
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   139
          result.append(table.get(x) getOrElse x)
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   140
          i = matcher.end
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   141
        }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   142
        else { result.append(c); i += 1 }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   143
      }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   144
      result.toString
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   145
    }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
   146
  }
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   147
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   148
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   149
27927
eb624bb54bc6 tuned Recoder;
wenzelm
parents: 27926
diff changeset
   150
  /** Symbol interpretation **/
eb624bb54bc6 tuned Recoder;
wenzelm
parents: 27926
diff changeset
   151
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   152
  class Interpretation(symbol_decls: List[String])
29569
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
   153
  {
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   154
    /* read symbols */
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   155
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   156
    private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   157
    private val key = new Regex("""(?xs) (.+): """)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   158
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   159
    private def read_decl(decl: String): (String, Map[String, String]) =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   160
    {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   161
      def err() = error("Bad symbol declaration: " + decl)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   162
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   163
      def read_props(props: List[String]): Map[String, String] =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   164
      {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   165
        props match {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   166
          case Nil => Map()
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   167
          case _ :: Nil => err()
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   168
          case key(x) :: y :: rest => read_props(rest) + (x -> y)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   169
          case _ => err()
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   170
        }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   171
      }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   172
      decl.split("\\s+").toList match {
34316
f879b649ac4c clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
wenzelm
parents: 34193
diff changeset
   173
        case sym :: props if sym.length > 1 && is_wellformed(sym) => (sym, read_props(props))
34193
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   174
        case _ => err()
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   175
      }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   176
    }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   177
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   178
    private val symbols: List[(String, Map[String, String])] =
34137
6cc9a0cbaf55 refined some Symbol operations/signatures;
wenzelm
parents: 34134
diff changeset
   179
      for (decl <- symbol_decls if !empty.pattern.matcher(decl).matches)
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   180
        yield read_decl(decl)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   181
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   182
31651
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   183
    /* misc properties */
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   184
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   185
    val names: Map[String, String] =
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   186
    {
31651
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   187
      val name = new Regex("""\\<([A-Za-z][A-Za-z0-9_']*)>""")
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   188
      Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   189
    }
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   190
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   191
    val abbrevs: Map[String, String] = Map((
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   192
      for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   193
        yield (sym -> props("abbrev"))): _*)
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   194
7d6a518b5a2b added names, abbrevs;
wenzelm
parents: 31548
diff changeset
   195
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   196
    /* main recoder methods */
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   197
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   198
    private val (decoder, encoder) =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   199
    {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   200
      val mapping =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   201
        for {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   202
          (sym, props) <- symbols
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   203
          val code =
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   204
            try { Integer.decode(props("code")).intValue }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   205
            catch {
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   206
              case _: NoSuchElementException => error("Missing code for symbol " + sym)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   207
              case _: NumberFormatException => error("Bad code for symbol " + sym)
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   208
            }
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   209
          val ch = new String(Character.toChars(code))
34193
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   210
        } yield {
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   211
          if (code < 128) error("Illegal ASCII code for symbol " + sym)
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   212
          else (sym, ch)
d3358b909c40 some sanity checks for symbol interpretation;
wenzelm
parents: 34138
diff changeset
   213
        }
31545
5f1f0a20af4d discontinued escaped symbols such as \\<forall> -- only one backslash should be used;
wenzelm
parents: 31523
diff changeset
   214
      (new Recoder(mapping),
31548
wenzelm
parents: 31545
diff changeset
   215
       new Recoder(mapping map { case (x, y) => (y, x) }))
31522
0466cb17064f more native Scala style;
wenzelm
parents: 29569
diff changeset
   216
    }
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   217
34098
2b9cdf23c188 tuned signature;
wenzelm
parents: 34001
diff changeset
   218
    def decode(text: String): String = decoder.recode(text)
2b9cdf23c188 tuned signature;
wenzelm
parents: 34001
diff changeset
   219
    def encode(text: String): String = encoder.recode(text)
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   220
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   221
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   222
    /* classification */
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   223
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   224
    private object Decode_Set
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   225
    {
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   226
      def apply(elems: String*): Set[String] =
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   227
      {
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   228
        val content = elems.toList
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   229
        Set((content ::: content.map(decode)): _*)
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   230
      }
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   231
    }
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   232
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   233
    private val letters = Decode_Set(
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   234
      "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   235
      "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   236
      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   237
      "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   238
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   239
      "\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   240
      "\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   241
      "\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   242
      "\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   243
      "\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   244
      "\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   245
      "\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   246
      "\\<x>", "\\<y>", "\\<z>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   247
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   248
      "\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   249
      "\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   250
      "\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   251
      "\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   252
      "\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   253
      "\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   254
      "\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   255
      "\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   256
      "\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   257
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   258
      "\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   259
      "\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   260
      "\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   261
      "\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   262
      "\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   263
      "\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   264
      "\\<Psi>", "\\<Omega>",
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   265
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   266
      "\\<^isub>", "\\<^isup>")
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   267
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   268
    private val blanks =
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   269
      Decode_Set(" ", "\t", "\n", "\u000B", "\f", "\r", "\\<spacespace>", "\\<^newline>")
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   270
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   271
    private val sym_chars =
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   272
      Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   273
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   274
    def is_letter(sym: String): Boolean = letters.contains(sym)
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   275
    def is_digit(sym: String): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   276
    def is_quasi(sym: String): Boolean = sym == "_" || sym == "'"
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   277
    def is_letdig(sym: String): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
34134
d8d9df8407f6 added symbol classification;
wenzelm
parents: 34098
diff changeset
   278
    def is_blank(sym: String): Boolean = blanks.contains(sym)
34138
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   279
    def is_symbolic_char(sym: String): Boolean = sym_chars.contains(sym)
4008c2f5a46e refined some Symbol operations/signatures;
wenzelm
parents: 34137
diff changeset
   280
    def is_symbolic(sym: String): Boolean = sym.startsWith("\\<") && !sym.startsWith("\\<^")
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   281
  }
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
   282
}