src/Pure/General/symbol.scala
author blanchet
Tue, 17 Feb 2009 14:01:54 +0100
changeset 29956 62f931b257b7
parent 29569 f3f529b5d8fb
child 31522 0466cb17064f
permissions -rw-r--r--
Reintroduce set_interpreter for Collect and op :. I removed them by accident when removing old code that dealt with the "set" type. Incidentally, there is still some broken "set" code in Refute that should be fixed (see TODO in refute.ML).
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/symbol.scala
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     3
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
     4
Detecting and recoding Isabelle symbols.
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     5
*/
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     6
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     7
package isabelle
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     8
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
     9
import java.util.regex.Pattern
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    10
import java.io.File
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    11
import scala.io.Source
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    12
import scala.collection.jcl.HashMap
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    13
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    14
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    15
object Symbol {
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    16
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    17
  /** Symbol regexps **/
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    18
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    19
  private def compile(s: String) =
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    20
    Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL)
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    21
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    22
  private val plain_pattern = compile(""" [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    23
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    24
  private val symbol_pattern = compile(""" \\ \\? < (?:
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    25
      \^? [A-Za-z][A-Za-z0-9_']* |
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    26
      \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    27
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    28
  private val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" +
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    29
    """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    30
27939
41b1c0b769bf pattern: proper "." not "[.]"!
wenzelm
parents: 27938
diff changeset
    31
  // total pattern
41b1c0b769bf pattern: proper "." not "[.]"!
wenzelm
parents: 27938
diff changeset
    32
  val pattern = compile(plain_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern + "| .")
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    33
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    34
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    35
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    36
  /** Recoding **/
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    37
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    38
  private class Recoder(list: List[(String, String)]) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    39
    private val (min, max) = {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    40
      var min = '\uffff'
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    41
      var max = '\u0000'
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    42
      for ((x, _) <- list) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    43
        val c = x(0)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    44
        if (c < min) min = c
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    45
        if (c > max) max = c
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    46
      }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    47
      (min, max)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    48
    }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    49
    private val table = {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    50
      val table = new HashMap[String, String]
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    51
      for ((x, y) <- list) table + (x -> y)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    52
      table
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    53
    }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    54
    def recode(text: String) = {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    55
      val len = text.length
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    56
      val matcher = pattern.matcher(text)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    57
      val result = new StringBuilder(len)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    58
      var i = 0
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    59
      while (i < len) {
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    60
        val c = text(i)
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    61
        if (min <= c && c <= max) {
27939
41b1c0b769bf pattern: proper "." not "[.]"!
wenzelm
parents: 27938
diff changeset
    62
          matcher.region(i, len)
41b1c0b769bf pattern: proper "." not "[.]"!
wenzelm
parents: 27938
diff changeset
    63
          matcher.lookingAt
27938
3d5b12f23f15 recode: proper result for unmatched symbols;
wenzelm
parents: 27937
diff changeset
    64
          val x = matcher.group
3d5b12f23f15 recode: proper result for unmatched symbols;
wenzelm
parents: 27937
diff changeset
    65
          table.get(x) match {
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    66
            case Some(y) => result.append(y)
27938
3d5b12f23f15 recode: proper result for unmatched symbols;
wenzelm
parents: 27937
diff changeset
    67
            case None => result.append(x)
27937
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    68
          }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    69
          i = matcher.end
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    70
        }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    71
        else { result.append(c); i += 1 }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    72
      }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    73
      result.toString
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    74
    }
fdf77e7be01a more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents: 27935
diff changeset
    75
  }
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    76
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    77
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    78
27927
eb624bb54bc6 tuned Recoder;
wenzelm
parents: 27926
diff changeset
    79
  /** Symbol interpretation **/
eb624bb54bc6 tuned Recoder;
wenzelm
parents: 27926
diff changeset
    80
29569
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
    81
  class Interpretation(symbol_decls: Iterator[String])
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
    82
  {
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
    83
    private val symbols = new HashMap[String, HashMap[String, String]]
27926
308be7332e25 more private fields;
wenzelm
parents: 27924
diff changeset
    84
    private var decoder: Recoder = null
308be7332e25 more private fields;
wenzelm
parents: 27924
diff changeset
    85
    private var encoder: Recoder = null
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    86
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    87
    def decode(text: String) = decoder.recode(text)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    88
    def encode(text: String) = encoder.recode(text)
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    89
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    90
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    91
    /* read symbols */
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    92
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    93
    private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    94
    private val blank_pattern = compile(""" \s+ """)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    95
    private val key_pattern = compile(""" (.+): """)
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    96
29569
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
    97
    private def read_decl(decl: String) = {
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
    98
      def err() = error("Bad symbol declaration: " + decl)
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    99
29569
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
   100
      def read_props(props: List[String], tab: HashMap[String, String])
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
   101
      {
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   102
        props match {
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   103
          case Nil => ()
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   104
          case _ :: Nil => err()
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   105
          case key :: value :: rest => {
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   106
            val key_matcher = key_pattern.matcher(key)
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   107
            if (key_matcher.matches) {
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   108
              tab + (key_matcher.group(1) -> value)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   109
              read_props(rest, tab)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   110
            }
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   111
            else err ()
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   112
          }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   113
        }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   114
      }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   115
29569
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
   116
      if (!empty_pattern.matcher(decl).matches) {
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
   117
        blank_pattern.split(decl).toList match {
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   118
          case Nil => err()
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   119
          case symbol :: props => {
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   120
            val tab = new HashMap[String, String]
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   121
            read_props(props, tab)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   122
            symbols + (symbol -> tab)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   123
          }
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   124
        }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   125
      }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   126
    }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   127
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   128
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   129
    /* init tables */
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   130
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   131
    private def get_code(entry: (String, HashMap[String, String])) = {
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   132
      val (symbol, props) = entry
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   133
      val code =
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   134
        try { Integer.decode(props("code")).intValue }
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   135
        catch {
27993
6dd90ef9f927 simplified exceptions: use plain error function / RuntimeException;
wenzelm
parents: 27939
diff changeset
   136
          case _: NoSuchElementException => error("Missing code for symbol " + symbol)
6dd90ef9f927 simplified exceptions: use plain error function / RuntimeException;
wenzelm
parents: 27939
diff changeset
   137
          case _: NumberFormatException => error("Bad code for symbol " + symbol)
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   138
        }
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   139
      (symbol, new String(Character.toChars(code)))
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   140
    }
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   141
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   142
    private def init_recoders() = {
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   143
      val list = symbols.elements.toList.map(get_code)
28007
2d0c93291293 tuned append;
wenzelm
parents: 27993
diff changeset
   144
      decoder = new Recoder(list ++ (for ((x, y) <- list) yield ("\\" + x, y)))
27928
b1d0d1351ed9 decode escaped symbols as well;
wenzelm
parents: 27927
diff changeset
   145
      encoder = new Recoder(for ((x, y) <- list) yield (y, x))
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   146
    }
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   147
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   148
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   149
    /* constructor */
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   150
29569
f3f529b5d8fb more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
wenzelm
parents: 29174
diff changeset
   151
    symbol_decls.foreach(read_decl)
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   152
    init_recoders()
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   153
  }
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
   154
}