src/Pure/General/symbol.scala
author wenzelm
Sat, 16 Aug 2008 23:12:23 +0200
changeset 27924 8dd8b564faf5
parent 27923 7ebe9d38743a
child 27926 308be7332e25
permissions -rw-r--r--
tuned comments; simplified symbol pattern presentation: no need to keep source strings, canonical ofString does the job; auxiliary class Recoder; proper implementation of Interpretation.decode/encode;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     1
/*  Title:      Pure/General/symbol.scala
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     2
    ID:         $Id$
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     3
    Author:     Makarius
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     4
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
     5
Detecting and recoding Isabelle symbols.
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     6
*/
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     7
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     8
package isabelle
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
     9
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    10
import java.util.regex.{Pattern, Matcher}
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    11
import java.io.File
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    12
import scala.io.Source
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    13
import scala.collection.jcl.HashMap
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    14
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    15
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    16
object Symbol {
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    17
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    18
  /** Symbol regexps **/
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    19
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    20
  private def compile(s: String) =
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    21
    Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL)
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    22
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    23
  val char_pattern = compile(""" [^\ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    24
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    25
  val symbol_pattern = compile(""" \\ \\? < (?:
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    26
      \^? [A-Za-z][A-Za-z0-9_']* |
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    27
      \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    28
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    29
  val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" +
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    30
    """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    31
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    32
  val pattern = compile(char_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    33
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    34
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    35
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    36
  /** Recoder tables **/
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    37
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    38
  class Recoder(list: List[(String, String)]) {
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    39
    var pattern: Pattern = null
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    40
    var table = new HashMap[String, String]
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    41
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    42
    def recode(text: String) = {
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    43
      val output = new StringBuffer(text.length)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    44
      val matcher = pattern.matcher(text)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    45
      while(matcher.find) matcher.appendReplacement(output, table(matcher.group))
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    46
      matcher.appendTail(output)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    47
      output.toString
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    48
    }
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
    49
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    50
    /* constructor */
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    51
    {
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    52
      val pat = new StringBuilder(500)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    53
      val elems = list.elements
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    54
      for ((x, y) <- elems) {
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    55
        pat.append(Pattern.quote(x))
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    56
        if (elems.hasNext) pat.append("|")
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    57
        table + (x -> Matcher.quoteReplacement(y))
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    58
      }
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    59
      pattern = compile(pat.toString)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    60
    }
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    61
  }
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    62
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    63
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    64
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    65
  /** Symbol interpretation **/
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    66
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    67
  class Interpretation {
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    68
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    69
    class BadSymbol(val msg: String) extends Exception
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    70
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    71
    private var symbols = new HashMap[String, HashMap[String, String]]
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    72
    var decoder: Recoder = null
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    73
    var encoder: Recoder = null
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    74
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    75
    def decode(text: String) = decoder.recode(text)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
    76
    def encode(text: String) = encoder.recode(text)
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    77
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    78
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    79
    /* read symbols */
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    80
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    81
    private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    82
    private val blank_pattern = compile(""" \s+ """)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    83
    private val key_pattern = compile(""" (.+): """)
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    84
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    85
    private def read_line(line: String) = {
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    86
      def err() = throw new BadSymbol(line)
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    87
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    88
      def read_props(props: List[String], tab: HashMap[String, String]): Unit = {
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    89
        props match {
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    90
          case Nil => ()
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    91
          case _ :: Nil => err()
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    92
          case key :: value :: rest => {
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    93
            val key_matcher = key_pattern.matcher(key)
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    94
            if (key_matcher.matches) {
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    95
              tab + (key_matcher.group(1) -> value)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    96
              read_props(rest, tab)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
    97
            }
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    98
            else err ()
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
    99
          }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   100
        }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   101
      }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   102
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   103
      if (!empty_pattern.matcher(line).matches) {
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   104
        blank_pattern.split(line).toList match {
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   105
          case Nil => err()
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   106
          case symbol :: props => {
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   107
            val tab = new HashMap[String, String]
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   108
            read_props(props, tab)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   109
            symbols + (symbol -> tab)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   110
          }
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   111
        }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   112
      }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   113
    }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   114
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   115
    private def read_symbols(base: String) = {
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   116
      val file = new File(base + File.separator + "etc" + File.separator + "symbols")
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   117
      if (file.canRead) {
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   118
        for (line <- Source.fromFile(file).getLines) read_line(line)
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   119
      }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   120
    }
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   121
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   122
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   123
    /* init tables */
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   124
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   125
    private def get_code(entry: (String, HashMap[String, String])) = {
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   126
      val (symbol, props) = entry
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   127
      val code =
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   128
        try { Integer.decode(props("code")).intValue }
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   129
        catch {
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   130
          case e: NoSuchElementException => throw new BadSymbol(symbol)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   131
          case e: NumberFormatException => throw new BadSymbol(symbol)
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   132
        }
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   133
      (symbol, new String(Character.toChars(code)))
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   134
    }
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   135
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   136
    private def init_recoders() = {
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   137
      val list = symbols.elements.toList.map(get_code)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   138
      decoder = new Recoder(list)
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   139
      encoder = new Recoder(list.map((p: (String, String)) => (p._2, p._1)))
27923
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   140
    }
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   141
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   142
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   143
    /* constructor */
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   144
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   145
    read_symbols(IsabelleSystem.ISABELLE_HOME)
7ebe9d38743a use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents: 27918
diff changeset
   146
    read_symbols(IsabelleSystem.ISABELLE_HOME_USER)
27924
8dd8b564faf5 tuned comments;
wenzelm
parents: 27923
diff changeset
   147
    init_recoders()
27918
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   148
  }
85942d2036a0 reading symbol interpretation tables;
wenzelm
parents: 27905
diff changeset
   149
27901
28083e9f8d1d Basic support for Isabelle symbols.
wenzelm
parents:
diff changeset
   150
}