use scala.collection.jcl.HashMap, which seems to be more efficient;
authorwenzelm
Sat, 16 Aug 2008 21:23:03 +0200
changeset 27923 7ebe9d38743a
parent 27922 ddf74e16ab01
child 27924 8dd8b564faf5
use scala.collection.jcl.HashMap, which seems to be more efficient; char_pattern: proper matching of surrogate unicode characters, those outside the Basic Multilingual Plane; class Interpretation: misc reorganization, more serious preparation of patterns and tables;
src/Pure/General/symbol.scala
--- a/src/Pure/General/symbol.scala	Sat Aug 16 21:23:01 2008 +0200
+++ b/src/Pure/General/symbol.scala	Sat Aug 16 21:23:03 2008 +0200
@@ -10,18 +10,21 @@
 import java.util.regex.Pattern
 import java.io.File
 import scala.io.Source
-import scala.collection.mutable.HashMap
+import scala.collection.jcl.HashMap
 
 
 object Symbol {
 
-  /* Regexp utils */
+  /** Regexp utils **/
 
   private def compile(s: String) =
     Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL)
 
 
-  /* Symbol regexps */
+
+  /** Symbol regexps **/
+
+  private val char_src = """ [^\ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """
 
   private val symbol_src = """ \\ \\? < (?:
       \^? [A-Za-z][A-Za-z0-9_']* |
@@ -30,31 +33,58 @@
   private val bad_symbol_src = "(?!" + symbol_src + ")" +
     """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*"""
 
+  val char_pattern = compile(char_src)
   val symbol_pattern = compile(symbol_src)
   val bad_symbol_pattern = compile(bad_symbol_src)
-  val pattern = compile(symbol_src + "|" + bad_symbol_src + "| .")
+  val pattern = compile(char_pattern + "|" + symbol_src + "|" + bad_symbol_src)
+
+
+
+  /** Symbol interpretation **/
+
+  class Interpretation {
+
+    class BadSymbol(val msg: String) extends Exception
+
+    var symbols = new HashMap[String, HashMap[String, String]]
+
+    var decode_pattern: Pattern = null
+    var decode_table = new HashMap[String, String]
+
+    var encode_pattern: Pattern = null
+    var encode_table = new HashMap[String, String]
 
 
-  /* Symbol interpretation tables */
+    /* recode */
 
-  private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
-  private val blank_pattern = compile(""" \s+ """)
-  private val key_pattern = compile(""" (.+): """)
+    private def recode(pattern: Pattern, table: HashMap[String, String], text: String) = {
+      // FIXME
+      text
+    }
 
-  class Interpretation extends HashMap[String, List[(String, String)]] {
+    def decode(text: String) = recode(decode_pattern, decode_table, text)
+    def encode(text: String) = recode(encode_pattern, encode_table, text)
+
 
-    class BadEntry(val line: String) extends Exception
+    /* read symbols */
+
+    private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
+    private val blank_pattern = compile(""" \s+ """)
+    private val key_pattern = compile(""" (.+): """)
 
-    def read_line(line: String) = {
-      def err() = throw new BadEntry ("Bad symbol specification: " + line)
+    private def read_line(line: String) = {
+      def err() = throw new BadSymbol(line)
 
-      def read_pairs(props: List[String]): List[(String, String)] = {
+      def read_props(props: List[String], tab: HashMap[String, String]): Unit = {
         props match {
-          case Nil => Nil
+          case Nil => ()
           case _ :: Nil => err()
           case key :: value :: rest => {
             val key_matcher = key_pattern.matcher(key)
-            if (key_matcher.matches) { (key_matcher.group(1) -> value) :: read_pairs(rest) }
+            if (key_matcher.matches) {
+              tab + (key_matcher.group(1) -> value)
+              read_props(rest, tab)
+            }
             else err ()
           }
         }
@@ -63,17 +93,61 @@
       if (!empty_pattern.matcher(line).matches) {
         blank_pattern.split(line).toList match {
           case Nil => err()
-          case symbol :: props => this + (symbol -> read_pairs(props))
+          case symbol :: props => {
+            val tab = new HashMap[String, String]
+            read_props(props, tab)
+            symbols + (symbol -> tab)
+          }
         }
       }
     }
 
-    for (base <- List(IsabelleSystem.ISABELLE_HOME, IsabelleSystem.ISABELLE_HOME_USER)) {
+    private def read_symbols(base: String) = {
       val file = new File(base + File.separator + "etc" + File.separator + "symbols")
       if (file.canRead) {
         for (line <- Source.fromFile(file).getLines) read_line(line)
       }
     }
+
+
+    /* init tables */
+
+    private def init_tables() = {
+      val decode_pat = new StringBuilder
+      val encode_pat = new StringBuilder
+
+      val syms = symbols.elements
+      for ((symbol, props) <- syms) {
+        val code = {
+          try { Integer.decode(props("code")) }
+          catch {
+            case e: NoSuchElementException => throw new BadSymbol(symbol)
+            case e: NumberFormatException => throw new BadSymbol(symbol)
+          }
+        }
+        val code_str = new String(Character.toChars(code.intValue))
+
+        decode_pat.append(Pattern.quote(symbol))
+        encode_pat.append(Pattern.quote(code_str))
+        if (syms.hasNext) {
+          decode_pat.append("|")
+          encode_pat.append("|")
+        }
+
+        decode_table + (symbol -> code_str)
+        encode_table + (code_str -> symbol)
+      }
+
+      decode_pattern = compile(decode_pat.toString)
+      encode_pattern = compile(encode_pat.toString)
+    }
+
+
+    /* constructor */
+
+    read_symbols(IsabelleSystem.ISABELLE_HOME)
+    read_symbols(IsabelleSystem.ISABELLE_HOME_USER)
+    init_tables()
   }
 
 }