tuned comments;
simplified symbol pattern presentation: no need to keep source strings, canonical ofString does the job;
auxiliary class Recoder;
proper implementation of Interpretation.decode/encode;
--- a/src/Pure/General/symbol.scala Sat Aug 16 21:23:03 2008 +0200
+++ b/src/Pure/General/symbol.scala Sat Aug 16 23:12:23 2008 +0200
@@ -2,12 +2,12 @@
ID: $Id$
Author: Makarius
-Basic support for Isabelle symbols.
+Detecting and recoding Isabelle symbols.
*/
package isabelle
-import java.util.regex.Pattern
+import java.util.regex.{Pattern, Matcher}
import java.io.File
import scala.io.Source
import scala.collection.jcl.HashMap
@@ -15,28 +15,50 @@
object Symbol {
- /** Regexp utils **/
+ /** Symbol regexps **/
private def compile(s: String) =
Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL)
+ val char_pattern = compile(""" [^\ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
+
+ val symbol_pattern = compile(""" \\ \\? < (?:
+ \^? [A-Za-z][A-Za-z0-9_']* |
+ \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")
+
+ val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" +
+ """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
+
+ val pattern = compile(char_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern)
+
- /** Symbol regexps **/
-
- private val char_src = """ [^\ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """
+ /** Recoder tables **/
- private val symbol_src = """ \\ \\? < (?:
- \^? [A-Za-z][A-Za-z0-9_']* |
- \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >"""
+ class Recoder(list: List[(String, String)]) {
+ var pattern: Pattern = null
+ var table = new HashMap[String, String]
- private val bad_symbol_src = "(?!" + symbol_src + ")" +
- """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*"""
+ def recode(text: String) = {
+ val output = new StringBuffer(text.length)
+ val matcher = pattern.matcher(text)
+ while(matcher.find) matcher.appendReplacement(output, table(matcher.group))
+ matcher.appendTail(output)
+ output.toString
+ }
- val char_pattern = compile(char_src)
- val symbol_pattern = compile(symbol_src)
- val bad_symbol_pattern = compile(bad_symbol_src)
- val pattern = compile(char_pattern + "|" + symbol_src + "|" + bad_symbol_src)
+ /* constructor */
+ {
+ val pat = new StringBuilder(500)
+ val elems = list.elements
+ for ((x, y) <- elems) {
+ pat.append(Pattern.quote(x))
+ if (elems.hasNext) pat.append("|")
+ table + (x -> Matcher.quoteReplacement(y))
+ }
+ pattern = compile(pat.toString)
+ }
+ }
@@ -46,24 +68,12 @@
class BadSymbol(val msg: String) extends Exception
- var symbols = new HashMap[String, HashMap[String, String]]
-
- var decode_pattern: Pattern = null
- var decode_table = new HashMap[String, String]
-
- var encode_pattern: Pattern = null
- var encode_table = new HashMap[String, String]
-
+ private var symbols = new HashMap[String, HashMap[String, String]]
+ var decoder: Recoder = null
+ var encoder: Recoder = null
- /* recode */
-
- private def recode(pattern: Pattern, table: HashMap[String, String], text: String) = {
- // FIXME
- text
- }
-
- def decode(text: String) = recode(decode_pattern, decode_table, text)
- def encode(text: String) = recode(encode_pattern, encode_table, text)
+ def decode(text: String) = decoder.recode(text)
+ def encode(text: String) = encoder.recode(text)
/* read symbols */
@@ -112,34 +122,21 @@
/* init tables */
- private def init_tables() = {
- val decode_pat = new StringBuilder
- val encode_pat = new StringBuilder
-
- val syms = symbols.elements
- for ((symbol, props) <- syms) {
- val code = {
- try { Integer.decode(props("code")) }
- catch {
- case e: NoSuchElementException => throw new BadSymbol(symbol)
- case e: NumberFormatException => throw new BadSymbol(symbol)
- }
+ private def get_code(entry: (String, HashMap[String, String])) = {
+ val (symbol, props) = entry
+ val code =
+ try { Integer.decode(props("code")).intValue }
+ catch {
+ case e: NoSuchElementException => throw new BadSymbol(symbol)
+ case e: NumberFormatException => throw new BadSymbol(symbol)
}
- val code_str = new String(Character.toChars(code.intValue))
+ (symbol, new String(Character.toChars(code)))
+ }
- decode_pat.append(Pattern.quote(symbol))
- encode_pat.append(Pattern.quote(code_str))
- if (syms.hasNext) {
- decode_pat.append("|")
- encode_pat.append("|")
- }
-
- decode_table + (symbol -> code_str)
- encode_table + (code_str -> symbol)
- }
-
- decode_pattern = compile(decode_pat.toString)
- encode_pattern = compile(encode_pat.toString)
+ private def init_recoders() = {
+ val list = symbols.elements.toList.map(get_code)
+ decoder = new Recoder(list)
+ encoder = new Recoder(list.map((p: (String, String)) => (p._2, p._1)))
}
@@ -147,7 +144,7 @@
read_symbols(IsabelleSystem.ISABELLE_HOME)
read_symbols(IsabelleSystem.ISABELLE_HOME_USER)
- init_tables()
+ init_recoders()
}
}