| author | wenzelm | 
| Wed, 27 Aug 2008 16:32:48 +0200 | |
| changeset 28023 | 92dd3ad302b7 | 
| parent 28007 | 2d0c93291293 | 
| child 29140 | e7ac5bb20aed | 
| permissions | -rw-r--r-- | 
| 27901 | 1 | /* Title: Pure/General/symbol.scala | 
| 2 | ID: $Id$ | |
| 3 | Author: Makarius | |
| 4 | ||
| 27924 | 5 | Detecting and recoding Isabelle symbols. | 
| 27901 | 6 | */ | 
| 7 | ||
| 8 | package isabelle | |
| 9 | ||
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 10 | import java.util.regex.Pattern | 
| 27918 | 11 | import java.io.File | 
| 12 | import scala.io.Source | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 13 | import scala.collection.jcl.HashMap | 
| 27901 | 14 | |
| 15 | ||
| 16 | object Symbol {
 | |
| 17 | ||
| 27924 | 18 | /** Symbol regexps **/ | 
| 27901 | 19 | |
| 20 | private def compile(s: String) = | |
| 21 | Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL) | |
| 22 | ||
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 23 |   private val plain_pattern = compile(""" [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
 | 
| 27924 | 24 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 25 |   private val symbol_pattern = compile(""" \\ \\? < (?:
 | 
| 27924 | 26 | \^? [A-Za-z][A-Za-z0-9_']* | | 
| 27 | \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""") | |
| 28 | ||
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 29 |   private val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" +
 | 
| 27924 | 30 |     """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
 | 
| 31 | ||
| 27939 | 32 | // total pattern | 
| 33 | val pattern = compile(plain_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern + "| .") | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 34 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 35 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 36 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 37 | /** Recoding **/ | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 38 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 39 |   private class Recoder(list: List[(String, String)]) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 40 |     private val (min, max) = {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 41 | var min = '\uffff' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 42 | var max = '\u0000' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 43 |       for ((x, _) <- list) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 44 | val c = x(0) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 45 | if (c < min) min = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 46 | if (c > max) max = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 47 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 48 | (min, max) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 49 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 50 |     private val table = {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 51 | val table = new HashMap[String, String] | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 52 | for ((x, y) <- list) table + (x -> y) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 53 | table | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 54 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 55 |     def recode(text: String) = {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 56 | val len = text.length | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 57 | val matcher = pattern.matcher(text) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 58 | val result = new StringBuilder(len) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 59 | var i = 0 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 60 |       while (i < len) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 61 | val c = text(i) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 62 |         if (min <= c && c <= max) {
 | 
| 27939 | 63 | matcher.region(i, len) | 
| 64 | matcher.lookingAt | |
| 27938 | 65 | val x = matcher.group | 
| 66 |           table.get(x) match {
 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 67 | case Some(y) => result.append(y) | 
| 27938 | 68 | case None => result.append(x) | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 69 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 70 | i = matcher.end | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 71 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 72 |         else { result.append(c); i += 1 }
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 73 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 74 | result.toString | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 75 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 76 | } | 
| 27924 | 77 | |
| 27918 | 78 | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 79 | |
| 27927 | 80 | /** Symbol interpretation **/ | 
| 81 | ||
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 82 |   class Interpretation {
 | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 83 | |
| 27924 | 84 | private var symbols = new HashMap[String, HashMap[String, String]] | 
| 27926 | 85 | private var decoder: Recoder = null | 
| 86 | private var encoder: Recoder = null | |
| 27918 | 87 | |
| 27924 | 88 | def decode(text: String) = decoder.recode(text) | 
| 89 | def encode(text: String) = encoder.recode(text) | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 90 | |
| 27918 | 91 | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 92 | /* read symbols */ | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 93 | |
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 94 |     private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
 | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 95 |     private val blank_pattern = compile(""" \s+ """)
 | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 96 |     private val key_pattern = compile(""" (.+): """)
 | 
| 27918 | 97 | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 98 |     private def read_line(line: String) = {
 | 
| 27993 
6dd90ef9f927
simplified exceptions: use plain error function / RuntimeException;
 wenzelm parents: 
27939diff
changeset | 99 |       def err() = error("Bad symbol specification (line " + line + ")")
 | 
| 27918 | 100 | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 101 |       def read_props(props: List[String], tab: HashMap[String, String]): Unit = {
 | 
| 27918 | 102 |         props match {
 | 
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 103 | case Nil => () | 
| 27918 | 104 | case _ :: Nil => err() | 
| 105 |           case key :: value :: rest => {
 | |
| 106 | val key_matcher = key_pattern.matcher(key) | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 107 |             if (key_matcher.matches) {
 | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 108 | tab + (key_matcher.group(1) -> value) | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 109 | read_props(rest, tab) | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 110 | } | 
| 27918 | 111 | else err () | 
| 112 | } | |
| 113 | } | |
| 114 | } | |
| 115 | ||
| 116 |       if (!empty_pattern.matcher(line).matches) {
 | |
| 117 |         blank_pattern.split(line).toList match {
 | |
| 118 | case Nil => err() | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 119 |           case symbol :: props => {
 | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 120 | val tab = new HashMap[String, String] | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 121 | read_props(props, tab) | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 122 | symbols + (symbol -> tab) | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 123 | } | 
| 27918 | 124 | } | 
| 125 | } | |
| 126 | } | |
| 127 | ||
| 27935 
68d40072e9e7
read_symbols: proper IsabelleSystem.platform_path;
 wenzelm parents: 
27928diff
changeset | 128 |     private def read_symbols(path: String) = {
 | 
| 
68d40072e9e7
read_symbols: proper IsabelleSystem.platform_path;
 wenzelm parents: 
27928diff
changeset | 129 | val file = new File(IsabelleSystem.platform_path(path)) | 
| 27918 | 130 |       if (file.canRead) {
 | 
| 131 | for (line <- Source.fromFile(file).getLines) read_line(line) | |
| 132 | } | |
| 133 | } | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 134 | |
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 135 | |
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 136 | /* init tables */ | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 137 | |
| 27924 | 138 |     private def get_code(entry: (String, HashMap[String, String])) = {
 | 
| 139 | val (symbol, props) = entry | |
| 140 | val code = | |
| 141 |         try { Integer.decode(props("code")).intValue }
 | |
| 142 |         catch {
 | |
| 27993 
6dd90ef9f927
simplified exceptions: use plain error function / RuntimeException;
 wenzelm parents: 
27939diff
changeset | 143 |           case _: NoSuchElementException => error("Missing code for symbol " + symbol)
 | 
| 
6dd90ef9f927
simplified exceptions: use plain error function / RuntimeException;
 wenzelm parents: 
27939diff
changeset | 144 |           case _: NumberFormatException => error("Bad code for symbol " + symbol)
 | 
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 145 | } | 
| 27924 | 146 | (symbol, new String(Character.toChars(code))) | 
| 147 | } | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 148 | |
| 27924 | 149 |     private def init_recoders() = {
 | 
| 150 | val list = symbols.elements.toList.map(get_code) | |
| 28007 | 151 |       decoder = new Recoder(list ++ (for ((x, y) <- list) yield ("\\" + x, y)))
 | 
| 27928 | 152 | encoder = new Recoder(for ((x, y) <- list) yield (y, x)) | 
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 153 | } | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 154 | |
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 155 | |
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 156 | /* constructor */ | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 157 | |
| 27935 
68d40072e9e7
read_symbols: proper IsabelleSystem.platform_path;
 wenzelm parents: 
27928diff
changeset | 158 |     read_symbols("$ISABELLE_HOME/etc/symbols")
 | 
| 
68d40072e9e7
read_symbols: proper IsabelleSystem.platform_path;
 wenzelm parents: 
27928diff
changeset | 159 |     read_symbols("$ISABELLE_HOME_USER/etc/symbols")
 | 
| 27924 | 160 | init_recoders() | 
| 27918 | 161 | } | 
| 162 | ||
| 27901 | 163 | } |