| author | haftmann | 
| Thu, 22 Jan 2009 09:04:46 +0100 | |
| changeset 29614 | 1f7b1b0df292 | 
| parent 29569 | f3f529b5d8fb | 
| child 31522 | 0466cb17064f | 
| permissions | -rw-r--r-- | 
| 27901 | 1 | /* Title: Pure/General/symbol.scala | 
| 2 | Author: Makarius | |
| 3 | ||
| 27924 | 4 | Detecting and recoding Isabelle symbols. | 
| 27901 | 5 | */ | 
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 9 | import java.util.regex.Pattern | 
| 27918 | 10 | import java.io.File | 
| 11 | import scala.io.Source | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 12 | import scala.collection.jcl.HashMap | 
| 27901 | 13 | |
| 14 | ||
| 15 | object Symbol {
 | |
| 16 | ||
| 27924 | 17 | /** Symbol regexps **/ | 
| 27901 | 18 | |
| 19 | private def compile(s: String) = | |
| 20 | Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL) | |
| 21 | ||
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 22 |   private val plain_pattern = compile(""" [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)
 | 
| 27924 | 23 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 24 |   private val symbol_pattern = compile(""" \\ \\? < (?:
 | 
| 27924 | 25 | \^? [A-Za-z][A-Za-z0-9_']* | | 
| 26 | \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""") | |
| 27 | ||
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 28 |   private val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" +
 | 
| 27924 | 29 |     """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")
 | 
| 30 | ||
| 27939 | 31 | // total pattern | 
| 32 | val pattern = compile(plain_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern + "| .") | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 33 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 34 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 35 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 36 | /** Recoding **/ | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 37 | |
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 38 |   private class Recoder(list: List[(String, String)]) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 39 |     private val (min, max) = {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 40 | var min = '\uffff' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 41 | var max = '\u0000' | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 42 |       for ((x, _) <- list) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 43 | val c = x(0) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 44 | if (c < min) min = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 45 | if (c > max) max = c | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 46 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 47 | (min, max) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 48 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 49 |     private val table = {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 50 | val table = new HashMap[String, String] | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 51 | for ((x, y) <- list) table + (x -> y) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 52 | table | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 53 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 54 |     def recode(text: String) = {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 55 | val len = text.length | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 56 | val matcher = pattern.matcher(text) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 57 | val result = new StringBuilder(len) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 58 | var i = 0 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 59 |       while (i < len) {
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 60 | val c = text(i) | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 61 |         if (min <= c && c <= max) {
 | 
| 27939 | 62 | matcher.region(i, len) | 
| 63 | matcher.lookingAt | |
| 27938 | 64 | val x = matcher.group | 
| 65 |           table.get(x) match {
 | |
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 66 | case Some(y) => result.append(y) | 
| 27938 | 67 | case None => result.append(x) | 
| 27937 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 68 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 69 | i = matcher.end | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 70 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 71 |         else { result.append(c); i += 1 }
 | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 72 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 73 | result.toString | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 74 | } | 
| 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 wenzelm parents: 
27935diff
changeset | 75 | } | 
| 27924 | 76 | |
| 27918 | 77 | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 78 | |
| 27927 | 79 | /** Symbol interpretation **/ | 
| 80 | ||
| 29569 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 81 | class Interpretation(symbol_decls: Iterator[String]) | 
| 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 82 |   {
 | 
| 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 83 | private val symbols = new HashMap[String, HashMap[String, String]] | 
| 27926 | 84 | private var decoder: Recoder = null | 
| 85 | private var encoder: Recoder = null | |
| 27918 | 86 | |
| 27924 | 87 | def decode(text: String) = decoder.recode(text) | 
| 88 | def encode(text: String) = encoder.recode(text) | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 89 | |
| 27918 | 90 | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 91 | /* read symbols */ | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 92 | |
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 93 |     private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
 | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 94 |     private val blank_pattern = compile(""" \s+ """)
 | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 95 |     private val key_pattern = compile(""" (.+): """)
 | 
| 27918 | 96 | |
| 29569 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 97 |     private def read_decl(decl: String) = {
 | 
| 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 98 |       def err() = error("Bad symbol declaration: " + decl)
 | 
| 27918 | 99 | |
| 29569 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 100 | def read_props(props: List[String], tab: HashMap[String, String]) | 
| 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 101 |       {
 | 
| 27918 | 102 |         props match {
 | 
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 103 | case Nil => () | 
| 27918 | 104 | case _ :: Nil => err() | 
| 105 |           case key :: value :: rest => {
 | |
| 106 | val key_matcher = key_pattern.matcher(key) | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 107 |             if (key_matcher.matches) {
 | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 108 | tab + (key_matcher.group(1) -> value) | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 109 | read_props(rest, tab) | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 110 | } | 
| 27918 | 111 | else err () | 
| 112 | } | |
| 113 | } | |
| 114 | } | |
| 115 | ||
| 29569 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 116 |       if (!empty_pattern.matcher(decl).matches) {
 | 
| 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 117 |         blank_pattern.split(decl).toList match {
 | 
| 27918 | 118 | case Nil => err() | 
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 119 |           case symbol :: props => {
 | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 120 | val tab = new HashMap[String, String] | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 121 | read_props(props, tab) | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 122 | symbols + (symbol -> tab) | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 123 | } | 
| 27918 | 124 | } | 
| 125 | } | |
| 126 | } | |
| 127 | ||
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 128 | |
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 129 | /* init tables */ | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 130 | |
| 27924 | 131 |     private def get_code(entry: (String, HashMap[String, String])) = {
 | 
| 132 | val (symbol, props) = entry | |
| 133 | val code = | |
| 134 |         try { Integer.decode(props("code")).intValue }
 | |
| 135 |         catch {
 | |
| 27993 
6dd90ef9f927
simplified exceptions: use plain error function / RuntimeException;
 wenzelm parents: 
27939diff
changeset | 136 |           case _: NoSuchElementException => error("Missing code for symbol " + symbol)
 | 
| 
6dd90ef9f927
simplified exceptions: use plain error function / RuntimeException;
 wenzelm parents: 
27939diff
changeset | 137 |           case _: NumberFormatException => error("Bad code for symbol " + symbol)
 | 
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 138 | } | 
| 27924 | 139 | (symbol, new String(Character.toChars(code))) | 
| 140 | } | |
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 141 | |
| 27924 | 142 |     private def init_recoders() = {
 | 
| 143 | val list = symbols.elements.toList.map(get_code) | |
| 28007 | 144 |       decoder = new Recoder(list ++ (for ((x, y) <- list) yield ("\\" + x, y)))
 | 
| 27928 | 145 | encoder = new Recoder(for ((x, y) <- list) yield (y, x)) | 
| 27923 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 146 | } | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 147 | |
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 148 | |
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 149 | /* constructor */ | 
| 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 wenzelm parents: 
27918diff
changeset | 150 | |
| 29569 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 wenzelm parents: 
29174diff
changeset | 151 | symbol_decls.foreach(read_decl) | 
| 27924 | 152 | init_recoders() | 
| 27918 | 153 | } | 
| 27901 | 154 | } |