author | wenzelm |
Mon, 25 Aug 2008 20:01:17 +0200 | |
changeset 27993 | 6dd90ef9f927 |
parent 27939 | 41b1c0b769bf |
child 28007 | 2d0c93291293 |
permissions | -rw-r--r-- |
27901 | 1 |
/* Title: Pure/General/symbol.scala |
2 |
ID: $Id$ |
|
3 |
Author: Makarius |
|
4 |
||
27924 | 5 |
Detecting and recoding Isabelle symbols. |
27901 | 6 |
*/ |
7 |
||
8 |
package isabelle |
|
9 |
||
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
10 |
import java.util.regex.Pattern |
27918 | 11 |
import java.io.File |
12 |
import scala.io.Source |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
13 |
import scala.collection.jcl.HashMap |
27901 | 14 |
|
15 |
||
16 |
object Symbol { |
|
17 |
||
27924 | 18 |
/** Symbol regexps **/ |
27901 | 19 |
|
20 |
private def compile(s: String) = |
|
21 |
Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL) |
|
22 |
||
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
23 |
private val plain_pattern = compile(""" [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """) |
27924 | 24 |
|
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
25 |
private val symbol_pattern = compile(""" \\ \\? < (?: |
27924 | 26 |
\^? [A-Za-z][A-Za-z0-9_']* | |
27 |
\^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""") |
|
28 |
||
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
29 |
private val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" + |
27924 | 30 |
""" \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""") |
31 |
||
27939 | 32 |
// total pattern |
33 |
val pattern = compile(plain_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern + "| .") |
|
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
34 |
|
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
35 |
|
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
36 |
|
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
37 |
/** Recoding **/ |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
38 |
|
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
39 |
private class Recoder(list: List[(String, String)]) { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
40 |
private val (min, max) = { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
41 |
var min = '\uffff' |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
42 |
var max = '\u0000' |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
43 |
for ((x, _) <- list) { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
44 |
val c = x(0) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
45 |
if (c < min) min = c |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
46 |
if (c > max) max = c |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
47 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
48 |
(min, max) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
49 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
50 |
private val table = { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
51 |
val table = new HashMap[String, String] |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
52 |
for ((x, y) <- list) table + (x -> y) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
53 |
table |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
54 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
55 |
def recode(text: String) = { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
56 |
val len = text.length |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
57 |
val matcher = pattern.matcher(text) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
58 |
val result = new StringBuilder(len) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
59 |
var i = 0 |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
60 |
while (i < len) { |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
61 |
val c = text(i) |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
62 |
if (min <= c && c <= max) { |
27939 | 63 |
matcher.region(i, len) |
64 |
matcher.lookingAt |
|
27938 | 65 |
val x = matcher.group |
66 |
table.get(x) match { |
|
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
67 |
case Some(y) => result.append(y) |
27938 | 68 |
case None => result.append(x) |
27937
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
69 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
70 |
i = matcher.end |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
71 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
72 |
else { result.append(c); i += 1 } |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
73 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
74 |
result.toString |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
75 |
} |
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
wenzelm
parents:
27935
diff
changeset
|
76 |
} |
27924 | 77 |
|
27918 | 78 |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
79 |
|
27927 | 80 |
/** Symbol interpretation **/ |
81 |
||
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
82 |
class Interpretation { |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
83 |
|
27924 | 84 |
private var symbols = new HashMap[String, HashMap[String, String]] |
27926 | 85 |
private var decoder: Recoder = null |
86 |
private var encoder: Recoder = null |
|
27918 | 87 |
|
27924 | 88 |
def decode(text: String) = decoder.recode(text) |
89 |
def encode(text: String) = encoder.recode(text) |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
90 |
|
27918 | 91 |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
92 |
/* read symbols */ |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
93 |
|
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
94 |
private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """) |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
95 |
private val blank_pattern = compile(""" \s+ """) |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
96 |
private val key_pattern = compile(""" (.+): """) |
27918 | 97 |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
98 |
private def read_line(line: String) = { |
27993
6dd90ef9f927
simplified exceptions: use plain error function / RuntimeException;
wenzelm
parents:
27939
diff
changeset
|
99 |
def err() = error("Bad symbol specification (line " + line + ")") |
27918 | 100 |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
101 |
def read_props(props: List[String], tab: HashMap[String, String]): Unit = { |
27918 | 102 |
props match { |
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
103 |
case Nil => () |
27918 | 104 |
case _ :: Nil => err() |
105 |
case key :: value :: rest => { |
|
106 |
val key_matcher = key_pattern.matcher(key) |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
107 |
if (key_matcher.matches) { |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
108 |
tab + (key_matcher.group(1) -> value) |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
109 |
read_props(rest, tab) |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
110 |
} |
27918 | 111 |
else err () |
112 |
} |
|
113 |
} |
|
114 |
} |
|
115 |
||
116 |
if (!empty_pattern.matcher(line).matches) { |
|
117 |
blank_pattern.split(line).toList match { |
|
118 |
case Nil => err() |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
119 |
case symbol :: props => { |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
120 |
val tab = new HashMap[String, String] |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
121 |
read_props(props, tab) |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
122 |
symbols + (symbol -> tab) |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
123 |
} |
27918 | 124 |
} |
125 |
} |
|
126 |
} |
|
127 |
||
27935
68d40072e9e7
read_symbols: proper IsabelleSystem.platform_path;
wenzelm
parents:
27928
diff
changeset
|
128 |
private def read_symbols(path: String) = { |
68d40072e9e7
read_symbols: proper IsabelleSystem.platform_path;
wenzelm
parents:
27928
diff
changeset
|
129 |
val file = new File(IsabelleSystem.platform_path(path)) |
27918 | 130 |
if (file.canRead) { |
131 |
for (line <- Source.fromFile(file).getLines) read_line(line) |
|
132 |
} |
|
133 |
} |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
134 |
|
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
135 |
|
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
136 |
/* init tables */ |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
137 |
|
27924 | 138 |
private def get_code(entry: (String, HashMap[String, String])) = { |
139 |
val (symbol, props) = entry |
|
140 |
val code = |
|
141 |
try { Integer.decode(props("code")).intValue } |
|
142 |
catch { |
|
27993
6dd90ef9f927
simplified exceptions: use plain error function / RuntimeException;
wenzelm
parents:
27939
diff
changeset
|
143 |
case _: NoSuchElementException => error("Missing code for symbol " + symbol) |
6dd90ef9f927
simplified exceptions: use plain error function / RuntimeException;
wenzelm
parents:
27939
diff
changeset
|
144 |
case _: NumberFormatException => error("Bad code for symbol " + symbol) |
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
145 |
} |
27924 | 146 |
(symbol, new String(Character.toChars(code))) |
147 |
} |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
148 |
|
27924 | 149 |
private def init_recoders() = { |
150 |
val list = symbols.elements.toList.map(get_code) |
|
27928 | 151 |
decoder = new Recoder(list ::: (for ((x, y) <- list) yield ("\\" + x, y))) |
152 |
encoder = new Recoder(for ((x, y) <- list) yield (y, x)) |
|
27923
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
153 |
} |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
154 |
|
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
155 |
|
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
156 |
/* constructor */ |
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
wenzelm
parents:
27918
diff
changeset
|
157 |
|
27935
68d40072e9e7
read_symbols: proper IsabelleSystem.platform_path;
wenzelm
parents:
27928
diff
changeset
|
158 |
read_symbols("$ISABELLE_HOME/etc/symbols") |
68d40072e9e7
read_symbols: proper IsabelleSystem.platform_path;
wenzelm
parents:
27928
diff
changeset
|
159 |
read_symbols("$ISABELLE_HOME_USER/etc/symbols") |
27924 | 160 |
init_recoders() |
27918 | 161 |
} |
162 |
||
27901 | 163 |
} |