| author | krauss | 
| Wed, 02 Feb 2011 08:47:45 +0100 | |
| changeset 41686 | d8efc2490b8e | 
| parent 40529 | d5fb1f1a5857 | 
| child 43418 | c69e9fbb81a8 | 
| permissions | -rw-r--r-- | 
| 27901 | 1  | 
/* Title: Pure/General/symbol.scala  | 
2  | 
Author: Makarius  | 
|
3  | 
||
| 27924 | 4  | 
Detecting and recoding Isabelle symbols.  | 
| 27901 | 5  | 
*/  | 
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
| 27918 | 9  | 
import scala.io.Source  | 
| 
36011
 
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
 
wenzelm 
parents: 
34316 
diff
changeset
 | 
10  | 
import scala.collection.mutable  | 
| 31522 | 11  | 
import scala.util.matching.Regex  | 
| 27901 | 12  | 
|
13  | 
||
| 31522 | 14  | 
object Symbol  | 
15  | 
{
 | 
|
| 36763 | 16  | 
/* spaces */  | 
17  | 
||
| 36816 | 18  | 
val spc = ' '  | 
19  | 
val space = " "  | 
|
20  | 
||
21  | 
private val static_spaces = space * 4000  | 
|
| 36763 | 22  | 
|
23  | 
def spaces(k: Int): String =  | 
|
24  | 
  {
 | 
|
25  | 
require(k >= 0)  | 
|
26  | 
if (k < static_spaces.length) static_spaces.substring(0, k)  | 
|
| 36816 | 27  | 
else space * k  | 
| 36763 | 28  | 
}  | 
29  | 
||
30  | 
||
| 33998 | 31  | 
/* Symbol regexps */  | 
| 27901 | 32  | 
|
| 31522 | 33  | 
  private val plain = new Regex("""(?xs)
 | 
| 
40524
 
6131d7a78ad3
treat Unicode "replacement character" (i.e. decoding error) is malformed;
 
wenzelm 
parents: 
40523 
diff
changeset
 | 
34  | 
[^\r\\\ud800-\udfff\ufffd] | [\ud800-\udbff][\udc00-\udfff] """)  | 
| 
37556
 
2bf29095d26f
treat alternative newline symbols as in Isabelle/ML;
 
wenzelm 
parents: 
36816 
diff
changeset
 | 
35  | 
|
| 40522 | 36  | 
  private val physical_newline = new Regex("""(?xs) \n | \r\n | \r """)
 | 
| 27901 | 37  | 
|
| 31522 | 38  | 
  private val symbol = new Regex("""(?xs)
 | 
| 
31545
 
5f1f0a20af4d
discontinued escaped symbols such as \\<forall> -- only one backslash should be used;
 
wenzelm 
parents: 
31523 
diff
changeset
 | 
39  | 
\\ < (?:  | 
| 27924 | 40  | 
\^? [A-Za-z][A-Za-z0-9_']* |  | 
41  | 
\^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")  | 
|
42  | 
||
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40522 
diff
changeset
 | 
43  | 
  private val malformed_symbol = new Regex("(?xs) (?!" + symbol + ")" +
 | 
| 40529 | 44  | 
""" [\ud800-\udbff\ufffd] | \\<\^? """)  | 
| 27924 | 45  | 
|
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40522 
diff
changeset
 | 
46  | 
val regex_total =  | 
| 
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40522 
diff
changeset
 | 
47  | 
new Regex(plain + "|" + physical_newline + "|" + symbol + "|" + malformed_symbol + "| .")  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
48  | 
|
| 34137 | 49  | 
|
50  | 
/* basic matching */  | 
|
51  | 
||
| 
37556
 
2bf29095d26f
treat alternative newline symbols as in Isabelle/ML;
 
wenzelm 
parents: 
36816 
diff
changeset
 | 
52  | 
def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || '\ud800' <= c && c <= '\udfff')  | 
| 34137 | 53  | 
|
| 38877 | 54  | 
def is_physical_newline(s: CharSequence): Boolean =  | 
55  | 
"\n".contentEquals(s) || "\r".contentEquals(s) || "\r\n".contentEquals(s)  | 
|
56  | 
||
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40522 
diff
changeset
 | 
57  | 
def is_malformed(s: CharSequence): Boolean =  | 
| 
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40522 
diff
changeset
 | 
58  | 
!(s.length == 1 && is_plain(s.charAt(0))) && malformed_symbol.pattern.matcher(s).matches  | 
| 34137 | 59  | 
|
60  | 
class Matcher(text: CharSequence)  | 
|
61  | 
  {
 | 
|
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40522 
diff
changeset
 | 
62  | 
private val matcher = regex_total.pattern.matcher(text)  | 
| 34137 | 63  | 
def apply(start: Int, end: Int): Int =  | 
64  | 
    {
 | 
|
65  | 
require(0 <= start && start < end && end <= text.length)  | 
|
| 
34316
 
f879b649ac4c
clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
 
wenzelm 
parents: 
34193 
diff
changeset
 | 
66  | 
if (is_plain(text.charAt(start))) 1  | 
| 34138 | 67  | 
      else {
 | 
| 34137 | 68  | 
matcher.region(start, end).lookingAt  | 
69  | 
matcher.group.length  | 
|
70  | 
}  | 
|
71  | 
}  | 
|
| 31522 | 72  | 
}  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
73  | 
|
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
74  | 
|
| 
36011
 
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
 
wenzelm 
parents: 
34316 
diff
changeset
 | 
75  | 
/* iterator */  | 
| 33998 | 76  | 
|
| 
36011
 
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
 
wenzelm 
parents: 
34316 
diff
changeset
 | 
77  | 
def iterator(text: CharSequence) = new Iterator[CharSequence]  | 
| 34134 | 78  | 
  {
 | 
| 34137 | 79  | 
private val matcher = new Matcher(text)  | 
| 33998 | 80  | 
private var i = 0  | 
81  | 
def hasNext = i < text.length  | 
|
| 40522 | 82  | 
def next =  | 
83  | 
    {
 | 
|
| 34137 | 84  | 
val n = matcher(i, text.length)  | 
85  | 
val s = text.subSequence(i, i + n)  | 
|
86  | 
i += n  | 
|
87  | 
s  | 
|
| 33998 | 88  | 
}  | 
89  | 
}  | 
|
90  | 
||
91  | 
||
92  | 
/* decoding offsets */  | 
|
93  | 
||
94  | 
class Index(text: CharSequence)  | 
|
| 31929 | 95  | 
  {
 | 
96  | 
case class Entry(chr: Int, sym: Int)  | 
|
97  | 
val index: Array[Entry] =  | 
|
98  | 
    {
 | 
|
| 34137 | 99  | 
val matcher = new Matcher(text)  | 
| 31929 | 100  | 
val buf = new mutable.ArrayBuffer[Entry]  | 
101  | 
var chr = 0  | 
|
102  | 
var sym = 0  | 
|
| 33998 | 103  | 
      while (chr < text.length) {
 | 
| 34137 | 104  | 
val n = matcher(chr, text.length)  | 
105  | 
chr += n  | 
|
| 31929 | 106  | 
sym += 1  | 
| 34137 | 107  | 
if (n > 1) buf += Entry(chr, sym)  | 
| 31929 | 108  | 
}  | 
109  | 
buf.toArray  | 
|
110  | 
}  | 
|
| 
38479
 
e628da370072
more efficient Markup_Tree, based on branches sorted by quasi-order;
 
wenzelm 
parents: 
37556 
diff
changeset
 | 
111  | 
def decode(sym1: Int): Int =  | 
| 31929 | 112  | 
    {
 | 
| 
38479
 
e628da370072
more efficient Markup_Tree, based on branches sorted by quasi-order;
 
wenzelm 
parents: 
37556 
diff
changeset
 | 
113  | 
val sym = sym1 - 1  | 
| 31929 | 114  | 
val end = index.length  | 
115  | 
def bisect(a: Int, b: Int): Int =  | 
|
116  | 
      {
 | 
|
117  | 
        if (a < b) {
 | 
|
118  | 
val c = (a + b) / 2  | 
|
119  | 
if (sym < index(c).sym) bisect(a, c)  | 
|
120  | 
else if (c + 1 == end || sym < index(c + 1).sym) c  | 
|
121  | 
else bisect(c + 1, b)  | 
|
122  | 
}  | 
|
123  | 
else -1  | 
|
124  | 
}  | 
|
125  | 
val i = bisect(0, end)  | 
|
126  | 
if (i < 0) sym  | 
|
127  | 
else index(i).chr + sym - index(i).sym  | 
|
128  | 
}  | 
|
| 
38479
 
e628da370072
more efficient Markup_Tree, based on branches sorted by quasi-order;
 
wenzelm 
parents: 
37556 
diff
changeset
 | 
129  | 
def decode(range: Text.Range): Text.Range = range.map(decode(_))  | 
| 31929 | 130  | 
}  | 
131  | 
||
132  | 
||
| 33998 | 133  | 
/* recoding text */  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
134  | 
|
| 31522 | 135  | 
private class Recoder(list: List[(String, String)])  | 
136  | 
  {
 | 
|
137  | 
private val (min, max) =  | 
|
138  | 
    {
 | 
|
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
139  | 
var min = '\uffff'  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
140  | 
var max = '\u0000'  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
141  | 
      for ((x, _) <- list) {
 | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
142  | 
val c = x(0)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
143  | 
if (c < min) min = c  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
144  | 
if (c > max) max = c  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
145  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
146  | 
(min, max)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
147  | 
}  | 
| 40443 | 148  | 
private val table =  | 
149  | 
    {
 | 
|
150  | 
var tab = Map[String, String]()  | 
|
151  | 
      for ((x, y) <- list) {
 | 
|
152  | 
        tab.get(x) match {
 | 
|
153  | 
case None => tab += (x -> y)  | 
|
154  | 
case Some(z) =>  | 
|
155  | 
            error("Duplicate mapping of \"" + x + "\" to \"" + y + "\" vs. \"" + z + "\"")
 | 
|
156  | 
}  | 
|
157  | 
}  | 
|
158  | 
tab  | 
|
159  | 
}  | 
|
| 31522 | 160  | 
def recode(text: String): String =  | 
161  | 
    {
 | 
|
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
162  | 
val len = text.length  | 
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40522 
diff
changeset
 | 
163  | 
val matcher = regex_total.pattern.matcher(text)  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
164  | 
val result = new StringBuilder(len)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
165  | 
var i = 0  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
166  | 
      while (i < len) {
 | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
167  | 
val c = text(i)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
168  | 
        if (min <= c && c <= max) {
 | 
| 31929 | 169  | 
matcher.region(i, len).lookingAt  | 
| 27938 | 170  | 
val x = matcher.group  | 
| 31522 | 171  | 
result.append(table.get(x) getOrElse x)  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
172  | 
i = matcher.end  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
173  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
174  | 
        else { result.append(c); i += 1 }
 | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
175  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
176  | 
result.toString  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
177  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
178  | 
}  | 
| 27924 | 179  | 
|
| 27918 | 180  | 
|
| 
27923
 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 
wenzelm 
parents: 
27918 
diff
changeset
 | 
181  | 
|
| 27927 | 182  | 
/** Symbol interpretation **/  | 
183  | 
||
| 34137 | 184  | 
class Interpretation(symbol_decls: List[String])  | 
| 
29569
 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 
wenzelm 
parents: 
29174 
diff
changeset
 | 
185  | 
  {
 | 
| 31522 | 186  | 
/* read symbols */  | 
187  | 
||
188  | 
    private val empty = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
 | 
|
189  | 
    private val key = new Regex("""(?xs) (.+): """)
 | 
|
190  | 
||
191  | 
private def read_decl(decl: String): (String, Map[String, String]) =  | 
|
192  | 
    {
 | 
|
193  | 
      def err() = error("Bad symbol declaration: " + decl)
 | 
|
194  | 
||
195  | 
def read_props(props: List[String]): Map[String, String] =  | 
|
196  | 
      {
 | 
|
197  | 
        props match {
 | 
|
198  | 
case Nil => Map()  | 
|
199  | 
case _ :: Nil => err()  | 
|
200  | 
case key(x) :: y :: rest => read_props(rest) + (x -> y)  | 
|
201  | 
case _ => err()  | 
|
202  | 
}  | 
|
203  | 
}  | 
|
204  | 
      decl.split("\\s+").toList match {
 | 
|
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40522 
diff
changeset
 | 
205  | 
case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props))  | 
| 34193 | 206  | 
case _ => err()  | 
| 31522 | 207  | 
}  | 
208  | 
}  | 
|
209  | 
||
210  | 
private val symbols: List[(String, Map[String, String])] =  | 
|
| 40443 | 211  | 
Map((  | 
212  | 
for (decl <- symbol_decls if !empty.pattern.matcher(decl).matches)  | 
|
213  | 
yield read_decl(decl)): _*) toList  | 
|
| 31522 | 214  | 
|
215  | 
||
| 31651 | 216  | 
/* misc properties */  | 
217  | 
||
| 34134 | 218  | 
val names: Map[String, String] =  | 
219  | 
    {
 | 
|
| 31651 | 220  | 
      val name = new Regex("""\\<([A-Za-z][A-Za-z0-9_']*)>""")
 | 
221  | 
Map((for ((sym @ name(a), _) <- symbols) yield (sym -> a)): _*)  | 
|
222  | 
}  | 
|
223  | 
||
| 40443 | 224  | 
val abbrevs: Map[String, String] =  | 
225  | 
Map((  | 
|
226  | 
        for ((sym, props) <- symbols if props.isDefinedAt("abbrev"))
 | 
|
227  | 
          yield (sym -> props("abbrev"))): _*)
 | 
|
| 31651 | 228  | 
|
229  | 
||
| 31522 | 230  | 
/* main recoder methods */  | 
231  | 
||
232  | 
private val (decoder, encoder) =  | 
|
233  | 
    {
 | 
|
234  | 
val mapping =  | 
|
235  | 
        for {
 | 
|
236  | 
(sym, props) <- symbols  | 
|
237  | 
val code =  | 
|
238  | 
            try { Integer.decode(props("code")).intValue }
 | 
|
239  | 
            catch {
 | 
|
240  | 
              case _: NoSuchElementException => error("Missing code for symbol " + sym)
 | 
|
241  | 
              case _: NumberFormatException => error("Bad code for symbol " + sym)
 | 
|
242  | 
}  | 
|
243  | 
val ch = new String(Character.toChars(code))  | 
|
| 34193 | 244  | 
        } yield {
 | 
245  | 
          if (code < 128) error("Illegal ASCII code for symbol " + sym)
 | 
|
246  | 
else (sym, ch)  | 
|
247  | 
}  | 
|
| 
31545
 
5f1f0a20af4d
discontinued escaped symbols such as \\<forall> -- only one backslash should be used;
 
wenzelm 
parents: 
31523 
diff
changeset
 | 
248  | 
(new Recoder(mapping),  | 
| 31548 | 249  | 
       new Recoder(mapping map { case (x, y) => (y, x) }))
 | 
| 31522 | 250  | 
}  | 
| 27918 | 251  | 
|
| 34098 | 252  | 
def decode(text: String): String = decoder.recode(text)  | 
253  | 
def encode(text: String): String = encoder.recode(text)  | 
|
| 34134 | 254  | 
|
255  | 
||
256  | 
/* classification */  | 
|
257  | 
||
| 34138 | 258  | 
private object Decode_Set  | 
259  | 
    {
 | 
|
260  | 
def apply(elems: String*): Set[String] =  | 
|
261  | 
      {
 | 
|
262  | 
val content = elems.toList  | 
|
263  | 
Set((content ::: content.map(decode)): _*)  | 
|
264  | 
}  | 
|
265  | 
}  | 
|
266  | 
||
267  | 
private val letters = Decode_Set(  | 
|
| 34134 | 268  | 
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",  | 
269  | 
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",  | 
|
270  | 
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",  | 
|
271  | 
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",  | 
|
272  | 
||
273  | 
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",  | 
|
274  | 
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",  | 
|
275  | 
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",  | 
|
276  | 
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",  | 
|
277  | 
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",  | 
|
278  | 
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",  | 
|
279  | 
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",  | 
|
280  | 
"\\<x>", "\\<y>", "\\<z>",  | 
|
281  | 
||
282  | 
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",  | 
|
283  | 
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",  | 
|
284  | 
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",  | 
|
285  | 
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",  | 
|
286  | 
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",  | 
|
287  | 
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",  | 
|
288  | 
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",  | 
|
289  | 
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",  | 
|
290  | 
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",  | 
|
291  | 
||
292  | 
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",  | 
|
293  | 
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",  | 
|
294  | 
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",  | 
|
295  | 
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",  | 
|
296  | 
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",  | 
|
297  | 
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",  | 
|
298  | 
"\\<Psi>", "\\<Omega>",  | 
|
299  | 
||
300  | 
"\\<^isub>", "\\<^isup>")  | 
|
301  | 
||
| 34138 | 302  | 
private val blanks =  | 
| 36816 | 303  | 
Decode_Set(space, "\t", "\n", "\u000B", "\f", "\r", "\\<spacespace>", "\\<^newline>")  | 
| 34138 | 304  | 
|
305  | 
private val sym_chars =  | 
|
306  | 
      Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
 | 
|
| 34134 | 307  | 
|
308  | 
def is_letter(sym: String): Boolean = letters.contains(sym)  | 
|
| 34138 | 309  | 
def is_digit(sym: String): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'  | 
| 34134 | 310  | 
def is_quasi(sym: String): Boolean = sym == "_" || sym == "'"  | 
| 34138 | 311  | 
def is_letdig(sym: String): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)  | 
| 34134 | 312  | 
def is_blank(sym: String): Boolean = blanks.contains(sym)  | 
| 34138 | 313  | 
def is_symbolic_char(sym: String): Boolean = sym_chars.contains(sym)  | 
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40522 
diff
changeset
 | 
314  | 
def is_symbolic(sym: String): Boolean =  | 
| 
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40522 
diff
changeset
 | 
315  | 
      sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
 | 
| 27918 | 316  | 
}  | 
| 27901 | 317  | 
}  |