| author | nipkow | 
| Mon, 16 Nov 2015 15:59:47 +0100 | |
| changeset 61688 | d04b1b4fb015 | 
| parent 61579 | 634cd44bb1d3 | 
| child 61865 | 6dcc9e4f1aa6 | 
| permissions | -rw-r--r-- | 
| 27901 | 1  | 
/* Title: Pure/General/symbol.scala  | 
2  | 
Author: Makarius  | 
|
3  | 
||
| 27924 | 4  | 
Detecting and recoding Isabelle symbols.  | 
| 27901 | 5  | 
*/  | 
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
| 55618 | 9  | 
|
| 
36011
 
3ff725ac13a4
adapted to Scala 2.8.0 Beta1 -- with notable changes to scala.collection;
 
wenzelm 
parents: 
34316 
diff
changeset
 | 
10  | 
import scala.collection.mutable  | 
| 31522 | 11  | 
import scala.util.matching.Regex  | 
| 48922 | 12  | 
import scala.annotation.tailrec  | 
| 27901 | 13  | 
|
14  | 
||
| 31522 | 15  | 
object Symbol  | 
16  | 
{
 | 
|
| 43696 | 17  | 
type Symbol = String  | 
18  | 
||
| 
55884
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
19  | 
// counting Isabelle symbols, starting from 1  | 
| 
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
20  | 
type Offset = Text.Offset  | 
| 
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
21  | 
type Range = Text.Range  | 
| 
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
22  | 
|
| 43696 | 23  | 
|
| 43418 | 24  | 
/* ASCII characters */  | 
25  | 
||
26  | 
def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'  | 
|
| 55497 | 27  | 
|
| 43418 | 28  | 
def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9'  | 
| 55497 | 29  | 
|
30  | 
def is_ascii_hex(c: Char): Boolean =  | 
|
31  | 
'0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'  | 
|
32  | 
||
| 43418 | 33  | 
def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\''  | 
34  | 
||
| 55497 | 35  | 
def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c)  | 
36  | 
||
| 43418 | 37  | 
def is_ascii_letdig(c: Char): Boolean =  | 
38  | 
is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c)  | 
|
39  | 
||
40  | 
def is_ascii_identifier(s: String): Boolean =  | 
|
| 
50238
 
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
 
wenzelm 
parents: 
50233 
diff
changeset
 | 
41  | 
s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig)  | 
| 43418 | 42  | 
|
43  | 
||
| 48775 | 44  | 
/* symbol matching */  | 
| 27901 | 45  | 
|
| 48775 | 46  | 
  private val symbol_total = new Regex("""(?xs)
 | 
47  | 
[\ud800-\udbff][\udc00-\udfff] | \r\n |  | 
|
48  | 
\\ < (?: \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* | \^? ([A-Za-z][A-Za-z0-9_']*)? ) >? |  | 
|
49  | 
.""")  | 
|
| 27924 | 50  | 
|
| 48775 | 51  | 
private def is_plain(c: Char): Boolean =  | 
52  | 
!(c == '\r' || c == '\\' || Character.isHighSurrogate(c))  | 
|
| 
48773
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
53  | 
|
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
54  | 
def is_malformed(s: Symbol): Boolean =  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
55  | 
    s.length match {
 | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
56  | 
case 1 =>  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
57  | 
val c = s(0)  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
58  | 
Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd'  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
59  | 
case 2 =>  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
60  | 
val c1 = s(0)  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
61  | 
val c2 = s(1)  | 
| 
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
62  | 
!(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2))  | 
| 48774 | 63  | 
      case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>"
 | 
| 
48773
 
0e1bab274672
more liberal scanning of potentially malformed symbols;
 
wenzelm 
parents: 
48704 
diff
changeset
 | 
64  | 
}  | 
| 34137 | 65  | 
|
| 
54734
 
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
 
wenzelm 
parents: 
53400 
diff
changeset
 | 
66  | 
def is_newline(s: Symbol): Boolean =  | 
| 
43675
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
67  | 
s == "\n" || s == "\r" || s == "\r\n"  | 
| 38877 | 68  | 
|
| 34137 | 69  | 
class Matcher(text: CharSequence)  | 
70  | 
  {
 | 
|
| 48775 | 71  | 
private val matcher = symbol_total.pattern.matcher(text)  | 
| 34137 | 72  | 
def apply(start: Int, end: Int): Int =  | 
73  | 
    {
 | 
|
74  | 
require(0 <= start && start < end && end <= text.length)  | 
|
| 
34316
 
f879b649ac4c
clarified Symbol.is_plain/is_wellformed -- is_closed was rejecting plain backslashes;
 
wenzelm 
parents: 
34193 
diff
changeset
 | 
75  | 
if (is_plain(text.charAt(start))) 1  | 
| 34138 | 76  | 
      else {
 | 
| 34137 | 77  | 
matcher.region(start, end).lookingAt  | 
78  | 
matcher.group.length  | 
|
79  | 
}  | 
|
80  | 
}  | 
|
| 31522 | 81  | 
}  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
82  | 
|
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
83  | 
|
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
84  | 
/* iterator */  | 
| 33998 | 85  | 
|
| 43696 | 86  | 
private val char_symbols: Array[Symbol] =  | 
| 
43675
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
87  | 
(0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray  | 
| 
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
88  | 
|
| 43696 | 89  | 
def iterator(text: CharSequence): Iterator[Symbol] =  | 
90  | 
new Iterator[Symbol]  | 
|
| 40522 | 91  | 
    {
 | 
| 43489 | 92  | 
private val matcher = new Matcher(text)  | 
93  | 
private var i = 0  | 
|
94  | 
def hasNext = i < text.length  | 
|
95  | 
def next =  | 
|
96  | 
      {
 | 
|
97  | 
val n = matcher(i, text.length)  | 
|
| 
43675
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
98  | 
val s =  | 
| 
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
99  | 
if (n == 0) ""  | 
| 
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
100  | 
          else if (n == 1) {
 | 
| 
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
101  | 
val c = text.charAt(i)  | 
| 
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
102  | 
if (c < char_symbols.length) char_symbols(c)  | 
| 
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
103  | 
else text.subSequence(i, i + n).toString  | 
| 
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
104  | 
}  | 
| 
 
8252d51d70e2
simplified Symbol.iterator: produce strings, which are mostly preallocated;
 
wenzelm 
parents: 
43511 
diff
changeset
 | 
105  | 
else text.subSequence(i, i + n).toString  | 
| 43489 | 106  | 
i += n  | 
107  | 
s  | 
|
108  | 
}  | 
|
| 33998 | 109  | 
}  | 
| 43489 | 110  | 
|
| 44949 | 111  | 
def explode(text: CharSequence): List[Symbol] = iterator(text).toList  | 
112  | 
||
| 48922 | 113  | 
def advance_line_column(pos: (Int, Int), text: CharSequence): (Int, Int) =  | 
114  | 
  {
 | 
|
115  | 
var (line, column) = pos  | 
|
116  | 
    for (sym <- iterator(text)) {
 | 
|
| 
54734
 
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
 
wenzelm 
parents: 
53400 
diff
changeset
 | 
117  | 
      if (is_newline(sym)) { line += 1; column = 1 }
 | 
| 48922 | 118  | 
else column += 1  | 
119  | 
}  | 
|
120  | 
(line, column)  | 
|
121  | 
}  | 
|
122  | 
||
| 33998 | 123  | 
|
124  | 
/* decoding offsets */  | 
|
125  | 
||
| 52507 | 126  | 
object Index  | 
127  | 
  {
 | 
|
| 
56471
 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 
wenzelm 
parents: 
56338 
diff
changeset
 | 
128  | 
private sealed case class Entry(chr: Int, sym: Int)  | 
| 52507 | 129  | 
|
| 56472 | 130  | 
val empty: Index = new Index(Nil)  | 
| 
56471
 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 
wenzelm 
parents: 
56338 
diff
changeset
 | 
131  | 
|
| 
 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 
wenzelm 
parents: 
56338 
diff
changeset
 | 
132  | 
def apply(text: CharSequence): Index =  | 
| 31929 | 133  | 
    {
 | 
| 34137 | 134  | 
val matcher = new Matcher(text)  | 
| 
56471
 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 
wenzelm 
parents: 
56338 
diff
changeset
 | 
135  | 
val buf = new mutable.ListBuffer[Entry]  | 
| 31929 | 136  | 
var chr = 0  | 
137  | 
var sym = 0  | 
|
| 33998 | 138  | 
      while (chr < text.length) {
 | 
| 34137 | 139  | 
val n = matcher(chr, text.length)  | 
140  | 
chr += n  | 
|
| 31929 | 141  | 
sym += 1  | 
| 34137 | 142  | 
if (n > 1) buf += Entry(chr, sym)  | 
| 31929 | 143  | 
}  | 
| 56472 | 144  | 
if (buf.isEmpty) empty else new Index(buf.toList)  | 
| 31929 | 145  | 
}  | 
| 
56471
 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 
wenzelm 
parents: 
56338 
diff
changeset
 | 
146  | 
}  | 
| 55430 | 147  | 
|
| 56472 | 148  | 
final class Index private(entries: List[Index.Entry])  | 
| 
56471
 
2293a4350716
more frugal Symbol.Index -- no need to waste space on mostly empty arrays;
 
wenzelm 
parents: 
56338 
diff
changeset
 | 
149  | 
  {
 | 
| 56472 | 150  | 
private val hash: Int = entries.hashCode  | 
151  | 
private val index: Array[Index.Entry] = entries.toArray  | 
|
152  | 
||
| 
55884
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
153  | 
def decode(symbol_offset: Offset): Text.Offset =  | 
| 31929 | 154  | 
    {
 | 
| 
55884
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
155  | 
val sym = symbol_offset - 1  | 
| 31929 | 156  | 
val end = index.length  | 
| 48922 | 157  | 
@tailrec def bisect(a: Int, b: Int): Int =  | 
| 31929 | 158  | 
      {
 | 
159  | 
        if (a < b) {
 | 
|
160  | 
val c = (a + b) / 2  | 
|
161  | 
if (sym < index(c).sym) bisect(a, c)  | 
|
162  | 
else if (c + 1 == end || sym < index(c + 1).sym) c  | 
|
163  | 
else bisect(c + 1, b)  | 
|
164  | 
}  | 
|
165  | 
else -1  | 
|
166  | 
}  | 
|
167  | 
val i = bisect(0, end)  | 
|
168  | 
if (i < 0) sym  | 
|
169  | 
else index(i).chr + sym - index(i).sym  | 
|
170  | 
}  | 
|
| 
55884
 
f2c0eaedd579
tuned signature -- emphasize symbol positions (prover) vs. decoded text offsets (editor);
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
171  | 
def decode(symbol_range: Range): Text.Range = symbol_range.map(decode(_))  | 
| 
56335
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
172  | 
|
| 
56338
 
f968f4e3d520
proper structural hashCode, which is required for Command.File equals (NB: Array has physical object identity);
 
wenzelm 
parents: 
56335 
diff
changeset
 | 
173  | 
override def hashCode: Int = hash  | 
| 
56335
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
174  | 
override def equals(that: Any): Boolean =  | 
| 
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
175  | 
      that match {
 | 
| 
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
176  | 
case other: Index => index.sameElements(other.index)  | 
| 
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
177  | 
case _ => false  | 
| 
 
8953d4cc060a
store blob content within document node: aux. files that were once open are made persistent;
 
wenzelm 
parents: 
55884 
diff
changeset
 | 
178  | 
}  | 
| 31929 | 179  | 
}  | 
180  | 
||
181  | 
||
| 56746 | 182  | 
/* text chunks */  | 
183  | 
||
184  | 
object Text_Chunk  | 
|
185  | 
  {
 | 
|
186  | 
sealed abstract class Name  | 
|
187  | 
case object Default extends Name  | 
|
188  | 
case class Id(id: Document_ID.Generic) extends Name  | 
|
189  | 
case class File(name: String) extends Name  | 
|
190  | 
||
191  | 
def apply(text: CharSequence): Text_Chunk =  | 
|
192  | 
new Text_Chunk(Text.Range(0, text.length), Index(text))  | 
|
193  | 
}  | 
|
194  | 
||
195  | 
final class Text_Chunk private(val range: Text.Range, private val index: Index)  | 
|
196  | 
  {
 | 
|
197  | 
override def hashCode: Int = (range, index).hashCode  | 
|
198  | 
override def equals(that: Any): Boolean =  | 
|
199  | 
      that match {
 | 
|
200  | 
case other: Text_Chunk =>  | 
|
201  | 
range == other.range &&  | 
|
202  | 
index == other.index  | 
|
203  | 
case _ => false  | 
|
204  | 
}  | 
|
205  | 
||
| 57840 | 206  | 
override def toString: String = "Text_Chunk" + range.toString  | 
207  | 
||
| 56746 | 208  | 
def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset)  | 
209  | 
def decode(symbol_range: Range): Text.Range = index.decode(symbol_range)  | 
|
210  | 
def incorporate(symbol_range: Range): Option[Text.Range] =  | 
|
211  | 
    {
 | 
|
212  | 
def in(r: Range): Option[Text.Range] =  | 
|
213  | 
        range.try_restrict(decode(r)) match {
 | 
|
214  | 
case Some(r1) if !r1.is_singularity => Some(r1)  | 
|
215  | 
case _ => None  | 
|
216  | 
}  | 
|
217  | 
in(symbol_range) orElse in(symbol_range - 1)  | 
|
218  | 
}  | 
|
219  | 
}  | 
|
220  | 
||
221  | 
||
| 33998 | 222  | 
/* recoding text */  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
223  | 
|
| 31522 | 224  | 
private class Recoder(list: List[(String, String)])  | 
225  | 
  {
 | 
|
226  | 
private val (min, max) =  | 
|
227  | 
    {
 | 
|
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
228  | 
var min = '\uffff'  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
229  | 
var max = '\u0000'  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
230  | 
      for ((x, _) <- list) {
 | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
231  | 
val c = x(0)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
232  | 
if (c < min) min = c  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
233  | 
if (c > max) max = c  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
234  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
235  | 
(min, max)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
236  | 
}  | 
| 40443 | 237  | 
private val table =  | 
238  | 
    {
 | 
|
239  | 
var tab = Map[String, String]()  | 
|
240  | 
      for ((x, y) <- list) {
 | 
|
241  | 
        tab.get(x) match {
 | 
|
242  | 
case None => tab += (x -> y)  | 
|
243  | 
case Some(z) =>  | 
|
| 44181 | 244  | 
            error("Duplicate mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z))
 | 
| 40443 | 245  | 
}  | 
246  | 
}  | 
|
247  | 
tab  | 
|
248  | 
}  | 
|
| 31522 | 249  | 
def recode(text: String): String =  | 
250  | 
    {
 | 
|
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
251  | 
val len = text.length  | 
| 48775 | 252  | 
val matcher = symbol_total.pattern.matcher(text)  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
253  | 
val result = new StringBuilder(len)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
254  | 
var i = 0  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
255  | 
      while (i < len) {
 | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
256  | 
val c = text(i)  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
257  | 
        if (min <= c && c <= max) {
 | 
| 31929 | 258  | 
matcher.region(i, len).lookingAt  | 
| 27938 | 259  | 
val x = matcher.group  | 
| 52888 | 260  | 
result.append(table.getOrElse(x, x))  | 
| 
27937
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
261  | 
i = matcher.end  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
262  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
263  | 
        else { result.append(c); i += 1 }
 | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
264  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
265  | 
result.toString  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
266  | 
}  | 
| 
 
fdf77e7be01a
more robust pattern: look at longer matches first, added catch-all case;
 
wenzelm 
parents: 
27935 
diff
changeset
 | 
267  | 
}  | 
| 27924 | 268  | 
|
| 27918 | 269  | 
|
| 
27923
 
7ebe9d38743a
use scala.collection.jcl.HashMap, which seems to be more efficient;
 
wenzelm 
parents: 
27918 
diff
changeset
 | 
270  | 
|
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
271  | 
/** symbol interpretation **/  | 
| 27927 | 272  | 
|
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
273  | 
private lazy val symbols =  | 
| 
50564
 
c6fde2fc4217
allow to suppress ISABELLE_SYMBOLS for experiments;
 
wenzelm 
parents: 
50291 
diff
changeset
 | 
274  | 
    new Interpretation(File.try_read(Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS"))))
 | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
275  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
276  | 
private class Interpretation(symbols_spec: String)  | 
| 
29569
 
f3f529b5d8fb
more general init of Symbol.Interpretation, independent of IsabelleSystem instance;
 
wenzelm 
parents: 
29174 
diff
changeset
 | 
277  | 
  {
 | 
| 31522 | 278  | 
/* read symbols */  | 
279  | 
||
| 
50136
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
280  | 
    private val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """)
 | 
| 
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
281  | 
    private val Key = new Regex("""(?xs) (.+): """)
 | 
| 31522 | 282  | 
|
| 
53316
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
283  | 
private def read_decl(decl: String): (Symbol, Properties.T) =  | 
| 31522 | 284  | 
    {
 | 
285  | 
      def err() = error("Bad symbol declaration: " + decl)
 | 
|
286  | 
||
| 
53316
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
287  | 
def read_props(props: List[String]): Properties.T =  | 
| 31522 | 288  | 
      {
 | 
289  | 
        props match {
 | 
|
| 
53316
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
290  | 
case Nil => Nil  | 
| 31522 | 291  | 
case _ :: Nil => err()  | 
| 61174 | 292  | 
          case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest)
 | 
| 31522 | 293  | 
case _ => err()  | 
294  | 
}  | 
|
295  | 
}  | 
|
296  | 
      decl.split("\\s+").toList match {
 | 
|
| 
53316
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
297  | 
case sym :: props if sym.length > 1 && !is_malformed(sym) =>  | 
| 
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
298  | 
(sym, read_props(props))  | 
| 34193 | 299  | 
case _ => err()  | 
| 31522 | 300  | 
}  | 
301  | 
}  | 
|
302  | 
||
| 
53316
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
303  | 
private val symbols: List[(Symbol, Properties.T)] =  | 
| 
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
304  | 
(((List.empty[(Symbol, Properties.T)], Set.empty[Symbol]) /:  | 
| 
50136
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
305  | 
split_lines(symbols_spec).reverse)  | 
| 
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
306  | 
        { case (res, No_Decl()) => res
 | 
| 
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
307  | 
case ((list, known), decl) =>  | 
| 
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
308  | 
val (sym, props) = read_decl(decl)  | 
| 
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
309  | 
if (known(sym)) (list, known)  | 
| 
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
310  | 
else ((sym, props) :: list, known + sym)  | 
| 
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
311  | 
})._1  | 
| 31522 | 312  | 
|
313  | 
||
| 53400 | 314  | 
/* basic properties */  | 
315  | 
||
316  | 
val properties: Map[Symbol, Properties.T] = Map(symbols: _*)  | 
|
| 31651 | 317  | 
|
| 43696 | 318  | 
val names: Map[Symbol, String] =  | 
| 34134 | 319  | 
    {
 | 
| 
43456
 
8a6de1a6e1dc
names for control symbols without "^", which is relevant for completion;
 
wenzelm 
parents: 
43455 
diff
changeset
 | 
320  | 
      val name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""")
 | 
| 60215 | 321  | 
Map((for ((sym @ name(a), _) <- symbols) yield sym -> a): _*)  | 
| 31651 | 322  | 
}  | 
323  | 
||
| 
50136
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
324  | 
val groups: List[(String, List[Symbol])] =  | 
| 
53316
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
325  | 
      symbols.map({ case (sym, props) =>
 | 
| 
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
326  | 
        val gs = for (("group", g) <- props) yield g
 | 
| 
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
327  | 
if (gs.isEmpty) List(sym -> "unsorted") else gs.map(sym -> _)  | 
| 
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
328  | 
}).flatten  | 
| 
50136
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
329  | 
        .groupBy(_._2).toList.map({ case (group, list) => (group, list.map(_._1)) })
 | 
| 
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
330  | 
.sortBy(_._1)  | 
| 
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
331  | 
|
| 
53316
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
332  | 
val abbrevs: Multi_Map[Symbol, String] =  | 
| 
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
333  | 
Multi_Map((  | 
| 
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
334  | 
        for {
 | 
| 
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
335  | 
(sym, props) <- symbols  | 
| 
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
336  | 
          ("abbrev", a) <- props.reverse
 | 
| 60215 | 337  | 
} yield sym -> a): _*)  | 
| 43488 | 338  | 
|
| 
61376
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
339  | 
val codes: List[(String, Int)] =  | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
340  | 
    {
 | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
341  | 
      val Code = new Properties.String("code")
 | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
342  | 
      for {
 | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
343  | 
(sym, props) <- symbols  | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
344  | 
code =  | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
345  | 
          props match {
 | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
346  | 
case Code(s) =>  | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
347  | 
              try { Integer.decode(s).intValue }
 | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
348  | 
              catch { case _: NumberFormatException => error("Bad code for symbol " + sym) }
 | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
349  | 
            case _ => error("Missing code for symbol " + sym)
 | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
350  | 
}  | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
351  | 
      } yield {
 | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
352  | 
        if (code < 128) error("Illegal ASCII code for symbol " + sym)
 | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
353  | 
else (sym, code)  | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
354  | 
}  | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
355  | 
}  | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
356  | 
|
| 43488 | 357  | 
|
| 43490 | 358  | 
/* recoding */  | 
| 31522 | 359  | 
|
360  | 
private val (decoder, encoder) =  | 
|
361  | 
    {
 | 
|
362  | 
val mapping =  | 
|
| 
61376
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
363  | 
for ((sym, code) <- codes) yield (sym, new String(Character.toChars(code)))  | 
| 
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
364  | 
(new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x)))  | 
| 31522 | 365  | 
}  | 
| 27918 | 366  | 
|
| 34098 | 367  | 
def decode(text: String): String = decoder.recode(text)  | 
368  | 
def encode(text: String): String = encoder.recode(text)  | 
|
| 34134 | 369  | 
|
| 43490 | 370  | 
private def recode_set(elems: String*): Set[String] =  | 
371  | 
    {
 | 
|
372  | 
val content = elems.toList  | 
|
373  | 
Set((content ::: content.map(decode)): _*)  | 
|
374  | 
}  | 
|
375  | 
||
376  | 
private def recode_map[A](elems: (String, A)*): Map[String, A] =  | 
|
377  | 
    {
 | 
|
378  | 
val content = elems.toList  | 
|
379  | 
      Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*)
 | 
|
380  | 
}  | 
|
381  | 
||
382  | 
||
383  | 
/* user fonts */  | 
|
384  | 
||
| 
53316
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
385  | 
    private val Font = new Properties.String("font")
 | 
| 43696 | 386  | 
val fonts: Map[Symbol, String] =  | 
| 60215 | 387  | 
recode_map((for ((sym, Font(font)) <- symbols) yield sym -> font): _*)  | 
| 43490 | 388  | 
|
389  | 
val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList  | 
|
390  | 
val font_index: Map[String, Int] = Map((font_names zip (0 until font_names.length).toList): _*)  | 
|
391  | 
||
| 34134 | 392  | 
|
393  | 
/* classification */  | 
|
394  | 
||
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
395  | 
val letters = recode_set(  | 
| 34134 | 396  | 
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",  | 
397  | 
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",  | 
|
398  | 
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",  | 
|
399  | 
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",  | 
|
400  | 
||
401  | 
"\\<A>", "\\<B>", "\\<C>", "\\<D>", "\\<E>", "\\<F>", "\\<G>",  | 
|
402  | 
"\\<H>", "\\<I>", "\\<J>", "\\<K>", "\\<L>", "\\<M>", "\\<N>",  | 
|
403  | 
"\\<O>", "\\<P>", "\\<Q>", "\\<R>", "\\<S>", "\\<T>", "\\<U>",  | 
|
404  | 
"\\<V>", "\\<W>", "\\<X>", "\\<Y>", "\\<Z>", "\\<a>", "\\<b>",  | 
|
405  | 
"\\<c>", "\\<d>", "\\<e>", "\\<f>", "\\<g>", "\\<h>", "\\<i>",  | 
|
406  | 
"\\<j>", "\\<k>", "\\<l>", "\\<m>", "\\<n>", "\\<o>", "\\<p>",  | 
|
407  | 
"\\<q>", "\\<r>", "\\<s>", "\\<t>", "\\<u>", "\\<v>", "\\<w>",  | 
|
408  | 
"\\<x>", "\\<y>", "\\<z>",  | 
|
409  | 
||
410  | 
"\\<AA>", "\\<BB>", "\\<CC>", "\\<DD>", "\\<EE>", "\\<FF>",  | 
|
411  | 
"\\<GG>", "\\<HH>", "\\<II>", "\\<JJ>", "\\<KK>", "\\<LL>",  | 
|
412  | 
"\\<MM>", "\\<NN>", "\\<OO>", "\\<PP>", "\\<QQ>", "\\<RR>",  | 
|
413  | 
"\\<SS>", "\\<TT>", "\\<UU>", "\\<VV>", "\\<WW>", "\\<XX>",  | 
|
414  | 
"\\<YY>", "\\<ZZ>", "\\<aa>", "\\<bb>", "\\<cc>", "\\<dd>",  | 
|
415  | 
"\\<ee>", "\\<ff>", "\\<gg>", "\\<hh>", "\\<ii>", "\\<jj>",  | 
|
416  | 
"\\<kk>", "\\<ll>", "\\<mm>", "\\<nn>", "\\<oo>", "\\<pp>",  | 
|
417  | 
"\\<qq>", "\\<rr>", "\\<ss>", "\\<tt>", "\\<uu>", "\\<vv>",  | 
|
418  | 
"\\<ww>", "\\<xx>", "\\<yy>", "\\<zz>",  | 
|
419  | 
||
420  | 
"\\<alpha>", "\\<beta>", "\\<gamma>", "\\<delta>", "\\<epsilon>",  | 
|
421  | 
"\\<zeta>", "\\<eta>", "\\<theta>", "\\<iota>", "\\<kappa>",  | 
|
422  | 
"\\<mu>", "\\<nu>", "\\<xi>", "\\<pi>", "\\<rho>", "\\<sigma>",  | 
|
423  | 
"\\<tau>", "\\<upsilon>", "\\<phi>", "\\<chi>", "\\<psi>",  | 
|
424  | 
"\\<omega>", "\\<Gamma>", "\\<Delta>", "\\<Theta>", "\\<Lambda>",  | 
|
425  | 
"\\<Xi>", "\\<Pi>", "\\<Sigma>", "\\<Upsilon>", "\\<Phi>",  | 
|
| 
52616
 
3ac2878764f9
more robust identifier syntax: sub/superscript counts as modifier of LETDIG part instead of LETTER, both isub/isup and sub/sup are allowed;
 
wenzelm 
parents: 
52507 
diff
changeset
 | 
426  | 
"\\<Psi>", "\\<Omega>")  | 
| 34134 | 427  | 
|
| 
54734
 
b91afc3aa3e6
clarified Proof General legacy: special treatment of \<^newline> only in TTY mode;
 
wenzelm 
parents: 
53400 
diff
changeset
 | 
428  | 
    val blanks = recode_set(" ", "\t", "\n", "\u000B", "\f", "\r", "\r\n")
 | 
| 34138 | 429  | 
|
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
430  | 
val sym_chars =  | 
| 34138 | 431  | 
      Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~")
 | 
| 34134 | 432  | 
|
| 
44992
 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 
wenzelm 
parents: 
44949 
diff
changeset
 | 
433  | 
    val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*)
 | 
| 
 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 
wenzelm 
parents: 
44949 
diff
changeset
 | 
434  | 
|
| 43455 | 435  | 
|
| 61579 | 436  | 
/* comment */  | 
437  | 
||
438  | 
val comment_decoded = decode(comment)  | 
|
439  | 
||
440  | 
||
| 55033 | 441  | 
/* cartouches */  | 
442  | 
||
443  | 
val open_decoded = decode(open)  | 
|
444  | 
val close_decoded = decode(close)  | 
|
445  | 
||
446  | 
||
| 43488 | 447  | 
/* control symbols */  | 
448  | 
||
| 59107 | 449  | 
val control_decoded: Set[Symbol] =  | 
| 43488 | 450  | 
      Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*)
 | 
451  | 
||
| 
44238
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
452  | 
    val sub_decoded = decode("\\<^sub>")
 | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
453  | 
    val sup_decoded = decode("\\<^sup>")
 | 
| 43511 | 454  | 
    val bsub_decoded = decode("\\<^bsub>")
 | 
455  | 
    val esub_decoded = decode("\\<^esub>")
 | 
|
456  | 
    val bsup_decoded = decode("\\<^bsup>")
 | 
|
457  | 
    val esup_decoded = decode("\\<^esup>")
 | 
|
| 
44238
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
458  | 
    val bold_decoded = decode("\\<^bold>")
 | 
| 61483 | 459  | 
    val emph_decoded = decode("\\<^emph>")
 | 
| 27918 | 460  | 
}  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
461  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
462  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
463  | 
/* tables */  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
464  | 
|
| 53400 | 465  | 
def properties: Map[Symbol, Properties.T] = symbols.properties  | 
| 43696 | 466  | 
def names: Map[Symbol, String] = symbols.names  | 
| 
50136
 
a96bd08258a2
support for symbol groups, retaining original order of declarations;
 
wenzelm 
parents: 
48922 
diff
changeset
 | 
467  | 
def groups: List[(String, List[Symbol])] = symbols.groups  | 
| 
53316
 
c3e549e0d3c7
allow multiple symbol properties, notably groups and abbrevs;
 
wenzelm 
parents: 
53021 
diff
changeset
 | 
468  | 
def abbrevs: Multi_Map[Symbol, String] = symbols.abbrevs  | 
| 
61376
 
93224745477f
output HTML text according to Isabelle/Scala Symbol.Interpretation;
 
wenzelm 
parents: 
61174 
diff
changeset
 | 
469  | 
def codes: List[(String, Int)] = symbols.codes  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
470  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
471  | 
def decode(text: String): String = symbols.decode(text)  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
472  | 
def encode(text: String): String = symbols.encode(text)  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
473  | 
|
| 
53337
 
b3817a0e3211
sort items according to persistent history of frequency of use;
 
wenzelm 
parents: 
53316 
diff
changeset
 | 
474  | 
def decode_string: XML.Decode.T[String] = (x => decode(XML.Decode.string(x)))  | 
| 
 
b3817a0e3211
sort items according to persistent history of frequency of use;
 
wenzelm 
parents: 
53316 
diff
changeset
 | 
475  | 
def encode_string: XML.Encode.T[String] = (x => XML.Encode.string(encode(x)))  | 
| 
 
b3817a0e3211
sort items according to persistent history of frequency of use;
 
wenzelm 
parents: 
53316 
diff
changeset
 | 
476  | 
|
| 
50291
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
477  | 
def decode_strict(text: String): String =  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
478  | 
  {
 | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
479  | 
val decoded = decode(text)  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
480  | 
if (encode(decoded) == text) decoded  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
481  | 
    else {
 | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
482  | 
val bad = new mutable.ListBuffer[Symbol]  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
483  | 
for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s))  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
484  | 
bad += s  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
485  | 
      error("Bad Unicode symbols in text: " + commas_quote(bad))
 | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
486  | 
}  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
487  | 
}  | 
| 
 
674893679352
prefer Symbol.decode_strict in batch mode, to avoid files with spurious Unicode symbols that clash with Isabelle symbol interpretation;
 
wenzelm 
parents: 
50238 
diff
changeset
 | 
488  | 
|
| 43696 | 489  | 
def fonts: Map[Symbol, String] = symbols.fonts  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
490  | 
def font_names: List[String] = symbols.font_names  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
491  | 
def font_index: Map[String, Int] = symbols.font_index  | 
| 43696 | 492  | 
def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_))  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
493  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
494  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
495  | 
/* classification */  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
496  | 
|
| 43696 | 497  | 
def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym)  | 
498  | 
def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9'  | 
|
499  | 
def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'"  | 
|
500  | 
def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)  | 
|
501  | 
def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)  | 
|
| 
44992
 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 
wenzelm 
parents: 
44949 
diff
changeset
 | 
502  | 
|
| 55033 | 503  | 
|
| 61579 | 504  | 
/* comment */  | 
505  | 
||
506  | 
val comment: Symbol = "\\<comment>"  | 
|
507  | 
def comment_decoded: Symbol = symbols.comment_decoded  | 
|
508  | 
||
509  | 
||
| 55033 | 510  | 
/* cartouches */  | 
511  | 
||
| 61579 | 512  | 
val open: Symbol = "\\<open>"  | 
513  | 
val close: Symbol = "\\<close>"  | 
|
| 55033 | 514  | 
|
515  | 
def open_decoded: Symbol = symbols.open_decoded  | 
|
516  | 
def close_decoded: Symbol = symbols.close_decoded  | 
|
517  | 
||
518  | 
def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open  | 
|
519  | 
def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close  | 
|
520  | 
||
521  | 
||
522  | 
/* symbols for symbolic identifiers */  | 
|
| 
44992
 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 
wenzelm 
parents: 
44949 
diff
changeset
 | 
523  | 
|
| 
 
aa34d2d049ce
refined Symbol.is_symbolic -- cover recoded versions as well;
 
wenzelm 
parents: 
44949 
diff
changeset
 | 
524  | 
private def raw_symbolic(sym: Symbol): Boolean =  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
525  | 
    sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
 | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
526  | 
|
| 55033 | 527  | 
def is_symbolic(sym: Symbol): Boolean =  | 
528  | 
!is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym))  | 
|
529  | 
||
530  | 
def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)  | 
|
531  | 
||
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
532  | 
|
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
533  | 
/* control symbols */  | 
| 
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
534  | 
|
| 59107 | 535  | 
def is_control(sym: Symbol): Boolean =  | 
| 61470 | 536  | 
    (sym.startsWith("\\<^") && sym.endsWith(">")) || symbols.control_decoded.contains(sym)
 | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
537  | 
|
| 43696 | 538  | 
def is_controllable(sym: Symbol): Boolean =  | 
| 59107 | 539  | 
!is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && !is_malformed(sym)  | 
| 
43695
 
5130dfe1b7be
simplified Symbol based on lazy Symbol.Interpretation -- reduced odd "functorial style";
 
wenzelm 
parents: 
43675 
diff
changeset
 | 
540  | 
|
| 
44238
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
541  | 
def sub_decoded: Symbol = symbols.sub_decoded  | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
542  | 
def sup_decoded: Symbol = symbols.sup_decoded  | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
543  | 
def bsub_decoded: Symbol = symbols.bsub_decoded  | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
544  | 
def esub_decoded: Symbol = symbols.esub_decoded  | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
545  | 
def bsup_decoded: Symbol = symbols.bsup_decoded  | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
546  | 
def esup_decoded: Symbol = symbols.esup_decoded  | 
| 
 
36120feb70ed
some convenience actions/shortcuts for control symbols;
 
wenzelm 
parents: 
44181 
diff
changeset
 | 
547  | 
def bold_decoded: Symbol = symbols.bold_decoded  | 
| 61483 | 548  | 
def emph_decoded: Symbol = symbols.emph_decoded  | 
| 27901 | 549  | 
}  |