refined some Symbol operations/signatures;
added Symbol.Matcher;
flexible Scan.Lexicon.symbols, with one/many/many1 variants;
--- a/src/Pure/General/scan.scala Sat Dec 19 16:02:26 2009 +0100
+++ b/src/Pure/General/scan.scala Sat Dec 19 16:51:32 2009 +0100
@@ -103,12 +103,15 @@
def ++ (elems: Iterator[String]): Lexicon = (this /: elems) ((s, elem) => s + elem)
- /* RegexParsers methods */
+ /** RegexParsers methods **/
override val whiteSpace = "".r
type Result = (String, Boolean)
+
+ /* keywords from lexicon */
+
def keyword: Parser[Result] = new Parser[Result]
{
def apply(in: Input) =
@@ -134,18 +137,38 @@
}
}.named("keyword")
- def symbol: Parser[String] = new Parser[String]
- {
- private val symbol_regex = regex(Symbol.regex)
- def apply(in: Input) =
+
+ /* symbols */
+
+ def symbols(pred: String => Boolean, min_count: Int, max_count: Int): Parser[CharSequence] =
+ new Parser[CharSequence]
{
- val source = in.source
- val offset = in.offset
- if (offset >= source.length) Failure("input expected", in)
- else if (Symbol.could_open(source.charAt(offset))) symbol_regex(in)
- else Success(source.subSequence(offset, offset + 1).toString, in.drop(1))
- }
- }.named("symbol")
+ def apply(in: Input) =
+ {
+ val start = in.offset
+ val end = in.source.length
+ val matcher = new Symbol.Matcher(in.source)
+
+ var i = start
+ var count = 0
+ var finished = false
+ while (!finished) {
+ if (i < end && count < max_count) {
+ val n = matcher(i, end)
+ val sym = in.source.subSequence(i, i + n).toString
+ if (pred(sym)) { i += n; count += 1 }
+ else finished = true
+ }
+ else finished = true
+ }
+ if (count < min_count) Failure("bad symbols", in)
+ else Success(in.source.subSequence(start, i), in.drop(i - start))
+ }
+ }.named("symbols")
+
+ def one(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 1, 1)
+ def many(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 0, Integer.MAX_VALUE)
+ def many1(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 1, Integer.MAX_VALUE)
}
}
--- a/src/Pure/General/symbol.scala Sat Dec 19 16:02:26 2009 +0100
+++ b/src/Pure/General/symbol.scala Sat Dec 19 16:51:32 2009 +0100
@@ -29,37 +29,45 @@
// total pattern
val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .")
- // prefix of another symbol
+
+ /* basic matching */
+
+ def is_open(c: Char): Boolean =
+ c == '\\' || Character.isHighSurrogate(c)
+
def is_open(s: CharSequence): Boolean =
{
- val len = s.length
- len == 1 && Character.isHighSurrogate(s.charAt(0)) ||
- s == "\\" ||
- s == "\\<" ||
- len > 2 && s.charAt(len - 1) != '>' // FIXME bad_symbol !??
+ if (s.length == 1) is_open(s.charAt(0))
+ else bad_symbol.pattern.matcher(s).matches
+ }
+
+ class Matcher(text: CharSequence)
+ {
+ private val matcher = regex.pattern.matcher(text)
+ def apply(start: Int, end: Int): Int =
+ {
+ require(0 <= start && start < end && end <= text.length)
+ if (is_open(text.charAt(start))) {
+ matcher.region(start, end).lookingAt
+ matcher.group.length
+ }
+ else 1
+ }
}
/* elements */
- def could_open(c: Char): Boolean =
- c == '\\' || Character.isHighSurrogate(c)
-
- def elements(text: CharSequence) = new Iterator[String]
+ def elements(text: CharSequence) = new Iterator[CharSequence]
{
- private val matcher = regex.pattern.matcher(text)
+ private val matcher = new Matcher(text)
private var i = 0
def hasNext = i < text.length
def next = {
- val len =
- if (could_open(text.charAt(i))) {
- matcher.region(i, text.length).lookingAt
- matcher.group.length
- }
- else 1
- val s = text.subSequence(i, i + len)
- i += len
- s.toString
+ val n = matcher(i, text.length)
+ val s = text.subSequence(i, i + n)
+ i += n
+ s
}
}
@@ -71,20 +79,15 @@
case class Entry(chr: Int, sym: Int)
val index: Array[Entry] =
{
- val matcher = regex.pattern.matcher(text)
+ val matcher = new Matcher(text)
val buf = new mutable.ArrayBuffer[Entry]
var chr = 0
var sym = 0
while (chr < text.length) {
- val len =
- if (could_open(text.charAt(chr))) {
- matcher.region(chr, text.length).lookingAt
- matcher.group.length
- }
- else 1
- chr += len
+ val n = matcher(chr, text.length)
+ chr += n
sym += 1
- if (len > 1) buf += Entry(chr, sym)
+ if (n > 1) buf += Entry(chr, sym)
}
buf.toArray
}
@@ -153,7 +156,7 @@
/** Symbol interpretation **/
- class Interpretation(symbol_decls: Iterator[String])
+ class Interpretation(symbol_decls: List[String])
{
/* read symbols */
@@ -180,7 +183,7 @@
}
private val symbols: List[(String, Map[String, String])] =
- for (decl <- symbol_decls.toList if !empty.pattern.matcher(decl).matches)
+ for (decl <- symbol_decls if !empty.pattern.matcher(decl).matches)
yield read_decl(decl)
--- a/src/Pure/System/isabelle_system.scala Sat Dec 19 16:02:26 2009 +0100
+++ b/src/Pure/System/isabelle_system.scala Sat Dec 19 16:51:32 2009 +0100
@@ -316,14 +316,14 @@
/* symbols */
- private def read_symbols(path: String): Iterator[String] =
+ private def read_symbols(path: String): List[String] =
{
val file = new File(platform_path(path))
- if (file.isFile) Source.fromFile(file).getLines
- else Iterator.empty
+ if (file.isFile) Source.fromFile(file).getLines.toList
+ else Nil
}
val symbols = new Symbol.Interpretation(
- read_symbols("$ISABELLE_HOME/etc/symbols") ++
+ read_symbols("$ISABELLE_HOME/etc/symbols") :::
read_symbols("$ISABELLE_HOME_USER/etc/symbols"))
--- a/src/Pure/Thy/html.scala Sat Dec 19 16:02:26 2009 +0100
+++ b/src/Pure/Thy/html.scala Sat Dec 19 16:51:32 2009 +0100
@@ -49,7 +49,7 @@
flush()
ts + elem
}
- val syms = Symbol.elements(txt)
+ val syms = Symbol.elements(txt).map(_.toString)
while (syms.hasNext) {
val s1 = syms.next
def s2() = if (syms.hasNext) syms.next else ""