# HG changeset patch # User wenzelm # Date 1261237892 -3600 # Node ID 6cc9a0cbaf55724670f477de249b47c51d31597b # Parent 3dcb46ae61857993dcad7c8f29292d51b2b1ed81 refined some Symbol operations/signatures; added Symbol.Matcher; flexible Scan.Lexicon.symbols, with one/many/many1 variants; diff -r 3dcb46ae6185 -r 6cc9a0cbaf55 src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala Sat Dec 19 16:02:26 2009 +0100 +++ b/src/Pure/General/scan.scala Sat Dec 19 16:51:32 2009 +0100 @@ -103,12 +103,15 @@ def ++ (elems: Iterator[String]): Lexicon = (this /: elems) ((s, elem) => s + elem) - /* RegexParsers methods */ + /** RegexParsers methods **/ override val whiteSpace = "".r type Result = (String, Boolean) + + /* keywords from lexicon */ + def keyword: Parser[Result] = new Parser[Result] { def apply(in: Input) = @@ -134,18 +137,38 @@ } }.named("keyword") - def symbol: Parser[String] = new Parser[String] - { - private val symbol_regex = regex(Symbol.regex) - def apply(in: Input) = + + /* symbols */ + + def symbols(pred: String => Boolean, min_count: Int, max_count: Int): Parser[CharSequence] = + new Parser[CharSequence] { - val source = in.source - val offset = in.offset - if (offset >= source.length) Failure("input expected", in) - else if (Symbol.could_open(source.charAt(offset))) symbol_regex(in) - else Success(source.subSequence(offset, offset + 1).toString, in.drop(1)) - } - }.named("symbol") + def apply(in: Input) = + { + val start = in.offset + val end = in.source.length + val matcher = new Symbol.Matcher(in.source) + + var i = start + var count = 0 + var finished = false + while (!finished) { + if (i < end && count < max_count) { + val n = matcher(i, end) + val sym = in.source.subSequence(i, i + n).toString + if (pred(sym)) { i += n; count += 1 } + else finished = true + } + else finished = true + } + if (count < min_count) Failure("bad symbols", in) + else Success(in.source.subSequence(start, i), in.drop(i - start)) + } + }.named("symbols") + + def one(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 1, 1) + def many(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 0, Integer.MAX_VALUE) + def many1(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 1, Integer.MAX_VALUE) } } diff -r 3dcb46ae6185 -r 6cc9a0cbaf55 src/Pure/General/symbol.scala --- a/src/Pure/General/symbol.scala Sat Dec 19 16:02:26 2009 +0100 +++ b/src/Pure/General/symbol.scala Sat Dec 19 16:51:32 2009 +0100 @@ -29,37 +29,45 @@ // total pattern val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .") - // prefix of another symbol + + /* basic matching */ + + def is_open(c: Char): Boolean = + c == '\\' || Character.isHighSurrogate(c) + def is_open(s: CharSequence): Boolean = { - val len = s.length - len == 1 && Character.isHighSurrogate(s.charAt(0)) || - s == "\\" || - s == "\\<" || - len > 2 && s.charAt(len - 1) != '>' // FIXME bad_symbol !?? + if (s.length == 1) is_open(s.charAt(0)) + else bad_symbol.pattern.matcher(s).matches + } + + class Matcher(text: CharSequence) + { + private val matcher = regex.pattern.matcher(text) + def apply(start: Int, end: Int): Int = + { + require(0 <= start && start < end && end <= text.length) + if (is_open(text.charAt(start))) { + matcher.region(start, end).lookingAt + matcher.group.length + } + else 1 + } } /* elements */ - def could_open(c: Char): Boolean = - c == '\\' || Character.isHighSurrogate(c) - - def elements(text: CharSequence) = new Iterator[String] + def elements(text: CharSequence) = new Iterator[CharSequence] { - private val matcher = regex.pattern.matcher(text) + private val matcher = new Matcher(text) private var i = 0 def hasNext = i < text.length def next = { - val len = - if (could_open(text.charAt(i))) { - matcher.region(i, text.length).lookingAt - matcher.group.length - } - else 1 - val s = text.subSequence(i, i + len) - i += len - s.toString + val n = matcher(i, text.length) + val s = text.subSequence(i, i + n) + i += n + s } } @@ -71,20 +79,15 @@ case class Entry(chr: Int, sym: Int) val index: Array[Entry] = { - val matcher = regex.pattern.matcher(text) + val matcher = new Matcher(text) val buf = new mutable.ArrayBuffer[Entry] var chr = 0 var sym = 0 while (chr < text.length) { - val len = - if (could_open(text.charAt(chr))) { - matcher.region(chr, text.length).lookingAt - matcher.group.length - } - else 1 - chr += len + val n = matcher(chr, text.length) + chr += n sym += 1 - if (len > 1) buf += Entry(chr, sym) + if (n > 1) buf += Entry(chr, sym) } buf.toArray } @@ -153,7 +156,7 @@ /** Symbol interpretation **/ - class Interpretation(symbol_decls: Iterator[String]) + class Interpretation(symbol_decls: List[String]) { /* read symbols */ @@ -180,7 +183,7 @@ } private val symbols: List[(String, Map[String, String])] = - for (decl <- symbol_decls.toList if !empty.pattern.matcher(decl).matches) + for (decl <- symbol_decls if !empty.pattern.matcher(decl).matches) yield read_decl(decl) diff -r 3dcb46ae6185 -r 6cc9a0cbaf55 src/Pure/System/isabelle_system.scala --- a/src/Pure/System/isabelle_system.scala Sat Dec 19 16:02:26 2009 +0100 +++ b/src/Pure/System/isabelle_system.scala Sat Dec 19 16:51:32 2009 +0100 @@ -316,14 +316,14 @@ /* symbols */ - private def read_symbols(path: String): Iterator[String] = + private def read_symbols(path: String): List[String] = { val file = new File(platform_path(path)) - if (file.isFile) Source.fromFile(file).getLines - else Iterator.empty + if (file.isFile) Source.fromFile(file).getLines.toList + else Nil } val symbols = new Symbol.Interpretation( - read_symbols("$ISABELLE_HOME/etc/symbols") ++ + read_symbols("$ISABELLE_HOME/etc/symbols") ::: read_symbols("$ISABELLE_HOME_USER/etc/symbols")) diff -r 3dcb46ae6185 -r 6cc9a0cbaf55 src/Pure/Thy/html.scala --- a/src/Pure/Thy/html.scala Sat Dec 19 16:02:26 2009 +0100 +++ b/src/Pure/Thy/html.scala Sat Dec 19 16:51:32 2009 +0100 @@ -49,7 +49,7 @@ flush() ts + elem } - val syms = Symbol.elements(txt) + val syms = Symbol.elements(txt).map(_.toString) while (syms.hasNext) { val s1 = syms.next def s2() = if (syms.hasNext) syms.next else ""