refined some Symbol operations/signatures;
authorwenzelm
Sat, 19 Dec 2009 16:51:32 +0100
changeset 34137 6cc9a0cbaf55
parent 34136 3dcb46ae6185
child 34138 4008c2f5a46e
refined some Symbol operations/signatures; added Symbol.Matcher; flexible Scan.Lexicon.symbols, with one/many/many1 variants;
src/Pure/General/scan.scala
src/Pure/General/symbol.scala
src/Pure/System/isabelle_system.scala
src/Pure/Thy/html.scala
--- a/src/Pure/General/scan.scala	Sat Dec 19 16:02:26 2009 +0100
+++ b/src/Pure/General/scan.scala	Sat Dec 19 16:51:32 2009 +0100
@@ -103,12 +103,15 @@
     def ++ (elems: Iterator[String]): Lexicon = (this /: elems) ((s, elem) => s + elem)
 
 
-    /* RegexParsers methods */
+    /** RegexParsers methods **/
 
     override val whiteSpace = "".r
 
     type Result = (String, Boolean)
 
+
+    /* keywords from lexicon */
+
     def keyword: Parser[Result] = new Parser[Result]
     {
       def apply(in: Input) =
@@ -134,18 +137,38 @@
       }
     }.named("keyword")
 
-    def symbol: Parser[String] = new Parser[String]
-    {
-      private val symbol_regex = regex(Symbol.regex)
-      def apply(in: Input) =
+
+    /* symbols */
+
+    def symbols(pred: String => Boolean, min_count: Int, max_count: Int): Parser[CharSequence] =
+      new Parser[CharSequence]
       {
-        val source = in.source
-        val offset = in.offset
-        if (offset >= source.length) Failure("input expected", in)
-        else if (Symbol.could_open(source.charAt(offset))) symbol_regex(in)
-        else Success(source.subSequence(offset, offset + 1).toString, in.drop(1))
-      }
-    }.named("symbol")
+        def apply(in: Input) =
+        {
+          val start = in.offset
+          val end = in.source.length
+          val matcher = new Symbol.Matcher(in.source)
+
+          var i = start
+          var count = 0
+          var finished = false
+          while (!finished) {
+            if (i < end && count < max_count) {
+              val n = matcher(i, end)
+              val sym = in.source.subSequence(i, i + n).toString
+              if (pred(sym)) { i += n; count += 1 }
+              else finished = true
+            }
+            else finished = true
+          }
+          if (count < min_count) Failure("bad symbols", in)
+          else Success(in.source.subSequence(start, i), in.drop(i - start))
+        }
+      }.named("symbols")
+
+    def one(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 1, 1)
+    def many(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 0, Integer.MAX_VALUE)
+    def many1(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 1, Integer.MAX_VALUE)
   }
 }
 
--- a/src/Pure/General/symbol.scala	Sat Dec 19 16:02:26 2009 +0100
+++ b/src/Pure/General/symbol.scala	Sat Dec 19 16:51:32 2009 +0100
@@ -29,37 +29,45 @@
   // total pattern
   val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .")
 
-  // prefix of another symbol
+
+  /* basic matching */
+
+  def is_open(c: Char): Boolean =
+    c == '\\' || Character.isHighSurrogate(c)
+
   def is_open(s: CharSequence): Boolean =
   {
-    val len = s.length
-    len == 1 && Character.isHighSurrogate(s.charAt(0)) ||
-    s == "\\" ||
-    s == "\\<" ||
-    len > 2 && s.charAt(len - 1) != '>'   // FIXME bad_symbol !??
+    if (s.length == 1) is_open(s.charAt(0))
+    else bad_symbol.pattern.matcher(s).matches
+  }
+
+  class Matcher(text: CharSequence)
+  {
+    private val matcher = regex.pattern.matcher(text)
+    def apply(start: Int, end: Int): Int =
+    {
+      require(0 <= start && start < end && end <= text.length)
+      if (is_open(text.charAt(start))) {
+        matcher.region(start, end).lookingAt
+        matcher.group.length
+      }
+      else 1
+    }
   }
 
 
   /* elements */
 
-  def could_open(c: Char): Boolean =
-    c == '\\' || Character.isHighSurrogate(c)
-
-  def elements(text: CharSequence) = new Iterator[String]
+  def elements(text: CharSequence) = new Iterator[CharSequence]
   {
-    private val matcher = regex.pattern.matcher(text)
+    private val matcher = new Matcher(text)
     private var i = 0
     def hasNext = i < text.length
     def next = {
-      val len =
-        if (could_open(text.charAt(i))) {
-          matcher.region(i, text.length).lookingAt
-          matcher.group.length
-        }
-        else 1
-      val s = text.subSequence(i, i + len)
-      i += len
-      s.toString
+      val n = matcher(i, text.length)
+      val s = text.subSequence(i, i + n)
+      i += n
+      s
     }
   }
 
@@ -71,20 +79,15 @@
     case class Entry(chr: Int, sym: Int)
     val index: Array[Entry] =
     {
-      val matcher = regex.pattern.matcher(text)
+      val matcher = new Matcher(text)
       val buf = new mutable.ArrayBuffer[Entry]
       var chr = 0
       var sym = 0
       while (chr < text.length) {
-        val len =
-          if (could_open(text.charAt(chr))) {
-            matcher.region(chr, text.length).lookingAt
-            matcher.group.length
-          }
-          else 1
-        chr += len
+        val n = matcher(chr, text.length)
+        chr += n
         sym += 1
-        if (len > 1) buf += Entry(chr, sym)
+        if (n > 1) buf += Entry(chr, sym)
       }
       buf.toArray
     }
@@ -153,7 +156,7 @@
 
   /** Symbol interpretation **/
 
-  class Interpretation(symbol_decls: Iterator[String])
+  class Interpretation(symbol_decls: List[String])
   {
     /* read symbols */
 
@@ -180,7 +183,7 @@
     }
 
     private val symbols: List[(String, Map[String, String])] =
-      for (decl <- symbol_decls.toList if !empty.pattern.matcher(decl).matches)
+      for (decl <- symbol_decls if !empty.pattern.matcher(decl).matches)
         yield read_decl(decl)
 
 
--- a/src/Pure/System/isabelle_system.scala	Sat Dec 19 16:02:26 2009 +0100
+++ b/src/Pure/System/isabelle_system.scala	Sat Dec 19 16:51:32 2009 +0100
@@ -316,14 +316,14 @@
 
   /* symbols */
 
-  private def read_symbols(path: String): Iterator[String] =
+  private def read_symbols(path: String): List[String] =
   {
     val file = new File(platform_path(path))
-    if (file.isFile) Source.fromFile(file).getLines
-    else Iterator.empty
+    if (file.isFile) Source.fromFile(file).getLines.toList
+    else Nil
   }
   val symbols = new Symbol.Interpretation(
-    read_symbols("$ISABELLE_HOME/etc/symbols") ++
+    read_symbols("$ISABELLE_HOME/etc/symbols") :::
     read_symbols("$ISABELLE_HOME_USER/etc/symbols"))
 
 
--- a/src/Pure/Thy/html.scala	Sat Dec 19 16:02:26 2009 +0100
+++ b/src/Pure/Thy/html.scala	Sat Dec 19 16:51:32 2009 +0100
@@ -49,7 +49,7 @@
           flush()
           ts + elem
         }
-        val syms = Symbol.elements(txt)
+        val syms = Symbol.elements(txt).map(_.toString)
         while (syms.hasNext) {
           val s1 = syms.next
           def s2() = if (syms.hasNext) syms.next else ""