refined some Symbol operations/signatures;
authorwenzelm
Sat Dec 19 16:51:32 2009 +0100 (2009-12-19 ago)
changeset 341376cc9a0cbaf55
parent 34136 3dcb46ae6185
child 34138 4008c2f5a46e
refined some Symbol operations/signatures;
added Symbol.Matcher;
flexible Scan.Lexicon.symbols, with one/many/many1 variants;
src/Pure/General/scan.scala
src/Pure/General/symbol.scala
src/Pure/System/isabelle_system.scala
src/Pure/Thy/html.scala
     1.1 --- a/src/Pure/General/scan.scala	Sat Dec 19 16:02:26 2009 +0100
     1.2 +++ b/src/Pure/General/scan.scala	Sat Dec 19 16:51:32 2009 +0100
     1.3 @@ -103,12 +103,15 @@
     1.4      def ++ (elems: Iterator[String]): Lexicon = (this /: elems) ((s, elem) => s + elem)
     1.5  
     1.6  
     1.7 -    /* RegexParsers methods */
     1.8 +    /** RegexParsers methods **/
     1.9  
    1.10      override val whiteSpace = "".r
    1.11  
    1.12      type Result = (String, Boolean)
    1.13  
    1.14 +
    1.15 +    /* keywords from lexicon */
    1.16 +
    1.17      def keyword: Parser[Result] = new Parser[Result]
    1.18      {
    1.19        def apply(in: Input) =
    1.20 @@ -134,18 +137,38 @@
    1.21        }
    1.22      }.named("keyword")
    1.23  
    1.24 -    def symbol: Parser[String] = new Parser[String]
    1.25 -    {
    1.26 -      private val symbol_regex = regex(Symbol.regex)
    1.27 -      def apply(in: Input) =
    1.28 +
    1.29 +    /* symbols */
    1.30 +
    1.31 +    def symbols(pred: String => Boolean, min_count: Int, max_count: Int): Parser[CharSequence] =
    1.32 +      new Parser[CharSequence]
    1.33        {
    1.34 -        val source = in.source
    1.35 -        val offset = in.offset
    1.36 -        if (offset >= source.length) Failure("input expected", in)
    1.37 -        else if (Symbol.could_open(source.charAt(offset))) symbol_regex(in)
    1.38 -        else Success(source.subSequence(offset, offset + 1).toString, in.drop(1))
    1.39 -      }
    1.40 -    }.named("symbol")
    1.41 +        def apply(in: Input) =
    1.42 +        {
    1.43 +          val start = in.offset
    1.44 +          val end = in.source.length
    1.45 +          val matcher = new Symbol.Matcher(in.source)
    1.46 +
    1.47 +          var i = start
    1.48 +          var count = 0
    1.49 +          var finished = false
    1.50 +          while (!finished) {
    1.51 +            if (i < end && count < max_count) {
    1.52 +              val n = matcher(i, end)
    1.53 +              val sym = in.source.subSequence(i, i + n).toString
    1.54 +              if (pred(sym)) { i += n; count += 1 }
    1.55 +              else finished = true
    1.56 +            }
    1.57 +            else finished = true
    1.58 +          }
    1.59 +          if (count < min_count) Failure("bad symbols", in)
    1.60 +          else Success(in.source.subSequence(start, i), in.drop(i - start))
    1.61 +        }
    1.62 +      }.named("symbols")
    1.63 +
    1.64 +    def one(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 1, 1)
    1.65 +    def many(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 0, Integer.MAX_VALUE)
    1.66 +    def many1(pred: String => Boolean): Parser[CharSequence] = symbols(pred, 1, Integer.MAX_VALUE)
    1.67    }
    1.68  }
    1.69  
     2.1 --- a/src/Pure/General/symbol.scala	Sat Dec 19 16:02:26 2009 +0100
     2.2 +++ b/src/Pure/General/symbol.scala	Sat Dec 19 16:51:32 2009 +0100
     2.3 @@ -29,37 +29,45 @@
     2.4    // total pattern
     2.5    val regex = new Regex(plain + "|" + symbol + "|" + bad_symbol + "| .")
     2.6  
     2.7 -  // prefix of another symbol
     2.8 +
     2.9 +  /* basic matching */
    2.10 +
    2.11 +  def is_open(c: Char): Boolean =
    2.12 +    c == '\\' || Character.isHighSurrogate(c)
    2.13 +
    2.14    def is_open(s: CharSequence): Boolean =
    2.15    {
    2.16 -    val len = s.length
    2.17 -    len == 1 && Character.isHighSurrogate(s.charAt(0)) ||
    2.18 -    s == "\\" ||
    2.19 -    s == "\\<" ||
    2.20 -    len > 2 && s.charAt(len - 1) != '>'   // FIXME bad_symbol !??
    2.21 +    if (s.length == 1) is_open(s.charAt(0))
    2.22 +    else bad_symbol.pattern.matcher(s).matches
    2.23 +  }
    2.24 +
    2.25 +  class Matcher(text: CharSequence)
    2.26 +  {
    2.27 +    private val matcher = regex.pattern.matcher(text)
    2.28 +    def apply(start: Int, end: Int): Int =
    2.29 +    {
    2.30 +      require(0 <= start && start < end && end <= text.length)
    2.31 +      if (is_open(text.charAt(start))) {
    2.32 +        matcher.region(start, end).lookingAt
    2.33 +        matcher.group.length
    2.34 +      }
    2.35 +      else 1
    2.36 +    }
    2.37    }
    2.38  
    2.39  
    2.40    /* elements */
    2.41  
    2.42 -  def could_open(c: Char): Boolean =
    2.43 -    c == '\\' || Character.isHighSurrogate(c)
    2.44 -
    2.45 -  def elements(text: CharSequence) = new Iterator[String]
    2.46 +  def elements(text: CharSequence) = new Iterator[CharSequence]
    2.47    {
    2.48 -    private val matcher = regex.pattern.matcher(text)
    2.49 +    private val matcher = new Matcher(text)
    2.50      private var i = 0
    2.51      def hasNext = i < text.length
    2.52      def next = {
    2.53 -      val len =
    2.54 -        if (could_open(text.charAt(i))) {
    2.55 -          matcher.region(i, text.length).lookingAt
    2.56 -          matcher.group.length
    2.57 -        }
    2.58 -        else 1
    2.59 -      val s = text.subSequence(i, i + len)
    2.60 -      i += len
    2.61 -      s.toString
    2.62 +      val n = matcher(i, text.length)
    2.63 +      val s = text.subSequence(i, i + n)
    2.64 +      i += n
    2.65 +      s
    2.66      }
    2.67    }
    2.68  
    2.69 @@ -71,20 +79,15 @@
    2.70      case class Entry(chr: Int, sym: Int)
    2.71      val index: Array[Entry] =
    2.72      {
    2.73 -      val matcher = regex.pattern.matcher(text)
    2.74 +      val matcher = new Matcher(text)
    2.75        val buf = new mutable.ArrayBuffer[Entry]
    2.76        var chr = 0
    2.77        var sym = 0
    2.78        while (chr < text.length) {
    2.79 -        val len =
    2.80 -          if (could_open(text.charAt(chr))) {
    2.81 -            matcher.region(chr, text.length).lookingAt
    2.82 -            matcher.group.length
    2.83 -          }
    2.84 -          else 1
    2.85 -        chr += len
    2.86 +        val n = matcher(chr, text.length)
    2.87 +        chr += n
    2.88          sym += 1
    2.89 -        if (len > 1) buf += Entry(chr, sym)
    2.90 +        if (n > 1) buf += Entry(chr, sym)
    2.91        }
    2.92        buf.toArray
    2.93      }
    2.94 @@ -153,7 +156,7 @@
    2.95  
    2.96    /** Symbol interpretation **/
    2.97  
    2.98 -  class Interpretation(symbol_decls: Iterator[String])
    2.99 +  class Interpretation(symbol_decls: List[String])
   2.100    {
   2.101      /* read symbols */
   2.102  
   2.103 @@ -180,7 +183,7 @@
   2.104      }
   2.105  
   2.106      private val symbols: List[(String, Map[String, String])] =
   2.107 -      for (decl <- symbol_decls.toList if !empty.pattern.matcher(decl).matches)
   2.108 +      for (decl <- symbol_decls if !empty.pattern.matcher(decl).matches)
   2.109          yield read_decl(decl)
   2.110  
   2.111  
     3.1 --- a/src/Pure/System/isabelle_system.scala	Sat Dec 19 16:02:26 2009 +0100
     3.2 +++ b/src/Pure/System/isabelle_system.scala	Sat Dec 19 16:51:32 2009 +0100
     3.3 @@ -316,14 +316,14 @@
     3.4  
     3.5    /* symbols */
     3.6  
     3.7 -  private def read_symbols(path: String): Iterator[String] =
     3.8 +  private def read_symbols(path: String): List[String] =
     3.9    {
    3.10      val file = new File(platform_path(path))
    3.11 -    if (file.isFile) Source.fromFile(file).getLines
    3.12 -    else Iterator.empty
    3.13 +    if (file.isFile) Source.fromFile(file).getLines.toList
    3.14 +    else Nil
    3.15    }
    3.16    val symbols = new Symbol.Interpretation(
    3.17 -    read_symbols("$ISABELLE_HOME/etc/symbols") ++
    3.18 +    read_symbols("$ISABELLE_HOME/etc/symbols") :::
    3.19      read_symbols("$ISABELLE_HOME_USER/etc/symbols"))
    3.20  
    3.21  
     4.1 --- a/src/Pure/Thy/html.scala	Sat Dec 19 16:02:26 2009 +0100
     4.2 +++ b/src/Pure/Thy/html.scala	Sat Dec 19 16:51:32 2009 +0100
     4.3 @@ -49,7 +49,7 @@
     4.4            flush()
     4.5            ts + elem
     4.6          }
     4.7 -        val syms = Symbol.elements(txt)
     4.8 +        val syms = Symbol.elements(txt).map(_.toString)
     4.9          while (syms.hasNext) {
    4.10            val s1 = syms.next
    4.11            def s2() = if (syms.hasNext) syms.next else ""