src/Pure/General/symbol.scala
author blanchet
Tue, 17 Feb 2009 14:01:54 +0100
changeset 29956 62f931b257b7
parent 29569 f3f529b5d8fb
child 31522 0466cb17064f
permissions -rw-r--r--
Reintroduce set_interpreter for Collect and op :. I removed them by accident when removing old code that dealt with the "set" type. Incidentally, there is still some broken "set" code in Refute that should be fixed (see TODO in refute.ML).

/*  Title:      Pure/General/symbol.scala
    Author:     Makarius

Detecting and recoding Isabelle symbols.
*/

package isabelle

import java.util.regex.Pattern
import java.io.File
import scala.io.Source
import scala.collection.jcl.HashMap


object Symbol {

  /** Symbol regexps **/

  private def compile(s: String) =
    Pattern.compile(s, Pattern.COMMENTS | Pattern.DOTALL)

  private val plain_pattern = compile(""" [^\\ \ud800-\udfff] | [\ud800-\udbff][\udc00-\udfff] """)

  private val symbol_pattern = compile(""" \\ \\? < (?:
      \^? [A-Za-z][A-Za-z0-9_']* |
      \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* ) >""")

  private val bad_symbol_pattern = compile("(?!" + symbol_pattern + ")" +
    """ \\ \\? < (?: (?! \s | [\"`\\] | \(\* | \*\) | \{\* | \*\} ) . )*""")

  // total pattern
  val pattern = compile(plain_pattern + "|" + symbol_pattern + "|" + bad_symbol_pattern + "| .")



  /** Recoding **/

  private class Recoder(list: List[(String, String)]) {
    private val (min, max) = {
      var min = '\uffff'
      var max = '\u0000'
      for ((x, _) <- list) {
        val c = x(0)
        if (c < min) min = c
        if (c > max) max = c
      }
      (min, max)
    }
    private val table = {
      val table = new HashMap[String, String]
      for ((x, y) <- list) table + (x -> y)
      table
    }
    def recode(text: String) = {
      val len = text.length
      val matcher = pattern.matcher(text)
      val result = new StringBuilder(len)
      var i = 0
      while (i < len) {
        val c = text(i)
        if (min <= c && c <= max) {
          matcher.region(i, len)
          matcher.lookingAt
          val x = matcher.group
          table.get(x) match {
            case Some(y) => result.append(y)
            case None => result.append(x)
          }
          i = matcher.end
        }
        else { result.append(c); i += 1 }
      }
      result.toString
    }
  }



  /** Symbol interpretation **/

  class Interpretation(symbol_decls: Iterator[String])
  {
    private val symbols = new HashMap[String, HashMap[String, String]]
    private var decoder: Recoder = null
    private var encoder: Recoder = null

    def decode(text: String) = decoder.recode(text)
    def encode(text: String) = encoder.recode(text)


    /* read symbols */

    private val empty_pattern = compile(""" ^\s* (?: \#.* )? $ """)
    private val blank_pattern = compile(""" \s+ """)
    private val key_pattern = compile(""" (.+): """)

    private def read_decl(decl: String) = {
      def err() = error("Bad symbol declaration: " + decl)

      def read_props(props: List[String], tab: HashMap[String, String])
      {
        props match {
          case Nil => ()
          case _ :: Nil => err()
          case key :: value :: rest => {
            val key_matcher = key_pattern.matcher(key)
            if (key_matcher.matches) {
              tab + (key_matcher.group(1) -> value)
              read_props(rest, tab)
            }
            else err ()
          }
        }
      }

      if (!empty_pattern.matcher(decl).matches) {
        blank_pattern.split(decl).toList match {
          case Nil => err()
          case symbol :: props => {
            val tab = new HashMap[String, String]
            read_props(props, tab)
            symbols + (symbol -> tab)
          }
        }
      }
    }


    /* init tables */

    private def get_code(entry: (String, HashMap[String, String])) = {
      val (symbol, props) = entry
      val code =
        try { Integer.decode(props("code")).intValue }
        catch {
          case _: NoSuchElementException => error("Missing code for symbol " + symbol)
          case _: NumberFormatException => error("Bad code for symbol " + symbol)
        }
      (symbol, new String(Character.toChars(code)))
    }

    private def init_recoders() = {
      val list = symbols.elements.toList.map(get_code)
      decoder = new Recoder(list ++ (for ((x, y) <- list) yield ("\\" + x, y)))
      encoder = new Recoder(for ((x, y) <- list) yield (y, x))
    }


    /* constructor */

    symbol_decls.foreach(read_decl)
    init_recoders()
  }
}