src/Pure/General/xml.scala
author wenzelm
Mon Aug 16 18:20:36 2010 +0200 (2010-08-16 ago)
changeset 38446 9d59dab38fef
parent 38268 beb86b805590
child 38484 9c1fde4e2487
permissions -rw-r--r--
XML.Cache: pipe-lined (thread-safe) version using actor;
tuned Isabelle_Process.pid handling;
wenzelm@27931
     1
/*  Title:      Pure/General/xml.scala
wenzelm@27931
     2
    Author:     Makarius
wenzelm@27931
     3
wenzelm@27947
     4
Simple XML tree values.
wenzelm@27931
     5
*/
wenzelm@27931
     6
wenzelm@27931
     7
package isabelle
wenzelm@27931
     8
wenzelm@34108
     9
import java.util.WeakHashMap
wenzelm@34108
    10
import java.lang.ref.WeakReference
wenzelm@34108
    11
import javax.xml.parsers.DocumentBuilderFactory
wenzelm@34108
    12
wenzelm@38446
    13
import scala.actors.Actor._
wenzelm@38446
    14
wenzelm@27947
    15
wenzelm@29203
    16
object XML
wenzelm@29203
    17
{
wenzelm@27947
    18
  /* datatype representation */
wenzelm@27947
    19
wenzelm@27931
    20
  type Attributes = List[(String, String)]
wenzelm@27931
    21
wenzelm@38268
    22
  sealed abstract class Tree { override def toString = string_of_tree(this) }
wenzelm@38230
    23
  case class Elem(markup: Markup, body: List[Tree]) extends Tree
wenzelm@29204
    24
  case class Text(content: String) extends Tree
wenzelm@29203
    25
wenzelm@38230
    26
  def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
wenzelm@38230
    27
  def elem(name: String) = Elem(Markup(name, Nil), Nil)
wenzelm@33999
    28
wenzelm@38267
    29
  type Body = List[Tree]
wenzelm@38267
    30
wenzelm@29203
    31
wenzelm@29203
    32
  /* string representation */
wenzelm@29203
    33
wenzelm@38268
    34
  def string_of_body(body: Body): String =
wenzelm@38268
    35
  {
wenzelm@38268
    36
    val s = new StringBuilder
wenzelm@38268
    37
wenzelm@38268
    38
    def text(txt: String) {
wenzelm@38268
    39
      if (txt == null) s ++= txt
wenzelm@38268
    40
      else {
wenzelm@38268
    41
        for (c <- txt.iterator) c match {
wenzelm@38268
    42
          case '<' => s ++= "&lt;"
wenzelm@38268
    43
          case '>' => s ++= "&gt;"
wenzelm@38268
    44
          case '&' => s ++= "&amp;"
wenzelm@38268
    45
          case '"' => s ++= "&quot;"
wenzelm@38268
    46
          case '\'' => s ++= "&apos;"
wenzelm@38268
    47
          case _ => s += c
wenzelm@38268
    48
        }
wenzelm@34005
    49
      }
wenzelm@29203
    50
    }
wenzelm@38268
    51
    def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
wenzelm@38268
    52
    def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
wenzelm@38268
    53
    def tree(t: Tree): Unit =
wenzelm@38268
    54
      t match {
wenzelm@38268
    55
        case Elem(markup, Nil) =>
wenzelm@38268
    56
          s ++= "<"; elem(markup); s ++= "/>"
wenzelm@38268
    57
        case Elem(markup, ts) =>
wenzelm@38268
    58
          s ++= "<"; elem(markup); s ++= ">"
wenzelm@38268
    59
          ts.foreach(tree)
wenzelm@38268
    60
          s ++= "</"; s ++= markup.name; s ++= ">"
wenzelm@38268
    61
        case Text(txt) => text(txt)
wenzelm@38268
    62
      }
wenzelm@38268
    63
    body.foreach(tree)
wenzelm@38268
    64
    s.toString
wenzelm@29203
    65
  }
wenzelm@29203
    66
wenzelm@38268
    67
  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
wenzelm@27941
    68
wenzelm@27941
    69
wenzelm@27942
    70
  /* iterate over content */
wenzelm@27941
    71
wenzelm@27941
    72
  private type State = Option[(String, List[Tree])]
wenzelm@27941
    73
wenzelm@27941
    74
  private def get_next(tree: Tree): State = tree match {
wenzelm@38230
    75
    case Elem(_, body) => get_nexts(body)
wenzelm@27941
    76
    case Text(content) => Some(content, Nil)
wenzelm@27941
    77
  }
wenzelm@27941
    78
  private def get_nexts(trees: List[Tree]): State = trees match {
wenzelm@27941
    79
    case Nil => None
wenzelm@27941
    80
    case t :: ts => get_next(t) match {
wenzelm@27941
    81
      case None => get_nexts(ts)
wenzelm@28007
    82
      case Some((s, r)) => Some((s, r ++ ts))
wenzelm@27941
    83
    }
wenzelm@27941
    84
  }
wenzelm@27941
    85
wenzelm@27941
    86
  def content(tree: Tree) = new Iterator[String] {
wenzelm@27941
    87
    private var state = get_next(tree)
wenzelm@27941
    88
    def hasNext() = state.isDefined
wenzelm@27941
    89
    def next() = state match {
wenzelm@27941
    90
      case Some((s, rest)) => { state = get_nexts(rest); s }
wenzelm@27941
    91
      case None => throw new NoSuchElementException("next on empty iterator")
wenzelm@27941
    92
    }
wenzelm@27941
    93
  }
wenzelm@27941
    94
wenzelm@27947
    95
wenzelm@38446
    96
  /* pipe-lined cache for partial sharing */
wenzelm@34108
    97
wenzelm@34108
    98
  class Cache(initial_size: Int)
wenzelm@34108
    99
  {
wenzelm@38446
   100
    private val cache_actor = actor
wenzelm@34108
   101
    {
wenzelm@38446
   102
      val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
wenzelm@38446
   103
wenzelm@38446
   104
      def lookup[A](x: A): Option[A] =
wenzelm@38446
   105
      {
wenzelm@38446
   106
        val ref = table.get(x)
wenzelm@38446
   107
        if (ref == null) None
wenzelm@38446
   108
        else {
wenzelm@38446
   109
          val y = ref.asInstanceOf[WeakReference[A]].get
wenzelm@38446
   110
          if (y == null) None
wenzelm@38446
   111
          else Some(y)
wenzelm@38446
   112
        }
wenzelm@34108
   113
      }
wenzelm@38446
   114
      def store[A](x: A): A =
wenzelm@38446
   115
      {
wenzelm@38446
   116
        table.put(x, new WeakReference[Any](x))
wenzelm@38446
   117
        x
wenzelm@38446
   118
      }
wenzelm@34108
   119
wenzelm@38446
   120
      def cache_string(x: String): String =
wenzelm@38446
   121
        lookup(x) match {
wenzelm@38446
   122
          case Some(y) => y
wenzelm@38446
   123
          case None => store(new String(x.toCharArray))  // trim string value
wenzelm@38446
   124
        }
wenzelm@38446
   125
      def cache_props(x: List[(String, String)]): List[(String, String)] =
wenzelm@38446
   126
        if (x.isEmpty) x
wenzelm@38446
   127
        else
wenzelm@38446
   128
          lookup(x) match {
wenzelm@38446
   129
            case Some(y) => y
wenzelm@38446
   130
            case None => store(x.map(p => (cache_string(p._1), cache_string(p._2))))
wenzelm@38446
   131
          }
wenzelm@38446
   132
      def cache_markup(x: Markup): Markup =
wenzelm@34133
   133
        lookup(x) match {
wenzelm@34133
   134
          case Some(y) => y
wenzelm@38446
   135
          case None =>
wenzelm@38446
   136
            x match {
wenzelm@38446
   137
              case Markup(name, props) =>
wenzelm@38446
   138
                store(Markup(cache_string(name), cache_props(props)))
wenzelm@38446
   139
            }
wenzelm@34133
   140
        }
wenzelm@38446
   141
      def cache_tree(x: XML.Tree): XML.Tree =
wenzelm@34133
   142
        lookup(x) match {
wenzelm@34133
   143
          case Some(y) => y
wenzelm@38446
   144
          case None =>
wenzelm@38446
   145
            x match {
wenzelm@38446
   146
              case XML.Elem(markup, body) =>
wenzelm@38446
   147
                store(XML.Elem(cache_markup(markup), cache_body(body)))
wenzelm@38446
   148
              case XML.Text(text) => store(XML.Text(cache_string(text)))
wenzelm@38446
   149
            }
wenzelm@34133
   150
        }
wenzelm@38446
   151
      def cache_body(x: XML.Body): XML.Body =
wenzelm@38446
   152
        if (x.isEmpty) x
wenzelm@38446
   153
        else
wenzelm@38446
   154
          lookup(x) match {
wenzelm@38446
   155
            case Some(y) => y
wenzelm@38446
   156
            case None => x.map(cache_tree(_))
wenzelm@38446
   157
          }
wenzelm@38446
   158
wenzelm@38446
   159
      // main loop
wenzelm@38446
   160
      loop {
wenzelm@38446
   161
        react {
wenzelm@38446
   162
          case Cache_String(x, f) => f(cache_string(x))
wenzelm@38446
   163
          case Cache_Markup(x, f) => f(cache_markup(x))
wenzelm@38446
   164
          case Cache_Tree(x, f) => f(cache_tree(x))
wenzelm@38446
   165
          case Cache_Body(x, f) => f(cache_body(x))
wenzelm@38446
   166
          case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
wenzelm@38446
   167
        }
wenzelm@38446
   168
      }
wenzelm@38446
   169
    }
wenzelm@38446
   170
wenzelm@38446
   171
    private case class Cache_String(x: String, f: String => Unit)
wenzelm@38446
   172
    private case class Cache_Markup(x: Markup, f: Markup => Unit)
wenzelm@38446
   173
    private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
wenzelm@38446
   174
    private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
wenzelm@38446
   175
wenzelm@38446
   176
    // main methods
wenzelm@38446
   177
    def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
wenzelm@38446
   178
    def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
wenzelm@38446
   179
    def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
wenzelm@38446
   180
    def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
wenzelm@34108
   181
  }
wenzelm@34108
   182
wenzelm@34108
   183
wenzelm@33953
   184
  /* document object model (W3C DOM) */
wenzelm@27948
   185
wenzelm@34871
   186
  def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
wenzelm@38231
   187
    node.getUserData(Markup.Data.name) match {
wenzelm@34047
   188
      case tree: XML.Tree => Some(tree)
wenzelm@34047
   189
      case _ => None
wenzelm@34047
   190
    }
wenzelm@34047
   191
wenzelm@34871
   192
  def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
wenzelm@33953
   193
  {
wenzelm@34871
   194
    def DOM(tr: Tree): org.w3c.dom.Node = tr match {
wenzelm@38231
   195
      case Elem(Markup.Data, List(data, t)) =>
wenzelm@34046
   196
        val node = DOM(t)
wenzelm@38231
   197
        node.setUserData(Markup.Data.name, data, null)
wenzelm@34046
   198
        node
wenzelm@38230
   199
      case Elem(Markup(name, atts), ts) =>
wenzelm@38231
   200
        if (name == Markup.Data.name)
wenzelm@34046
   201
          error("Malformed data element: " + tr.toString)
wenzelm@27947
   202
        val node = doc.createElement(name)
wenzelm@27947
   203
        for ((name, value) <- atts) node.setAttribute(name, value)
wenzelm@27952
   204
        for (t <- ts) node.appendChild(DOM(t))
wenzelm@27947
   205
        node
wenzelm@27947
   206
      case Text(txt) => doc.createTextNode(txt)
wenzelm@27947
   207
    }
wenzelm@33953
   208
    DOM(tree)
wenzelm@33953
   209
  }
wenzelm@27931
   210
}