src/Pure/General/xml.scala
author wenzelm
Mon Jul 11 10:27:50 2011 +0200 (2011-07-11 ago)
changeset 43745 562e35bc351e
parent 43520 cec9b95fa35d
child 43747 74a9e9c8d5e8
permissions -rw-r--r--
tuned XML.Cache parameters;
wenzelm@27931
     1
/*  Title:      Pure/General/xml.scala
wenzelm@27931
     2
    Author:     Makarius
wenzelm@27931
     3
wenzelm@27947
     4
Simple XML tree values.
wenzelm@27931
     5
*/
wenzelm@27931
     6
wenzelm@27931
     7
package isabelle
wenzelm@27931
     8
wenzelm@43520
     9
import java.lang.System
wenzelm@34108
    10
import java.util.WeakHashMap
wenzelm@34108
    11
import java.lang.ref.WeakReference
wenzelm@34108
    12
import javax.xml.parsers.DocumentBuilderFactory
wenzelm@34108
    13
wenzelm@38446
    14
import scala.actors.Actor._
wenzelm@38446
    15
wenzelm@27947
    16
wenzelm@29203
    17
object XML
wenzelm@29203
    18
{
wenzelm@27947
    19
  /* datatype representation */
wenzelm@27947
    20
wenzelm@27931
    21
  type Attributes = List[(String, String)]
wenzelm@27931
    22
wenzelm@38268
    23
  sealed abstract class Tree { override def toString = string_of_tree(this) }
wenzelm@38230
    24
  case class Elem(markup: Markup, body: List[Tree]) extends Tree
wenzelm@29204
    25
  case class Text(content: String) extends Tree
wenzelm@29203
    26
wenzelm@38230
    27
  def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
wenzelm@38230
    28
  def elem(name: String) = Elem(Markup(name, Nil), Nil)
wenzelm@33999
    29
wenzelm@38267
    30
  type Body = List[Tree]
wenzelm@38267
    31
wenzelm@29203
    32
wenzelm@29203
    33
  /* string representation */
wenzelm@29203
    34
wenzelm@38268
    35
  def string_of_body(body: Body): String =
wenzelm@38268
    36
  {
wenzelm@38268
    37
    val s = new StringBuilder
wenzelm@38268
    38
wenzelm@38268
    39
    def text(txt: String) {
wenzelm@38268
    40
      if (txt == null) s ++= txt
wenzelm@38268
    41
      else {
wenzelm@38268
    42
        for (c <- txt.iterator) c match {
wenzelm@38268
    43
          case '<' => s ++= "&lt;"
wenzelm@38268
    44
          case '>' => s ++= "&gt;"
wenzelm@38268
    45
          case '&' => s ++= "&amp;"
wenzelm@38268
    46
          case '"' => s ++= "&quot;"
wenzelm@38268
    47
          case '\'' => s ++= "&apos;"
wenzelm@38268
    48
          case _ => s += c
wenzelm@38268
    49
        }
wenzelm@34005
    50
      }
wenzelm@29203
    51
    }
wenzelm@38268
    52
    def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
wenzelm@38268
    53
    def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
wenzelm@38268
    54
    def tree(t: Tree): Unit =
wenzelm@38268
    55
      t match {
wenzelm@38268
    56
        case Elem(markup, Nil) =>
wenzelm@38268
    57
          s ++= "<"; elem(markup); s ++= "/>"
wenzelm@38268
    58
        case Elem(markup, ts) =>
wenzelm@38268
    59
          s ++= "<"; elem(markup); s ++= ">"
wenzelm@38268
    60
          ts.foreach(tree)
wenzelm@38268
    61
          s ++= "</"; s ++= markup.name; s ++= ">"
wenzelm@38268
    62
        case Text(txt) => text(txt)
wenzelm@38268
    63
      }
wenzelm@38268
    64
    body.foreach(tree)
wenzelm@38268
    65
    s.toString
wenzelm@29203
    66
  }
wenzelm@29203
    67
wenzelm@38268
    68
  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
wenzelm@27941
    69
wenzelm@27941
    70
wenzelm@38484
    71
  /* text content */
wenzelm@27941
    72
wenzelm@38484
    73
  def content_stream(tree: Tree): Stream[String] =
wenzelm@38484
    74
    tree match {
wenzelm@38484
    75
      case Elem(_, body) => body.toStream.flatten(content_stream(_))
wenzelm@38484
    76
      case Text(content) => Stream(content)
wenzelm@27941
    77
    }
wenzelm@27941
    78
wenzelm@38484
    79
  def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
wenzelm@27941
    80
wenzelm@27947
    81
wenzelm@38446
    82
  /* pipe-lined cache for partial sharing */
wenzelm@34108
    83
wenzelm@43745
    84
  class Cache(initial_size: Int = 131071, max_string: Int = 100)
wenzelm@34108
    85
  {
wenzelm@38446
    86
    private val cache_actor = actor
wenzelm@34108
    87
    {
wenzelm@38446
    88
      val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
wenzelm@38446
    89
wenzelm@38446
    90
      def lookup[A](x: A): Option[A] =
wenzelm@38446
    91
      {
wenzelm@38446
    92
        val ref = table.get(x)
wenzelm@38446
    93
        if (ref == null) None
wenzelm@38446
    94
        else {
wenzelm@38446
    95
          val y = ref.asInstanceOf[WeakReference[A]].get
wenzelm@38446
    96
          if (y == null) None
wenzelm@38446
    97
          else Some(y)
wenzelm@38446
    98
        }
wenzelm@34108
    99
      }
wenzelm@38446
   100
      def store[A](x: A): A =
wenzelm@38446
   101
      {
wenzelm@38446
   102
        table.put(x, new WeakReference[Any](x))
wenzelm@38446
   103
        x
wenzelm@38446
   104
      }
wenzelm@34108
   105
wenzelm@38869
   106
      def trim_bytes(s: String): String = new String(s.toCharArray)
wenzelm@38869
   107
wenzelm@38446
   108
      def cache_string(x: String): String =
wenzelm@38446
   109
        lookup(x) match {
wenzelm@38446
   110
          case Some(y) => y
wenzelm@43745
   111
          case None =>
wenzelm@43745
   112
            val z = trim_bytes(x)
wenzelm@43745
   113
            if (z.length > max_string) z else store(z)
wenzelm@38446
   114
        }
wenzelm@38446
   115
      def cache_props(x: List[(String, String)]): List[(String, String)] =
wenzelm@38446
   116
        if (x.isEmpty) x
wenzelm@38446
   117
        else
wenzelm@38446
   118
          lookup(x) match {
wenzelm@38446
   119
            case Some(y) => y
wenzelm@38869
   120
            case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
wenzelm@38446
   121
          }
wenzelm@38446
   122
      def cache_markup(x: Markup): Markup =
wenzelm@34133
   123
        lookup(x) match {
wenzelm@34133
   124
          case Some(y) => y
wenzelm@38446
   125
          case None =>
wenzelm@38446
   126
            x match {
wenzelm@38446
   127
              case Markup(name, props) =>
wenzelm@38446
   128
                store(Markup(cache_string(name), cache_props(props)))
wenzelm@38446
   129
            }
wenzelm@34133
   130
        }
wenzelm@38446
   131
      def cache_tree(x: XML.Tree): XML.Tree =
wenzelm@34133
   132
        lookup(x) match {
wenzelm@34133
   133
          case Some(y) => y
wenzelm@38446
   134
          case None =>
wenzelm@38446
   135
            x match {
wenzelm@38446
   136
              case XML.Elem(markup, body) =>
wenzelm@38446
   137
                store(XML.Elem(cache_markup(markup), cache_body(body)))
wenzelm@38446
   138
              case XML.Text(text) => store(XML.Text(cache_string(text)))
wenzelm@38446
   139
            }
wenzelm@34133
   140
        }
wenzelm@38446
   141
      def cache_body(x: XML.Body): XML.Body =
wenzelm@38446
   142
        if (x.isEmpty) x
wenzelm@38446
   143
        else
wenzelm@38446
   144
          lookup(x) match {
wenzelm@38446
   145
            case Some(y) => y
wenzelm@38446
   146
            case None => x.map(cache_tree(_))
wenzelm@38446
   147
          }
wenzelm@38446
   148
wenzelm@38446
   149
      // main loop
wenzelm@38446
   150
      loop {
wenzelm@38446
   151
        react {
wenzelm@38446
   152
          case Cache_String(x, f) => f(cache_string(x))
wenzelm@38446
   153
          case Cache_Markup(x, f) => f(cache_markup(x))
wenzelm@38446
   154
          case Cache_Tree(x, f) => f(cache_tree(x))
wenzelm@38446
   155
          case Cache_Body(x, f) => f(cache_body(x))
wenzelm@38446
   156
          case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
wenzelm@38446
   157
        }
wenzelm@38446
   158
      }
wenzelm@38446
   159
    }
wenzelm@38446
   160
wenzelm@38446
   161
    private case class Cache_String(x: String, f: String => Unit)
wenzelm@38446
   162
    private case class Cache_Markup(x: Markup, f: Markup => Unit)
wenzelm@38446
   163
    private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
wenzelm@38446
   164
    private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
wenzelm@38446
   165
wenzelm@38446
   166
    // main methods
wenzelm@38446
   167
    def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
wenzelm@38446
   168
    def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
wenzelm@38446
   169
    def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
wenzelm@38446
   170
    def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
wenzelm@34108
   171
  }
wenzelm@34108
   172
wenzelm@34108
   173
wenzelm@33953
   174
  /* document object model (W3C DOM) */
wenzelm@27948
   175
wenzelm@34871
   176
  def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
wenzelm@38231
   177
    node.getUserData(Markup.Data.name) match {
wenzelm@34047
   178
      case tree: XML.Tree => Some(tree)
wenzelm@34047
   179
      case _ => None
wenzelm@34047
   180
    }
wenzelm@34047
   181
wenzelm@34871
   182
  def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
wenzelm@33953
   183
  {
wenzelm@34871
   184
    def DOM(tr: Tree): org.w3c.dom.Node = tr match {
wenzelm@38231
   185
      case Elem(Markup.Data, List(data, t)) =>
wenzelm@34046
   186
        val node = DOM(t)
wenzelm@38231
   187
        node.setUserData(Markup.Data.name, data, null)
wenzelm@34046
   188
        node
wenzelm@38230
   189
      case Elem(Markup(name, atts), ts) =>
wenzelm@38231
   190
        if (name == Markup.Data.name)
wenzelm@34046
   191
          error("Malformed data element: " + tr.toString)
wenzelm@27947
   192
        val node = doc.createElement(name)
wenzelm@27947
   193
        for ((name, value) <- atts) node.setAttribute(name, value)
wenzelm@27952
   194
        for (t <- ts) node.appendChild(DOM(t))
wenzelm@27947
   195
        node
wenzelm@27947
   196
      case Text(txt) => doc.createTextNode(txt)
wenzelm@27947
   197
    }
wenzelm@33953
   198
    DOM(tree)
wenzelm@33953
   199
  }
wenzelm@27931
   200
}