src/Pure/General/xml.scala
author wenzelm
Thu Aug 19 14:52:25 2010 +0200 (2010-08-19 ago)
changeset 38484 9c1fde4e2487
parent 38446 9d59dab38fef
child 38844 f3221fd64426
permissions -rw-r--r--
tuned XML.content: Stream based iteration is supposed to be declarative *and* efficient;
wenzelm@27931
     1
/*  Title:      Pure/General/xml.scala
wenzelm@27931
     2
    Author:     Makarius
wenzelm@27931
     3
wenzelm@27947
     4
Simple XML tree values.
wenzelm@27931
     5
*/
wenzelm@27931
     6
wenzelm@27931
     7
package isabelle
wenzelm@27931
     8
wenzelm@34108
     9
import java.util.WeakHashMap
wenzelm@34108
    10
import java.lang.ref.WeakReference
wenzelm@34108
    11
import javax.xml.parsers.DocumentBuilderFactory
wenzelm@34108
    12
wenzelm@38446
    13
import scala.actors.Actor._
wenzelm@38446
    14
wenzelm@27947
    15
wenzelm@29203
    16
object XML
wenzelm@29203
    17
{
wenzelm@27947
    18
  /* datatype representation */
wenzelm@27947
    19
wenzelm@27931
    20
  type Attributes = List[(String, String)]
wenzelm@27931
    21
wenzelm@38268
    22
  sealed abstract class Tree { override def toString = string_of_tree(this) }
wenzelm@38230
    23
  case class Elem(markup: Markup, body: List[Tree]) extends Tree
wenzelm@29204
    24
  case class Text(content: String) extends Tree
wenzelm@29203
    25
wenzelm@38230
    26
  def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
wenzelm@38230
    27
  def elem(name: String) = Elem(Markup(name, Nil), Nil)
wenzelm@33999
    28
wenzelm@38267
    29
  type Body = List[Tree]
wenzelm@38267
    30
wenzelm@29203
    31
wenzelm@29203
    32
  /* string representation */
wenzelm@29203
    33
wenzelm@38268
    34
  def string_of_body(body: Body): String =
wenzelm@38268
    35
  {
wenzelm@38268
    36
    val s = new StringBuilder
wenzelm@38268
    37
wenzelm@38268
    38
    def text(txt: String) {
wenzelm@38268
    39
      if (txt == null) s ++= txt
wenzelm@38268
    40
      else {
wenzelm@38268
    41
        for (c <- txt.iterator) c match {
wenzelm@38268
    42
          case '<' => s ++= "&lt;"
wenzelm@38268
    43
          case '>' => s ++= "&gt;"
wenzelm@38268
    44
          case '&' => s ++= "&amp;"
wenzelm@38268
    45
          case '"' => s ++= "&quot;"
wenzelm@38268
    46
          case '\'' => s ++= "&apos;"
wenzelm@38268
    47
          case _ => s += c
wenzelm@38268
    48
        }
wenzelm@34005
    49
      }
wenzelm@29203
    50
    }
wenzelm@38268
    51
    def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
wenzelm@38268
    52
    def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
wenzelm@38268
    53
    def tree(t: Tree): Unit =
wenzelm@38268
    54
      t match {
wenzelm@38268
    55
        case Elem(markup, Nil) =>
wenzelm@38268
    56
          s ++= "<"; elem(markup); s ++= "/>"
wenzelm@38268
    57
        case Elem(markup, ts) =>
wenzelm@38268
    58
          s ++= "<"; elem(markup); s ++= ">"
wenzelm@38268
    59
          ts.foreach(tree)
wenzelm@38268
    60
          s ++= "</"; s ++= markup.name; s ++= ">"
wenzelm@38268
    61
        case Text(txt) => text(txt)
wenzelm@38268
    62
      }
wenzelm@38268
    63
    body.foreach(tree)
wenzelm@38268
    64
    s.toString
wenzelm@29203
    65
  }
wenzelm@29203
    66
wenzelm@38268
    67
  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
wenzelm@27941
    68
wenzelm@27941
    69
wenzelm@38484
    70
  /* text content */
wenzelm@27941
    71
wenzelm@38484
    72
  def content_stream(tree: Tree): Stream[String] =
wenzelm@38484
    73
    tree match {
wenzelm@38484
    74
      case Elem(_, body) => body.toStream.flatten(content_stream(_))
wenzelm@38484
    75
      case Text(content) => Stream(content)
wenzelm@27941
    76
    }
wenzelm@27941
    77
wenzelm@38484
    78
  def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
wenzelm@27941
    79
wenzelm@27947
    80
wenzelm@38446
    81
  /* pipe-lined cache for partial sharing */
wenzelm@34108
    82
wenzelm@34108
    83
  class Cache(initial_size: Int)
wenzelm@34108
    84
  {
wenzelm@38446
    85
    private val cache_actor = actor
wenzelm@34108
    86
    {
wenzelm@38446
    87
      val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
wenzelm@38446
    88
wenzelm@38446
    89
      def lookup[A](x: A): Option[A] =
wenzelm@38446
    90
      {
wenzelm@38446
    91
        val ref = table.get(x)
wenzelm@38446
    92
        if (ref == null) None
wenzelm@38446
    93
        else {
wenzelm@38446
    94
          val y = ref.asInstanceOf[WeakReference[A]].get
wenzelm@38446
    95
          if (y == null) None
wenzelm@38446
    96
          else Some(y)
wenzelm@38446
    97
        }
wenzelm@34108
    98
      }
wenzelm@38446
    99
      def store[A](x: A): A =
wenzelm@38446
   100
      {
wenzelm@38446
   101
        table.put(x, new WeakReference[Any](x))
wenzelm@38446
   102
        x
wenzelm@38446
   103
      }
wenzelm@34108
   104
wenzelm@38446
   105
      def cache_string(x: String): String =
wenzelm@38446
   106
        lookup(x) match {
wenzelm@38446
   107
          case Some(y) => y
wenzelm@38446
   108
          case None => store(new String(x.toCharArray))  // trim string value
wenzelm@38446
   109
        }
wenzelm@38446
   110
      def cache_props(x: List[(String, String)]): List[(String, String)] =
wenzelm@38446
   111
        if (x.isEmpty) x
wenzelm@38446
   112
        else
wenzelm@38446
   113
          lookup(x) match {
wenzelm@38446
   114
            case Some(y) => y
wenzelm@38446
   115
            case None => store(x.map(p => (cache_string(p._1), cache_string(p._2))))
wenzelm@38446
   116
          }
wenzelm@38446
   117
      def cache_markup(x: Markup): Markup =
wenzelm@34133
   118
        lookup(x) match {
wenzelm@34133
   119
          case Some(y) => y
wenzelm@38446
   120
          case None =>
wenzelm@38446
   121
            x match {
wenzelm@38446
   122
              case Markup(name, props) =>
wenzelm@38446
   123
                store(Markup(cache_string(name), cache_props(props)))
wenzelm@38446
   124
            }
wenzelm@34133
   125
        }
wenzelm@38446
   126
      def cache_tree(x: XML.Tree): XML.Tree =
wenzelm@34133
   127
        lookup(x) match {
wenzelm@34133
   128
          case Some(y) => y
wenzelm@38446
   129
          case None =>
wenzelm@38446
   130
            x match {
wenzelm@38446
   131
              case XML.Elem(markup, body) =>
wenzelm@38446
   132
                store(XML.Elem(cache_markup(markup), cache_body(body)))
wenzelm@38446
   133
              case XML.Text(text) => store(XML.Text(cache_string(text)))
wenzelm@38446
   134
            }
wenzelm@34133
   135
        }
wenzelm@38446
   136
      def cache_body(x: XML.Body): XML.Body =
wenzelm@38446
   137
        if (x.isEmpty) x
wenzelm@38446
   138
        else
wenzelm@38446
   139
          lookup(x) match {
wenzelm@38446
   140
            case Some(y) => y
wenzelm@38446
   141
            case None => x.map(cache_tree(_))
wenzelm@38446
   142
          }
wenzelm@38446
   143
wenzelm@38446
   144
      // main loop
wenzelm@38446
   145
      loop {
wenzelm@38446
   146
        react {
wenzelm@38446
   147
          case Cache_String(x, f) => f(cache_string(x))
wenzelm@38446
   148
          case Cache_Markup(x, f) => f(cache_markup(x))
wenzelm@38446
   149
          case Cache_Tree(x, f) => f(cache_tree(x))
wenzelm@38446
   150
          case Cache_Body(x, f) => f(cache_body(x))
wenzelm@38446
   151
          case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
wenzelm@38446
   152
        }
wenzelm@38446
   153
      }
wenzelm@38446
   154
    }
wenzelm@38446
   155
wenzelm@38446
   156
    private case class Cache_String(x: String, f: String => Unit)
wenzelm@38446
   157
    private case class Cache_Markup(x: Markup, f: Markup => Unit)
wenzelm@38446
   158
    private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
wenzelm@38446
   159
    private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
wenzelm@38446
   160
wenzelm@38446
   161
    // main methods
wenzelm@38446
   162
    def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
wenzelm@38446
   163
    def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
wenzelm@38446
   164
    def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
wenzelm@38446
   165
    def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
wenzelm@34108
   166
  }
wenzelm@34108
   167
wenzelm@34108
   168
wenzelm@33953
   169
  /* document object model (W3C DOM) */
wenzelm@27948
   170
wenzelm@34871
   171
  def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
wenzelm@38231
   172
    node.getUserData(Markup.Data.name) match {
wenzelm@34047
   173
      case tree: XML.Tree => Some(tree)
wenzelm@34047
   174
      case _ => None
wenzelm@34047
   175
    }
wenzelm@34047
   176
wenzelm@34871
   177
  def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
wenzelm@33953
   178
  {
wenzelm@34871
   179
    def DOM(tr: Tree): org.w3c.dom.Node = tr match {
wenzelm@38231
   180
      case Elem(Markup.Data, List(data, t)) =>
wenzelm@34046
   181
        val node = DOM(t)
wenzelm@38231
   182
        node.setUserData(Markup.Data.name, data, null)
wenzelm@34046
   183
        node
wenzelm@38230
   184
      case Elem(Markup(name, atts), ts) =>
wenzelm@38231
   185
        if (name == Markup.Data.name)
wenzelm@34046
   186
          error("Malformed data element: " + tr.toString)
wenzelm@27947
   187
        val node = doc.createElement(name)
wenzelm@27947
   188
        for ((name, value) <- atts) node.setAttribute(name, value)
wenzelm@27952
   189
        for (t <- ts) node.appendChild(DOM(t))
wenzelm@27947
   190
        node
wenzelm@27947
   191
      case Text(txt) => doc.createTextNode(txt)
wenzelm@27947
   192
    }
wenzelm@33953
   193
    DOM(tree)
wenzelm@33953
   194
  }
wenzelm@27931
   195
}