src/Pure/General/xml.scala
author wenzelm
Tue Aug 10 23:03:48 2010 +0200 (2010-08-10 ago)
changeset 38267 e50c283dd125
parent 38263 11c2b8d1fde0
child 38268 beb86b805590
permissions -rw-r--r--
type XML.Body as basic data representation language (Scala version);
tuned;
wenzelm@27931
     1
/*  Title:      Pure/General/xml.scala
wenzelm@27931
     2
    Author:     Makarius
wenzelm@27931
     3
wenzelm@27947
     4
Simple XML tree values.
wenzelm@27931
     5
*/
wenzelm@27931
     6
wenzelm@27931
     7
package isabelle
wenzelm@27931
     8
wenzelm@34108
     9
import java.util.WeakHashMap
wenzelm@34108
    10
import java.lang.ref.WeakReference
wenzelm@34108
    11
import javax.xml.parsers.DocumentBuilderFactory
wenzelm@34108
    12
wenzelm@27947
    13
wenzelm@29203
    14
object XML
wenzelm@29203
    15
{
wenzelm@27947
    16
  /* datatype representation */
wenzelm@27947
    17
wenzelm@27931
    18
  type Attributes = List[(String, String)]
wenzelm@27931
    19
wenzelm@34046
    20
  sealed abstract class Tree {
wenzelm@29204
    21
    override def toString = {
wenzelm@29204
    22
      val s = new StringBuilder
wenzelm@29204
    23
      append_tree(this, s)
wenzelm@29204
    24
      s.toString
wenzelm@29204
    25
    }
wenzelm@29203
    26
  }
wenzelm@38230
    27
  case class Elem(markup: Markup, body: List[Tree]) extends Tree
wenzelm@29204
    28
  case class Text(content: String) extends Tree
wenzelm@29203
    29
wenzelm@38230
    30
  def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
wenzelm@38230
    31
  def elem(name: String) = Elem(Markup(name, Nil), Nil)
wenzelm@33999
    32
wenzelm@38267
    33
  type Body = List[Tree]
wenzelm@38267
    34
wenzelm@29203
    35
wenzelm@29203
    36
  /* string representation */
wenzelm@29203
    37
wenzelm@29203
    38
  private def append_text(text: String, s: StringBuilder) {
wenzelm@36016
    39
    if (text == null) s ++= text
wenzelm@34005
    40
    else {
wenzelm@36011
    41
      for (c <- text.iterator) c match {
wenzelm@36016
    42
        case '<' => s ++= "&lt;"
wenzelm@36016
    43
        case '>' => s ++= "&gt;"
wenzelm@36016
    44
        case '&' => s ++= "&amp;"
wenzelm@36016
    45
        case '"' => s ++= "&quot;"
wenzelm@36016
    46
        case '\'' => s ++= "&apos;"
wenzelm@36016
    47
        case _ => s += c
wenzelm@34005
    48
      }
wenzelm@29203
    49
    }
wenzelm@29203
    50
  }
wenzelm@29203
    51
wenzelm@38263
    52
  private def append_elem(name: String, atts: Attributes, s: StringBuilder)
wenzelm@38263
    53
  {
wenzelm@36016
    54
    s ++= name
wenzelm@29203
    55
    for ((a, x) <- atts) {
wenzelm@36016
    56
      s ++= " "; s ++= a; s ++= "=\""; append_text(x, s); s ++= "\""
wenzelm@29203
    57
    }
wenzelm@29203
    58
  }
wenzelm@29203
    59
wenzelm@38263
    60
  private def append_tree(tree: Tree, s: StringBuilder)
wenzelm@38263
    61
  {
wenzelm@29203
    62
    tree match {
wenzelm@38230
    63
      case Elem(Markup(name, atts), Nil) =>
wenzelm@36016
    64
        s ++= "<"; append_elem(name, atts, s); s ++= "/>"
wenzelm@38230
    65
      case Elem(Markup(name, atts), ts) =>
wenzelm@36016
    66
        s ++= "<"; append_elem(name, atts, s); s ++= ">"
wenzelm@29203
    67
        for (t <- ts) append_tree(t, s)
wenzelm@36016
    68
        s ++= "</"; s ++= name; s ++= ">"
wenzelm@29203
    69
      case Text(text) => append_text(text, s)
wenzelm@29203
    70
    }
wenzelm@29203
    71
  }
wenzelm@27941
    72
wenzelm@27941
    73
wenzelm@27942
    74
  /* iterate over content */
wenzelm@27941
    75
wenzelm@27941
    76
  private type State = Option[(String, List[Tree])]
wenzelm@27941
    77
wenzelm@27941
    78
  private def get_next(tree: Tree): State = tree match {
wenzelm@38230
    79
    case Elem(_, body) => get_nexts(body)
wenzelm@27941
    80
    case Text(content) => Some(content, Nil)
wenzelm@27941
    81
  }
wenzelm@27941
    82
  private def get_nexts(trees: List[Tree]): State = trees match {
wenzelm@27941
    83
    case Nil => None
wenzelm@27941
    84
    case t :: ts => get_next(t) match {
wenzelm@27941
    85
      case None => get_nexts(ts)
wenzelm@28007
    86
      case Some((s, r)) => Some((s, r ++ ts))
wenzelm@27941
    87
    }
wenzelm@27941
    88
  }
wenzelm@27941
    89
wenzelm@27941
    90
  def content(tree: Tree) = new Iterator[String] {
wenzelm@27941
    91
    private var state = get_next(tree)
wenzelm@27941
    92
    def hasNext() = state.isDefined
wenzelm@27941
    93
    def next() = state match {
wenzelm@27941
    94
      case Some((s, rest)) => { state = get_nexts(rest); s }
wenzelm@27941
    95
      case None => throw new NoSuchElementException("next on empty iterator")
wenzelm@27941
    96
    }
wenzelm@27941
    97
  }
wenzelm@27941
    98
wenzelm@27947
    99
wenzelm@34108
   100
  /* cache for partial sharing -- NOT THREAD SAFE */
wenzelm@34108
   101
wenzelm@34108
   102
  class Cache(initial_size: Int)
wenzelm@34108
   103
  {
wenzelm@34108
   104
    private val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
wenzelm@34108
   105
wenzelm@34108
   106
    private def lookup[A](x: A): Option[A] =
wenzelm@34108
   107
    {
wenzelm@34108
   108
      val ref = table.get(x)
wenzelm@34108
   109
      if (ref == null) None
wenzelm@34108
   110
      else {
wenzelm@34108
   111
        val y = ref.asInstanceOf[WeakReference[A]].get
wenzelm@34108
   112
        if (y == null) None
wenzelm@34108
   113
        else Some(y)
wenzelm@34108
   114
      }
wenzelm@34108
   115
    }
wenzelm@34108
   116
    private def store[A](x: A): A =
wenzelm@34108
   117
    {
wenzelm@34108
   118
      table.put(x, new WeakReference[Any](x))
wenzelm@34108
   119
      x
wenzelm@34108
   120
    }
wenzelm@34108
   121
wenzelm@34108
   122
    def cache_string(x: String): String =
wenzelm@34108
   123
      lookup(x) match {
wenzelm@34108
   124
        case Some(y) => y
wenzelm@38233
   125
        case None => store(new String(x.toCharArray))  // trim string value
wenzelm@34108
   126
      }
wenzelm@34108
   127
    def cache_props(x: List[(String, String)]): List[(String, String)] =
wenzelm@34133
   128
      if (x.isEmpty) x
wenzelm@34133
   129
      else
wenzelm@34133
   130
        lookup(x) match {
wenzelm@34133
   131
          case Some(y) => y
wenzelm@34133
   132
          case None => store(x.map(p => (cache_string(p._1), cache_string(p._2))))
wenzelm@34133
   133
        }
wenzelm@38230
   134
    def cache_markup(x: Markup): Markup =
wenzelm@38230
   135
      lookup(x) match {
wenzelm@38230
   136
        case Some(y) => y
wenzelm@38230
   137
        case None =>
wenzelm@38230
   138
          x match {
wenzelm@38230
   139
            case Markup(name, props) =>
wenzelm@38230
   140
              store(Markup(cache_string(name), cache_props(props)))
wenzelm@38230
   141
          }
wenzelm@38230
   142
      }
wenzelm@34117
   143
    def cache_tree(x: XML.Tree): XML.Tree =
wenzelm@34108
   144
      lookup(x) match {
wenzelm@34108
   145
        case Some(y) => y
wenzelm@34108
   146
        case None =>
wenzelm@34108
   147
          x match {
wenzelm@38230
   148
            case XML.Elem(markup, body) =>
wenzelm@38230
   149
              store(XML.Elem(cache_markup(markup), cache_trees(body)))
wenzelm@38230
   150
            case XML.Text(text) => store(XML.Text(cache_string(text)))
wenzelm@34108
   151
          }
wenzelm@34108
   152
      }
wenzelm@34117
   153
    def cache_trees(x: List[XML.Tree]): List[XML.Tree] =
wenzelm@34133
   154
      if (x.isEmpty) x
wenzelm@34133
   155
      else
wenzelm@34133
   156
        lookup(x) match {
wenzelm@34133
   157
          case Some(y) => y
wenzelm@34133
   158
          case None => x.map(cache_tree(_))
wenzelm@34133
   159
        }
wenzelm@34108
   160
  }
wenzelm@34108
   161
wenzelm@34108
   162
wenzelm@33953
   163
  /* document object model (W3C DOM) */
wenzelm@27948
   164
wenzelm@34871
   165
  def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
wenzelm@38231
   166
    node.getUserData(Markup.Data.name) match {
wenzelm@34047
   167
      case tree: XML.Tree => Some(tree)
wenzelm@34047
   168
      case _ => None
wenzelm@34047
   169
    }
wenzelm@34047
   170
wenzelm@34871
   171
  def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
wenzelm@33953
   172
  {
wenzelm@34871
   173
    def DOM(tr: Tree): org.w3c.dom.Node = tr match {
wenzelm@38231
   174
      case Elem(Markup.Data, List(data, t)) =>
wenzelm@34046
   175
        val node = DOM(t)
wenzelm@38231
   176
        node.setUserData(Markup.Data.name, data, null)
wenzelm@34046
   177
        node
wenzelm@38230
   178
      case Elem(Markup(name, atts), ts) =>
wenzelm@38231
   179
        if (name == Markup.Data.name)
wenzelm@34046
   180
          error("Malformed data element: " + tr.toString)
wenzelm@27947
   181
        val node = doc.createElement(name)
wenzelm@27947
   182
        for ((name, value) <- atts) node.setAttribute(name, value)
wenzelm@27952
   183
        for (t <- ts) node.appendChild(DOM(t))
wenzelm@27947
   184
        node
wenzelm@27947
   185
      case Text(txt) => doc.createTextNode(txt)
wenzelm@27947
   186
    }
wenzelm@33953
   187
    DOM(tree)
wenzelm@33953
   188
  }
wenzelm@27931
   189
}