src/Pure/General/xml.scala
author wenzelm
Mon Jul 11 10:27:50 2011 +0200 (2011-07-11 ago)
changeset 43745 562e35bc351e
parent 43520 cec9b95fa35d
child 43747 74a9e9c8d5e8
permissions -rw-r--r--
tuned XML.Cache parameters;
     1 /*  Title:      Pure/General/xml.scala
     2     Author:     Makarius
     3 
     4 Simple XML tree values.
     5 */
     6 
     7 package isabelle
     8 
     9 import java.lang.System
    10 import java.util.WeakHashMap
    11 import java.lang.ref.WeakReference
    12 import javax.xml.parsers.DocumentBuilderFactory
    13 
    14 import scala.actors.Actor._
    15 
    16 
    17 object XML
    18 {
    19   /* datatype representation */
    20 
    21   type Attributes = List[(String, String)]
    22 
    23   sealed abstract class Tree { override def toString = string_of_tree(this) }
    24   case class Elem(markup: Markup, body: List[Tree]) extends Tree
    25   case class Text(content: String) extends Tree
    26 
    27   def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
    28   def elem(name: String) = Elem(Markup(name, Nil), Nil)
    29 
    30   type Body = List[Tree]
    31 
    32 
    33   /* string representation */
    34 
    35   def string_of_body(body: Body): String =
    36   {
    37     val s = new StringBuilder
    38 
    39     def text(txt: String) {
    40       if (txt == null) s ++= txt
    41       else {
    42         for (c <- txt.iterator) c match {
    43           case '<' => s ++= "&lt;"
    44           case '>' => s ++= "&gt;"
    45           case '&' => s ++= "&amp;"
    46           case '"' => s ++= "&quot;"
    47           case '\'' => s ++= "&apos;"
    48           case _ => s += c
    49         }
    50       }
    51     }
    52     def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
    53     def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
    54     def tree(t: Tree): Unit =
    55       t match {
    56         case Elem(markup, Nil) =>
    57           s ++= "<"; elem(markup); s ++= "/>"
    58         case Elem(markup, ts) =>
    59           s ++= "<"; elem(markup); s ++= ">"
    60           ts.foreach(tree)
    61           s ++= "</"; s ++= markup.name; s ++= ">"
    62         case Text(txt) => text(txt)
    63       }
    64     body.foreach(tree)
    65     s.toString
    66   }
    67 
    68   def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
    69 
    70 
    71   /* text content */
    72 
    73   def content_stream(tree: Tree): Stream[String] =
    74     tree match {
    75       case Elem(_, body) => body.toStream.flatten(content_stream(_))
    76       case Text(content) => Stream(content)
    77     }
    78 
    79   def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
    80 
    81 
    82   /* pipe-lined cache for partial sharing */
    83 
    84   class Cache(initial_size: Int = 131071, max_string: Int = 100)
    85   {
    86     private val cache_actor = actor
    87     {
    88       val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
    89 
    90       def lookup[A](x: A): Option[A] =
    91       {
    92         val ref = table.get(x)
    93         if (ref == null) None
    94         else {
    95           val y = ref.asInstanceOf[WeakReference[A]].get
    96           if (y == null) None
    97           else Some(y)
    98         }
    99       }
   100       def store[A](x: A): A =
   101       {
   102         table.put(x, new WeakReference[Any](x))
   103         x
   104       }
   105 
   106       def trim_bytes(s: String): String = new String(s.toCharArray)
   107 
   108       def cache_string(x: String): String =
   109         lookup(x) match {
   110           case Some(y) => y
   111           case None =>
   112             val z = trim_bytes(x)
   113             if (z.length > max_string) z else store(z)
   114         }
   115       def cache_props(x: List[(String, String)]): List[(String, String)] =
   116         if (x.isEmpty) x
   117         else
   118           lookup(x) match {
   119             case Some(y) => y
   120             case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
   121           }
   122       def cache_markup(x: Markup): Markup =
   123         lookup(x) match {
   124           case Some(y) => y
   125           case None =>
   126             x match {
   127               case Markup(name, props) =>
   128                 store(Markup(cache_string(name), cache_props(props)))
   129             }
   130         }
   131       def cache_tree(x: XML.Tree): XML.Tree =
   132         lookup(x) match {
   133           case Some(y) => y
   134           case None =>
   135             x match {
   136               case XML.Elem(markup, body) =>
   137                 store(XML.Elem(cache_markup(markup), cache_body(body)))
   138               case XML.Text(text) => store(XML.Text(cache_string(text)))
   139             }
   140         }
   141       def cache_body(x: XML.Body): XML.Body =
   142         if (x.isEmpty) x
   143         else
   144           lookup(x) match {
   145             case Some(y) => y
   146             case None => x.map(cache_tree(_))
   147           }
   148 
   149       // main loop
   150       loop {
   151         react {
   152           case Cache_String(x, f) => f(cache_string(x))
   153           case Cache_Markup(x, f) => f(cache_markup(x))
   154           case Cache_Tree(x, f) => f(cache_tree(x))
   155           case Cache_Body(x, f) => f(cache_body(x))
   156           case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
   157         }
   158       }
   159     }
   160 
   161     private case class Cache_String(x: String, f: String => Unit)
   162     private case class Cache_Markup(x: Markup, f: Markup => Unit)
   163     private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
   164     private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
   165 
   166     // main methods
   167     def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
   168     def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
   169     def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
   170     def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
   171   }
   172 
   173 
   174   /* document object model (W3C DOM) */
   175 
   176   def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
   177     node.getUserData(Markup.Data.name) match {
   178       case tree: XML.Tree => Some(tree)
   179       case _ => None
   180     }
   181 
   182   def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
   183   {
   184     def DOM(tr: Tree): org.w3c.dom.Node = tr match {
   185       case Elem(Markup.Data, List(data, t)) =>
   186         val node = DOM(t)
   187         node.setUserData(Markup.Data.name, data, null)
   188         node
   189       case Elem(Markup(name, atts), ts) =>
   190         if (name == Markup.Data.name)
   191           error("Malformed data element: " + tr.toString)
   192         val node = doc.createElement(name)
   193         for ((name, value) <- atts) node.setAttribute(name, value)
   194         for (t <- ts) node.appendChild(DOM(t))
   195         node
   196       case Text(txt) => doc.createTextNode(txt)
   197     }
   198     DOM(tree)
   199   }
   200 }