src/Pure/General/xml.scala
author wenzelm
Tue Jul 12 10:44:30 2011 +0200 (2011-07-12)
changeset 43767 e0219ef7f84c
parent 43747 74a9e9c8d5e8
child 43768 d52ab827d62b
permissions -rw-r--r--
tuned XML modules;
     1 /*  Title:      Pure/General/xml.scala
     2     Author:     Makarius
     3 
     4 Simple XML tree values.
     5 */
     6 
     7 package isabelle
     8 
     9 import java.lang.System
    10 import java.util.WeakHashMap
    11 import java.lang.ref.WeakReference
    12 import javax.xml.parsers.DocumentBuilderFactory
    13 
    14 import scala.actors.Actor._
    15 
    16 
    17 object XML
    18 {
    19   /** XML trees **/
    20 
    21   /* datatype representation */
    22 
    23   type Attributes = List[(String, String)]
    24 
    25   sealed abstract class Tree { override def toString = string_of_tree(this) }
    26   case class Elem(markup: Markup, body: List[Tree]) extends Tree
    27   case class Text(content: String) extends Tree
    28 
    29   def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
    30   def elem(name: String) = Elem(Markup(name, Nil), Nil)
    31 
    32   type Body = List[Tree]
    33 
    34 
    35   /* string representation */
    36 
    37   def string_of_body(body: Body): String =
    38   {
    39     val s = new StringBuilder
    40 
    41     def text(txt: String) {
    42       if (txt == null) s ++= txt
    43       else {
    44         for (c <- txt.iterator) c match {
    45           case '<' => s ++= "&lt;"
    46           case '>' => s ++= "&gt;"
    47           case '&' => s ++= "&amp;"
    48           case '"' => s ++= "&quot;"
    49           case '\'' => s ++= "&apos;"
    50           case _ => s += c
    51         }
    52       }
    53     }
    54     def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
    55     def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
    56     def tree(t: Tree): Unit =
    57       t match {
    58         case Elem(markup, Nil) =>
    59           s ++= "<"; elem(markup); s ++= "/>"
    60         case Elem(markup, ts) =>
    61           s ++= "<"; elem(markup); s ++= ">"
    62           ts.foreach(tree)
    63           s ++= "</"; s ++= markup.name; s ++= ">"
    64         case Text(txt) => text(txt)
    65       }
    66     body.foreach(tree)
    67     s.toString
    68   }
    69 
    70   def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
    71 
    72 
    73   /* text content */
    74 
    75   def content_stream(tree: Tree): Stream[String] =
    76     tree match {
    77       case Elem(_, body) => content_stream(body)
    78       case Text(content) => Stream(content)
    79     }
    80   def content_stream(body: Body): Stream[String] =
    81     body.toStream.flatten(content_stream(_))
    82 
    83   def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
    84   def content(body: Body): Iterator[String] = content_stream(body).iterator
    85 
    86 
    87   /* pipe-lined cache for partial sharing */
    88 
    89   class Cache(initial_size: Int = 131071, max_string: Int = 100)
    90   {
    91     private val cache_actor = actor
    92     {
    93       val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
    94 
    95       def lookup[A](x: A): Option[A] =
    96       {
    97         val ref = table.get(x)
    98         if (ref == null) None
    99         else {
   100           val y = ref.asInstanceOf[WeakReference[A]].get
   101           if (y == null) None
   102           else Some(y)
   103         }
   104       }
   105       def store[A](x: A): A =
   106       {
   107         table.put(x, new WeakReference[Any](x))
   108         x
   109       }
   110 
   111       def trim_bytes(s: String): String = new String(s.toCharArray)
   112 
   113       def cache_string(x: String): String =
   114         lookup(x) match {
   115           case Some(y) => y
   116           case None =>
   117             val z = trim_bytes(x)
   118             if (z.length > max_string) z else store(z)
   119         }
   120       def cache_props(x: List[(String, String)]): List[(String, String)] =
   121         if (x.isEmpty) x
   122         else
   123           lookup(x) match {
   124             case Some(y) => y
   125             case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
   126           }
   127       def cache_markup(x: Markup): Markup =
   128         lookup(x) match {
   129           case Some(y) => y
   130           case None =>
   131             x match {
   132               case Markup(name, props) =>
   133                 store(Markup(cache_string(name), cache_props(props)))
   134             }
   135         }
   136       def cache_tree(x: XML.Tree): XML.Tree =
   137         lookup(x) match {
   138           case Some(y) => y
   139           case None =>
   140             x match {
   141               case XML.Elem(markup, body) =>
   142                 store(XML.Elem(cache_markup(markup), cache_body(body)))
   143               case XML.Text(text) => store(XML.Text(cache_string(text)))
   144             }
   145         }
   146       def cache_body(x: XML.Body): XML.Body =
   147         if (x.isEmpty) x
   148         else
   149           lookup(x) match {
   150             case Some(y) => y
   151             case None => x.map(cache_tree(_))
   152           }
   153 
   154       // main loop
   155       loop {
   156         react {
   157           case Cache_String(x, f) => f(cache_string(x))
   158           case Cache_Markup(x, f) => f(cache_markup(x))
   159           case Cache_Tree(x, f) => f(cache_tree(x))
   160           case Cache_Body(x, f) => f(cache_body(x))
   161           case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
   162         }
   163       }
   164     }
   165 
   166     private case class Cache_String(x: String, f: String => Unit)
   167     private case class Cache_Markup(x: Markup, f: Markup => Unit)
   168     private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
   169     private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
   170 
   171     // main methods
   172     def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
   173     def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
   174     def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
   175     def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
   176   }
   177 
   178 
   179 
   180   /** document object model (W3C DOM) **/
   181 
   182   def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
   183     node.getUserData(Markup.Data.name) match {
   184       case tree: XML.Tree => Some(tree)
   185       case _ => None
   186     }
   187 
   188   def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
   189   {
   190     def DOM(tr: Tree): org.w3c.dom.Node = tr match {
   191       case Elem(Markup.Data, List(data, t)) =>
   192         val node = DOM(t)
   193         node.setUserData(Markup.Data.name, data, null)
   194         node
   195       case Elem(Markup(name, atts), ts) =>
   196         if (name == Markup.Data.name)
   197           error("Malformed data element: " + tr.toString)
   198         val node = doc.createElement(name)
   199         for ((name, value) <- atts) node.setAttribute(name, value)
   200         for (t <- ts) node.appendChild(DOM(t))
   201         node
   202       case Text(txt) => doc.createTextNode(txt)
   203     }
   204     DOM(tree)
   205   }
   206 
   207 
   208 
   209   /** XML as data representation language **/
   210 
   211   class XML_Atom(s: String) extends Exception(s)
   212   class XML_Body(body: XML.Body) extends Exception
   213 
   214   object Encode
   215   {
   216     type T[A] = A => XML.Body
   217 
   218 
   219     /* basic values */
   220 
   221     private def long_atom(i: Long): String = i.toString
   222 
   223     private def int_atom(i: Int): String = i.toString
   224 
   225     private def bool_atom(b: Boolean): String = if (b) "1" else "0"
   226 
   227     private def unit_atom(u: Unit) = ""
   228 
   229 
   230     /* structural nodes */
   231 
   232     private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
   233 
   234     private def tagged(tag: Int, ts: XML.Body): XML.Tree =
   235       XML.Elem(Markup(int_atom(tag), Nil), ts)
   236 
   237 
   238     /* representation of standard types */
   239 
   240     val properties: T[List[(String, String)]] =
   241       (props => List(XML.Elem(Markup(":", props), Nil)))
   242 
   243     val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
   244 
   245     val long: T[Long] = (x => string(long_atom(x)))
   246 
   247     val int: T[Int] = (x => string(int_atom(x)))
   248 
   249     val bool: T[Boolean] = (x => string(bool_atom(x)))
   250 
   251     val unit: T[Unit] = (x => string(unit_atom(x)))
   252 
   253     def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
   254       (x => List(node(f(x._1)), node(g(x._2))))
   255 
   256     def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
   257       (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
   258 
   259     def list[A](f: T[A]): T[List[A]] =
   260       (xs => xs.map((x: A) => node(f(x))))
   261 
   262     def option[A](f: T[A]): T[Option[A]] =
   263     {
   264       case None => Nil
   265       case Some(x) => List(node(f(x)))
   266     }
   267 
   268     def variant[A](fs: List[PartialFunction[A, XML.Body]]): T[A] =
   269     {
   270       case x =>
   271         val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
   272         List(tagged(tag, f(x)))
   273     }
   274   }
   275 
   276   object Decode
   277   {
   278     type T[A] = XML.Body => A
   279 
   280 
   281      /* basic values */
   282 
   283     private def long_atom(s: String): Long =
   284       try { java.lang.Long.parseLong(s) }
   285       catch { case e: NumberFormatException => throw new XML_Atom(s) }
   286 
   287     private def int_atom(s: String): Int =
   288       try { Integer.parseInt(s) }
   289       catch { case e: NumberFormatException => throw new XML_Atom(s) }
   290 
   291     private def bool_atom(s: String): Boolean =
   292       if (s == "1") true
   293       else if (s == "0") false
   294       else throw new XML_Atom(s)
   295 
   296     private def unit_atom(s: String): Unit =
   297       if (s == "") () else throw new XML_Atom(s)
   298 
   299 
   300     /* structural nodes */
   301 
   302     private def node(t: XML.Tree): XML.Body =
   303       t match {
   304         case XML.Elem(Markup(":", Nil), ts) => ts
   305         case _ => throw new XML_Body(List(t))
   306       }
   307 
   308     private def tagged(t: XML.Tree): (Int, XML.Body) =
   309       t match {
   310         case XML.Elem(Markup(s, Nil), ts) => (int_atom(s), ts)
   311         case _ => throw new XML_Body(List(t))
   312       }
   313 
   314 
   315     /* representation of standard types */
   316 
   317     val properties: T[List[(String, String)]] =
   318     {
   319       case List(XML.Elem(Markup(":", props), Nil)) => props
   320       case ts => throw new XML_Body(ts)
   321     }
   322 
   323     val string: T[String] =
   324     {
   325       case Nil => ""
   326       case List(XML.Text(s)) => s
   327       case ts => throw new XML_Body(ts)
   328     }
   329 
   330     val long: T[Long] = (x => long_atom(string(x)))
   331 
   332     val int: T[Int] = (x => int_atom(string(x)))
   333 
   334     val bool: T[Boolean] = (x => bool_atom(string(x)))
   335 
   336     val unit: T[Unit] = (x => unit_atom(string(x)))
   337 
   338     def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
   339     {
   340       case List(t1, t2) => (f(node(t1)), g(node(t2)))
   341       case ts => throw new XML_Body(ts)
   342     }
   343 
   344     def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
   345     {
   346       case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
   347       case ts => throw new XML_Body(ts)
   348     }
   349 
   350     def list[A](f: T[A]): T[List[A]] =
   351       (ts => ts.map(t => f(node(t))))
   352 
   353     def option[A](f: T[A]): T[Option[A]] =
   354     {
   355       case Nil => None
   356       case List(t) => Some(f(node(t)))
   357       case ts => throw new XML_Body(ts)
   358     }
   359 
   360     def variant[A](fs: List[T[A]]): T[A] =
   361     {
   362       case List(t) =>
   363         val (tag, ts) = tagged(t)
   364         fs(tag)(ts)
   365       case ts => throw new XML_Body(ts)
   366     }
   367   }
   368 }