src/Pure/PIDE/xml.scala
author wenzelm
Sun Sep 04 15:21:50 2011 +0200 (2011-09-04 ago)
changeset 44698 0385292321a0
parent 44697 src/Pure/General/xml.scala@b99dfee76538
child 44704 528d635ef6f0
permissions -rw-r--r--
moved XML/YXML to src/Pure/PIDE;
tuned comments;
     1 /*  Title:      Pure/PIDE/xml.scala
     2     Author:     Makarius
     3 
     4 Untyped XML trees and basic data representation.
     5 */
     6 
     7 package isabelle
     8 
     9 import java.lang.System
    10 import java.util.WeakHashMap
    11 import java.lang.ref.WeakReference
    12 import javax.xml.parsers.DocumentBuilderFactory
    13 
    14 import scala.actors.Actor._
    15 import scala.collection.mutable
    16 
    17 
    18 object XML
    19 {
    20   /** XML trees **/
    21 
    22   /* datatype representation */
    23 
    24   type Attributes = Properties.T
    25 
    26   sealed abstract class Tree { override def toString = string_of_tree(this) }
    27   case class Elem(markup: Markup, body: List[Tree]) extends Tree
    28   case class Text(content: String) extends Tree
    29 
    30   def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
    31   def elem(name: String) = Elem(Markup(name, Nil), Nil)
    32 
    33   type Body = List[Tree]
    34 
    35 
    36   /* string representation */
    37 
    38   def string_of_body(body: Body): String =
    39   {
    40     val s = new StringBuilder
    41 
    42     def text(txt: String) {
    43       if (txt == null) s ++= txt
    44       else {
    45         for (c <- txt.iterator) c match {
    46           case '<' => s ++= "&lt;"
    47           case '>' => s ++= "&gt;"
    48           case '&' => s ++= "&amp;"
    49           case '"' => s ++= "&quot;"
    50           case '\'' => s ++= "&apos;"
    51           case _ => s += c
    52         }
    53       }
    54     }
    55     def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
    56     def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
    57     def tree(t: Tree): Unit =
    58       t match {
    59         case Elem(markup, Nil) =>
    60           s ++= "<"; elem(markup); s ++= "/>"
    61         case Elem(markup, ts) =>
    62           s ++= "<"; elem(markup); s ++= ">"
    63           ts.foreach(tree)
    64           s ++= "</"; s ++= markup.name; s ++= ">"
    65         case Text(txt) => text(txt)
    66       }
    67     body.foreach(tree)
    68     s.toString
    69   }
    70 
    71   def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
    72 
    73 
    74   /* text content */
    75 
    76   def content_stream(tree: Tree): Stream[String] =
    77     tree match {
    78       case Elem(_, body) => content_stream(body)
    79       case Text(content) => Stream(content)
    80     }
    81   def content_stream(body: Body): Stream[String] =
    82     body.toStream.flatten(content_stream(_))
    83 
    84   def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
    85   def content(body: Body): Iterator[String] = content_stream(body).iterator
    86 
    87 
    88   /* pipe-lined cache for partial sharing */
    89 
    90   class Cache(initial_size: Int = 131071, max_string: Int = 100)
    91   {
    92     private val cache_actor = actor
    93     {
    94       val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
    95 
    96       def lookup[A](x: A): Option[A] =
    97       {
    98         val ref = table.get(x)
    99         if (ref == null) None
   100         else {
   101           val y = ref.asInstanceOf[WeakReference[A]].get
   102           if (y == null) None
   103           else Some(y)
   104         }
   105       }
   106       def store[A](x: A): A =
   107       {
   108         table.put(x, new WeakReference[Any](x))
   109         x
   110       }
   111 
   112       def trim_bytes(s: String): String = new String(s.toCharArray)
   113 
   114       def cache_string(x: String): String =
   115         lookup(x) match {
   116           case Some(y) => y
   117           case None =>
   118             val z = trim_bytes(x)
   119             if (z.length > max_string) z else store(z)
   120         }
   121       def cache_props(x: Properties.T): Properties.T =
   122         if (x.isEmpty) x
   123         else
   124           lookup(x) match {
   125             case Some(y) => y
   126             case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
   127           }
   128       def cache_markup(x: Markup): Markup =
   129         lookup(x) match {
   130           case Some(y) => y
   131           case None =>
   132             x match {
   133               case Markup(name, props) =>
   134                 store(Markup(cache_string(name), cache_props(props)))
   135             }
   136         }
   137       def cache_tree(x: XML.Tree): XML.Tree =
   138         lookup(x) match {
   139           case Some(y) => y
   140           case None =>
   141             x match {
   142               case XML.Elem(markup, body) =>
   143                 store(XML.Elem(cache_markup(markup), cache_body(body)))
   144               case XML.Text(text) => store(XML.Text(cache_string(text)))
   145             }
   146         }
   147       def cache_body(x: XML.Body): XML.Body =
   148         if (x.isEmpty) x
   149         else
   150           lookup(x) match {
   151             case Some(y) => y
   152             case None => x.map(cache_tree(_))
   153           }
   154 
   155       // main loop
   156       loop {
   157         react {
   158           case Cache_String(x, f) => f(cache_string(x))
   159           case Cache_Markup(x, f) => f(cache_markup(x))
   160           case Cache_Tree(x, f) => f(cache_tree(x))
   161           case Cache_Body(x, f) => f(cache_body(x))
   162           case Cache_Ignore(x, f) => f(x)
   163           case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
   164         }
   165       }
   166     }
   167 
   168     private case class Cache_String(x: String, f: String => Unit)
   169     private case class Cache_Markup(x: Markup, f: Markup => Unit)
   170     private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
   171     private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
   172     private case class Cache_Ignore[A](x: A, f: A => Unit)
   173 
   174     // main methods
   175     def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
   176     def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
   177     def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
   178     def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
   179     def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) }
   180   }
   181 
   182 
   183 
   184   /** document object model (W3C DOM) **/
   185 
   186   def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
   187     node.getUserData(Markup.Data.name) match {
   188       case tree: XML.Tree => Some(tree)
   189       case _ => None
   190     }
   191 
   192   def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
   193   {
   194     def DOM(tr: Tree): org.w3c.dom.Node = tr match {
   195       case Elem(Markup.Data, List(data, t)) =>
   196         val node = DOM(t)
   197         node.setUserData(Markup.Data.name, data, null)
   198         node
   199       case Elem(Markup(name, atts), ts) =>
   200         if (name == Markup.Data.name)
   201           error("Malformed data element: " + tr.toString)
   202         val node = doc.createElement(name)
   203         for ((name, value) <- atts) node.setAttribute(name, value)
   204         for (t <- ts) node.appendChild(DOM(t))
   205         node
   206       case Text(txt) => doc.createTextNode(txt)
   207     }
   208     DOM(tree)
   209   }
   210 
   211 
   212 
   213   /** XML as data representation language **/
   214 
   215   class XML_Atom(s: String) extends Exception(s)
   216   class XML_Body(body: XML.Body) extends Exception
   217 
   218   object Encode
   219   {
   220     type T[A] = A => XML.Body
   221 
   222 
   223     /* atomic values */
   224 
   225     def long_atom(i: Long): String = i.toString
   226 
   227     def int_atom(i: Int): String = i.toString
   228 
   229     def bool_atom(b: Boolean): String = if (b) "1" else "0"
   230 
   231     def unit_atom(u: Unit) = ""
   232 
   233 
   234     /* structural nodes */
   235 
   236     private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
   237 
   238     private def vector(xs: List[String]): XML.Attributes =
   239       xs.zipWithIndex.map(p => (int_atom(p._2), p._1))
   240 
   241     private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
   242       XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
   243 
   244 
   245     /* representation of standard types */
   246 
   247     val properties: T[Properties.T] =
   248       (props => List(XML.Elem(Markup(":", props), Nil)))
   249 
   250     val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
   251 
   252     val long: T[Long] = (x => string(long_atom(x)))
   253 
   254     val int: T[Int] = (x => string(int_atom(x)))
   255 
   256     val bool: T[Boolean] = (x => string(bool_atom(x)))
   257 
   258     val unit: T[Unit] = (x => string(unit_atom(x)))
   259 
   260     def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
   261       (x => List(node(f(x._1)), node(g(x._2))))
   262 
   263     def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
   264       (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
   265 
   266     def list[A](f: T[A]): T[List[A]] =
   267       (xs => xs.map((x: A) => node(f(x))))
   268 
   269     def option[A](f: T[A]): T[Option[A]] =
   270     {
   271       case None => Nil
   272       case Some(x) => List(node(f(x)))
   273     }
   274 
   275     def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] =
   276     {
   277       case x =>
   278         val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
   279         List(tagged(tag, f(x)))
   280     }
   281   }
   282 
   283   object Decode
   284   {
   285     type T[A] = XML.Body => A
   286     type V[A] = (List[String], XML.Body) => A
   287 
   288 
   289     /* atomic values */
   290 
   291     def long_atom(s: String): Long =
   292       try { java.lang.Long.parseLong(s) }
   293       catch { case e: NumberFormatException => throw new XML_Atom(s) }
   294 
   295     def int_atom(s: String): Int =
   296       try { Integer.parseInt(s) }
   297       catch { case e: NumberFormatException => throw new XML_Atom(s) }
   298 
   299     def bool_atom(s: String): Boolean =
   300       if (s == "1") true
   301       else if (s == "0") false
   302       else throw new XML_Atom(s)
   303 
   304     def unit_atom(s: String): Unit =
   305       if (s == "") () else throw new XML_Atom(s)
   306 
   307 
   308     /* structural nodes */
   309 
   310     private def node(t: XML.Tree): XML.Body =
   311       t match {
   312         case XML.Elem(Markup(":", Nil), ts) => ts
   313         case _ => throw new XML_Body(List(t))
   314       }
   315 
   316     private def vector(atts: XML.Attributes): List[String] =
   317     {
   318       val xs = new mutable.ListBuffer[String]
   319       var i = 0
   320       for ((a, x) <- atts) {
   321         if (int_atom(a) == i) { xs += x; i = i + 1 }
   322         else throw new XML_Atom(a)
   323       }
   324       xs.toList
   325     }
   326 
   327     private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
   328       t match {
   329         case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
   330         case _ => throw new XML_Body(List(t))
   331       }
   332 
   333 
   334     /* representation of standard types */
   335 
   336     val properties: T[Properties.T] =
   337     {
   338       case List(XML.Elem(Markup(":", props), Nil)) => props
   339       case ts => throw new XML_Body(ts)
   340     }
   341 
   342     val string: T[String] =
   343     {
   344       case Nil => ""
   345       case List(XML.Text(s)) => s
   346       case ts => throw new XML_Body(ts)
   347     }
   348 
   349     val long: T[Long] = (x => long_atom(string(x)))
   350 
   351     val int: T[Int] = (x => int_atom(string(x)))
   352 
   353     val bool: T[Boolean] = (x => bool_atom(string(x)))
   354 
   355     val unit: T[Unit] = (x => unit_atom(string(x)))
   356 
   357     def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
   358     {
   359       case List(t1, t2) => (f(node(t1)), g(node(t2)))
   360       case ts => throw new XML_Body(ts)
   361     }
   362 
   363     def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
   364     {
   365       case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
   366       case ts => throw new XML_Body(ts)
   367     }
   368 
   369     def list[A](f: T[A]): T[List[A]] =
   370       (ts => ts.map(t => f(node(t))))
   371 
   372     def option[A](f: T[A]): T[Option[A]] =
   373     {
   374       case Nil => None
   375       case List(t) => Some(f(node(t)))
   376       case ts => throw new XML_Body(ts)
   377     }
   378 
   379     def variant[A](fs: List[V[A]]): T[A] =
   380     {
   381       case List(t) =>
   382         val (tag, (xs, ts)) = tagged(t)
   383         val f =
   384           try { fs(tag) }
   385           catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
   386         f(xs, ts)
   387       case ts => throw new XML_Body(ts)
   388     }
   389   }
   390 }