src/Pure/General/xml.scala
changeset 44713 8c3d4063bf52
parent 44712 1e490e891c88
parent 44706 fe319b45315c
child 44716 d37afb90c23e
equal deleted inserted replaced
44712:1e490e891c88 44713:8c3d4063bf52
     1 /*  Title:      Pure/General/xml.scala
       
     2     Author:     Makarius
       
     3 
       
     4 Untyped XML trees.
       
     5 */
       
     6 
       
     7 package isabelle
       
     8 
       
     9 import java.lang.System
       
    10 import java.util.WeakHashMap
       
    11 import java.lang.ref.WeakReference
       
    12 import javax.xml.parsers.DocumentBuilderFactory
       
    13 
       
    14 import scala.actors.Actor._
       
    15 import scala.collection.mutable
       
    16 
       
    17 
       
    18 object XML
       
    19 {
       
    20   /** XML trees **/
       
    21 
       
    22   /* datatype representation */
       
    23 
       
    24   type Attributes = Properties.T
       
    25 
       
    26   sealed abstract class Tree { override def toString = string_of_tree(this) }
       
    27   case class Elem(markup: Markup, body: List[Tree]) extends Tree
       
    28   case class Text(content: String) extends Tree
       
    29 
       
    30   def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
       
    31   def elem(name: String) = Elem(Markup(name, Nil), Nil)
       
    32 
       
    33   type Body = List[Tree]
       
    34 
       
    35 
       
    36   /* string representation */
       
    37 
       
    38   def string_of_body(body: Body): String =
       
    39   {
       
    40     val s = new StringBuilder
       
    41 
       
    42     def text(txt: String) {
       
    43       if (txt == null) s ++= txt
       
    44       else {
       
    45         for (c <- txt.iterator) c match {
       
    46           case '<' => s ++= "&lt;"
       
    47           case '>' => s ++= "&gt;"
       
    48           case '&' => s ++= "&amp;"
       
    49           case '"' => s ++= "&quot;"
       
    50           case '\'' => s ++= "&apos;"
       
    51           case _ => s += c
       
    52         }
       
    53       }
       
    54     }
       
    55     def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
       
    56     def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
       
    57     def tree(t: Tree): Unit =
       
    58       t match {
       
    59         case Elem(markup, Nil) =>
       
    60           s ++= "<"; elem(markup); s ++= "/>"
       
    61         case Elem(markup, ts) =>
       
    62           s ++= "<"; elem(markup); s ++= ">"
       
    63           ts.foreach(tree)
       
    64           s ++= "</"; s ++= markup.name; s ++= ">"
       
    65         case Text(txt) => text(txt)
       
    66       }
       
    67     body.foreach(tree)
       
    68     s.toString
       
    69   }
       
    70 
       
    71   def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
       
    72 
       
    73 
       
    74   /* text content */
       
    75 
       
    76   def content_stream(tree: Tree): Stream[String] =
       
    77     tree match {
       
    78       case Elem(_, body) => content_stream(body)
       
    79       case Text(content) => Stream(content)
       
    80     }
       
    81   def content_stream(body: Body): Stream[String] =
       
    82     body.toStream.flatten(content_stream(_))
       
    83 
       
    84   def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
       
    85   def content(body: Body): Iterator[String] = content_stream(body).iterator
       
    86 
       
    87 
       
    88   /* pipe-lined cache for partial sharing */
       
    89 
       
    90   class Cache(initial_size: Int = 131071, max_string: Int = 100)
       
    91   {
       
    92     private val cache_actor = actor
       
    93     {
       
    94       val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
       
    95 
       
    96       def lookup[A](x: A): Option[A] =
       
    97       {
       
    98         val ref = table.get(x)
       
    99         if (ref == null) None
       
   100         else {
       
   101           val y = ref.asInstanceOf[WeakReference[A]].get
       
   102           if (y == null) None
       
   103           else Some(y)
       
   104         }
       
   105       }
       
   106       def store[A](x: A): A =
       
   107       {
       
   108         table.put(x, new WeakReference[Any](x))
       
   109         x
       
   110       }
       
   111 
       
   112       def trim_bytes(s: String): String = new String(s.toCharArray)
       
   113 
       
   114       def cache_string(x: String): String =
       
   115         lookup(x) match {
       
   116           case Some(y) => y
       
   117           case None =>
       
   118             val z = trim_bytes(x)
       
   119             if (z.length > max_string) z else store(z)
       
   120         }
       
   121       def cache_props(x: Properties.T): Properties.T =
       
   122         if (x.isEmpty) x
       
   123         else
       
   124           lookup(x) match {
       
   125             case Some(y) => y
       
   126             case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
       
   127           }
       
   128       def cache_markup(x: Markup): Markup =
       
   129         lookup(x) match {
       
   130           case Some(y) => y
       
   131           case None =>
       
   132             x match {
       
   133               case Markup(name, props) =>
       
   134                 store(Markup(cache_string(name), cache_props(props)))
       
   135             }
       
   136         }
       
   137       def cache_tree(x: XML.Tree): XML.Tree =
       
   138         lookup(x) match {
       
   139           case Some(y) => y
       
   140           case None =>
       
   141             x match {
       
   142               case XML.Elem(markup, body) =>
       
   143                 store(XML.Elem(cache_markup(markup), cache_body(body)))
       
   144               case XML.Text(text) => store(XML.Text(cache_string(text)))
       
   145             }
       
   146         }
       
   147       def cache_body(x: XML.Body): XML.Body =
       
   148         if (x.isEmpty) x
       
   149         else
       
   150           lookup(x) match {
       
   151             case Some(y) => y
       
   152             case None => x.map(cache_tree(_))
       
   153           }
       
   154 
       
   155       // main loop
       
   156       loop {
       
   157         react {
       
   158           case Cache_String(x, f) => f(cache_string(x))
       
   159           case Cache_Markup(x, f) => f(cache_markup(x))
       
   160           case Cache_Tree(x, f) => f(cache_tree(x))
       
   161           case Cache_Body(x, f) => f(cache_body(x))
       
   162           case Cache_Ignore(x, f) => f(x)
       
   163           case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
       
   164         }
       
   165       }
       
   166     }
       
   167 
       
   168     private case class Cache_String(x: String, f: String => Unit)
       
   169     private case class Cache_Markup(x: Markup, f: Markup => Unit)
       
   170     private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
       
   171     private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
       
   172     private case class Cache_Ignore[A](x: A, f: A => Unit)
       
   173 
       
   174     // main methods
       
   175     def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
       
   176     def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
       
   177     def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
       
   178     def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
       
   179     def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) }
       
   180   }
       
   181 
       
   182 
       
   183 
       
   184   /** document object model (W3C DOM) **/
       
   185 
       
   186   def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
       
   187     node.getUserData(Markup.Data.name) match {
       
   188       case tree: XML.Tree => Some(tree)
       
   189       case _ => None
       
   190     }
       
   191 
       
   192   def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
       
   193   {
       
   194     def DOM(tr: Tree): org.w3c.dom.Node = tr match {
       
   195       case Elem(Markup.Data, List(data, t)) =>
       
   196         val node = DOM(t)
       
   197         node.setUserData(Markup.Data.name, data, null)
       
   198         node
       
   199       case Elem(Markup(name, atts), ts) =>
       
   200         if (name == Markup.Data.name)
       
   201           error("Malformed data element: " + tr.toString)
       
   202         val node = doc.createElement(name)
       
   203         for ((name, value) <- atts) node.setAttribute(name, value)
       
   204         for (t <- ts) node.appendChild(DOM(t))
       
   205         node
       
   206       case Text(txt) => doc.createTextNode(txt)
       
   207     }
       
   208     DOM(tree)
       
   209   }
       
   210 
       
   211 
       
   212 
       
   213   /** XML as data representation language **/
       
   214 
       
   215   class XML_Atom(s: String) extends Exception(s)
       
   216   class XML_Body(body: XML.Body) extends Exception
       
   217 
       
   218   object Encode
       
   219   {
       
   220     type T[A] = A => XML.Body
       
   221 
       
   222 
       
   223     /* atomic values */
       
   224 
       
   225     def long_atom(i: Long): String = i.toString
       
   226 
       
   227     def int_atom(i: Int): String = i.toString
       
   228 
       
   229     def bool_atom(b: Boolean): String = if (b) "1" else "0"
       
   230 
       
   231     def unit_atom(u: Unit) = ""
       
   232 
       
   233 
       
   234     /* structural nodes */
       
   235 
       
   236     private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
       
   237 
       
   238     private def vector(xs: List[String]): XML.Attributes =
       
   239       xs.zipWithIndex.map(p => (int_atom(p._2), p._1))
       
   240 
       
   241     private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
       
   242       XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
       
   243 
       
   244 
       
   245     /* representation of standard types */
       
   246 
       
   247     val properties: T[Properties.T] =
       
   248       (props => List(XML.Elem(Markup(":", props), Nil)))
       
   249 
       
   250     val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
       
   251 
       
   252     val long: T[Long] = (x => string(long_atom(x)))
       
   253 
       
   254     val int: T[Int] = (x => string(int_atom(x)))
       
   255 
       
   256     val bool: T[Boolean] = (x => string(bool_atom(x)))
       
   257 
       
   258     val unit: T[Unit] = (x => string(unit_atom(x)))
       
   259 
       
   260     def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
       
   261       (x => List(node(f(x._1)), node(g(x._2))))
       
   262 
       
   263     def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
       
   264       (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
       
   265 
       
   266     def list[A](f: T[A]): T[List[A]] =
       
   267       (xs => xs.map((x: A) => node(f(x))))
       
   268 
       
   269     def option[A](f: T[A]): T[Option[A]] =
       
   270     {
       
   271       case None => Nil
       
   272       case Some(x) => List(node(f(x)))
       
   273     }
       
   274 
       
   275     def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] =
       
   276     {
       
   277       case x =>
       
   278         val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
       
   279         List(tagged(tag, f(x)))
       
   280     }
       
   281   }
       
   282 
       
   283   object Decode
       
   284   {
       
   285     type T[A] = XML.Body => A
       
   286     type V[A] = (List[String], XML.Body) => A
       
   287 
       
   288 
       
   289     /* atomic values */
       
   290 
       
   291     def long_atom(s: String): Long =
       
   292       try { java.lang.Long.parseLong(s) }
       
   293       catch { case e: NumberFormatException => throw new XML_Atom(s) }
       
   294 
       
   295     def int_atom(s: String): Int =
       
   296       try { Integer.parseInt(s) }
       
   297       catch { case e: NumberFormatException => throw new XML_Atom(s) }
       
   298 
       
   299     def bool_atom(s: String): Boolean =
       
   300       if (s == "1") true
       
   301       else if (s == "0") false
       
   302       else throw new XML_Atom(s)
       
   303 
       
   304     def unit_atom(s: String): Unit =
       
   305       if (s == "") () else throw new XML_Atom(s)
       
   306 
       
   307 
       
   308     /* structural nodes */
       
   309 
       
   310     private def node(t: XML.Tree): XML.Body =
       
   311       t match {
       
   312         case XML.Elem(Markup(":", Nil), ts) => ts
       
   313         case _ => throw new XML_Body(List(t))
       
   314       }
       
   315 
       
   316     private def vector(atts: XML.Attributes): List[String] =
       
   317     {
       
   318       val xs = new mutable.ListBuffer[String]
       
   319       var i = 0
       
   320       for ((a, x) <- atts) {
       
   321         if (int_atom(a) == i) { xs += x; i = i + 1 }
       
   322         else throw new XML_Atom(a)
       
   323       }
       
   324       xs.toList
       
   325     }
       
   326 
       
   327     private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
       
   328       t match {
       
   329         case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
       
   330         case _ => throw new XML_Body(List(t))
       
   331       }
       
   332 
       
   333 
       
   334     /* representation of standard types */
       
   335 
       
   336     val properties: T[Properties.T] =
       
   337     {
       
   338       case List(XML.Elem(Markup(":", props), Nil)) => props
       
   339       case ts => throw new XML_Body(ts)
       
   340     }
       
   341 
       
   342     val string: T[String] =
       
   343     {
       
   344       case Nil => ""
       
   345       case List(XML.Text(s)) => s
       
   346       case ts => throw new XML_Body(ts)
       
   347     }
       
   348 
       
   349     val long: T[Long] = (x => long_atom(string(x)))
       
   350 
       
   351     val int: T[Int] = (x => int_atom(string(x)))
       
   352 
       
   353     val bool: T[Boolean] = (x => bool_atom(string(x)))
       
   354 
       
   355     val unit: T[Unit] = (x => unit_atom(string(x)))
       
   356 
       
   357     def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
       
   358     {
       
   359       case List(t1, t2) => (f(node(t1)), g(node(t2)))
       
   360       case ts => throw new XML_Body(ts)
       
   361     }
       
   362 
       
   363     def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
       
   364     {
       
   365       case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
       
   366       case ts => throw new XML_Body(ts)
       
   367     }
       
   368 
       
   369     def list[A](f: T[A]): T[List[A]] =
       
   370       (ts => ts.map(t => f(node(t))))
       
   371 
       
   372     def option[A](f: T[A]): T[Option[A]] =
       
   373     {
       
   374       case Nil => None
       
   375       case List(t) => Some(f(node(t)))
       
   376       case ts => throw new XML_Body(ts)
       
   377     }
       
   378 
       
   379     def variant[A](fs: List[V[A]]): T[A] =
       
   380     {
       
   381       case List(t) =>
       
   382         val (tag, (xs, ts)) = tagged(t)
       
   383         val f =
       
   384           try { fs(tag) }
       
   385           catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
       
   386         f(xs, ts)
       
   387       case ts => throw new XML_Body(ts)
       
   388     }
       
   389   }
       
   390 }