src/Pure/PIDE/xml.scala
author wenzelm
Sun May 13 16:37:36 2018 +0200 (13 months ago)
changeset 68169 395432e7516e
parent 67827 b027c97c77c9
child 68265 f0899dad4877
permissions -rw-r--r--
tuned signature;
     1 /*  Title:      Pure/PIDE/xml.scala
     2     Author:     Makarius
     3 
     4 Untyped XML trees and basic data representation.
     5 */
     6 
     7 package isabelle
     8 
     9 
    10 import java.util.{Collections, WeakHashMap}
    11 import java.lang.ref.WeakReference
    12 import javax.xml.parsers.DocumentBuilderFactory
    13 
    14 
    15 object XML
    16 {
    17   /** XML trees **/
    18 
    19   /* datatype representation */
    20 
    21   type Attribute = Properties.Entry
    22   type Attributes = Properties.T
    23 
    24   sealed abstract class Tree { override def toString: String = string_of_tree(this) }
    25   type Body = List[Tree]
    26   case class Elem(markup: Markup, body: Body) extends Tree
    27   {
    28     def name: String = markup.name
    29 
    30     def update_attributes(more_attributes: Attributes): Elem =
    31       if (more_attributes.isEmpty) this
    32       else Elem(markup.update_properties(more_attributes), body)
    33 
    34     def + (att: Attribute): Elem = Elem(markup + att, body)
    35   }
    36   case class Text(content: String) extends Tree
    37 
    38   def elem(markup: Markup): XML.Elem = XML.Elem(markup, Nil)
    39   def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body)
    40   def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil)
    41 
    42 
    43   /* wrapped elements */
    44 
    45   val XML_ELEM = "xml_elem"
    46   val XML_NAME = "xml_name"
    47   val XML_BODY = "xml_body"
    48 
    49   object Wrapped_Elem
    50   {
    51     def apply(markup: Markup, body1: Body, body2: Body): XML.Elem =
    52       XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties),
    53         XML.Elem(Markup(XML_BODY, Nil), body1) :: body2)
    54 
    55     def unapply(tree: Tree): Option[(Markup, Body, Body)] =
    56       tree match {
    57         case
    58           XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props),
    59             XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) =>
    60           Some(Markup(name, props), body1, body2)
    61         case _ => None
    62       }
    63   }
    64 
    65   object Root_Elem
    66   {
    67     def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body)
    68     def unapply(tree: Tree): Option[Body] =
    69       tree match {
    70         case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body)
    71         case _ => None
    72       }
    73   }
    74 
    75 
    76   /* traverse text */
    77 
    78   def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A =
    79   {
    80     def traverse(x: A, t: Tree): A =
    81       t match {
    82         case XML.Wrapped_Elem(_, _, ts) => (x /: ts)(traverse)
    83         case XML.Elem(_, ts) => (x /: ts)(traverse)
    84         case XML.Text(s) => op(x, s)
    85       }
    86     (a /: body)(traverse)
    87   }
    88 
    89   def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length }
    90 
    91 
    92   /* text content */
    93 
    94   def content(body: Body): String =
    95   {
    96     val text = new StringBuilder(text_length(body))
    97     traverse_text(body)(()) { case (_, s) => text.append(s) }
    98     text.toString
    99   }
   100 
   101   def content(tree: Tree): String = content(List(tree))
   102 
   103 
   104 
   105   /** string representation **/
   106 
   107   def output_char(c: Char, s: StringBuilder)
   108   {
   109     c match {
   110       case '<' => s ++= "&lt;"
   111       case '>' => s ++= "&gt;"
   112       case '&' => s ++= "&amp;"
   113       case '"' => s ++= "&quot;"
   114       case '\'' => s ++= "&apos;"
   115       case _ => s += c
   116     }
   117   }
   118 
   119   def output_string(str: String, s: StringBuilder)
   120   {
   121     if (str == null) s ++= str
   122     else str.iterator.foreach(c => output_char(c, s))
   123   }
   124 
   125   def string_of_body(body: Body): String =
   126   {
   127     val s = new StringBuilder
   128 
   129     def text(txt: String) { output_string(txt, s) }
   130     def elem(markup: Markup)
   131     {
   132       s ++= markup.name
   133       for ((a, b) <- markup.properties) {
   134         s += ' '; s ++= a; s += '='; s += '"'; text(b); s += '"'
   135       }
   136     }
   137     def tree(t: Tree): Unit =
   138       t match {
   139         case XML.Elem(markup, Nil) =>
   140           s += '<'; elem(markup); s ++= "/>"
   141         case XML.Elem(markup, ts) =>
   142           s += '<'; elem(markup); s += '>'
   143           ts.foreach(tree)
   144           s ++= "</"; s ++= markup.name; s += '>'
   145         case XML.Text(txt) => text(txt)
   146       }
   147     body.foreach(tree)
   148     s.toString
   149   }
   150 
   151   def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
   152 
   153 
   154 
   155   /** cache for partial sharing (weak table) **/
   156 
   157   def make_cache(initial_size: Int = 131071, max_string: Int = 100): Cache =
   158     new Cache(initial_size, max_string)
   159 
   160   class Cache private[XML](initial_size: Int, max_string: Int)
   161   {
   162     private val table =
   163       Collections.synchronizedMap(new WeakHashMap[Any, WeakReference[Any]](initial_size))
   164 
   165     def size: Int = table.size
   166 
   167     private def lookup[A](x: A): Option[A] =
   168     {
   169       val ref = table.get(x)
   170       if (ref == null) None
   171       else {
   172         val y = ref.asInstanceOf[WeakReference[A]].get
   173         if (y == null) None
   174         else Some(y)
   175       }
   176     }
   177     private def store[A](x: A): A =
   178     {
   179       table.put(x, new WeakReference[Any](x))
   180       x
   181     }
   182 
   183     private def cache_string(x: String): String =
   184       if (x == "") ""
   185       else if (x == "true") "true"
   186       else if (x == "false") "false"
   187       else if (x == "0.0") "0.0"
   188       else if (Library.is_small_int(x)) Library.signed_string_of_int(Integer.parseInt(x))
   189       else
   190         lookup(x) match {
   191           case Some(y) => y
   192           case None =>
   193             val z = Library.isolate_substring(x)
   194             if (z.length > max_string) z else store(z)
   195         }
   196     private def cache_props(x: Properties.T): Properties.T =
   197       if (x.isEmpty) x
   198       else
   199         lookup(x) match {
   200           case Some(y) => y
   201           case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2))))
   202         }
   203     private def cache_markup(x: Markup): Markup =
   204       lookup(x) match {
   205         case Some(y) => y
   206         case None =>
   207           x match {
   208             case Markup(name, props) =>
   209               store(Markup(cache_string(name), cache_props(props)))
   210           }
   211       }
   212     private def cache_tree(x: XML.Tree): XML.Tree =
   213       lookup(x) match {
   214         case Some(y) => y
   215         case None =>
   216           x match {
   217             case XML.Elem(markup, body) =>
   218               store(XML.Elem(cache_markup(markup), cache_body(body)))
   219             case XML.Text(text) => store(XML.Text(cache_string(text)))
   220           }
   221       }
   222     private def cache_body(x: XML.Body): XML.Body =
   223       if (x.isEmpty) x
   224       else
   225         lookup(x) match {
   226           case Some(y) => y
   227           case None => x.map(cache_tree(_))
   228         }
   229 
   230     // main methods
   231     def string(x: String): String = synchronized { cache_string(x) }
   232     def props(x: Properties.T): Properties.T = synchronized { cache_props(x) }
   233     def markup(x: Markup): Markup = synchronized { cache_markup(x) }
   234     def tree(x: XML.Tree): XML.Tree = synchronized { cache_tree(x) }
   235     def body(x: XML.Body): XML.Body = synchronized { cache_body(x) }
   236     def elem(x: XML.Elem): XML.Elem = synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
   237   }
   238 
   239 
   240 
   241   /** XML as data representation language **/
   242 
   243   abstract class Error(s: String) extends Exception(s)
   244   class XML_Atom(s: String) extends Error(s)
   245   class XML_Body(body: XML.Body) extends Error("")
   246 
   247   object Encode
   248   {
   249     type T[A] = A => XML.Body
   250     type V[A] = PartialFunction[A, (List[String], XML.Body)]
   251 
   252 
   253     /* atomic values */
   254 
   255     def long_atom(i: Long): String = Library.signed_string_of_long(i)
   256 
   257     def int_atom(i: Int): String = Library.signed_string_of_int(i)
   258 
   259     def bool_atom(b: Boolean): String = if (b) "1" else "0"
   260 
   261     def unit_atom(u: Unit) = ""
   262 
   263 
   264     /* structural nodes */
   265 
   266     private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
   267 
   268     private def vector(xs: List[String]): XML.Attributes =
   269       xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
   270 
   271     private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
   272       XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
   273 
   274 
   275     /* representation of standard types */
   276 
   277     val tree: T[XML.Tree] = (t => List(t))
   278 
   279     val properties: T[Properties.T] =
   280       (props => List(XML.Elem(Markup(":", props), Nil)))
   281 
   282     val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
   283 
   284     val long: T[Long] = (x => string(long_atom(x)))
   285 
   286     val int: T[Int] = (x => string(int_atom(x)))
   287 
   288     val bool: T[Boolean] = (x => string(bool_atom(x)))
   289 
   290     val unit: T[Unit] = (x => string(unit_atom(x)))
   291 
   292     def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
   293       (x => List(node(f(x._1)), node(g(x._2))))
   294 
   295     def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
   296       (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
   297 
   298     def list[A](f: T[A]): T[List[A]] =
   299       (xs => xs.map((x: A) => node(f(x))))
   300 
   301     def option[A](f: T[A]): T[Option[A]] =
   302     {
   303       case None => Nil
   304       case Some(x) => List(node(f(x)))
   305     }
   306 
   307     def variant[A](fs: List[V[A]]): T[A] =
   308     {
   309       case x =>
   310         val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
   311         List(tagged(tag, f(x)))
   312     }
   313   }
   314 
   315   object Decode
   316   {
   317     type T[A] = XML.Body => A
   318     type V[A] = (List[String], XML.Body) => A
   319 
   320 
   321     /* atomic values */
   322 
   323     def long_atom(s: String): Long =
   324       try { java.lang.Long.parseLong(s) }
   325       catch { case e: NumberFormatException => throw new XML_Atom(s) }
   326 
   327     def int_atom(s: String): Int =
   328       try { Integer.parseInt(s) }
   329       catch { case e: NumberFormatException => throw new XML_Atom(s) }
   330 
   331     def bool_atom(s: String): Boolean =
   332       if (s == "1") true
   333       else if (s == "0") false
   334       else throw new XML_Atom(s)
   335 
   336     def unit_atom(s: String): Unit =
   337       if (s == "") () else throw new XML_Atom(s)
   338 
   339 
   340     /* structural nodes */
   341 
   342     private def node(t: XML.Tree): XML.Body =
   343       t match {
   344         case XML.Elem(Markup(":", Nil), ts) => ts
   345         case _ => throw new XML_Body(List(t))
   346       }
   347 
   348     private def vector(atts: XML.Attributes): List[String] =
   349       atts.iterator.zipWithIndex.map(
   350         { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
   351 
   352     private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
   353       t match {
   354         case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
   355         case _ => throw new XML_Body(List(t))
   356       }
   357 
   358 
   359     /* representation of standard types */
   360 
   361     val tree: T[XML.Tree] =
   362     {
   363       case List(t) => t
   364       case ts => throw new XML_Body(ts)
   365     }
   366 
   367     val properties: T[Properties.T] =
   368     {
   369       case List(XML.Elem(Markup(":", props), Nil)) => props
   370       case ts => throw new XML_Body(ts)
   371     }
   372 
   373     val string: T[String] =
   374     {
   375       case Nil => ""
   376       case List(XML.Text(s)) => s
   377       case ts => throw new XML_Body(ts)
   378     }
   379 
   380     val long: T[Long] = (x => long_atom(string(x)))
   381 
   382     val int: T[Int] = (x => int_atom(string(x)))
   383 
   384     val bool: T[Boolean] = (x => bool_atom(string(x)))
   385 
   386     val unit: T[Unit] = (x => unit_atom(string(x)))
   387 
   388     def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
   389     {
   390       case List(t1, t2) => (f(node(t1)), g(node(t2)))
   391       case ts => throw new XML_Body(ts)
   392     }
   393 
   394     def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
   395     {
   396       case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
   397       case ts => throw new XML_Body(ts)
   398     }
   399 
   400     def list[A](f: T[A]): T[List[A]] =
   401       (ts => ts.map(t => f(node(t))))
   402 
   403     def option[A](f: T[A]): T[Option[A]] =
   404     {
   405       case Nil => None
   406       case List(t) => Some(f(node(t)))
   407       case ts => throw new XML_Body(ts)
   408     }
   409 
   410     def variant[A](fs: List[V[A]]): T[A] =
   411     {
   412       case List(t) =>
   413         val (tag, (xs, ts)) = tagged(t)
   414         val f =
   415           try { fs(tag) }
   416           catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
   417         f(xs, ts)
   418       case ts => throw new XML_Body(ts)
   419     }
   420   }
   421 }