moved XML/YXML to src/Pure/PIDE;
authorwenzelm
Sun, 04 Sep 2011 15:21:50 +0200
changeset 44698 0385292321a0
parent 44697 b99dfee76538
child 44699 5199ee17c7d7
moved XML/YXML to src/Pure/PIDE; tuned comments;
src/Pure/General/xml.ML
src/Pure/General/xml.scala
src/Pure/General/yxml.ML
src/Pure/General/yxml.scala
src/Pure/IsaMakefile
src/Pure/PIDE/xml.ML
src/Pure/PIDE/xml.scala
src/Pure/PIDE/yxml.ML
src/Pure/PIDE/yxml.scala
src/Pure/ROOT.ML
src/Pure/build-jars
--- a/src/Pure/General/xml.ML	Sun Sep 04 14:29:15 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,361 +0,0 @@
-(*  Title:      Pure/General/xml.ML
-    Author:     David Aspinall, Stefan Berghofer and Markus Wenzel
-
-Untyped XML trees.
-*)
-
-signature XML_DATA_OPS =
-sig
-  type 'a A
-  type 'a T
-  type 'a V
-  val int_atom: int A
-  val bool_atom: bool A
-  val unit_atom: unit A
-  val properties: Properties.T T
-  val string: string T
-  val int: int T
-  val bool: bool T
-  val unit: unit T
-  val pair: 'a T -> 'b T -> ('a * 'b) T
-  val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
-  val list: 'a T -> 'a list T
-  val option: 'a T -> 'a option T
-  val variant: 'a V list -> 'a T
-end;
-
-signature XML =
-sig
-  type attributes = Properties.T
-  datatype tree =
-      Elem of Markup.T * tree list
-    | Text of string
-  type body = tree list
-  val add_content: tree -> Buffer.T -> Buffer.T
-  val content_of: body -> string
-  val header: string
-  val text: string -> string
-  val element: string -> attributes -> string list -> string
-  val output_markup: Markup.T -> Output.output * Output.output
-  val string_of: tree -> string
-  val pretty: int -> tree -> Pretty.T
-  val output: tree -> TextIO.outstream -> unit
-  val parse_comments: string list -> unit * string list
-  val parse_string : string -> string option
-  val parse_element: string list -> tree * string list
-  val parse_document: string list -> tree * string list
-  val parse: string -> tree
-  exception XML_ATOM of string
-  exception XML_BODY of body
-  structure Encode: XML_DATA_OPS
-  structure Decode: XML_DATA_OPS
-end;
-
-structure XML: XML =
-struct
-
-(** XML trees **)
-
-type attributes = Properties.T;
-
-datatype tree =
-    Elem of Markup.T * tree list
-  | Text of string;
-
-type body = tree list;
-
-fun add_content (Elem (_, ts)) = fold add_content ts
-  | add_content (Text s) = Buffer.add s;
-
-fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;
-
-
-
-(** string representation **)
-
-val header = "<?xml version=\"1.0\"?>\n";
-
-
-(* escaped text *)
-
-fun decode "&lt;" = "<"
-  | decode "&gt;" = ">"
-  | decode "&amp;" = "&"
-  | decode "&apos;" = "'"
-  | decode "&quot;" = "\""
-  | decode c = c;
-
-fun encode "<" = "&lt;"
-  | encode ">" = "&gt;"
-  | encode "&" = "&amp;"
-  | encode "'" = "&apos;"
-  | encode "\"" = "&quot;"
-  | encode c = c;
-
-val text = translate_string encode;
-
-
-(* elements *)
-
-fun elem name atts =
-  space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
-
-fun element name atts body =
-  let val b = implode body in
-    if b = "" then enclose "<" "/>" (elem name atts)
-    else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
-  end;
-
-fun output_markup (markup as (name, atts)) =
-  if Markup.is_empty markup then Markup.no_output
-  else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
-
-
-(* output *)
-
-fun buffer_of depth tree =
-  let
-    fun traverse _ (Elem ((name, atts), [])) =
-          Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
-      | traverse d (Elem ((name, atts), ts)) =
-          Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
-          traverse_body d ts #>
-          Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
-      | traverse _ (Text s) = Buffer.add (text s)
-    and traverse_body 0 _ = Buffer.add "..."
-      | traverse_body d ts = fold (traverse (d - 1)) ts;
-  in Buffer.empty |> traverse depth tree end;
-
-val string_of = Buffer.content o buffer_of ~1;
-val output = Buffer.output o buffer_of ~1;
-
-fun pretty depth tree =
-  Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));
-
-
-
-(** XML parsing (slow) **)
-
-local
-
-fun err msg (xs, _) =
-  fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
-
-fun ignored _ = [];
-
-val blanks = Scan.many Symbol.is_blank;
-val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
-val regular = Scan.one Symbol.is_regular;
-fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
-
-val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
-
-val parse_cdata =
-  Scan.this_string "<![CDATA[" |--
-  (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
-  Scan.this_string "]]>";
-
-val parse_att =
-  (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
-  (($$ "\"" || $$ "'") :|-- (fn s =>
-    (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
-
-val parse_comment =
-  Scan.this_string "<!--" --
-  Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
-  Scan.this_string "-->" >> ignored;
-
-val parse_processing_instruction =
-  Scan.this_string "<?" --
-  Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
-  Scan.this_string "?>" >> ignored;
-
-val parse_doctype =
-  Scan.this_string "<!DOCTYPE" --
-  Scan.repeat (Scan.unless ($$ ">") regular) --
-  $$ ">" >> ignored;
-
-val parse_misc =
-  Scan.one Symbol.is_blank >> ignored ||
-  parse_processing_instruction ||
-  parse_comment;
-
-val parse_optional_text =
-  Scan.optional (parse_chars >> (single o Text)) [];
-
-fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";
-fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";
-val parse_name = Scan.one name_start_char ::: Scan.many name_char;
-
-in
-
-val parse_comments =
-  blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
-
-val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;
-
-fun parse_content xs =
-  (parse_optional_text @@@
-    (Scan.repeat
-      ((parse_element >> single ||
-        parse_cdata >> (single o Text) ||
-        parse_processing_instruction ||
-        parse_comment)
-      @@@ parse_optional_text) >> flat)) xs
-
-and parse_element xs =
-  ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--
-    (fn (name, _) =>
-      !! (err (fn () => "Expected > or />"))
-       ($$ "/" -- $$ ">" >> ignored ||
-        $$ ">" |-- parse_content --|
-          !! (err (fn () => "Expected </" ^ implode name ^ ">"))
-              ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))
-    >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;
-
-val parse_document =
-  (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
-  |-- parse_element;
-
-fun parse s =
-  (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))
-      (blanks |-- parse_document --| blanks))) (raw_explode s) of
-    (x, []) => x
-  | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
-
-end;
-
-
-
-(** XML as data representation language **)
-
-exception XML_ATOM of string;
-exception XML_BODY of tree list;
-
-
-structure Encode =
-struct
-
-type 'a A = 'a -> string;
-type 'a T = 'a -> body;
-type 'a V = 'a -> string list * body;
-
-
-(* atomic values *)
-
-fun int_atom i = signed_string_of_int i;
-
-fun bool_atom false = "0"
-  | bool_atom true = "1";
-
-fun unit_atom () = "";
-
-
-(* structural nodes *)
-
-fun node ts = Elem ((":", []), ts);
-
-fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;
-
-fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);
-
-
-(* representation of standard types *)
-
-fun properties props = [Elem ((":", props), [])];
-
-fun string "" = []
-  | string s = [Text s];
-
-val int = string o int_atom;
-
-val bool = string o bool_atom;
-
-val unit = string o unit_atom;
-
-fun pair f g (x, y) = [node (f x), node (g y)];
-
-fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];
-
-fun list f xs = map (node o f) xs;
-
-fun option _ NONE = []
-  | option f (SOME x) = [node (f x)];
-
-fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];
-
-end;
-
-
-structure Decode =
-struct
-
-type 'a A = string -> 'a;
-type 'a T = body -> 'a;
-type 'a V = string list * body -> 'a;
-
-
-(* atomic values *)
-
-fun int_atom s =
-  Markup.parse_int s
-    handle Fail _ => raise XML_ATOM s;
-
-fun bool_atom "0" = false
-  | bool_atom "1" = true
-  | bool_atom s = raise XML_ATOM s;
-
-fun unit_atom "" = ()
-  | unit_atom s = raise XML_ATOM s;
-
-
-(* structural nodes *)
-
-fun node (Elem ((":", []), ts)) = ts
-  | node t = raise XML_BODY [t];
-
-fun vector atts =
-  #1 (fold_map (fn (a, x) =>
-        fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0);
-
-fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))
-  | tagged t = raise XML_BODY [t];
-
-
-(* representation of standard types *)
-
-fun properties [Elem ((":", props), [])] = props
-  | properties ts = raise XML_BODY ts;
-
-fun string [] = ""
-  | string [Text s] = s
-  | string ts = raise XML_BODY ts;
-
-val int = int_atom o string;
-
-val bool = bool_atom o string;
-
-val unit = unit_atom o string;
-
-fun pair f g [t1, t2] = (f (node t1), g (node t2))
-  | pair _ _ ts = raise XML_BODY ts;
-
-fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))
-  | triple _ _ _ ts = raise XML_BODY ts;
-
-fun list f ts = map (f o node) ts;
-
-fun option _ [] = NONE
-  | option f [t] = SOME (f (node t))
-  | option _ ts = raise XML_BODY ts;
-
-fun variant fs [t] =
-      let
-        val (tag, (xs, ts)) = tagged t;
-        val f = nth fs tag handle General.Subscript => raise XML_BODY [t];
-      in f (xs, ts) end
-  | variant _ ts = raise XML_BODY ts;
-
-end;
-
-end;
--- a/src/Pure/General/xml.scala	Sun Sep 04 14:29:15 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,390 +0,0 @@
-/*  Title:      Pure/General/xml.scala
-    Author:     Makarius
-
-Untyped XML trees.
-*/
-
-package isabelle
-
-import java.lang.System
-import java.util.WeakHashMap
-import java.lang.ref.WeakReference
-import javax.xml.parsers.DocumentBuilderFactory
-
-import scala.actors.Actor._
-import scala.collection.mutable
-
-
-object XML
-{
-  /** XML trees **/
-
-  /* datatype representation */
-
-  type Attributes = Properties.T
-
-  sealed abstract class Tree { override def toString = string_of_tree(this) }
-  case class Elem(markup: Markup, body: List[Tree]) extends Tree
-  case class Text(content: String) extends Tree
-
-  def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
-  def elem(name: String) = Elem(Markup(name, Nil), Nil)
-
-  type Body = List[Tree]
-
-
-  /* string representation */
-
-  def string_of_body(body: Body): String =
-  {
-    val s = new StringBuilder
-
-    def text(txt: String) {
-      if (txt == null) s ++= txt
-      else {
-        for (c <- txt.iterator) c match {
-          case '<' => s ++= "&lt;"
-          case '>' => s ++= "&gt;"
-          case '&' => s ++= "&amp;"
-          case '"' => s ++= "&quot;"
-          case '\'' => s ++= "&apos;"
-          case _ => s += c
-        }
-      }
-    }
-    def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
-    def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
-    def tree(t: Tree): Unit =
-      t match {
-        case Elem(markup, Nil) =>
-          s ++= "<"; elem(markup); s ++= "/>"
-        case Elem(markup, ts) =>
-          s ++= "<"; elem(markup); s ++= ">"
-          ts.foreach(tree)
-          s ++= "</"; s ++= markup.name; s ++= ">"
-        case Text(txt) => text(txt)
-      }
-    body.foreach(tree)
-    s.toString
-  }
-
-  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
-
-
-  /* text content */
-
-  def content_stream(tree: Tree): Stream[String] =
-    tree match {
-      case Elem(_, body) => content_stream(body)
-      case Text(content) => Stream(content)
-    }
-  def content_stream(body: Body): Stream[String] =
-    body.toStream.flatten(content_stream(_))
-
-  def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
-  def content(body: Body): Iterator[String] = content_stream(body).iterator
-
-
-  /* pipe-lined cache for partial sharing */
-
-  class Cache(initial_size: Int = 131071, max_string: Int = 100)
-  {
-    private val cache_actor = actor
-    {
-      val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
-
-      def lookup[A](x: A): Option[A] =
-      {
-        val ref = table.get(x)
-        if (ref == null) None
-        else {
-          val y = ref.asInstanceOf[WeakReference[A]].get
-          if (y == null) None
-          else Some(y)
-        }
-      }
-      def store[A](x: A): A =
-      {
-        table.put(x, new WeakReference[Any](x))
-        x
-      }
-
-      def trim_bytes(s: String): String = new String(s.toCharArray)
-
-      def cache_string(x: String): String =
-        lookup(x) match {
-          case Some(y) => y
-          case None =>
-            val z = trim_bytes(x)
-            if (z.length > max_string) z else store(z)
-        }
-      def cache_props(x: Properties.T): Properties.T =
-        if (x.isEmpty) x
-        else
-          lookup(x) match {
-            case Some(y) => y
-            case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
-          }
-      def cache_markup(x: Markup): Markup =
-        lookup(x) match {
-          case Some(y) => y
-          case None =>
-            x match {
-              case Markup(name, props) =>
-                store(Markup(cache_string(name), cache_props(props)))
-            }
-        }
-      def cache_tree(x: XML.Tree): XML.Tree =
-        lookup(x) match {
-          case Some(y) => y
-          case None =>
-            x match {
-              case XML.Elem(markup, body) =>
-                store(XML.Elem(cache_markup(markup), cache_body(body)))
-              case XML.Text(text) => store(XML.Text(cache_string(text)))
-            }
-        }
-      def cache_body(x: XML.Body): XML.Body =
-        if (x.isEmpty) x
-        else
-          lookup(x) match {
-            case Some(y) => y
-            case None => x.map(cache_tree(_))
-          }
-
-      // main loop
-      loop {
-        react {
-          case Cache_String(x, f) => f(cache_string(x))
-          case Cache_Markup(x, f) => f(cache_markup(x))
-          case Cache_Tree(x, f) => f(cache_tree(x))
-          case Cache_Body(x, f) => f(cache_body(x))
-          case Cache_Ignore(x, f) => f(x)
-          case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
-        }
-      }
-    }
-
-    private case class Cache_String(x: String, f: String => Unit)
-    private case class Cache_Markup(x: Markup, f: Markup => Unit)
-    private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
-    private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
-    private case class Cache_Ignore[A](x: A, f: A => Unit)
-
-    // main methods
-    def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
-    def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
-    def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
-    def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
-    def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) }
-  }
-
-
-
-  /** document object model (W3C DOM) **/
-
-  def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
-    node.getUserData(Markup.Data.name) match {
-      case tree: XML.Tree => Some(tree)
-      case _ => None
-    }
-
-  def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
-  {
-    def DOM(tr: Tree): org.w3c.dom.Node = tr match {
-      case Elem(Markup.Data, List(data, t)) =>
-        val node = DOM(t)
-        node.setUserData(Markup.Data.name, data, null)
-        node
-      case Elem(Markup(name, atts), ts) =>
-        if (name == Markup.Data.name)
-          error("Malformed data element: " + tr.toString)
-        val node = doc.createElement(name)
-        for ((name, value) <- atts) node.setAttribute(name, value)
-        for (t <- ts) node.appendChild(DOM(t))
-        node
-      case Text(txt) => doc.createTextNode(txt)
-    }
-    DOM(tree)
-  }
-
-
-
-  /** XML as data representation language **/
-
-  class XML_Atom(s: String) extends Exception(s)
-  class XML_Body(body: XML.Body) extends Exception
-
-  object Encode
-  {
-    type T[A] = A => XML.Body
-
-
-    /* atomic values */
-
-    def long_atom(i: Long): String = i.toString
-
-    def int_atom(i: Int): String = i.toString
-
-    def bool_atom(b: Boolean): String = if (b) "1" else "0"
-
-    def unit_atom(u: Unit) = ""
-
-
-    /* structural nodes */
-
-    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
-
-    private def vector(xs: List[String]): XML.Attributes =
-      xs.zipWithIndex.map(p => (int_atom(p._2), p._1))
-
-    private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
-      XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
-
-
-    /* representation of standard types */
-
-    val properties: T[Properties.T] =
-      (props => List(XML.Elem(Markup(":", props), Nil)))
-
-    val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
-
-    val long: T[Long] = (x => string(long_atom(x)))
-
-    val int: T[Int] = (x => string(int_atom(x)))
-
-    val bool: T[Boolean] = (x => string(bool_atom(x)))
-
-    val unit: T[Unit] = (x => string(unit_atom(x)))
-
-    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
-      (x => List(node(f(x._1)), node(g(x._2))))
-
-    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
-      (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
-
-    def list[A](f: T[A]): T[List[A]] =
-      (xs => xs.map((x: A) => node(f(x))))
-
-    def option[A](f: T[A]): T[Option[A]] =
-    {
-      case None => Nil
-      case Some(x) => List(node(f(x)))
-    }
-
-    def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] =
-    {
-      case x =>
-        val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
-        List(tagged(tag, f(x)))
-    }
-  }
-
-  object Decode
-  {
-    type T[A] = XML.Body => A
-    type V[A] = (List[String], XML.Body) => A
-
-
-    /* atomic values */
-
-    def long_atom(s: String): Long =
-      try { java.lang.Long.parseLong(s) }
-      catch { case e: NumberFormatException => throw new XML_Atom(s) }
-
-    def int_atom(s: String): Int =
-      try { Integer.parseInt(s) }
-      catch { case e: NumberFormatException => throw new XML_Atom(s) }
-
-    def bool_atom(s: String): Boolean =
-      if (s == "1") true
-      else if (s == "0") false
-      else throw new XML_Atom(s)
-
-    def unit_atom(s: String): Unit =
-      if (s == "") () else throw new XML_Atom(s)
-
-
-    /* structural nodes */
-
-    private def node(t: XML.Tree): XML.Body =
-      t match {
-        case XML.Elem(Markup(":", Nil), ts) => ts
-        case _ => throw new XML_Body(List(t))
-      }
-
-    private def vector(atts: XML.Attributes): List[String] =
-    {
-      val xs = new mutable.ListBuffer[String]
-      var i = 0
-      for ((a, x) <- atts) {
-        if (int_atom(a) == i) { xs += x; i = i + 1 }
-        else throw new XML_Atom(a)
-      }
-      xs.toList
-    }
-
-    private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
-      t match {
-        case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
-        case _ => throw new XML_Body(List(t))
-      }
-
-
-    /* representation of standard types */
-
-    val properties: T[Properties.T] =
-    {
-      case List(XML.Elem(Markup(":", props), Nil)) => props
-      case ts => throw new XML_Body(ts)
-    }
-
-    val string: T[String] =
-    {
-      case Nil => ""
-      case List(XML.Text(s)) => s
-      case ts => throw new XML_Body(ts)
-    }
-
-    val long: T[Long] = (x => long_atom(string(x)))
-
-    val int: T[Int] = (x => int_atom(string(x)))
-
-    val bool: T[Boolean] = (x => bool_atom(string(x)))
-
-    val unit: T[Unit] = (x => unit_atom(string(x)))
-
-    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
-    {
-      case List(t1, t2) => (f(node(t1)), g(node(t2)))
-      case ts => throw new XML_Body(ts)
-    }
-
-    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
-    {
-      case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
-      case ts => throw new XML_Body(ts)
-    }
-
-    def list[A](f: T[A]): T[List[A]] =
-      (ts => ts.map(t => f(node(t))))
-
-    def option[A](f: T[A]): T[Option[A]] =
-    {
-      case Nil => None
-      case List(t) => Some(f(node(t)))
-      case ts => throw new XML_Body(ts)
-    }
-
-    def variant[A](fs: List[V[A]]): T[A] =
-    {
-      case List(t) =>
-        val (tag, (xs, ts)) = tagged(t)
-        val f =
-          try { fs(tag) }
-          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
-        f(xs, ts)
-      case ts => throw new XML_Body(ts)
-    }
-  }
-}
--- a/src/Pure/General/yxml.ML	Sun Sep 04 14:29:15 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,147 +0,0 @@
-(*  Title:      Pure/General/yxml.ML
-    Author:     Makarius
-
-Efficient text representation of XML trees using extra characters X
-and Y -- no escaping, may nest marked text verbatim.
-
-Markup <elem att="val" ...>...body...</elem> is encoded as:
-
-  X Y name Y att=val ... X
-  ...
-  body
-  ...
-  X Y X
-*)
-
-signature YXML =
-sig
-  val X: Symbol.symbol
-  val Y: Symbol.symbol
-  val embed_controls: string -> string
-  val detect: string -> bool
-  val output_markup: Markup.T -> string * string
-  val element: string -> XML.attributes -> string list -> string
-  val string_of_body: XML.body -> string
-  val string_of: XML.tree -> string
-  val parse_body: string -> XML.body
-  val parse: string -> XML.tree
-end;
-
-structure YXML: YXML =
-struct
-
-(** string representation **)
-
-(* idempotent recoding of certain low ASCII control characters *)
-
-fun pseudo_utf8 c =
-  if Symbol.is_ascii_control c
-  then chr 192 ^ chr (128 + ord c)
-  else c;
-
-fun embed_controls str =
-  if exists_string Symbol.is_ascii_control str
-  then translate_string pseudo_utf8 str
-  else str;
-
-
-(* markers *)
-
-val X = chr 5;
-val Y = chr 6;
-val XY = X ^ Y;
-val XYX = XY ^ X;
-
-val detect = exists_string (fn s => s = X orelse s = Y);
-
-
-(* output *)
-
-fun output_markup (markup as (name, atts)) =
-  if Markup.is_empty markup then Markup.no_output
-  else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
-
-fun element name atts body =
-  let val (pre, post) = output_markup (name, atts)
-  in pre ^ implode body ^ post end;
-
-fun string_of_body body =
-  let
-    fun attrib (a, x) =
-      Buffer.add Y #>
-      Buffer.add a #> Buffer.add "=" #> Buffer.add x;
-    fun tree (XML.Elem ((name, atts), ts)) =
-          Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
-          trees ts #>
-          Buffer.add XYX
-      | tree (XML.Text s) = Buffer.add s
-    and trees ts = fold tree ts;
-  in Buffer.empty |> trees body |> Buffer.content end;
-
-val string_of = string_of_body o single;
-
-
-
-(** efficient YXML parsing **)
-
-local
-
-(* splitting *)
-
-fun is_char s c = ord s = Char.ord c;
-
-val split_string =
-  Substring.full #>
-  Substring.tokens (is_char X) #>
-  map (Substring.fields (is_char Y) #> map Substring.string);
-
-
-(* structural errors *)
-
-fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
-fun err_attribute () = err "bad attribute";
-fun err_element () = err "bad element";
-fun err_unbalanced "" = err "unbalanced element"
-  | err_unbalanced name = err ("unbalanced element " ^ quote name);
-
-
-(* stack operations *)
-
-fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
-
-fun push "" _ _ = err_element ()
-  | push name atts pending = ((name, atts), []) :: pending;
-
-fun pop ((("", _), _) :: _) = err_unbalanced ""
-  | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
-
-
-(* parsing *)
-
-fun parse_attrib s =
-  (case first_field "=" s of
-    NONE => err_attribute ()
-  | SOME ("", _) => err_attribute ()
-  | SOME att => att);
-
-fun parse_chunk ["", ""] = pop
-  | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
-  | parse_chunk txts = fold (add o XML.Text) txts;
-
-in
-
-fun parse_body source =
-  (case fold parse_chunk (split_string source) [(("", []), [])] of
-    [(("", _), result)] => rev result
-  | ((name, _), _) :: _ => err_unbalanced name);
-
-fun parse source =
-  (case parse_body source of
-    [result] => result
-  | [] => XML.Text ""
-  | _ => err "multiple results");
-
-end;
-
-end;
-
--- a/src/Pure/General/yxml.scala	Sun Sep 04 14:29:15 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-/*  Title:      Pure/General/yxml.scala
-    Author:     Makarius
-
-Efficient text representation of XML trees.
-*/
-
-package isabelle
-
-
-import scala.collection.mutable
-
-
-object YXML
-{
-  /* chunk markers */
-
-  val X = '\5'
-  val Y = '\6'
-  val X_string = X.toString
-  val Y_string = Y.toString
-
-  def detect(s: String): Boolean = s.exists(c => c == X || c == Y)
-
-
-  /* string representation */  // FIXME byte array version with pseudo-utf-8 (!?)
-
-  def string_of_body(body: XML.Body): String =
-  {
-    val s = new StringBuilder
-    def attrib(p: (String, String)) { s += Y; s ++= p._1; s += '='; s ++= p._2 }
-    def tree(t: XML.Tree): Unit =
-      t match {
-        case XML.Elem(Markup(name, atts), ts) =>
-          s += X; s += Y; s++= name; atts.foreach(attrib); s += X
-          ts.foreach(tree)
-          s += X; s += Y; s += X
-        case XML.Text(text) => s ++= text
-      }
-    body.foreach(tree)
-    s.toString
-  }
-
-  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
-
-
-  /* parsing */
-
-  private def err(msg: String) = error("Malformed YXML: " + msg)
-  private def err_attribute() = err("bad attribute")
-  private def err_element() = err("bad element")
-  private def err_unbalanced(name: String) =
-    if (name == "") err("unbalanced element")
-    else err("unbalanced element " + quote(name))
-
-  private def parse_attrib(source: CharSequence) = {
-    val s = source.toString
-    val i = s.indexOf('=')
-    if (i <= 0) err_attribute()
-    (s.substring(0, i), s.substring(i + 1))
-  }
-
-
-  def parse_body(source: CharSequence): XML.Body =
-  {
-    /* stack operations */
-
-    def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree]
-    var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer()))
-
-    def add(x: XML.Tree)
-    {
-      (stack: @unchecked) match { case ((_, body) :: _) => body += x }
-    }
-
-    def push(name: String, atts: XML.Attributes)
-    {
-      if (name == "") err_element()
-      else stack = (Markup(name, atts), buffer()) :: stack
-    }
-
-    def pop()
-    {
-      (stack: @unchecked) match {
-        case ((Markup.Empty, _) :: _) => err_unbalanced("")
-        case ((markup, body) :: pending) =>
-          stack = pending
-          add(XML.Elem(markup, body.toList))
-      }
-    }
-
-
-    /* parse chunks */
-
-    for (chunk <- Library.chunks(source, X) if chunk.length != 0) {
-      if (chunk.length == 1 && chunk.charAt(0) == Y) pop()
-      else {
-        Library.chunks(chunk, Y).toList match {
-          case ch :: name :: atts if ch.length == 0 =>
-            push(name.toString, atts.map(parse_attrib))
-          case txts => for (txt <- txts) add(XML.Text(txt.toString))
-        }
-      }
-    }
-    (stack: @unchecked) match {
-      case List((Markup.Empty, body)) => body.toList
-      case (Markup(name, _), _) :: _ => err_unbalanced(name)
-    }
-  }
-
-  def parse(source: CharSequence): XML.Tree =
-    parse_body(source) match {
-      case List(result) => result
-      case Nil => XML.Text("")
-      case _ => err("multiple results")
-    }
-
-
-  /* failsafe parsing */
-
-  private def markup_malformed(source: CharSequence) =
-    XML.elem(Markup.MALFORMED, List(XML.Text(source.toString)))
-
-  def parse_body_failsafe(source: CharSequence): XML.Body =
-  {
-    try { parse_body(source) }
-    catch { case ERROR(_) => List(markup_malformed(source)) }
-  }
-
-  def parse_failsafe(source: CharSequence): XML.Tree =
-  {
-    try { parse(source) }
-    catch { case ERROR(_) => markup_malformed(source) }
-  }
-}
--- a/src/Pure/IsaMakefile	Sun Sep 04 14:29:15 2011 +0200
+++ b/src/Pure/IsaMakefile	Sun Sep 04 15:21:50 2011 +0200
@@ -105,8 +105,6 @@
   General/table.ML					\
   General/timing.ML					\
   General/url.ML					\
-  General/xml.ML					\
-  General/yxml.ML					\
   Isar/args.ML						\
   Isar/attrib.ML					\
   Isar/auto_bind.ML					\
@@ -158,6 +156,8 @@
   ML/ml_thms.ML						\
   PIDE/document.ML					\
   PIDE/isar_document.ML					\
+  PIDE/xml.ML						\
+  PIDE/yxml.ML						\
   Proof/extraction.ML					\
   Proof/proof_checker.ML				\
   Proof/proof_rewrite_rules.ML				\
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/PIDE/xml.ML	Sun Sep 04 15:21:50 2011 +0200
@@ -0,0 +1,363 @@
+(*  Title:      Pure/PIDE/xml.ML
+    Author:     David Aspinall
+    Author:     Stefan Berghofer
+    Author:     Makarius
+
+Untyped XML trees and basic data representation.
+*)
+
+signature XML_DATA_OPS =
+sig
+  type 'a A
+  type 'a T
+  type 'a V
+  val int_atom: int A
+  val bool_atom: bool A
+  val unit_atom: unit A
+  val properties: Properties.T T
+  val string: string T
+  val int: int T
+  val bool: bool T
+  val unit: unit T
+  val pair: 'a T -> 'b T -> ('a * 'b) T
+  val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
+  val list: 'a T -> 'a list T
+  val option: 'a T -> 'a option T
+  val variant: 'a V list -> 'a T
+end;
+
+signature XML =
+sig
+  type attributes = Properties.T
+  datatype tree =
+      Elem of Markup.T * tree list
+    | Text of string
+  type body = tree list
+  val add_content: tree -> Buffer.T -> Buffer.T
+  val content_of: body -> string
+  val header: string
+  val text: string -> string
+  val element: string -> attributes -> string list -> string
+  val output_markup: Markup.T -> Output.output * Output.output
+  val string_of: tree -> string
+  val pretty: int -> tree -> Pretty.T
+  val output: tree -> TextIO.outstream -> unit
+  val parse_comments: string list -> unit * string list
+  val parse_string : string -> string option
+  val parse_element: string list -> tree * string list
+  val parse_document: string list -> tree * string list
+  val parse: string -> tree
+  exception XML_ATOM of string
+  exception XML_BODY of body
+  structure Encode: XML_DATA_OPS
+  structure Decode: XML_DATA_OPS
+end;
+
+structure XML: XML =
+struct
+
+(** XML trees **)
+
+type attributes = Properties.T;
+
+datatype tree =
+    Elem of Markup.T * tree list
+  | Text of string;
+
+type body = tree list;
+
+fun add_content (Elem (_, ts)) = fold add_content ts
+  | add_content (Text s) = Buffer.add s;
+
+fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;
+
+
+
+(** string representation **)
+
+val header = "<?xml version=\"1.0\"?>\n";
+
+
+(* escaped text *)
+
+fun decode "&lt;" = "<"
+  | decode "&gt;" = ">"
+  | decode "&amp;" = "&"
+  | decode "&apos;" = "'"
+  | decode "&quot;" = "\""
+  | decode c = c;
+
+fun encode "<" = "&lt;"
+  | encode ">" = "&gt;"
+  | encode "&" = "&amp;"
+  | encode "'" = "&apos;"
+  | encode "\"" = "&quot;"
+  | encode c = c;
+
+val text = translate_string encode;
+
+
+(* elements *)
+
+fun elem name atts =
+  space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
+
+fun element name atts body =
+  let val b = implode body in
+    if b = "" then enclose "<" "/>" (elem name atts)
+    else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
+  end;
+
+fun output_markup (markup as (name, atts)) =
+  if Markup.is_empty markup then Markup.no_output
+  else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
+
+
+(* output *)
+
+fun buffer_of depth tree =
+  let
+    fun traverse _ (Elem ((name, atts), [])) =
+          Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
+      | traverse d (Elem ((name, atts), ts)) =
+          Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
+          traverse_body d ts #>
+          Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
+      | traverse _ (Text s) = Buffer.add (text s)
+    and traverse_body 0 _ = Buffer.add "..."
+      | traverse_body d ts = fold (traverse (d - 1)) ts;
+  in Buffer.empty |> traverse depth tree end;
+
+val string_of = Buffer.content o buffer_of ~1;
+val output = Buffer.output o buffer_of ~1;
+
+fun pretty depth tree =
+  Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));
+
+
+
+(** XML parsing **)
+
+local
+
+fun err msg (xs, _) =
+  fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
+
+fun ignored _ = [];
+
+val blanks = Scan.many Symbol.is_blank;
+val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
+val regular = Scan.one Symbol.is_regular;
+fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
+
+val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
+
+val parse_cdata =
+  Scan.this_string "<![CDATA[" |--
+  (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
+  Scan.this_string "]]>";
+
+val parse_att =
+  (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
+  (($$ "\"" || $$ "'") :|-- (fn s =>
+    (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
+
+val parse_comment =
+  Scan.this_string "<!--" --
+  Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
+  Scan.this_string "-->" >> ignored;
+
+val parse_processing_instruction =
+  Scan.this_string "<?" --
+  Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
+  Scan.this_string "?>" >> ignored;
+
+val parse_doctype =
+  Scan.this_string "<!DOCTYPE" --
+  Scan.repeat (Scan.unless ($$ ">") regular) --
+  $$ ">" >> ignored;
+
+val parse_misc =
+  Scan.one Symbol.is_blank >> ignored ||
+  parse_processing_instruction ||
+  parse_comment;
+
+val parse_optional_text =
+  Scan.optional (parse_chars >> (single o Text)) [];
+
+fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";
+fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";
+val parse_name = Scan.one name_start_char ::: Scan.many name_char;
+
+in
+
+val parse_comments =
+  blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
+
+val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;
+
+fun parse_content xs =
+  (parse_optional_text @@@
+    (Scan.repeat
+      ((parse_element >> single ||
+        parse_cdata >> (single o Text) ||
+        parse_processing_instruction ||
+        parse_comment)
+      @@@ parse_optional_text) >> flat)) xs
+
+and parse_element xs =
+  ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--
+    (fn (name, _) =>
+      !! (err (fn () => "Expected > or />"))
+       ($$ "/" -- $$ ">" >> ignored ||
+        $$ ">" |-- parse_content --|
+          !! (err (fn () => "Expected </" ^ implode name ^ ">"))
+              ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))
+    >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;
+
+val parse_document =
+  (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
+  |-- parse_element;
+
+fun parse s =
+  (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))
+      (blanks |-- parse_document --| blanks))) (raw_explode s) of
+    (x, []) => x
+  | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
+
+end;
+
+
+
+(** XML as data representation language **)
+
+exception XML_ATOM of string;
+exception XML_BODY of tree list;
+
+
+structure Encode =
+struct
+
+type 'a A = 'a -> string;
+type 'a T = 'a -> body;
+type 'a V = 'a -> string list * body;
+
+
+(* atomic values *)
+
+fun int_atom i = signed_string_of_int i;
+
+fun bool_atom false = "0"
+  | bool_atom true = "1";
+
+fun unit_atom () = "";
+
+
+(* structural nodes *)
+
+fun node ts = Elem ((":", []), ts);
+
+fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;
+
+fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);
+
+
+(* representation of standard types *)
+
+fun properties props = [Elem ((":", props), [])];
+
+fun string "" = []
+  | string s = [Text s];
+
+val int = string o int_atom;
+
+val bool = string o bool_atom;
+
+val unit = string o unit_atom;
+
+fun pair f g (x, y) = [node (f x), node (g y)];
+
+fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];
+
+fun list f xs = map (node o f) xs;
+
+fun option _ NONE = []
+  | option f (SOME x) = [node (f x)];
+
+fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];
+
+end;
+
+
+structure Decode =
+struct
+
+type 'a A = string -> 'a;
+type 'a T = body -> 'a;
+type 'a V = string list * body -> 'a;
+
+
+(* atomic values *)
+
+fun int_atom s =
+  Markup.parse_int s
+    handle Fail _ => raise XML_ATOM s;
+
+fun bool_atom "0" = false
+  | bool_atom "1" = true
+  | bool_atom s = raise XML_ATOM s;
+
+fun unit_atom "" = ()
+  | unit_atom s = raise XML_ATOM s;
+
+
+(* structural nodes *)
+
+fun node (Elem ((":", []), ts)) = ts
+  | node t = raise XML_BODY [t];
+
+fun vector atts =
+  #1 (fold_map (fn (a, x) =>
+        fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0);
+
+fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))
+  | tagged t = raise XML_BODY [t];
+
+
+(* representation of standard types *)
+
+fun properties [Elem ((":", props), [])] = props
+  | properties ts = raise XML_BODY ts;
+
+fun string [] = ""
+  | string [Text s] = s
+  | string ts = raise XML_BODY ts;
+
+val int = int_atom o string;
+
+val bool = bool_atom o string;
+
+val unit = unit_atom o string;
+
+fun pair f g [t1, t2] = (f (node t1), g (node t2))
+  | pair _ _ ts = raise XML_BODY ts;
+
+fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))
+  | triple _ _ _ ts = raise XML_BODY ts;
+
+fun list f ts = map (f o node) ts;
+
+fun option _ [] = NONE
+  | option f [t] = SOME (f (node t))
+  | option _ ts = raise XML_BODY ts;
+
+fun variant fs [t] =
+      let
+        val (tag, (xs, ts)) = tagged t;
+        val f = nth fs tag handle General.Subscript => raise XML_BODY [t];
+      in f (xs, ts) end
+  | variant _ ts = raise XML_BODY ts;
+
+end;
+
+end;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/PIDE/xml.scala	Sun Sep 04 15:21:50 2011 +0200
@@ -0,0 +1,390 @@
+/*  Title:      Pure/PIDE/xml.scala
+    Author:     Makarius
+
+Untyped XML trees and basic data representation.
+*/
+
+package isabelle
+
+import java.lang.System
+import java.util.WeakHashMap
+import java.lang.ref.WeakReference
+import javax.xml.parsers.DocumentBuilderFactory
+
+import scala.actors.Actor._
+import scala.collection.mutable
+
+
+object XML
+{
+  /** XML trees **/
+
+  /* datatype representation */
+
+  type Attributes = Properties.T
+
+  sealed abstract class Tree { override def toString = string_of_tree(this) }
+  case class Elem(markup: Markup, body: List[Tree]) extends Tree
+  case class Text(content: String) extends Tree
+
+  def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
+  def elem(name: String) = Elem(Markup(name, Nil), Nil)
+
+  type Body = List[Tree]
+
+
+  /* string representation */
+
+  def string_of_body(body: Body): String =
+  {
+    val s = new StringBuilder
+
+    def text(txt: String) {
+      if (txt == null) s ++= txt
+      else {
+        for (c <- txt.iterator) c match {
+          case '<' => s ++= "&lt;"
+          case '>' => s ++= "&gt;"
+          case '&' => s ++= "&amp;"
+          case '"' => s ++= "&quot;"
+          case '\'' => s ++= "&apos;"
+          case _ => s += c
+        }
+      }
+    }
+    def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
+    def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
+    def tree(t: Tree): Unit =
+      t match {
+        case Elem(markup, Nil) =>
+          s ++= "<"; elem(markup); s ++= "/>"
+        case Elem(markup, ts) =>
+          s ++= "<"; elem(markup); s ++= ">"
+          ts.foreach(tree)
+          s ++= "</"; s ++= markup.name; s ++= ">"
+        case Text(txt) => text(txt)
+      }
+    body.foreach(tree)
+    s.toString
+  }
+
+  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
+
+
+  /* text content */
+
+  def content_stream(tree: Tree): Stream[String] =
+    tree match {
+      case Elem(_, body) => content_stream(body)
+      case Text(content) => Stream(content)
+    }
+  def content_stream(body: Body): Stream[String] =
+    body.toStream.flatten(content_stream(_))
+
+  def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
+  def content(body: Body): Iterator[String] = content_stream(body).iterator
+
+
+  /* pipe-lined cache for partial sharing */
+
+  class Cache(initial_size: Int = 131071, max_string: Int = 100)
+  {
+    private val cache_actor = actor
+    {
+      val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
+
+      def lookup[A](x: A): Option[A] =
+      {
+        val ref = table.get(x)
+        if (ref == null) None
+        else {
+          val y = ref.asInstanceOf[WeakReference[A]].get
+          if (y == null) None
+          else Some(y)
+        }
+      }
+      def store[A](x: A): A =
+      {
+        table.put(x, new WeakReference[Any](x))
+        x
+      }
+
+      def trim_bytes(s: String): String = new String(s.toCharArray)
+
+      def cache_string(x: String): String =
+        lookup(x) match {
+          case Some(y) => y
+          case None =>
+            val z = trim_bytes(x)
+            if (z.length > max_string) z else store(z)
+        }
+      def cache_props(x: Properties.T): Properties.T =
+        if (x.isEmpty) x
+        else
+          lookup(x) match {
+            case Some(y) => y
+            case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
+          }
+      def cache_markup(x: Markup): Markup =
+        lookup(x) match {
+          case Some(y) => y
+          case None =>
+            x match {
+              case Markup(name, props) =>
+                store(Markup(cache_string(name), cache_props(props)))
+            }
+        }
+      def cache_tree(x: XML.Tree): XML.Tree =
+        lookup(x) match {
+          case Some(y) => y
+          case None =>
+            x match {
+              case XML.Elem(markup, body) =>
+                store(XML.Elem(cache_markup(markup), cache_body(body)))
+              case XML.Text(text) => store(XML.Text(cache_string(text)))
+            }
+        }
+      def cache_body(x: XML.Body): XML.Body =
+        if (x.isEmpty) x
+        else
+          lookup(x) match {
+            case Some(y) => y
+            case None => x.map(cache_tree(_))
+          }
+
+      // main loop
+      loop {
+        react {
+          case Cache_String(x, f) => f(cache_string(x))
+          case Cache_Markup(x, f) => f(cache_markup(x))
+          case Cache_Tree(x, f) => f(cache_tree(x))
+          case Cache_Body(x, f) => f(cache_body(x))
+          case Cache_Ignore(x, f) => f(x)
+          case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
+        }
+      }
+    }
+
+    private case class Cache_String(x: String, f: String => Unit)
+    private case class Cache_Markup(x: Markup, f: Markup => Unit)
+    private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
+    private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
+    private case class Cache_Ignore[A](x: A, f: A => Unit)
+
+    // main methods
+    def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
+    def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
+    def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
+    def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
+    def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) }
+  }
+
+
+
+  /** document object model (W3C DOM) **/
+
+  def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
+    node.getUserData(Markup.Data.name) match {
+      case tree: XML.Tree => Some(tree)
+      case _ => None
+    }
+
+  def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
+  {
+    def DOM(tr: Tree): org.w3c.dom.Node = tr match {
+      case Elem(Markup.Data, List(data, t)) =>
+        val node = DOM(t)
+        node.setUserData(Markup.Data.name, data, null)
+        node
+      case Elem(Markup(name, atts), ts) =>
+        if (name == Markup.Data.name)
+          error("Malformed data element: " + tr.toString)
+        val node = doc.createElement(name)
+        for ((name, value) <- atts) node.setAttribute(name, value)
+        for (t <- ts) node.appendChild(DOM(t))
+        node
+      case Text(txt) => doc.createTextNode(txt)
+    }
+    DOM(tree)
+  }
+
+
+
+  /** XML as data representation language **/
+
+  class XML_Atom(s: String) extends Exception(s)
+  class XML_Body(body: XML.Body) extends Exception
+
+  object Encode
+  {
+    type T[A] = A => XML.Body
+
+
+    /* atomic values */
+
+    def long_atom(i: Long): String = i.toString
+
+    def int_atom(i: Int): String = i.toString
+
+    def bool_atom(b: Boolean): String = if (b) "1" else "0"
+
+    def unit_atom(u: Unit) = ""
+
+
+    /* structural nodes */
+
+    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
+
+    private def vector(xs: List[String]): XML.Attributes =
+      xs.zipWithIndex.map(p => (int_atom(p._2), p._1))
+
+    private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
+      XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
+
+
+    /* representation of standard types */
+
+    val properties: T[Properties.T] =
+      (props => List(XML.Elem(Markup(":", props), Nil)))
+
+    val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
+
+    val long: T[Long] = (x => string(long_atom(x)))
+
+    val int: T[Int] = (x => string(int_atom(x)))
+
+    val bool: T[Boolean] = (x => string(bool_atom(x)))
+
+    val unit: T[Unit] = (x => string(unit_atom(x)))
+
+    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
+      (x => List(node(f(x._1)), node(g(x._2))))
+
+    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
+      (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
+
+    def list[A](f: T[A]): T[List[A]] =
+      (xs => xs.map((x: A) => node(f(x))))
+
+    def option[A](f: T[A]): T[Option[A]] =
+    {
+      case None => Nil
+      case Some(x) => List(node(f(x)))
+    }
+
+    def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] =
+    {
+      case x =>
+        val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
+        List(tagged(tag, f(x)))
+    }
+  }
+
+  object Decode
+  {
+    type T[A] = XML.Body => A
+    type V[A] = (List[String], XML.Body) => A
+
+
+    /* atomic values */
+
+    def long_atom(s: String): Long =
+      try { java.lang.Long.parseLong(s) }
+      catch { case e: NumberFormatException => throw new XML_Atom(s) }
+
+    def int_atom(s: String): Int =
+      try { Integer.parseInt(s) }
+      catch { case e: NumberFormatException => throw new XML_Atom(s) }
+
+    def bool_atom(s: String): Boolean =
+      if (s == "1") true
+      else if (s == "0") false
+      else throw new XML_Atom(s)
+
+    def unit_atom(s: String): Unit =
+      if (s == "") () else throw new XML_Atom(s)
+
+
+    /* structural nodes */
+
+    private def node(t: XML.Tree): XML.Body =
+      t match {
+        case XML.Elem(Markup(":", Nil), ts) => ts
+        case _ => throw new XML_Body(List(t))
+      }
+
+    private def vector(atts: XML.Attributes): List[String] =
+    {
+      val xs = new mutable.ListBuffer[String]
+      var i = 0
+      for ((a, x) <- atts) {
+        if (int_atom(a) == i) { xs += x; i = i + 1 }
+        else throw new XML_Atom(a)
+      }
+      xs.toList
+    }
+
+    private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
+      t match {
+        case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
+        case _ => throw new XML_Body(List(t))
+      }
+
+
+    /* representation of standard types */
+
+    val properties: T[Properties.T] =
+    {
+      case List(XML.Elem(Markup(":", props), Nil)) => props
+      case ts => throw new XML_Body(ts)
+    }
+
+    val string: T[String] =
+    {
+      case Nil => ""
+      case List(XML.Text(s)) => s
+      case ts => throw new XML_Body(ts)
+    }
+
+    val long: T[Long] = (x => long_atom(string(x)))
+
+    val int: T[Int] = (x => int_atom(string(x)))
+
+    val bool: T[Boolean] = (x => bool_atom(string(x)))
+
+    val unit: T[Unit] = (x => unit_atom(string(x)))
+
+    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
+    {
+      case List(t1, t2) => (f(node(t1)), g(node(t2)))
+      case ts => throw new XML_Body(ts)
+    }
+
+    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
+    {
+      case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
+      case ts => throw new XML_Body(ts)
+    }
+
+    def list[A](f: T[A]): T[List[A]] =
+      (ts => ts.map(t => f(node(t))))
+
+    def option[A](f: T[A]): T[Option[A]] =
+    {
+      case Nil => None
+      case List(t) => Some(f(node(t)))
+      case ts => throw new XML_Body(ts)
+    }
+
+    def variant[A](fs: List[V[A]]): T[A] =
+    {
+      case List(t) =>
+        val (tag, (xs, ts)) = tagged(t)
+        val f =
+          try { fs(tag) }
+          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
+        f(xs, ts)
+      case ts => throw new XML_Body(ts)
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/PIDE/yxml.ML	Sun Sep 04 15:21:50 2011 +0200
@@ -0,0 +1,148 @@
+(*  Title:      Pure/PIDE/yxml.ML
+    Author:     Makarius
+
+Efficient text representation of XML trees using extra characters X
+and Y -- no escaping, may nest marked text verbatim.  Suitable for
+direct inlining into plain text.
+
+Markup <elem att="val" ...>...body...</elem> is encoded as:
+
+  X Y name Y att=val ... X
+  ...
+  body
+  ...
+  X Y X
+*)
+
+signature YXML =
+sig
+  val X: Symbol.symbol
+  val Y: Symbol.symbol
+  val embed_controls: string -> string
+  val detect: string -> bool
+  val output_markup: Markup.T -> string * string
+  val element: string -> XML.attributes -> string list -> string
+  val string_of_body: XML.body -> string
+  val string_of: XML.tree -> string
+  val parse_body: string -> XML.body
+  val parse: string -> XML.tree
+end;
+
+structure YXML: YXML =
+struct
+
+(** string representation **)
+
+(* idempotent recoding of certain low ASCII control characters *)
+
+fun pseudo_utf8 c =
+  if Symbol.is_ascii_control c
+  then chr 192 ^ chr (128 + ord c)
+  else c;
+
+fun embed_controls str =
+  if exists_string Symbol.is_ascii_control str
+  then translate_string pseudo_utf8 str
+  else str;
+
+
+(* markers *)
+
+val X = chr 5;
+val Y = chr 6;
+val XY = X ^ Y;
+val XYX = XY ^ X;
+
+val detect = exists_string (fn s => s = X orelse s = Y);
+
+
+(* output *)
+
+fun output_markup (markup as (name, atts)) =
+  if Markup.is_empty markup then Markup.no_output
+  else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
+
+fun element name atts body =
+  let val (pre, post) = output_markup (name, atts)
+  in pre ^ implode body ^ post end;
+
+fun string_of_body body =
+  let
+    fun attrib (a, x) =
+      Buffer.add Y #>
+      Buffer.add a #> Buffer.add "=" #> Buffer.add x;
+    fun tree (XML.Elem ((name, atts), ts)) =
+          Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
+          trees ts #>
+          Buffer.add XYX
+      | tree (XML.Text s) = Buffer.add s
+    and trees ts = fold tree ts;
+  in Buffer.empty |> trees body |> Buffer.content end;
+
+val string_of = string_of_body o single;
+
+
+
+(** efficient YXML parsing **)
+
+local
+
+(* splitting *)
+
+fun is_char s c = ord s = Char.ord c;
+
+val split_string =
+  Substring.full #>
+  Substring.tokens (is_char X) #>
+  map (Substring.fields (is_char Y) #> map Substring.string);
+
+
+(* structural errors *)
+
+fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
+fun err_attribute () = err "bad attribute";
+fun err_element () = err "bad element";
+fun err_unbalanced "" = err "unbalanced element"
+  | err_unbalanced name = err ("unbalanced element " ^ quote name);
+
+
+(* stack operations *)
+
+fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
+
+fun push "" _ _ = err_element ()
+  | push name atts pending = ((name, atts), []) :: pending;
+
+fun pop ((("", _), _) :: _) = err_unbalanced ""
+  | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
+
+
+(* parsing *)
+
+fun parse_attrib s =
+  (case first_field "=" s of
+    NONE => err_attribute ()
+  | SOME ("", _) => err_attribute ()
+  | SOME att => att);
+
+fun parse_chunk ["", ""] = pop
+  | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
+  | parse_chunk txts = fold (add o XML.Text) txts;
+
+in
+
+fun parse_body source =
+  (case fold parse_chunk (split_string source) [(("", []), [])] of
+    [(("", _), result)] => rev result
+  | ((name, _), _) :: _ => err_unbalanced name);
+
+fun parse source =
+  (case parse_body source of
+    [result] => result
+  | [] => XML.Text ""
+  | _ => err "multiple results");
+
+end;
+
+end;
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/PIDE/yxml.scala	Sun Sep 04 15:21:50 2011 +0200
@@ -0,0 +1,135 @@
+/*  Title:      Pure/PIDE/yxml.scala
+    Author:     Makarius
+
+Efficient text representation of XML trees.  Suitable for direct
+inlining into plain text.
+*/
+
+package isabelle
+
+
+import scala.collection.mutable
+
+
+object YXML
+{
+  /* chunk markers */
+
+  val X = '\5'
+  val Y = '\6'
+  val X_string = X.toString
+  val Y_string = Y.toString
+
+  def detect(s: String): Boolean = s.exists(c => c == X || c == Y)
+
+
+  /* string representation */  // FIXME byte array version with pseudo-utf-8 (!?)
+
+  def string_of_body(body: XML.Body): String =
+  {
+    val s = new StringBuilder
+    def attrib(p: (String, String)) { s += Y; s ++= p._1; s += '='; s ++= p._2 }
+    def tree(t: XML.Tree): Unit =
+      t match {
+        case XML.Elem(Markup(name, atts), ts) =>
+          s += X; s += Y; s++= name; atts.foreach(attrib); s += X
+          ts.foreach(tree)
+          s += X; s += Y; s += X
+        case XML.Text(text) => s ++= text
+      }
+    body.foreach(tree)
+    s.toString
+  }
+
+  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
+
+
+  /* parsing */
+
+  private def err(msg: String) = error("Malformed YXML: " + msg)
+  private def err_attribute() = err("bad attribute")
+  private def err_element() = err("bad element")
+  private def err_unbalanced(name: String) =
+    if (name == "") err("unbalanced element")
+    else err("unbalanced element " + quote(name))
+
+  private def parse_attrib(source: CharSequence) = {
+    val s = source.toString
+    val i = s.indexOf('=')
+    if (i <= 0) err_attribute()
+    (s.substring(0, i), s.substring(i + 1))
+  }
+
+
+  def parse_body(source: CharSequence): XML.Body =
+  {
+    /* stack operations */
+
+    def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree]
+    var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer()))
+
+    def add(x: XML.Tree)
+    {
+      (stack: @unchecked) match { case ((_, body) :: _) => body += x }
+    }
+
+    def push(name: String, atts: XML.Attributes)
+    {
+      if (name == "") err_element()
+      else stack = (Markup(name, atts), buffer()) :: stack
+    }
+
+    def pop()
+    {
+      (stack: @unchecked) match {
+        case ((Markup.Empty, _) :: _) => err_unbalanced("")
+        case ((markup, body) :: pending) =>
+          stack = pending
+          add(XML.Elem(markup, body.toList))
+      }
+    }
+
+
+    /* parse chunks */
+
+    for (chunk <- Library.chunks(source, X) if chunk.length != 0) {
+      if (chunk.length == 1 && chunk.charAt(0) == Y) pop()
+      else {
+        Library.chunks(chunk, Y).toList match {
+          case ch :: name :: atts if ch.length == 0 =>
+            push(name.toString, atts.map(parse_attrib))
+          case txts => for (txt <- txts) add(XML.Text(txt.toString))
+        }
+      }
+    }
+    (stack: @unchecked) match {
+      case List((Markup.Empty, body)) => body.toList
+      case (Markup(name, _), _) :: _ => err_unbalanced(name)
+    }
+  }
+
+  def parse(source: CharSequence): XML.Tree =
+    parse_body(source) match {
+      case List(result) => result
+      case Nil => XML.Text("")
+      case _ => err("multiple results")
+    }
+
+
+  /* failsafe parsing */
+
+  private def markup_malformed(source: CharSequence) =
+    XML.elem(Markup.MALFORMED, List(XML.Text(source.toString)))
+
+  def parse_body_failsafe(source: CharSequence): XML.Body =
+  {
+    try { parse_body(source) }
+    catch { case ERROR(_) => List(markup_malformed(source)) }
+  }
+
+  def parse_failsafe(source: CharSequence): XML.Tree =
+  {
+    try { parse(source) }
+    catch { case ERROR(_) => markup_malformed(source) }
+  }
+}
--- a/src/Pure/ROOT.ML	Sun Sep 04 14:29:15 2011 +0200
+++ b/src/Pure/ROOT.ML	Sun Sep 04 15:21:50 2011 +0200
@@ -54,17 +54,18 @@
 use "General/linear_set.ML";
 use "General/buffer.ML";
 use "General/pretty.ML";
-use "General/xml.ML";
 use "General/path.ML";
 use "General/url.ML";
 use "General/file.ML";
-use "General/yxml.ML";
 use "General/long_name.ML";
 use "General/binding.ML";
 
 use "General/sha1.ML";
 if ML_System.is_polyml then use "General/sha1_polyml.ML" else ();
 
+use "PIDE/xml.ML";
+use "PIDE/yxml.ML";
+
 
 (* concurrency within the ML runtime *)
 
--- a/src/Pure/build-jars	Sun Sep 04 14:29:15 2011 +0200
+++ b/src/Pure/build-jars	Sun Sep 04 15:21:50 2011 +0200
@@ -24,8 +24,6 @@
   General/scan.scala
   General/sha1.scala
   General/symbol.scala
-  General/xml.scala
-  General/yxml.scala
   Isar/keyword.scala
   Isar/outer_syntax.scala
   Isar/parse.scala
@@ -36,6 +34,8 @@
   PIDE/isar_document.scala
   PIDE/markup_tree.scala
   PIDE/text.scala
+  PIDE/xml.scala
+  PIDE/yxml.scala
   System/cygwin.scala
   System/download.scala
   System/event_bus.scala