moved XML/YXML to src/Pure/PIDE;
authorwenzelm
Sun Sep 04 15:21:50 2011 +0200 (2011-09-04)
changeset 446980385292321a0
parent 44697 b99dfee76538
child 44699 5199ee17c7d7
moved XML/YXML to src/Pure/PIDE;
tuned comments;
src/Pure/General/xml.ML
src/Pure/General/xml.scala
src/Pure/General/yxml.ML
src/Pure/General/yxml.scala
src/Pure/IsaMakefile
src/Pure/PIDE/xml.ML
src/Pure/PIDE/xml.scala
src/Pure/PIDE/yxml.ML
src/Pure/PIDE/yxml.scala
src/Pure/ROOT.ML
src/Pure/build-jars
     1.1 --- a/src/Pure/General/xml.ML	Sun Sep 04 14:29:15 2011 +0200
     1.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.3 @@ -1,361 +0,0 @@
     1.4 -(*  Title:      Pure/General/xml.ML
     1.5 -    Author:     David Aspinall, Stefan Berghofer and Markus Wenzel
     1.6 -
     1.7 -Untyped XML trees.
     1.8 -*)
     1.9 -
    1.10 -signature XML_DATA_OPS =
    1.11 -sig
    1.12 -  type 'a A
    1.13 -  type 'a T
    1.14 -  type 'a V
    1.15 -  val int_atom: int A
    1.16 -  val bool_atom: bool A
    1.17 -  val unit_atom: unit A
    1.18 -  val properties: Properties.T T
    1.19 -  val string: string T
    1.20 -  val int: int T
    1.21 -  val bool: bool T
    1.22 -  val unit: unit T
    1.23 -  val pair: 'a T -> 'b T -> ('a * 'b) T
    1.24 -  val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
    1.25 -  val list: 'a T -> 'a list T
    1.26 -  val option: 'a T -> 'a option T
    1.27 -  val variant: 'a V list -> 'a T
    1.28 -end;
    1.29 -
    1.30 -signature XML =
    1.31 -sig
    1.32 -  type attributes = Properties.T
    1.33 -  datatype tree =
    1.34 -      Elem of Markup.T * tree list
    1.35 -    | Text of string
    1.36 -  type body = tree list
    1.37 -  val add_content: tree -> Buffer.T -> Buffer.T
    1.38 -  val content_of: body -> string
    1.39 -  val header: string
    1.40 -  val text: string -> string
    1.41 -  val element: string -> attributes -> string list -> string
    1.42 -  val output_markup: Markup.T -> Output.output * Output.output
    1.43 -  val string_of: tree -> string
    1.44 -  val pretty: int -> tree -> Pretty.T
    1.45 -  val output: tree -> TextIO.outstream -> unit
    1.46 -  val parse_comments: string list -> unit * string list
    1.47 -  val parse_string : string -> string option
    1.48 -  val parse_element: string list -> tree * string list
    1.49 -  val parse_document: string list -> tree * string list
    1.50 -  val parse: string -> tree
    1.51 -  exception XML_ATOM of string
    1.52 -  exception XML_BODY of body
    1.53 -  structure Encode: XML_DATA_OPS
    1.54 -  structure Decode: XML_DATA_OPS
    1.55 -end;
    1.56 -
    1.57 -structure XML: XML =
    1.58 -struct
    1.59 -
    1.60 -(** XML trees **)
    1.61 -
    1.62 -type attributes = Properties.T;
    1.63 -
    1.64 -datatype tree =
    1.65 -    Elem of Markup.T * tree list
    1.66 -  | Text of string;
    1.67 -
    1.68 -type body = tree list;
    1.69 -
    1.70 -fun add_content (Elem (_, ts)) = fold add_content ts
    1.71 -  | add_content (Text s) = Buffer.add s;
    1.72 -
    1.73 -fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;
    1.74 -
    1.75 -
    1.76 -
    1.77 -(** string representation **)
    1.78 -
    1.79 -val header = "<?xml version=\"1.0\"?>\n";
    1.80 -
    1.81 -
    1.82 -(* escaped text *)
    1.83 -
    1.84 -fun decode "&lt;" = "<"
    1.85 -  | decode "&gt;" = ">"
    1.86 -  | decode "&amp;" = "&"
    1.87 -  | decode "&apos;" = "'"
    1.88 -  | decode "&quot;" = "\""
    1.89 -  | decode c = c;
    1.90 -
    1.91 -fun encode "<" = "&lt;"
    1.92 -  | encode ">" = "&gt;"
    1.93 -  | encode "&" = "&amp;"
    1.94 -  | encode "'" = "&apos;"
    1.95 -  | encode "\"" = "&quot;"
    1.96 -  | encode c = c;
    1.97 -
    1.98 -val text = translate_string encode;
    1.99 -
   1.100 -
   1.101 -(* elements *)
   1.102 -
   1.103 -fun elem name atts =
   1.104 -  space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
   1.105 -
   1.106 -fun element name atts body =
   1.107 -  let val b = implode body in
   1.108 -    if b = "" then enclose "<" "/>" (elem name atts)
   1.109 -    else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
   1.110 -  end;
   1.111 -
   1.112 -fun output_markup (markup as (name, atts)) =
   1.113 -  if Markup.is_empty markup then Markup.no_output
   1.114 -  else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
   1.115 -
   1.116 -
   1.117 -(* output *)
   1.118 -
   1.119 -fun buffer_of depth tree =
   1.120 -  let
   1.121 -    fun traverse _ (Elem ((name, atts), [])) =
   1.122 -          Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
   1.123 -      | traverse d (Elem ((name, atts), ts)) =
   1.124 -          Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
   1.125 -          traverse_body d ts #>
   1.126 -          Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
   1.127 -      | traverse _ (Text s) = Buffer.add (text s)
   1.128 -    and traverse_body 0 _ = Buffer.add "..."
   1.129 -      | traverse_body d ts = fold (traverse (d - 1)) ts;
   1.130 -  in Buffer.empty |> traverse depth tree end;
   1.131 -
   1.132 -val string_of = Buffer.content o buffer_of ~1;
   1.133 -val output = Buffer.output o buffer_of ~1;
   1.134 -
   1.135 -fun pretty depth tree =
   1.136 -  Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));
   1.137 -
   1.138 -
   1.139 -
   1.140 -(** XML parsing (slow) **)
   1.141 -
   1.142 -local
   1.143 -
   1.144 -fun err msg (xs, _) =
   1.145 -  fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
   1.146 -
   1.147 -fun ignored _ = [];
   1.148 -
   1.149 -val blanks = Scan.many Symbol.is_blank;
   1.150 -val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
   1.151 -val regular = Scan.one Symbol.is_regular;
   1.152 -fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
   1.153 -
   1.154 -val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
   1.155 -
   1.156 -val parse_cdata =
   1.157 -  Scan.this_string "<![CDATA[" |--
   1.158 -  (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
   1.159 -  Scan.this_string "]]>";
   1.160 -
   1.161 -val parse_att =
   1.162 -  (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
   1.163 -  (($$ "\"" || $$ "'") :|-- (fn s =>
   1.164 -    (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
   1.165 -
   1.166 -val parse_comment =
   1.167 -  Scan.this_string "<!--" --
   1.168 -  Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
   1.169 -  Scan.this_string "-->" >> ignored;
   1.170 -
   1.171 -val parse_processing_instruction =
   1.172 -  Scan.this_string "<?" --
   1.173 -  Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
   1.174 -  Scan.this_string "?>" >> ignored;
   1.175 -
   1.176 -val parse_doctype =
   1.177 -  Scan.this_string "<!DOCTYPE" --
   1.178 -  Scan.repeat (Scan.unless ($$ ">") regular) --
   1.179 -  $$ ">" >> ignored;
   1.180 -
   1.181 -val parse_misc =
   1.182 -  Scan.one Symbol.is_blank >> ignored ||
   1.183 -  parse_processing_instruction ||
   1.184 -  parse_comment;
   1.185 -
   1.186 -val parse_optional_text =
   1.187 -  Scan.optional (parse_chars >> (single o Text)) [];
   1.188 -
   1.189 -fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";
   1.190 -fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";
   1.191 -val parse_name = Scan.one name_start_char ::: Scan.many name_char;
   1.192 -
   1.193 -in
   1.194 -
   1.195 -val parse_comments =
   1.196 -  blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
   1.197 -
   1.198 -val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;
   1.199 -
   1.200 -fun parse_content xs =
   1.201 -  (parse_optional_text @@@
   1.202 -    (Scan.repeat
   1.203 -      ((parse_element >> single ||
   1.204 -        parse_cdata >> (single o Text) ||
   1.205 -        parse_processing_instruction ||
   1.206 -        parse_comment)
   1.207 -      @@@ parse_optional_text) >> flat)) xs
   1.208 -
   1.209 -and parse_element xs =
   1.210 -  ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--
   1.211 -    (fn (name, _) =>
   1.212 -      !! (err (fn () => "Expected > or />"))
   1.213 -       ($$ "/" -- $$ ">" >> ignored ||
   1.214 -        $$ ">" |-- parse_content --|
   1.215 -          !! (err (fn () => "Expected </" ^ implode name ^ ">"))
   1.216 -              ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))
   1.217 -    >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;
   1.218 -
   1.219 -val parse_document =
   1.220 -  (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
   1.221 -  |-- parse_element;
   1.222 -
   1.223 -fun parse s =
   1.224 -  (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))
   1.225 -      (blanks |-- parse_document --| blanks))) (raw_explode s) of
   1.226 -    (x, []) => x
   1.227 -  | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
   1.228 -
   1.229 -end;
   1.230 -
   1.231 -
   1.232 -
   1.233 -(** XML as data representation language **)
   1.234 -
   1.235 -exception XML_ATOM of string;
   1.236 -exception XML_BODY of tree list;
   1.237 -
   1.238 -
   1.239 -structure Encode =
   1.240 -struct
   1.241 -
   1.242 -type 'a A = 'a -> string;
   1.243 -type 'a T = 'a -> body;
   1.244 -type 'a V = 'a -> string list * body;
   1.245 -
   1.246 -
   1.247 -(* atomic values *)
   1.248 -
   1.249 -fun int_atom i = signed_string_of_int i;
   1.250 -
   1.251 -fun bool_atom false = "0"
   1.252 -  | bool_atom true = "1";
   1.253 -
   1.254 -fun unit_atom () = "";
   1.255 -
   1.256 -
   1.257 -(* structural nodes *)
   1.258 -
   1.259 -fun node ts = Elem ((":", []), ts);
   1.260 -
   1.261 -fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;
   1.262 -
   1.263 -fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);
   1.264 -
   1.265 -
   1.266 -(* representation of standard types *)
   1.267 -
   1.268 -fun properties props = [Elem ((":", props), [])];
   1.269 -
   1.270 -fun string "" = []
   1.271 -  | string s = [Text s];
   1.272 -
   1.273 -val int = string o int_atom;
   1.274 -
   1.275 -val bool = string o bool_atom;
   1.276 -
   1.277 -val unit = string o unit_atom;
   1.278 -
   1.279 -fun pair f g (x, y) = [node (f x), node (g y)];
   1.280 -
   1.281 -fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];
   1.282 -
   1.283 -fun list f xs = map (node o f) xs;
   1.284 -
   1.285 -fun option _ NONE = []
   1.286 -  | option f (SOME x) = [node (f x)];
   1.287 -
   1.288 -fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];
   1.289 -
   1.290 -end;
   1.291 -
   1.292 -
   1.293 -structure Decode =
   1.294 -struct
   1.295 -
   1.296 -type 'a A = string -> 'a;
   1.297 -type 'a T = body -> 'a;
   1.298 -type 'a V = string list * body -> 'a;
   1.299 -
   1.300 -
   1.301 -(* atomic values *)
   1.302 -
   1.303 -fun int_atom s =
   1.304 -  Markup.parse_int s
   1.305 -    handle Fail _ => raise XML_ATOM s;
   1.306 -
   1.307 -fun bool_atom "0" = false
   1.308 -  | bool_atom "1" = true
   1.309 -  | bool_atom s = raise XML_ATOM s;
   1.310 -
   1.311 -fun unit_atom "" = ()
   1.312 -  | unit_atom s = raise XML_ATOM s;
   1.313 -
   1.314 -
   1.315 -(* structural nodes *)
   1.316 -
   1.317 -fun node (Elem ((":", []), ts)) = ts
   1.318 -  | node t = raise XML_BODY [t];
   1.319 -
   1.320 -fun vector atts =
   1.321 -  #1 (fold_map (fn (a, x) =>
   1.322 -        fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0);
   1.323 -
   1.324 -fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))
   1.325 -  | tagged t = raise XML_BODY [t];
   1.326 -
   1.327 -
   1.328 -(* representation of standard types *)
   1.329 -
   1.330 -fun properties [Elem ((":", props), [])] = props
   1.331 -  | properties ts = raise XML_BODY ts;
   1.332 -
   1.333 -fun string [] = ""
   1.334 -  | string [Text s] = s
   1.335 -  | string ts = raise XML_BODY ts;
   1.336 -
   1.337 -val int = int_atom o string;
   1.338 -
   1.339 -val bool = bool_atom o string;
   1.340 -
   1.341 -val unit = unit_atom o string;
   1.342 -
   1.343 -fun pair f g [t1, t2] = (f (node t1), g (node t2))
   1.344 -  | pair _ _ ts = raise XML_BODY ts;
   1.345 -
   1.346 -fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))
   1.347 -  | triple _ _ _ ts = raise XML_BODY ts;
   1.348 -
   1.349 -fun list f ts = map (f o node) ts;
   1.350 -
   1.351 -fun option _ [] = NONE
   1.352 -  | option f [t] = SOME (f (node t))
   1.353 -  | option _ ts = raise XML_BODY ts;
   1.354 -
   1.355 -fun variant fs [t] =
   1.356 -      let
   1.357 -        val (tag, (xs, ts)) = tagged t;
   1.358 -        val f = nth fs tag handle General.Subscript => raise XML_BODY [t];
   1.359 -      in f (xs, ts) end
   1.360 -  | variant _ ts = raise XML_BODY ts;
   1.361 -
   1.362 -end;
   1.363 -
   1.364 -end;
     2.1 --- a/src/Pure/General/xml.scala	Sun Sep 04 14:29:15 2011 +0200
     2.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.3 @@ -1,390 +0,0 @@
     2.4 -/*  Title:      Pure/General/xml.scala
     2.5 -    Author:     Makarius
     2.6 -
     2.7 -Untyped XML trees.
     2.8 -*/
     2.9 -
    2.10 -package isabelle
    2.11 -
    2.12 -import java.lang.System
    2.13 -import java.util.WeakHashMap
    2.14 -import java.lang.ref.WeakReference
    2.15 -import javax.xml.parsers.DocumentBuilderFactory
    2.16 -
    2.17 -import scala.actors.Actor._
    2.18 -import scala.collection.mutable
    2.19 -
    2.20 -
    2.21 -object XML
    2.22 -{
    2.23 -  /** XML trees **/
    2.24 -
    2.25 -  /* datatype representation */
    2.26 -
    2.27 -  type Attributes = Properties.T
    2.28 -
    2.29 -  sealed abstract class Tree { override def toString = string_of_tree(this) }
    2.30 -  case class Elem(markup: Markup, body: List[Tree]) extends Tree
    2.31 -  case class Text(content: String) extends Tree
    2.32 -
    2.33 -  def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
    2.34 -  def elem(name: String) = Elem(Markup(name, Nil), Nil)
    2.35 -
    2.36 -  type Body = List[Tree]
    2.37 -
    2.38 -
    2.39 -  /* string representation */
    2.40 -
    2.41 -  def string_of_body(body: Body): String =
    2.42 -  {
    2.43 -    val s = new StringBuilder
    2.44 -
    2.45 -    def text(txt: String) {
    2.46 -      if (txt == null) s ++= txt
    2.47 -      else {
    2.48 -        for (c <- txt.iterator) c match {
    2.49 -          case '<' => s ++= "&lt;"
    2.50 -          case '>' => s ++= "&gt;"
    2.51 -          case '&' => s ++= "&amp;"
    2.52 -          case '"' => s ++= "&quot;"
    2.53 -          case '\'' => s ++= "&apos;"
    2.54 -          case _ => s += c
    2.55 -        }
    2.56 -      }
    2.57 -    }
    2.58 -    def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
    2.59 -    def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
    2.60 -    def tree(t: Tree): Unit =
    2.61 -      t match {
    2.62 -        case Elem(markup, Nil) =>
    2.63 -          s ++= "<"; elem(markup); s ++= "/>"
    2.64 -        case Elem(markup, ts) =>
    2.65 -          s ++= "<"; elem(markup); s ++= ">"
    2.66 -          ts.foreach(tree)
    2.67 -          s ++= "</"; s ++= markup.name; s ++= ">"
    2.68 -        case Text(txt) => text(txt)
    2.69 -      }
    2.70 -    body.foreach(tree)
    2.71 -    s.toString
    2.72 -  }
    2.73 -
    2.74 -  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
    2.75 -
    2.76 -
    2.77 -  /* text content */
    2.78 -
    2.79 -  def content_stream(tree: Tree): Stream[String] =
    2.80 -    tree match {
    2.81 -      case Elem(_, body) => content_stream(body)
    2.82 -      case Text(content) => Stream(content)
    2.83 -    }
    2.84 -  def content_stream(body: Body): Stream[String] =
    2.85 -    body.toStream.flatten(content_stream(_))
    2.86 -
    2.87 -  def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
    2.88 -  def content(body: Body): Iterator[String] = content_stream(body).iterator
    2.89 -
    2.90 -
    2.91 -  /* pipe-lined cache for partial sharing */
    2.92 -
    2.93 -  class Cache(initial_size: Int = 131071, max_string: Int = 100)
    2.94 -  {
    2.95 -    private val cache_actor = actor
    2.96 -    {
    2.97 -      val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
    2.98 -
    2.99 -      def lookup[A](x: A): Option[A] =
   2.100 -      {
   2.101 -        val ref = table.get(x)
   2.102 -        if (ref == null) None
   2.103 -        else {
   2.104 -          val y = ref.asInstanceOf[WeakReference[A]].get
   2.105 -          if (y == null) None
   2.106 -          else Some(y)
   2.107 -        }
   2.108 -      }
   2.109 -      def store[A](x: A): A =
   2.110 -      {
   2.111 -        table.put(x, new WeakReference[Any](x))
   2.112 -        x
   2.113 -      }
   2.114 -
   2.115 -      def trim_bytes(s: String): String = new String(s.toCharArray)
   2.116 -
   2.117 -      def cache_string(x: String): String =
   2.118 -        lookup(x) match {
   2.119 -          case Some(y) => y
   2.120 -          case None =>
   2.121 -            val z = trim_bytes(x)
   2.122 -            if (z.length > max_string) z else store(z)
   2.123 -        }
   2.124 -      def cache_props(x: Properties.T): Properties.T =
   2.125 -        if (x.isEmpty) x
   2.126 -        else
   2.127 -          lookup(x) match {
   2.128 -            case Some(y) => y
   2.129 -            case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
   2.130 -          }
   2.131 -      def cache_markup(x: Markup): Markup =
   2.132 -        lookup(x) match {
   2.133 -          case Some(y) => y
   2.134 -          case None =>
   2.135 -            x match {
   2.136 -              case Markup(name, props) =>
   2.137 -                store(Markup(cache_string(name), cache_props(props)))
   2.138 -            }
   2.139 -        }
   2.140 -      def cache_tree(x: XML.Tree): XML.Tree =
   2.141 -        lookup(x) match {
   2.142 -          case Some(y) => y
   2.143 -          case None =>
   2.144 -            x match {
   2.145 -              case XML.Elem(markup, body) =>
   2.146 -                store(XML.Elem(cache_markup(markup), cache_body(body)))
   2.147 -              case XML.Text(text) => store(XML.Text(cache_string(text)))
   2.148 -            }
   2.149 -        }
   2.150 -      def cache_body(x: XML.Body): XML.Body =
   2.151 -        if (x.isEmpty) x
   2.152 -        else
   2.153 -          lookup(x) match {
   2.154 -            case Some(y) => y
   2.155 -            case None => x.map(cache_tree(_))
   2.156 -          }
   2.157 -
   2.158 -      // main loop
   2.159 -      loop {
   2.160 -        react {
   2.161 -          case Cache_String(x, f) => f(cache_string(x))
   2.162 -          case Cache_Markup(x, f) => f(cache_markup(x))
   2.163 -          case Cache_Tree(x, f) => f(cache_tree(x))
   2.164 -          case Cache_Body(x, f) => f(cache_body(x))
   2.165 -          case Cache_Ignore(x, f) => f(x)
   2.166 -          case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
   2.167 -        }
   2.168 -      }
   2.169 -    }
   2.170 -
   2.171 -    private case class Cache_String(x: String, f: String => Unit)
   2.172 -    private case class Cache_Markup(x: Markup, f: Markup => Unit)
   2.173 -    private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
   2.174 -    private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
   2.175 -    private case class Cache_Ignore[A](x: A, f: A => Unit)
   2.176 -
   2.177 -    // main methods
   2.178 -    def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
   2.179 -    def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
   2.180 -    def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
   2.181 -    def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
   2.182 -    def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) }
   2.183 -  }
   2.184 -
   2.185 -
   2.186 -
   2.187 -  /** document object model (W3C DOM) **/
   2.188 -
   2.189 -  def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
   2.190 -    node.getUserData(Markup.Data.name) match {
   2.191 -      case tree: XML.Tree => Some(tree)
   2.192 -      case _ => None
   2.193 -    }
   2.194 -
   2.195 -  def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
   2.196 -  {
   2.197 -    def DOM(tr: Tree): org.w3c.dom.Node = tr match {
   2.198 -      case Elem(Markup.Data, List(data, t)) =>
   2.199 -        val node = DOM(t)
   2.200 -        node.setUserData(Markup.Data.name, data, null)
   2.201 -        node
   2.202 -      case Elem(Markup(name, atts), ts) =>
   2.203 -        if (name == Markup.Data.name)
   2.204 -          error("Malformed data element: " + tr.toString)
   2.205 -        val node = doc.createElement(name)
   2.206 -        for ((name, value) <- atts) node.setAttribute(name, value)
   2.207 -        for (t <- ts) node.appendChild(DOM(t))
   2.208 -        node
   2.209 -      case Text(txt) => doc.createTextNode(txt)
   2.210 -    }
   2.211 -    DOM(tree)
   2.212 -  }
   2.213 -
   2.214 -
   2.215 -
   2.216 -  /** XML as data representation language **/
   2.217 -
   2.218 -  class XML_Atom(s: String) extends Exception(s)
   2.219 -  class XML_Body(body: XML.Body) extends Exception
   2.220 -
   2.221 -  object Encode
   2.222 -  {
   2.223 -    type T[A] = A => XML.Body
   2.224 -
   2.225 -
   2.226 -    /* atomic values */
   2.227 -
   2.228 -    def long_atom(i: Long): String = i.toString
   2.229 -
   2.230 -    def int_atom(i: Int): String = i.toString
   2.231 -
   2.232 -    def bool_atom(b: Boolean): String = if (b) "1" else "0"
   2.233 -
   2.234 -    def unit_atom(u: Unit) = ""
   2.235 -
   2.236 -
   2.237 -    /* structural nodes */
   2.238 -
   2.239 -    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
   2.240 -
   2.241 -    private def vector(xs: List[String]): XML.Attributes =
   2.242 -      xs.zipWithIndex.map(p => (int_atom(p._2), p._1))
   2.243 -
   2.244 -    private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
   2.245 -      XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
   2.246 -
   2.247 -
   2.248 -    /* representation of standard types */
   2.249 -
   2.250 -    val properties: T[Properties.T] =
   2.251 -      (props => List(XML.Elem(Markup(":", props), Nil)))
   2.252 -
   2.253 -    val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
   2.254 -
   2.255 -    val long: T[Long] = (x => string(long_atom(x)))
   2.256 -
   2.257 -    val int: T[Int] = (x => string(int_atom(x)))
   2.258 -
   2.259 -    val bool: T[Boolean] = (x => string(bool_atom(x)))
   2.260 -
   2.261 -    val unit: T[Unit] = (x => string(unit_atom(x)))
   2.262 -
   2.263 -    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
   2.264 -      (x => List(node(f(x._1)), node(g(x._2))))
   2.265 -
   2.266 -    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
   2.267 -      (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
   2.268 -
   2.269 -    def list[A](f: T[A]): T[List[A]] =
   2.270 -      (xs => xs.map((x: A) => node(f(x))))
   2.271 -
   2.272 -    def option[A](f: T[A]): T[Option[A]] =
   2.273 -    {
   2.274 -      case None => Nil
   2.275 -      case Some(x) => List(node(f(x)))
   2.276 -    }
   2.277 -
   2.278 -    def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] =
   2.279 -    {
   2.280 -      case x =>
   2.281 -        val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
   2.282 -        List(tagged(tag, f(x)))
   2.283 -    }
   2.284 -  }
   2.285 -
   2.286 -  object Decode
   2.287 -  {
   2.288 -    type T[A] = XML.Body => A
   2.289 -    type V[A] = (List[String], XML.Body) => A
   2.290 -
   2.291 -
   2.292 -    /* atomic values */
   2.293 -
   2.294 -    def long_atom(s: String): Long =
   2.295 -      try { java.lang.Long.parseLong(s) }
   2.296 -      catch { case e: NumberFormatException => throw new XML_Atom(s) }
   2.297 -
   2.298 -    def int_atom(s: String): Int =
   2.299 -      try { Integer.parseInt(s) }
   2.300 -      catch { case e: NumberFormatException => throw new XML_Atom(s) }
   2.301 -
   2.302 -    def bool_atom(s: String): Boolean =
   2.303 -      if (s == "1") true
   2.304 -      else if (s == "0") false
   2.305 -      else throw new XML_Atom(s)
   2.306 -
   2.307 -    def unit_atom(s: String): Unit =
   2.308 -      if (s == "") () else throw new XML_Atom(s)
   2.309 -
   2.310 -
   2.311 -    /* structural nodes */
   2.312 -
   2.313 -    private def node(t: XML.Tree): XML.Body =
   2.314 -      t match {
   2.315 -        case XML.Elem(Markup(":", Nil), ts) => ts
   2.316 -        case _ => throw new XML_Body(List(t))
   2.317 -      }
   2.318 -
   2.319 -    private def vector(atts: XML.Attributes): List[String] =
   2.320 -    {
   2.321 -      val xs = new mutable.ListBuffer[String]
   2.322 -      var i = 0
   2.323 -      for ((a, x) <- atts) {
   2.324 -        if (int_atom(a) == i) { xs += x; i = i + 1 }
   2.325 -        else throw new XML_Atom(a)
   2.326 -      }
   2.327 -      xs.toList
   2.328 -    }
   2.329 -
   2.330 -    private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
   2.331 -      t match {
   2.332 -        case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
   2.333 -        case _ => throw new XML_Body(List(t))
   2.334 -      }
   2.335 -
   2.336 -
   2.337 -    /* representation of standard types */
   2.338 -
   2.339 -    val properties: T[Properties.T] =
   2.340 -    {
   2.341 -      case List(XML.Elem(Markup(":", props), Nil)) => props
   2.342 -      case ts => throw new XML_Body(ts)
   2.343 -    }
   2.344 -
   2.345 -    val string: T[String] =
   2.346 -    {
   2.347 -      case Nil => ""
   2.348 -      case List(XML.Text(s)) => s
   2.349 -      case ts => throw new XML_Body(ts)
   2.350 -    }
   2.351 -
   2.352 -    val long: T[Long] = (x => long_atom(string(x)))
   2.353 -
   2.354 -    val int: T[Int] = (x => int_atom(string(x)))
   2.355 -
   2.356 -    val bool: T[Boolean] = (x => bool_atom(string(x)))
   2.357 -
   2.358 -    val unit: T[Unit] = (x => unit_atom(string(x)))
   2.359 -
   2.360 -    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
   2.361 -    {
   2.362 -      case List(t1, t2) => (f(node(t1)), g(node(t2)))
   2.363 -      case ts => throw new XML_Body(ts)
   2.364 -    }
   2.365 -
   2.366 -    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
   2.367 -    {
   2.368 -      case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
   2.369 -      case ts => throw new XML_Body(ts)
   2.370 -    }
   2.371 -
   2.372 -    def list[A](f: T[A]): T[List[A]] =
   2.373 -      (ts => ts.map(t => f(node(t))))
   2.374 -
   2.375 -    def option[A](f: T[A]): T[Option[A]] =
   2.376 -    {
   2.377 -      case Nil => None
   2.378 -      case List(t) => Some(f(node(t)))
   2.379 -      case ts => throw new XML_Body(ts)
   2.380 -    }
   2.381 -
   2.382 -    def variant[A](fs: List[V[A]]): T[A] =
   2.383 -    {
   2.384 -      case List(t) =>
   2.385 -        val (tag, (xs, ts)) = tagged(t)
   2.386 -        val f =
   2.387 -          try { fs(tag) }
   2.388 -          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
   2.389 -        f(xs, ts)
   2.390 -      case ts => throw new XML_Body(ts)
   2.391 -    }
   2.392 -  }
   2.393 -}
     3.1 --- a/src/Pure/General/yxml.ML	Sun Sep 04 14:29:15 2011 +0200
     3.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.3 @@ -1,147 +0,0 @@
     3.4 -(*  Title:      Pure/General/yxml.ML
     3.5 -    Author:     Makarius
     3.6 -
     3.7 -Efficient text representation of XML trees using extra characters X
     3.8 -and Y -- no escaping, may nest marked text verbatim.
     3.9 -
    3.10 -Markup <elem att="val" ...>...body...</elem> is encoded as:
    3.11 -
    3.12 -  X Y name Y att=val ... X
    3.13 -  ...
    3.14 -  body
    3.15 -  ...
    3.16 -  X Y X
    3.17 -*)
    3.18 -
    3.19 -signature YXML =
    3.20 -sig
    3.21 -  val X: Symbol.symbol
    3.22 -  val Y: Symbol.symbol
    3.23 -  val embed_controls: string -> string
    3.24 -  val detect: string -> bool
    3.25 -  val output_markup: Markup.T -> string * string
    3.26 -  val element: string -> XML.attributes -> string list -> string
    3.27 -  val string_of_body: XML.body -> string
    3.28 -  val string_of: XML.tree -> string
    3.29 -  val parse_body: string -> XML.body
    3.30 -  val parse: string -> XML.tree
    3.31 -end;
    3.32 -
    3.33 -structure YXML: YXML =
    3.34 -struct
    3.35 -
    3.36 -(** string representation **)
    3.37 -
    3.38 -(* idempotent recoding of certain low ASCII control characters *)
    3.39 -
    3.40 -fun pseudo_utf8 c =
    3.41 -  if Symbol.is_ascii_control c
    3.42 -  then chr 192 ^ chr (128 + ord c)
    3.43 -  else c;
    3.44 -
    3.45 -fun embed_controls str =
    3.46 -  if exists_string Symbol.is_ascii_control str
    3.47 -  then translate_string pseudo_utf8 str
    3.48 -  else str;
    3.49 -
    3.50 -
    3.51 -(* markers *)
    3.52 -
    3.53 -val X = chr 5;
    3.54 -val Y = chr 6;
    3.55 -val XY = X ^ Y;
    3.56 -val XYX = XY ^ X;
    3.57 -
    3.58 -val detect = exists_string (fn s => s = X orelse s = Y);
    3.59 -
    3.60 -
    3.61 -(* output *)
    3.62 -
    3.63 -fun output_markup (markup as (name, atts)) =
    3.64 -  if Markup.is_empty markup then Markup.no_output
    3.65 -  else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
    3.66 -
    3.67 -fun element name atts body =
    3.68 -  let val (pre, post) = output_markup (name, atts)
    3.69 -  in pre ^ implode body ^ post end;
    3.70 -
    3.71 -fun string_of_body body =
    3.72 -  let
    3.73 -    fun attrib (a, x) =
    3.74 -      Buffer.add Y #>
    3.75 -      Buffer.add a #> Buffer.add "=" #> Buffer.add x;
    3.76 -    fun tree (XML.Elem ((name, atts), ts)) =
    3.77 -          Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
    3.78 -          trees ts #>
    3.79 -          Buffer.add XYX
    3.80 -      | tree (XML.Text s) = Buffer.add s
    3.81 -    and trees ts = fold tree ts;
    3.82 -  in Buffer.empty |> trees body |> Buffer.content end;
    3.83 -
    3.84 -val string_of = string_of_body o single;
    3.85 -
    3.86 -
    3.87 -
    3.88 -(** efficient YXML parsing **)
    3.89 -
    3.90 -local
    3.91 -
    3.92 -(* splitting *)
    3.93 -
    3.94 -fun is_char s c = ord s = Char.ord c;
    3.95 -
    3.96 -val split_string =
    3.97 -  Substring.full #>
    3.98 -  Substring.tokens (is_char X) #>
    3.99 -  map (Substring.fields (is_char Y) #> map Substring.string);
   3.100 -
   3.101 -
   3.102 -(* structural errors *)
   3.103 -
   3.104 -fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
   3.105 -fun err_attribute () = err "bad attribute";
   3.106 -fun err_element () = err "bad element";
   3.107 -fun err_unbalanced "" = err "unbalanced element"
   3.108 -  | err_unbalanced name = err ("unbalanced element " ^ quote name);
   3.109 -
   3.110 -
   3.111 -(* stack operations *)
   3.112 -
   3.113 -fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
   3.114 -
   3.115 -fun push "" _ _ = err_element ()
   3.116 -  | push name atts pending = ((name, atts), []) :: pending;
   3.117 -
   3.118 -fun pop ((("", _), _) :: _) = err_unbalanced ""
   3.119 -  | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
   3.120 -
   3.121 -
   3.122 -(* parsing *)
   3.123 -
   3.124 -fun parse_attrib s =
   3.125 -  (case first_field "=" s of
   3.126 -    NONE => err_attribute ()
   3.127 -  | SOME ("", _) => err_attribute ()
   3.128 -  | SOME att => att);
   3.129 -
   3.130 -fun parse_chunk ["", ""] = pop
   3.131 -  | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
   3.132 -  | parse_chunk txts = fold (add o XML.Text) txts;
   3.133 -
   3.134 -in
   3.135 -
   3.136 -fun parse_body source =
   3.137 -  (case fold parse_chunk (split_string source) [(("", []), [])] of
   3.138 -    [(("", _), result)] => rev result
   3.139 -  | ((name, _), _) :: _ => err_unbalanced name);
   3.140 -
   3.141 -fun parse source =
   3.142 -  (case parse_body source of
   3.143 -    [result] => result
   3.144 -  | [] => XML.Text ""
   3.145 -  | _ => err "multiple results");
   3.146 -
   3.147 -end;
   3.148 -
   3.149 -end;
   3.150 -
     4.1 --- a/src/Pure/General/yxml.scala	Sun Sep 04 14:29:15 2011 +0200
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,134 +0,0 @@
     4.4 -/*  Title:      Pure/General/yxml.scala
     4.5 -    Author:     Makarius
     4.6 -
     4.7 -Efficient text representation of XML trees.
     4.8 -*/
     4.9 -
    4.10 -package isabelle
    4.11 -
    4.12 -
    4.13 -import scala.collection.mutable
    4.14 -
    4.15 -
    4.16 -object YXML
    4.17 -{
    4.18 -  /* chunk markers */
    4.19 -
    4.20 -  val X = '\5'
    4.21 -  val Y = '\6'
    4.22 -  val X_string = X.toString
    4.23 -  val Y_string = Y.toString
    4.24 -
    4.25 -  def detect(s: String): Boolean = s.exists(c => c == X || c == Y)
    4.26 -
    4.27 -
    4.28 -  /* string representation */  // FIXME byte array version with pseudo-utf-8 (!?)
    4.29 -
    4.30 -  def string_of_body(body: XML.Body): String =
    4.31 -  {
    4.32 -    val s = new StringBuilder
    4.33 -    def attrib(p: (String, String)) { s += Y; s ++= p._1; s += '='; s ++= p._2 }
    4.34 -    def tree(t: XML.Tree): Unit =
    4.35 -      t match {
    4.36 -        case XML.Elem(Markup(name, atts), ts) =>
    4.37 -          s += X; s += Y; s++= name; atts.foreach(attrib); s += X
    4.38 -          ts.foreach(tree)
    4.39 -          s += X; s += Y; s += X
    4.40 -        case XML.Text(text) => s ++= text
    4.41 -      }
    4.42 -    body.foreach(tree)
    4.43 -    s.toString
    4.44 -  }
    4.45 -
    4.46 -  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
    4.47 -
    4.48 -
    4.49 -  /* parsing */
    4.50 -
    4.51 -  private def err(msg: String) = error("Malformed YXML: " + msg)
    4.52 -  private def err_attribute() = err("bad attribute")
    4.53 -  private def err_element() = err("bad element")
    4.54 -  private def err_unbalanced(name: String) =
    4.55 -    if (name == "") err("unbalanced element")
    4.56 -    else err("unbalanced element " + quote(name))
    4.57 -
    4.58 -  private def parse_attrib(source: CharSequence) = {
    4.59 -    val s = source.toString
    4.60 -    val i = s.indexOf('=')
    4.61 -    if (i <= 0) err_attribute()
    4.62 -    (s.substring(0, i), s.substring(i + 1))
    4.63 -  }
    4.64 -
    4.65 -
    4.66 -  def parse_body(source: CharSequence): XML.Body =
    4.67 -  {
    4.68 -    /* stack operations */
    4.69 -
    4.70 -    def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree]
    4.71 -    var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer()))
    4.72 -
    4.73 -    def add(x: XML.Tree)
    4.74 -    {
    4.75 -      (stack: @unchecked) match { case ((_, body) :: _) => body += x }
    4.76 -    }
    4.77 -
    4.78 -    def push(name: String, atts: XML.Attributes)
    4.79 -    {
    4.80 -      if (name == "") err_element()
    4.81 -      else stack = (Markup(name, atts), buffer()) :: stack
    4.82 -    }
    4.83 -
    4.84 -    def pop()
    4.85 -    {
    4.86 -      (stack: @unchecked) match {
    4.87 -        case ((Markup.Empty, _) :: _) => err_unbalanced("")
    4.88 -        case ((markup, body) :: pending) =>
    4.89 -          stack = pending
    4.90 -          add(XML.Elem(markup, body.toList))
    4.91 -      }
    4.92 -    }
    4.93 -
    4.94 -
    4.95 -    /* parse chunks */
    4.96 -
    4.97 -    for (chunk <- Library.chunks(source, X) if chunk.length != 0) {
    4.98 -      if (chunk.length == 1 && chunk.charAt(0) == Y) pop()
    4.99 -      else {
   4.100 -        Library.chunks(chunk, Y).toList match {
   4.101 -          case ch :: name :: atts if ch.length == 0 =>
   4.102 -            push(name.toString, atts.map(parse_attrib))
   4.103 -          case txts => for (txt <- txts) add(XML.Text(txt.toString))
   4.104 -        }
   4.105 -      }
   4.106 -    }
   4.107 -    (stack: @unchecked) match {
   4.108 -      case List((Markup.Empty, body)) => body.toList
   4.109 -      case (Markup(name, _), _) :: _ => err_unbalanced(name)
   4.110 -    }
   4.111 -  }
   4.112 -
   4.113 -  def parse(source: CharSequence): XML.Tree =
   4.114 -    parse_body(source) match {
   4.115 -      case List(result) => result
   4.116 -      case Nil => XML.Text("")
   4.117 -      case _ => err("multiple results")
   4.118 -    }
   4.119 -
   4.120 -
   4.121 -  /* failsafe parsing */
   4.122 -
   4.123 -  private def markup_malformed(source: CharSequence) =
   4.124 -    XML.elem(Markup.MALFORMED, List(XML.Text(source.toString)))
   4.125 -
   4.126 -  def parse_body_failsafe(source: CharSequence): XML.Body =
   4.127 -  {
   4.128 -    try { parse_body(source) }
   4.129 -    catch { case ERROR(_) => List(markup_malformed(source)) }
   4.130 -  }
   4.131 -
   4.132 -  def parse_failsafe(source: CharSequence): XML.Tree =
   4.133 -  {
   4.134 -    try { parse(source) }
   4.135 -    catch { case ERROR(_) => markup_malformed(source) }
   4.136 -  }
   4.137 -}
     5.1 --- a/src/Pure/IsaMakefile	Sun Sep 04 14:29:15 2011 +0200
     5.2 +++ b/src/Pure/IsaMakefile	Sun Sep 04 15:21:50 2011 +0200
     5.3 @@ -105,8 +105,6 @@
     5.4    General/table.ML					\
     5.5    General/timing.ML					\
     5.6    General/url.ML					\
     5.7 -  General/xml.ML					\
     5.8 -  General/yxml.ML					\
     5.9    Isar/args.ML						\
    5.10    Isar/attrib.ML					\
    5.11    Isar/auto_bind.ML					\
    5.12 @@ -158,6 +156,8 @@
    5.13    ML/ml_thms.ML						\
    5.14    PIDE/document.ML					\
    5.15    PIDE/isar_document.ML					\
    5.16 +  PIDE/xml.ML						\
    5.17 +  PIDE/yxml.ML						\
    5.18    Proof/extraction.ML					\
    5.19    Proof/proof_checker.ML				\
    5.20    Proof/proof_rewrite_rules.ML				\
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/src/Pure/PIDE/xml.ML	Sun Sep 04 15:21:50 2011 +0200
     6.3 @@ -0,0 +1,363 @@
     6.4 +(*  Title:      Pure/PIDE/xml.ML
     6.5 +    Author:     David Aspinall
     6.6 +    Author:     Stefan Berghofer
     6.7 +    Author:     Makarius
     6.8 +
     6.9 +Untyped XML trees and basic data representation.
    6.10 +*)
    6.11 +
    6.12 +signature XML_DATA_OPS =
    6.13 +sig
    6.14 +  type 'a A
    6.15 +  type 'a T
    6.16 +  type 'a V
    6.17 +  val int_atom: int A
    6.18 +  val bool_atom: bool A
    6.19 +  val unit_atom: unit A
    6.20 +  val properties: Properties.T T
    6.21 +  val string: string T
    6.22 +  val int: int T
    6.23 +  val bool: bool T
    6.24 +  val unit: unit T
    6.25 +  val pair: 'a T -> 'b T -> ('a * 'b) T
    6.26 +  val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
    6.27 +  val list: 'a T -> 'a list T
    6.28 +  val option: 'a T -> 'a option T
    6.29 +  val variant: 'a V list -> 'a T
    6.30 +end;
    6.31 +
    6.32 +signature XML =
    6.33 +sig
    6.34 +  type attributes = Properties.T
    6.35 +  datatype tree =
    6.36 +      Elem of Markup.T * tree list
    6.37 +    | Text of string
    6.38 +  type body = tree list
    6.39 +  val add_content: tree -> Buffer.T -> Buffer.T
    6.40 +  val content_of: body -> string
    6.41 +  val header: string
    6.42 +  val text: string -> string
    6.43 +  val element: string -> attributes -> string list -> string
    6.44 +  val output_markup: Markup.T -> Output.output * Output.output
    6.45 +  val string_of: tree -> string
    6.46 +  val pretty: int -> tree -> Pretty.T
    6.47 +  val output: tree -> TextIO.outstream -> unit
    6.48 +  val parse_comments: string list -> unit * string list
    6.49 +  val parse_string : string -> string option
    6.50 +  val parse_element: string list -> tree * string list
    6.51 +  val parse_document: string list -> tree * string list
    6.52 +  val parse: string -> tree
    6.53 +  exception XML_ATOM of string
    6.54 +  exception XML_BODY of body
    6.55 +  structure Encode: XML_DATA_OPS
    6.56 +  structure Decode: XML_DATA_OPS
    6.57 +end;
    6.58 +
    6.59 +structure XML: XML =
    6.60 +struct
    6.61 +
    6.62 +(** XML trees **)
    6.63 +
    6.64 +type attributes = Properties.T;
    6.65 +
    6.66 +datatype tree =
    6.67 +    Elem of Markup.T * tree list
    6.68 +  | Text of string;
    6.69 +
    6.70 +type body = tree list;
    6.71 +
    6.72 +fun add_content (Elem (_, ts)) = fold add_content ts
    6.73 +  | add_content (Text s) = Buffer.add s;
    6.74 +
    6.75 +fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;
    6.76 +
    6.77 +
    6.78 +
    6.79 +(** string representation **)
    6.80 +
    6.81 +val header = "<?xml version=\"1.0\"?>\n";
    6.82 +
    6.83 +
    6.84 +(* escaped text *)
    6.85 +
    6.86 +fun decode "&lt;" = "<"
    6.87 +  | decode "&gt;" = ">"
    6.88 +  | decode "&amp;" = "&"
    6.89 +  | decode "&apos;" = "'"
    6.90 +  | decode "&quot;" = "\""
    6.91 +  | decode c = c;
    6.92 +
    6.93 +fun encode "<" = "&lt;"
    6.94 +  | encode ">" = "&gt;"
    6.95 +  | encode "&" = "&amp;"
    6.96 +  | encode "'" = "&apos;"
    6.97 +  | encode "\"" = "&quot;"
    6.98 +  | encode c = c;
    6.99 +
   6.100 +val text = translate_string encode;
   6.101 +
   6.102 +
   6.103 +(* elements *)
   6.104 +
   6.105 +fun elem name atts =
   6.106 +  space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
   6.107 +
   6.108 +fun element name atts body =
   6.109 +  let val b = implode body in
   6.110 +    if b = "" then enclose "<" "/>" (elem name atts)
   6.111 +    else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
   6.112 +  end;
   6.113 +
   6.114 +fun output_markup (markup as (name, atts)) =
   6.115 +  if Markup.is_empty markup then Markup.no_output
   6.116 +  else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
   6.117 +
   6.118 +
   6.119 +(* output *)
   6.120 +
   6.121 +fun buffer_of depth tree =
   6.122 +  let
   6.123 +    fun traverse _ (Elem ((name, atts), [])) =
   6.124 +          Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
   6.125 +      | traverse d (Elem ((name, atts), ts)) =
   6.126 +          Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
   6.127 +          traverse_body d ts #>
   6.128 +          Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
   6.129 +      | traverse _ (Text s) = Buffer.add (text s)
   6.130 +    and traverse_body 0 _ = Buffer.add "..."
   6.131 +      | traverse_body d ts = fold (traverse (d - 1)) ts;
   6.132 +  in Buffer.empty |> traverse depth tree end;
   6.133 +
   6.134 +val string_of = Buffer.content o buffer_of ~1;
   6.135 +val output = Buffer.output o buffer_of ~1;
   6.136 +
   6.137 +fun pretty depth tree =
   6.138 +  Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));
   6.139 +
   6.140 +
   6.141 +
   6.142 +(** XML parsing **)
   6.143 +
   6.144 +local
   6.145 +
   6.146 +fun err msg (xs, _) =
   6.147 +  fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
   6.148 +
   6.149 +fun ignored _ = [];
   6.150 +
   6.151 +val blanks = Scan.many Symbol.is_blank;
   6.152 +val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
   6.153 +val regular = Scan.one Symbol.is_regular;
   6.154 +fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
   6.155 +
   6.156 +val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
   6.157 +
   6.158 +val parse_cdata =
   6.159 +  Scan.this_string "<![CDATA[" |--
   6.160 +  (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
   6.161 +  Scan.this_string "]]>";
   6.162 +
   6.163 +val parse_att =
   6.164 +  (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
   6.165 +  (($$ "\"" || $$ "'") :|-- (fn s =>
   6.166 +    (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
   6.167 +
   6.168 +val parse_comment =
   6.169 +  Scan.this_string "<!--" --
   6.170 +  Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
   6.171 +  Scan.this_string "-->" >> ignored;
   6.172 +
   6.173 +val parse_processing_instruction =
   6.174 +  Scan.this_string "<?" --
   6.175 +  Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
   6.176 +  Scan.this_string "?>" >> ignored;
   6.177 +
   6.178 +val parse_doctype =
   6.179 +  Scan.this_string "<!DOCTYPE" --
   6.180 +  Scan.repeat (Scan.unless ($$ ">") regular) --
   6.181 +  $$ ">" >> ignored;
   6.182 +
   6.183 +val parse_misc =
   6.184 +  Scan.one Symbol.is_blank >> ignored ||
   6.185 +  parse_processing_instruction ||
   6.186 +  parse_comment;
   6.187 +
   6.188 +val parse_optional_text =
   6.189 +  Scan.optional (parse_chars >> (single o Text)) [];
   6.190 +
   6.191 +fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";
   6.192 +fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";
   6.193 +val parse_name = Scan.one name_start_char ::: Scan.many name_char;
   6.194 +
   6.195 +in
   6.196 +
   6.197 +val parse_comments =
   6.198 +  blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
   6.199 +
   6.200 +val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;
   6.201 +
   6.202 +fun parse_content xs =
   6.203 +  (parse_optional_text @@@
   6.204 +    (Scan.repeat
   6.205 +      ((parse_element >> single ||
   6.206 +        parse_cdata >> (single o Text) ||
   6.207 +        parse_processing_instruction ||
   6.208 +        parse_comment)
   6.209 +      @@@ parse_optional_text) >> flat)) xs
   6.210 +
   6.211 +and parse_element xs =
   6.212 +  ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--
   6.213 +    (fn (name, _) =>
   6.214 +      !! (err (fn () => "Expected > or />"))
   6.215 +       ($$ "/" -- $$ ">" >> ignored ||
   6.216 +        $$ ">" |-- parse_content --|
   6.217 +          !! (err (fn () => "Expected </" ^ implode name ^ ">"))
   6.218 +              ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))
   6.219 +    >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;
   6.220 +
   6.221 +val parse_document =
   6.222 +  (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
   6.223 +  |-- parse_element;
   6.224 +
   6.225 +fun parse s =
   6.226 +  (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))
   6.227 +      (blanks |-- parse_document --| blanks))) (raw_explode s) of
   6.228 +    (x, []) => x
   6.229 +  | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
   6.230 +
   6.231 +end;
   6.232 +
   6.233 +
   6.234 +
   6.235 +(** XML as data representation language **)
   6.236 +
   6.237 +exception XML_ATOM of string;
   6.238 +exception XML_BODY of tree list;
   6.239 +
   6.240 +
   6.241 +structure Encode =
   6.242 +struct
   6.243 +
   6.244 +type 'a A = 'a -> string;
   6.245 +type 'a T = 'a -> body;
   6.246 +type 'a V = 'a -> string list * body;
   6.247 +
   6.248 +
   6.249 +(* atomic values *)
   6.250 +
   6.251 +fun int_atom i = signed_string_of_int i;
   6.252 +
   6.253 +fun bool_atom false = "0"
   6.254 +  | bool_atom true = "1";
   6.255 +
   6.256 +fun unit_atom () = "";
   6.257 +
   6.258 +
   6.259 +(* structural nodes *)
   6.260 +
   6.261 +fun node ts = Elem ((":", []), ts);
   6.262 +
   6.263 +fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;
   6.264 +
   6.265 +fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);
   6.266 +
   6.267 +
   6.268 +(* representation of standard types *)
   6.269 +
   6.270 +fun properties props = [Elem ((":", props), [])];
   6.271 +
   6.272 +fun string "" = []
   6.273 +  | string s = [Text s];
   6.274 +
   6.275 +val int = string o int_atom;
   6.276 +
   6.277 +val bool = string o bool_atom;
   6.278 +
   6.279 +val unit = string o unit_atom;
   6.280 +
   6.281 +fun pair f g (x, y) = [node (f x), node (g y)];
   6.282 +
   6.283 +fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];
   6.284 +
   6.285 +fun list f xs = map (node o f) xs;
   6.286 +
   6.287 +fun option _ NONE = []
   6.288 +  | option f (SOME x) = [node (f x)];
   6.289 +
   6.290 +fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];
   6.291 +
   6.292 +end;
   6.293 +
   6.294 +
   6.295 +structure Decode =
   6.296 +struct
   6.297 +
   6.298 +type 'a A = string -> 'a;
   6.299 +type 'a T = body -> 'a;
   6.300 +type 'a V = string list * body -> 'a;
   6.301 +
   6.302 +
   6.303 +(* atomic values *)
   6.304 +
   6.305 +fun int_atom s =
   6.306 +  Markup.parse_int s
   6.307 +    handle Fail _ => raise XML_ATOM s;
   6.308 +
   6.309 +fun bool_atom "0" = false
   6.310 +  | bool_atom "1" = true
   6.311 +  | bool_atom s = raise XML_ATOM s;
   6.312 +
   6.313 +fun unit_atom "" = ()
   6.314 +  | unit_atom s = raise XML_ATOM s;
   6.315 +
   6.316 +
   6.317 +(* structural nodes *)
   6.318 +
   6.319 +fun node (Elem ((":", []), ts)) = ts
   6.320 +  | node t = raise XML_BODY [t];
   6.321 +
   6.322 +fun vector atts =
   6.323 +  #1 (fold_map (fn (a, x) =>
   6.324 +        fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0);
   6.325 +
   6.326 +fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))
   6.327 +  | tagged t = raise XML_BODY [t];
   6.328 +
   6.329 +
   6.330 +(* representation of standard types *)
   6.331 +
   6.332 +fun properties [Elem ((":", props), [])] = props
   6.333 +  | properties ts = raise XML_BODY ts;
   6.334 +
   6.335 +fun string [] = ""
   6.336 +  | string [Text s] = s
   6.337 +  | string ts = raise XML_BODY ts;
   6.338 +
   6.339 +val int = int_atom o string;
   6.340 +
   6.341 +val bool = bool_atom o string;
   6.342 +
   6.343 +val unit = unit_atom o string;
   6.344 +
   6.345 +fun pair f g [t1, t2] = (f (node t1), g (node t2))
   6.346 +  | pair _ _ ts = raise XML_BODY ts;
   6.347 +
   6.348 +fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))
   6.349 +  | triple _ _ _ ts = raise XML_BODY ts;
   6.350 +
   6.351 +fun list f ts = map (f o node) ts;
   6.352 +
   6.353 +fun option _ [] = NONE
   6.354 +  | option f [t] = SOME (f (node t))
   6.355 +  | option _ ts = raise XML_BODY ts;
   6.356 +
   6.357 +fun variant fs [t] =
   6.358 +      let
   6.359 +        val (tag, (xs, ts)) = tagged t;
   6.360 +        val f = nth fs tag handle General.Subscript => raise XML_BODY [t];
   6.361 +      in f (xs, ts) end
   6.362 +  | variant _ ts = raise XML_BODY ts;
   6.363 +
   6.364 +end;
   6.365 +
   6.366 +end;
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/src/Pure/PIDE/xml.scala	Sun Sep 04 15:21:50 2011 +0200
     7.3 @@ -0,0 +1,390 @@
     7.4 +/*  Title:      Pure/PIDE/xml.scala
     7.5 +    Author:     Makarius
     7.6 +
     7.7 +Untyped XML trees and basic data representation.
     7.8 +*/
     7.9 +
    7.10 +package isabelle
    7.11 +
    7.12 +import java.lang.System
    7.13 +import java.util.WeakHashMap
    7.14 +import java.lang.ref.WeakReference
    7.15 +import javax.xml.parsers.DocumentBuilderFactory
    7.16 +
    7.17 +import scala.actors.Actor._
    7.18 +import scala.collection.mutable
    7.19 +
    7.20 +
    7.21 +object XML
    7.22 +{
    7.23 +  /** XML trees **/
    7.24 +
    7.25 +  /* datatype representation */
    7.26 +
    7.27 +  type Attributes = Properties.T
    7.28 +
    7.29 +  sealed abstract class Tree { override def toString = string_of_tree(this) }
    7.30 +  case class Elem(markup: Markup, body: List[Tree]) extends Tree
    7.31 +  case class Text(content: String) extends Tree
    7.32 +
    7.33 +  def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
    7.34 +  def elem(name: String) = Elem(Markup(name, Nil), Nil)
    7.35 +
    7.36 +  type Body = List[Tree]
    7.37 +
    7.38 +
    7.39 +  /* string representation */
    7.40 +
    7.41 +  def string_of_body(body: Body): String =
    7.42 +  {
    7.43 +    val s = new StringBuilder
    7.44 +
    7.45 +    def text(txt: String) {
    7.46 +      if (txt == null) s ++= txt
    7.47 +      else {
    7.48 +        for (c <- txt.iterator) c match {
    7.49 +          case '<' => s ++= "&lt;"
    7.50 +          case '>' => s ++= "&gt;"
    7.51 +          case '&' => s ++= "&amp;"
    7.52 +          case '"' => s ++= "&quot;"
    7.53 +          case '\'' => s ++= "&apos;"
    7.54 +          case _ => s += c
    7.55 +        }
    7.56 +      }
    7.57 +    }
    7.58 +    def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
    7.59 +    def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
    7.60 +    def tree(t: Tree): Unit =
    7.61 +      t match {
    7.62 +        case Elem(markup, Nil) =>
    7.63 +          s ++= "<"; elem(markup); s ++= "/>"
    7.64 +        case Elem(markup, ts) =>
    7.65 +          s ++= "<"; elem(markup); s ++= ">"
    7.66 +          ts.foreach(tree)
    7.67 +          s ++= "</"; s ++= markup.name; s ++= ">"
    7.68 +        case Text(txt) => text(txt)
    7.69 +      }
    7.70 +    body.foreach(tree)
    7.71 +    s.toString
    7.72 +  }
    7.73 +
    7.74 +  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
    7.75 +
    7.76 +
    7.77 +  /* text content */
    7.78 +
    7.79 +  def content_stream(tree: Tree): Stream[String] =
    7.80 +    tree match {
    7.81 +      case Elem(_, body) => content_stream(body)
    7.82 +      case Text(content) => Stream(content)
    7.83 +    }
    7.84 +  def content_stream(body: Body): Stream[String] =
    7.85 +    body.toStream.flatten(content_stream(_))
    7.86 +
    7.87 +  def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
    7.88 +  def content(body: Body): Iterator[String] = content_stream(body).iterator
    7.89 +
    7.90 +
    7.91 +  /* pipe-lined cache for partial sharing */
    7.92 +
    7.93 +  class Cache(initial_size: Int = 131071, max_string: Int = 100)
    7.94 +  {
    7.95 +    private val cache_actor = actor
    7.96 +    {
    7.97 +      val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
    7.98 +
    7.99 +      def lookup[A](x: A): Option[A] =
   7.100 +      {
   7.101 +        val ref = table.get(x)
   7.102 +        if (ref == null) None
   7.103 +        else {
   7.104 +          val y = ref.asInstanceOf[WeakReference[A]].get
   7.105 +          if (y == null) None
   7.106 +          else Some(y)
   7.107 +        }
   7.108 +      }
   7.109 +      def store[A](x: A): A =
   7.110 +      {
   7.111 +        table.put(x, new WeakReference[Any](x))
   7.112 +        x
   7.113 +      }
   7.114 +
   7.115 +      def trim_bytes(s: String): String = new String(s.toCharArray)
   7.116 +
   7.117 +      def cache_string(x: String): String =
   7.118 +        lookup(x) match {
   7.119 +          case Some(y) => y
   7.120 +          case None =>
   7.121 +            val z = trim_bytes(x)
   7.122 +            if (z.length > max_string) z else store(z)
   7.123 +        }
   7.124 +      def cache_props(x: Properties.T): Properties.T =
   7.125 +        if (x.isEmpty) x
   7.126 +        else
   7.127 +          lookup(x) match {
   7.128 +            case Some(y) => y
   7.129 +            case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
   7.130 +          }
   7.131 +      def cache_markup(x: Markup): Markup =
   7.132 +        lookup(x) match {
   7.133 +          case Some(y) => y
   7.134 +          case None =>
   7.135 +            x match {
   7.136 +              case Markup(name, props) =>
   7.137 +                store(Markup(cache_string(name), cache_props(props)))
   7.138 +            }
   7.139 +        }
   7.140 +      def cache_tree(x: XML.Tree): XML.Tree =
   7.141 +        lookup(x) match {
   7.142 +          case Some(y) => y
   7.143 +          case None =>
   7.144 +            x match {
   7.145 +              case XML.Elem(markup, body) =>
   7.146 +                store(XML.Elem(cache_markup(markup), cache_body(body)))
   7.147 +              case XML.Text(text) => store(XML.Text(cache_string(text)))
   7.148 +            }
   7.149 +        }
   7.150 +      def cache_body(x: XML.Body): XML.Body =
   7.151 +        if (x.isEmpty) x
   7.152 +        else
   7.153 +          lookup(x) match {
   7.154 +            case Some(y) => y
   7.155 +            case None => x.map(cache_tree(_))
   7.156 +          }
   7.157 +
   7.158 +      // main loop
   7.159 +      loop {
   7.160 +        react {
   7.161 +          case Cache_String(x, f) => f(cache_string(x))
   7.162 +          case Cache_Markup(x, f) => f(cache_markup(x))
   7.163 +          case Cache_Tree(x, f) => f(cache_tree(x))
   7.164 +          case Cache_Body(x, f) => f(cache_body(x))
   7.165 +          case Cache_Ignore(x, f) => f(x)
   7.166 +          case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
   7.167 +        }
   7.168 +      }
   7.169 +    }
   7.170 +
   7.171 +    private case class Cache_String(x: String, f: String => Unit)
   7.172 +    private case class Cache_Markup(x: Markup, f: Markup => Unit)
   7.173 +    private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
   7.174 +    private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
   7.175 +    private case class Cache_Ignore[A](x: A, f: A => Unit)
   7.176 +
   7.177 +    // main methods
   7.178 +    def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
   7.179 +    def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
   7.180 +    def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
   7.181 +    def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
   7.182 +    def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) }
   7.183 +  }
   7.184 +
   7.185 +
   7.186 +
   7.187 +  /** document object model (W3C DOM) **/
   7.188 +
   7.189 +  def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
   7.190 +    node.getUserData(Markup.Data.name) match {
   7.191 +      case tree: XML.Tree => Some(tree)
   7.192 +      case _ => None
   7.193 +    }
   7.194 +
   7.195 +  def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
   7.196 +  {
   7.197 +    def DOM(tr: Tree): org.w3c.dom.Node = tr match {
   7.198 +      case Elem(Markup.Data, List(data, t)) =>
   7.199 +        val node = DOM(t)
   7.200 +        node.setUserData(Markup.Data.name, data, null)
   7.201 +        node
   7.202 +      case Elem(Markup(name, atts), ts) =>
   7.203 +        if (name == Markup.Data.name)
   7.204 +          error("Malformed data element: " + tr.toString)
   7.205 +        val node = doc.createElement(name)
   7.206 +        for ((name, value) <- atts) node.setAttribute(name, value)
   7.207 +        for (t <- ts) node.appendChild(DOM(t))
   7.208 +        node
   7.209 +      case Text(txt) => doc.createTextNode(txt)
   7.210 +    }
   7.211 +    DOM(tree)
   7.212 +  }
   7.213 +
   7.214 +
   7.215 +
   7.216 +  /** XML as data representation language **/
   7.217 +
   7.218 +  class XML_Atom(s: String) extends Exception(s)
   7.219 +  class XML_Body(body: XML.Body) extends Exception
   7.220 +
   7.221 +  object Encode
   7.222 +  {
   7.223 +    type T[A] = A => XML.Body
   7.224 +
   7.225 +
   7.226 +    /* atomic values */
   7.227 +
   7.228 +    def long_atom(i: Long): String = i.toString
   7.229 +
   7.230 +    def int_atom(i: Int): String = i.toString
   7.231 +
   7.232 +    def bool_atom(b: Boolean): String = if (b) "1" else "0"
   7.233 +
   7.234 +    def unit_atom(u: Unit) = ""
   7.235 +
   7.236 +
   7.237 +    /* structural nodes */
   7.238 +
   7.239 +    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
   7.240 +
   7.241 +    private def vector(xs: List[String]): XML.Attributes =
   7.242 +      xs.zipWithIndex.map(p => (int_atom(p._2), p._1))
   7.243 +
   7.244 +    private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
   7.245 +      XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
   7.246 +
   7.247 +
   7.248 +    /* representation of standard types */
   7.249 +
   7.250 +    val properties: T[Properties.T] =
   7.251 +      (props => List(XML.Elem(Markup(":", props), Nil)))
   7.252 +
   7.253 +    val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
   7.254 +
   7.255 +    val long: T[Long] = (x => string(long_atom(x)))
   7.256 +
   7.257 +    val int: T[Int] = (x => string(int_atom(x)))
   7.258 +
   7.259 +    val bool: T[Boolean] = (x => string(bool_atom(x)))
   7.260 +
   7.261 +    val unit: T[Unit] = (x => string(unit_atom(x)))
   7.262 +
   7.263 +    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
   7.264 +      (x => List(node(f(x._1)), node(g(x._2))))
   7.265 +
   7.266 +    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
   7.267 +      (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
   7.268 +
   7.269 +    def list[A](f: T[A]): T[List[A]] =
   7.270 +      (xs => xs.map((x: A) => node(f(x))))
   7.271 +
   7.272 +    def option[A](f: T[A]): T[Option[A]] =
   7.273 +    {
   7.274 +      case None => Nil
   7.275 +      case Some(x) => List(node(f(x)))
   7.276 +    }
   7.277 +
   7.278 +    def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] =
   7.279 +    {
   7.280 +      case x =>
   7.281 +        val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
   7.282 +        List(tagged(tag, f(x)))
   7.283 +    }
   7.284 +  }
   7.285 +
   7.286 +  object Decode
   7.287 +  {
   7.288 +    type T[A] = XML.Body => A
   7.289 +    type V[A] = (List[String], XML.Body) => A
   7.290 +
   7.291 +
   7.292 +    /* atomic values */
   7.293 +
   7.294 +    def long_atom(s: String): Long =
   7.295 +      try { java.lang.Long.parseLong(s) }
   7.296 +      catch { case e: NumberFormatException => throw new XML_Atom(s) }
   7.297 +
   7.298 +    def int_atom(s: String): Int =
   7.299 +      try { Integer.parseInt(s) }
   7.300 +      catch { case e: NumberFormatException => throw new XML_Atom(s) }
   7.301 +
   7.302 +    def bool_atom(s: String): Boolean =
   7.303 +      if (s == "1") true
   7.304 +      else if (s == "0") false
   7.305 +      else throw new XML_Atom(s)
   7.306 +
   7.307 +    def unit_atom(s: String): Unit =
   7.308 +      if (s == "") () else throw new XML_Atom(s)
   7.309 +
   7.310 +
   7.311 +    /* structural nodes */
   7.312 +
   7.313 +    private def node(t: XML.Tree): XML.Body =
   7.314 +      t match {
   7.315 +        case XML.Elem(Markup(":", Nil), ts) => ts
   7.316 +        case _ => throw new XML_Body(List(t))
   7.317 +      }
   7.318 +
   7.319 +    private def vector(atts: XML.Attributes): List[String] =
   7.320 +    {
   7.321 +      val xs = new mutable.ListBuffer[String]
   7.322 +      var i = 0
   7.323 +      for ((a, x) <- atts) {
   7.324 +        if (int_atom(a) == i) { xs += x; i = i + 1 }
   7.325 +        else throw new XML_Atom(a)
   7.326 +      }
   7.327 +      xs.toList
   7.328 +    }
   7.329 +
   7.330 +    private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
   7.331 +      t match {
   7.332 +        case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
   7.333 +        case _ => throw new XML_Body(List(t))
   7.334 +      }
   7.335 +
   7.336 +
   7.337 +    /* representation of standard types */
   7.338 +
   7.339 +    val properties: T[Properties.T] =
   7.340 +    {
   7.341 +      case List(XML.Elem(Markup(":", props), Nil)) => props
   7.342 +      case ts => throw new XML_Body(ts)
   7.343 +    }
   7.344 +
   7.345 +    val string: T[String] =
   7.346 +    {
   7.347 +      case Nil => ""
   7.348 +      case List(XML.Text(s)) => s
   7.349 +      case ts => throw new XML_Body(ts)
   7.350 +    }
   7.351 +
   7.352 +    val long: T[Long] = (x => long_atom(string(x)))
   7.353 +
   7.354 +    val int: T[Int] = (x => int_atom(string(x)))
   7.355 +
   7.356 +    val bool: T[Boolean] = (x => bool_atom(string(x)))
   7.357 +
   7.358 +    val unit: T[Unit] = (x => unit_atom(string(x)))
   7.359 +
   7.360 +    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
   7.361 +    {
   7.362 +      case List(t1, t2) => (f(node(t1)), g(node(t2)))
   7.363 +      case ts => throw new XML_Body(ts)
   7.364 +    }
   7.365 +
   7.366 +    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
   7.367 +    {
   7.368 +      case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
   7.369 +      case ts => throw new XML_Body(ts)
   7.370 +    }
   7.371 +
   7.372 +    def list[A](f: T[A]): T[List[A]] =
   7.373 +      (ts => ts.map(t => f(node(t))))
   7.374 +
   7.375 +    def option[A](f: T[A]): T[Option[A]] =
   7.376 +    {
   7.377 +      case Nil => None
   7.378 +      case List(t) => Some(f(node(t)))
   7.379 +      case ts => throw new XML_Body(ts)
   7.380 +    }
   7.381 +
   7.382 +    def variant[A](fs: List[V[A]]): T[A] =
   7.383 +    {
   7.384 +      case List(t) =>
   7.385 +        val (tag, (xs, ts)) = tagged(t)
   7.386 +        val f =
   7.387 +          try { fs(tag) }
   7.388 +          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
   7.389 +        f(xs, ts)
   7.390 +      case ts => throw new XML_Body(ts)
   7.391 +    }
   7.392 +  }
   7.393 +}
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/src/Pure/PIDE/yxml.ML	Sun Sep 04 15:21:50 2011 +0200
     8.3 @@ -0,0 +1,148 @@
     8.4 +(*  Title:      Pure/PIDE/yxml.ML
     8.5 +    Author:     Makarius
     8.6 +
     8.7 +Efficient text representation of XML trees using extra characters X
     8.8 +and Y -- no escaping, may nest marked text verbatim.  Suitable for
     8.9 +direct inlining into plain text.
    8.10 +
    8.11 +Markup <elem att="val" ...>...body...</elem> is encoded as:
    8.12 +
    8.13 +  X Y name Y att=val ... X
    8.14 +  ...
    8.15 +  body
    8.16 +  ...
    8.17 +  X Y X
    8.18 +*)
    8.19 +
    8.20 +signature YXML =
    8.21 +sig
    8.22 +  val X: Symbol.symbol
    8.23 +  val Y: Symbol.symbol
    8.24 +  val embed_controls: string -> string
    8.25 +  val detect: string -> bool
    8.26 +  val output_markup: Markup.T -> string * string
    8.27 +  val element: string -> XML.attributes -> string list -> string
    8.28 +  val string_of_body: XML.body -> string
    8.29 +  val string_of: XML.tree -> string
    8.30 +  val parse_body: string -> XML.body
    8.31 +  val parse: string -> XML.tree
    8.32 +end;
    8.33 +
    8.34 +structure YXML: YXML =
    8.35 +struct
    8.36 +
    8.37 +(** string representation **)
    8.38 +
    8.39 +(* idempotent recoding of certain low ASCII control characters *)
    8.40 +
    8.41 +fun pseudo_utf8 c =
    8.42 +  if Symbol.is_ascii_control c
    8.43 +  then chr 192 ^ chr (128 + ord c)
    8.44 +  else c;
    8.45 +
    8.46 +fun embed_controls str =
    8.47 +  if exists_string Symbol.is_ascii_control str
    8.48 +  then translate_string pseudo_utf8 str
    8.49 +  else str;
    8.50 +
    8.51 +
    8.52 +(* markers *)
    8.53 +
    8.54 +val X = chr 5;
    8.55 +val Y = chr 6;
    8.56 +val XY = X ^ Y;
    8.57 +val XYX = XY ^ X;
    8.58 +
    8.59 +val detect = exists_string (fn s => s = X orelse s = Y);
    8.60 +
    8.61 +
    8.62 +(* output *)
    8.63 +
    8.64 +fun output_markup (markup as (name, atts)) =
    8.65 +  if Markup.is_empty markup then Markup.no_output
    8.66 +  else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
    8.67 +
    8.68 +fun element name atts body =
    8.69 +  let val (pre, post) = output_markup (name, atts)
    8.70 +  in pre ^ implode body ^ post end;
    8.71 +
    8.72 +fun string_of_body body =
    8.73 +  let
    8.74 +    fun attrib (a, x) =
    8.75 +      Buffer.add Y #>
    8.76 +      Buffer.add a #> Buffer.add "=" #> Buffer.add x;
    8.77 +    fun tree (XML.Elem ((name, atts), ts)) =
    8.78 +          Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
    8.79 +          trees ts #>
    8.80 +          Buffer.add XYX
    8.81 +      | tree (XML.Text s) = Buffer.add s
    8.82 +    and trees ts = fold tree ts;
    8.83 +  in Buffer.empty |> trees body |> Buffer.content end;
    8.84 +
    8.85 +val string_of = string_of_body o single;
    8.86 +
    8.87 +
    8.88 +
    8.89 +(** efficient YXML parsing **)
    8.90 +
    8.91 +local
    8.92 +
    8.93 +(* splitting *)
    8.94 +
    8.95 +fun is_char s c = ord s = Char.ord c;
    8.96 +
    8.97 +val split_string =
    8.98 +  Substring.full #>
    8.99 +  Substring.tokens (is_char X) #>
   8.100 +  map (Substring.fields (is_char Y) #> map Substring.string);
   8.101 +
   8.102 +
   8.103 +(* structural errors *)
   8.104 +
   8.105 +fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
   8.106 +fun err_attribute () = err "bad attribute";
   8.107 +fun err_element () = err "bad element";
   8.108 +fun err_unbalanced "" = err "unbalanced element"
   8.109 +  | err_unbalanced name = err ("unbalanced element " ^ quote name);
   8.110 +
   8.111 +
   8.112 +(* stack operations *)
   8.113 +
   8.114 +fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
   8.115 +
   8.116 +fun push "" _ _ = err_element ()
   8.117 +  | push name atts pending = ((name, atts), []) :: pending;
   8.118 +
   8.119 +fun pop ((("", _), _) :: _) = err_unbalanced ""
   8.120 +  | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
   8.121 +
   8.122 +
   8.123 +(* parsing *)
   8.124 +
   8.125 +fun parse_attrib s =
   8.126 +  (case first_field "=" s of
   8.127 +    NONE => err_attribute ()
   8.128 +  | SOME ("", _) => err_attribute ()
   8.129 +  | SOME att => att);
   8.130 +
   8.131 +fun parse_chunk ["", ""] = pop
   8.132 +  | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
   8.133 +  | parse_chunk txts = fold (add o XML.Text) txts;
   8.134 +
   8.135 +in
   8.136 +
   8.137 +fun parse_body source =
   8.138 +  (case fold parse_chunk (split_string source) [(("", []), [])] of
   8.139 +    [(("", _), result)] => rev result
   8.140 +  | ((name, _), _) :: _ => err_unbalanced name);
   8.141 +
   8.142 +fun parse source =
   8.143 +  (case parse_body source of
   8.144 +    [result] => result
   8.145 +  | [] => XML.Text ""
   8.146 +  | _ => err "multiple results");
   8.147 +
   8.148 +end;
   8.149 +
   8.150 +end;
   8.151 +
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/src/Pure/PIDE/yxml.scala	Sun Sep 04 15:21:50 2011 +0200
     9.3 @@ -0,0 +1,135 @@
     9.4 +/*  Title:      Pure/PIDE/yxml.scala
     9.5 +    Author:     Makarius
     9.6 +
     9.7 +Efficient text representation of XML trees.  Suitable for direct
     9.8 +inlining into plain text.
     9.9 +*/
    9.10 +
    9.11 +package isabelle
    9.12 +
    9.13 +
    9.14 +import scala.collection.mutable
    9.15 +
    9.16 +
    9.17 +object YXML
    9.18 +{
    9.19 +  /* chunk markers */
    9.20 +
    9.21 +  val X = '\5'
    9.22 +  val Y = '\6'
    9.23 +  val X_string = X.toString
    9.24 +  val Y_string = Y.toString
    9.25 +
    9.26 +  def detect(s: String): Boolean = s.exists(c => c == X || c == Y)
    9.27 +
    9.28 +
    9.29 +  /* string representation */  // FIXME byte array version with pseudo-utf-8 (!?)
    9.30 +
    9.31 +  def string_of_body(body: XML.Body): String =
    9.32 +  {
    9.33 +    val s = new StringBuilder
    9.34 +    def attrib(p: (String, String)) { s += Y; s ++= p._1; s += '='; s ++= p._2 }
    9.35 +    def tree(t: XML.Tree): Unit =
    9.36 +      t match {
    9.37 +        case XML.Elem(Markup(name, atts), ts) =>
    9.38 +          s += X; s += Y; s++= name; atts.foreach(attrib); s += X
    9.39 +          ts.foreach(tree)
    9.40 +          s += X; s += Y; s += X
    9.41 +        case XML.Text(text) => s ++= text
    9.42 +      }
    9.43 +    body.foreach(tree)
    9.44 +    s.toString
    9.45 +  }
    9.46 +
    9.47 +  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
    9.48 +
    9.49 +
    9.50 +  /* parsing */
    9.51 +
    9.52 +  private def err(msg: String) = error("Malformed YXML: " + msg)
    9.53 +  private def err_attribute() = err("bad attribute")
    9.54 +  private def err_element() = err("bad element")
    9.55 +  private def err_unbalanced(name: String) =
    9.56 +    if (name == "") err("unbalanced element")
    9.57 +    else err("unbalanced element " + quote(name))
    9.58 +
    9.59 +  private def parse_attrib(source: CharSequence) = {
    9.60 +    val s = source.toString
    9.61 +    val i = s.indexOf('=')
    9.62 +    if (i <= 0) err_attribute()
    9.63 +    (s.substring(0, i), s.substring(i + 1))
    9.64 +  }
    9.65 +
    9.66 +
    9.67 +  def parse_body(source: CharSequence): XML.Body =
    9.68 +  {
    9.69 +    /* stack operations */
    9.70 +
    9.71 +    def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree]
    9.72 +    var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer()))
    9.73 +
    9.74 +    def add(x: XML.Tree)
    9.75 +    {
    9.76 +      (stack: @unchecked) match { case ((_, body) :: _) => body += x }
    9.77 +    }
    9.78 +
    9.79 +    def push(name: String, atts: XML.Attributes)
    9.80 +    {
    9.81 +      if (name == "") err_element()
    9.82 +      else stack = (Markup(name, atts), buffer()) :: stack
    9.83 +    }
    9.84 +
    9.85 +    def pop()
    9.86 +    {
    9.87 +      (stack: @unchecked) match {
    9.88 +        case ((Markup.Empty, _) :: _) => err_unbalanced("")
    9.89 +        case ((markup, body) :: pending) =>
    9.90 +          stack = pending
    9.91 +          add(XML.Elem(markup, body.toList))
    9.92 +      }
    9.93 +    }
    9.94 +
    9.95 +
    9.96 +    /* parse chunks */
    9.97 +
    9.98 +    for (chunk <- Library.chunks(source, X) if chunk.length != 0) {
    9.99 +      if (chunk.length == 1 && chunk.charAt(0) == Y) pop()
   9.100 +      else {
   9.101 +        Library.chunks(chunk, Y).toList match {
   9.102 +          case ch :: name :: atts if ch.length == 0 =>
   9.103 +            push(name.toString, atts.map(parse_attrib))
   9.104 +          case txts => for (txt <- txts) add(XML.Text(txt.toString))
   9.105 +        }
   9.106 +      }
   9.107 +    }
   9.108 +    (stack: @unchecked) match {
   9.109 +      case List((Markup.Empty, body)) => body.toList
   9.110 +      case (Markup(name, _), _) :: _ => err_unbalanced(name)
   9.111 +    }
   9.112 +  }
   9.113 +
   9.114 +  def parse(source: CharSequence): XML.Tree =
   9.115 +    parse_body(source) match {
   9.116 +      case List(result) => result
   9.117 +      case Nil => XML.Text("")
   9.118 +      case _ => err("multiple results")
   9.119 +    }
   9.120 +
   9.121 +
   9.122 +  /* failsafe parsing */
   9.123 +
   9.124 +  private def markup_malformed(source: CharSequence) =
   9.125 +    XML.elem(Markup.MALFORMED, List(XML.Text(source.toString)))
   9.126 +
   9.127 +  def parse_body_failsafe(source: CharSequence): XML.Body =
   9.128 +  {
   9.129 +    try { parse_body(source) }
   9.130 +    catch { case ERROR(_) => List(markup_malformed(source)) }
   9.131 +  }
   9.132 +
   9.133 +  def parse_failsafe(source: CharSequence): XML.Tree =
   9.134 +  {
   9.135 +    try { parse(source) }
   9.136 +    catch { case ERROR(_) => markup_malformed(source) }
   9.137 +  }
   9.138 +}
    10.1 --- a/src/Pure/ROOT.ML	Sun Sep 04 14:29:15 2011 +0200
    10.2 +++ b/src/Pure/ROOT.ML	Sun Sep 04 15:21:50 2011 +0200
    10.3 @@ -54,17 +54,18 @@
    10.4  use "General/linear_set.ML";
    10.5  use "General/buffer.ML";
    10.6  use "General/pretty.ML";
    10.7 -use "General/xml.ML";
    10.8  use "General/path.ML";
    10.9  use "General/url.ML";
   10.10  use "General/file.ML";
   10.11 -use "General/yxml.ML";
   10.12  use "General/long_name.ML";
   10.13  use "General/binding.ML";
   10.14  
   10.15  use "General/sha1.ML";
   10.16  if ML_System.is_polyml then use "General/sha1_polyml.ML" else ();
   10.17  
   10.18 +use "PIDE/xml.ML";
   10.19 +use "PIDE/yxml.ML";
   10.20 +
   10.21  
   10.22  (* concurrency within the ML runtime *)
   10.23  
    11.1 --- a/src/Pure/build-jars	Sun Sep 04 14:29:15 2011 +0200
    11.2 +++ b/src/Pure/build-jars	Sun Sep 04 15:21:50 2011 +0200
    11.3 @@ -24,8 +24,6 @@
    11.4    General/scan.scala
    11.5    General/sha1.scala
    11.6    General/symbol.scala
    11.7 -  General/xml.scala
    11.8 -  General/yxml.scala
    11.9    Isar/keyword.scala
   11.10    Isar/outer_syntax.scala
   11.11    Isar/parse.scala
   11.12 @@ -36,6 +34,8 @@
   11.13    PIDE/isar_document.scala
   11.14    PIDE/markup_tree.scala
   11.15    PIDE/text.scala
   11.16 +  PIDE/xml.scala
   11.17 +  PIDE/yxml.scala
   11.18    System/cygwin.scala
   11.19    System/download.scala
   11.20    System/event_bus.scala