--- a/src/Pure/General/xml.ML Sun Sep 04 14:29:15 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,361 +0,0 @@
-(* Title: Pure/General/xml.ML
- Author: David Aspinall, Stefan Berghofer and Markus Wenzel
-
-Untyped XML trees.
-*)
-
-signature XML_DATA_OPS =
-sig
- type 'a A
- type 'a T
- type 'a V
- val int_atom: int A
- val bool_atom: bool A
- val unit_atom: unit A
- val properties: Properties.T T
- val string: string T
- val int: int T
- val bool: bool T
- val unit: unit T
- val pair: 'a T -> 'b T -> ('a * 'b) T
- val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
- val list: 'a T -> 'a list T
- val option: 'a T -> 'a option T
- val variant: 'a V list -> 'a T
-end;
-
-signature XML =
-sig
- type attributes = Properties.T
- datatype tree =
- Elem of Markup.T * tree list
- | Text of string
- type body = tree list
- val add_content: tree -> Buffer.T -> Buffer.T
- val content_of: body -> string
- val header: string
- val text: string -> string
- val element: string -> attributes -> string list -> string
- val output_markup: Markup.T -> Output.output * Output.output
- val string_of: tree -> string
- val pretty: int -> tree -> Pretty.T
- val output: tree -> TextIO.outstream -> unit
- val parse_comments: string list -> unit * string list
- val parse_string : string -> string option
- val parse_element: string list -> tree * string list
- val parse_document: string list -> tree * string list
- val parse: string -> tree
- exception XML_ATOM of string
- exception XML_BODY of body
- structure Encode: XML_DATA_OPS
- structure Decode: XML_DATA_OPS
-end;
-
-structure XML: XML =
-struct
-
-(** XML trees **)
-
-type attributes = Properties.T;
-
-datatype tree =
- Elem of Markup.T * tree list
- | Text of string;
-
-type body = tree list;
-
-fun add_content (Elem (_, ts)) = fold add_content ts
- | add_content (Text s) = Buffer.add s;
-
-fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;
-
-
-
-(** string representation **)
-
-val header = "<?xml version=\"1.0\"?>\n";
-
-
-(* escaped text *)
-
-fun decode "<" = "<"
- | decode ">" = ">"
- | decode "&" = "&"
- | decode "'" = "'"
- | decode """ = "\""
- | decode c = c;
-
-fun encode "<" = "<"
- | encode ">" = ">"
- | encode "&" = "&"
- | encode "'" = "'"
- | encode "\"" = """
- | encode c = c;
-
-val text = translate_string encode;
-
-
-(* elements *)
-
-fun elem name atts =
- space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
-
-fun element name atts body =
- let val b = implode body in
- if b = "" then enclose "<" "/>" (elem name atts)
- else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
- end;
-
-fun output_markup (markup as (name, atts)) =
- if Markup.is_empty markup then Markup.no_output
- else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
-
-
-(* output *)
-
-fun buffer_of depth tree =
- let
- fun traverse _ (Elem ((name, atts), [])) =
- Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
- | traverse d (Elem ((name, atts), ts)) =
- Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
- traverse_body d ts #>
- Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
- | traverse _ (Text s) = Buffer.add (text s)
- and traverse_body 0 _ = Buffer.add "..."
- | traverse_body d ts = fold (traverse (d - 1)) ts;
- in Buffer.empty |> traverse depth tree end;
-
-val string_of = Buffer.content o buffer_of ~1;
-val output = Buffer.output o buffer_of ~1;
-
-fun pretty depth tree =
- Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));
-
-
-
-(** XML parsing (slow) **)
-
-local
-
-fun err msg (xs, _) =
- fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
-
-fun ignored _ = [];
-
-val blanks = Scan.many Symbol.is_blank;
-val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
-val regular = Scan.one Symbol.is_regular;
-fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
-
-val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
-
-val parse_cdata =
- Scan.this_string "<![CDATA[" |--
- (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
- Scan.this_string "]]>";
-
-val parse_att =
- (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
- (($$ "\"" || $$ "'") :|-- (fn s =>
- (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
-
-val parse_comment =
- Scan.this_string "<!--" --
- Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
- Scan.this_string "-->" >> ignored;
-
-val parse_processing_instruction =
- Scan.this_string "<?" --
- Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
- Scan.this_string "?>" >> ignored;
-
-val parse_doctype =
- Scan.this_string "<!DOCTYPE" --
- Scan.repeat (Scan.unless ($$ ">") regular) --
- $$ ">" >> ignored;
-
-val parse_misc =
- Scan.one Symbol.is_blank >> ignored ||
- parse_processing_instruction ||
- parse_comment;
-
-val parse_optional_text =
- Scan.optional (parse_chars >> (single o Text)) [];
-
-fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";
-fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";
-val parse_name = Scan.one name_start_char ::: Scan.many name_char;
-
-in
-
-val parse_comments =
- blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
-
-val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;
-
-fun parse_content xs =
- (parse_optional_text @@@
- (Scan.repeat
- ((parse_element >> single ||
- parse_cdata >> (single o Text) ||
- parse_processing_instruction ||
- parse_comment)
- @@@ parse_optional_text) >> flat)) xs
-
-and parse_element xs =
- ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--
- (fn (name, _) =>
- !! (err (fn () => "Expected > or />"))
- ($$ "/" -- $$ ">" >> ignored ||
- $$ ">" |-- parse_content --|
- !! (err (fn () => "Expected </" ^ implode name ^ ">"))
- ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))
- >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;
-
-val parse_document =
- (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
- |-- parse_element;
-
-fun parse s =
- (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))
- (blanks |-- parse_document --| blanks))) (raw_explode s) of
- (x, []) => x
- | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
-
-end;
-
-
-
-(** XML as data representation language **)
-
-exception XML_ATOM of string;
-exception XML_BODY of tree list;
-
-
-structure Encode =
-struct
-
-type 'a A = 'a -> string;
-type 'a T = 'a -> body;
-type 'a V = 'a -> string list * body;
-
-
-(* atomic values *)
-
-fun int_atom i = signed_string_of_int i;
-
-fun bool_atom false = "0"
- | bool_atom true = "1";
-
-fun unit_atom () = "";
-
-
-(* structural nodes *)
-
-fun node ts = Elem ((":", []), ts);
-
-fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;
-
-fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);
-
-
-(* representation of standard types *)
-
-fun properties props = [Elem ((":", props), [])];
-
-fun string "" = []
- | string s = [Text s];
-
-val int = string o int_atom;
-
-val bool = string o bool_atom;
-
-val unit = string o unit_atom;
-
-fun pair f g (x, y) = [node (f x), node (g y)];
-
-fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];
-
-fun list f xs = map (node o f) xs;
-
-fun option _ NONE = []
- | option f (SOME x) = [node (f x)];
-
-fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];
-
-end;
-
-
-structure Decode =
-struct
-
-type 'a A = string -> 'a;
-type 'a T = body -> 'a;
-type 'a V = string list * body -> 'a;
-
-
-(* atomic values *)
-
-fun int_atom s =
- Markup.parse_int s
- handle Fail _ => raise XML_ATOM s;
-
-fun bool_atom "0" = false
- | bool_atom "1" = true
- | bool_atom s = raise XML_ATOM s;
-
-fun unit_atom "" = ()
- | unit_atom s = raise XML_ATOM s;
-
-
-(* structural nodes *)
-
-fun node (Elem ((":", []), ts)) = ts
- | node t = raise XML_BODY [t];
-
-fun vector atts =
- #1 (fold_map (fn (a, x) =>
- fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0);
-
-fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))
- | tagged t = raise XML_BODY [t];
-
-
-(* representation of standard types *)
-
-fun properties [Elem ((":", props), [])] = props
- | properties ts = raise XML_BODY ts;
-
-fun string [] = ""
- | string [Text s] = s
- | string ts = raise XML_BODY ts;
-
-val int = int_atom o string;
-
-val bool = bool_atom o string;
-
-val unit = unit_atom o string;
-
-fun pair f g [t1, t2] = (f (node t1), g (node t2))
- | pair _ _ ts = raise XML_BODY ts;
-
-fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))
- | triple _ _ _ ts = raise XML_BODY ts;
-
-fun list f ts = map (f o node) ts;
-
-fun option _ [] = NONE
- | option f [t] = SOME (f (node t))
- | option _ ts = raise XML_BODY ts;
-
-fun variant fs [t] =
- let
- val (tag, (xs, ts)) = tagged t;
- val f = nth fs tag handle General.Subscript => raise XML_BODY [t];
- in f (xs, ts) end
- | variant _ ts = raise XML_BODY ts;
-
-end;
-
-end;
--- a/src/Pure/General/xml.scala Sun Sep 04 14:29:15 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,390 +0,0 @@
-/* Title: Pure/General/xml.scala
- Author: Makarius
-
-Untyped XML trees.
-*/
-
-package isabelle
-
-import java.lang.System
-import java.util.WeakHashMap
-import java.lang.ref.WeakReference
-import javax.xml.parsers.DocumentBuilderFactory
-
-import scala.actors.Actor._
-import scala.collection.mutable
-
-
-object XML
-{
- /** XML trees **/
-
- /* datatype representation */
-
- type Attributes = Properties.T
-
- sealed abstract class Tree { override def toString = string_of_tree(this) }
- case class Elem(markup: Markup, body: List[Tree]) extends Tree
- case class Text(content: String) extends Tree
-
- def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
- def elem(name: String) = Elem(Markup(name, Nil), Nil)
-
- type Body = List[Tree]
-
-
- /* string representation */
-
- def string_of_body(body: Body): String =
- {
- val s = new StringBuilder
-
- def text(txt: String) {
- if (txt == null) s ++= txt
- else {
- for (c <- txt.iterator) c match {
- case '<' => s ++= "<"
- case '>' => s ++= ">"
- case '&' => s ++= "&"
- case '"' => s ++= """
- case '\'' => s ++= "'"
- case _ => s += c
- }
- }
- }
- def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
- def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
- def tree(t: Tree): Unit =
- t match {
- case Elem(markup, Nil) =>
- s ++= "<"; elem(markup); s ++= "/>"
- case Elem(markup, ts) =>
- s ++= "<"; elem(markup); s ++= ">"
- ts.foreach(tree)
- s ++= "</"; s ++= markup.name; s ++= ">"
- case Text(txt) => text(txt)
- }
- body.foreach(tree)
- s.toString
- }
-
- def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
-
-
- /* text content */
-
- def content_stream(tree: Tree): Stream[String] =
- tree match {
- case Elem(_, body) => content_stream(body)
- case Text(content) => Stream(content)
- }
- def content_stream(body: Body): Stream[String] =
- body.toStream.flatten(content_stream(_))
-
- def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
- def content(body: Body): Iterator[String] = content_stream(body).iterator
-
-
- /* pipe-lined cache for partial sharing */
-
- class Cache(initial_size: Int = 131071, max_string: Int = 100)
- {
- private val cache_actor = actor
- {
- val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
-
- def lookup[A](x: A): Option[A] =
- {
- val ref = table.get(x)
- if (ref == null) None
- else {
- val y = ref.asInstanceOf[WeakReference[A]].get
- if (y == null) None
- else Some(y)
- }
- }
- def store[A](x: A): A =
- {
- table.put(x, new WeakReference[Any](x))
- x
- }
-
- def trim_bytes(s: String): String = new String(s.toCharArray)
-
- def cache_string(x: String): String =
- lookup(x) match {
- case Some(y) => y
- case None =>
- val z = trim_bytes(x)
- if (z.length > max_string) z else store(z)
- }
- def cache_props(x: Properties.T): Properties.T =
- if (x.isEmpty) x
- else
- lookup(x) match {
- case Some(y) => y
- case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
- }
- def cache_markup(x: Markup): Markup =
- lookup(x) match {
- case Some(y) => y
- case None =>
- x match {
- case Markup(name, props) =>
- store(Markup(cache_string(name), cache_props(props)))
- }
- }
- def cache_tree(x: XML.Tree): XML.Tree =
- lookup(x) match {
- case Some(y) => y
- case None =>
- x match {
- case XML.Elem(markup, body) =>
- store(XML.Elem(cache_markup(markup), cache_body(body)))
- case XML.Text(text) => store(XML.Text(cache_string(text)))
- }
- }
- def cache_body(x: XML.Body): XML.Body =
- if (x.isEmpty) x
- else
- lookup(x) match {
- case Some(y) => y
- case None => x.map(cache_tree(_))
- }
-
- // main loop
- loop {
- react {
- case Cache_String(x, f) => f(cache_string(x))
- case Cache_Markup(x, f) => f(cache_markup(x))
- case Cache_Tree(x, f) => f(cache_tree(x))
- case Cache_Body(x, f) => f(cache_body(x))
- case Cache_Ignore(x, f) => f(x)
- case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
- }
- }
- }
-
- private case class Cache_String(x: String, f: String => Unit)
- private case class Cache_Markup(x: Markup, f: Markup => Unit)
- private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
- private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
- private case class Cache_Ignore[A](x: A, f: A => Unit)
-
- // main methods
- def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
- def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
- def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
- def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
- def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) }
- }
-
-
-
- /** document object model (W3C DOM) **/
-
- def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
- node.getUserData(Markup.Data.name) match {
- case tree: XML.Tree => Some(tree)
- case _ => None
- }
-
- def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
- {
- def DOM(tr: Tree): org.w3c.dom.Node = tr match {
- case Elem(Markup.Data, List(data, t)) =>
- val node = DOM(t)
- node.setUserData(Markup.Data.name, data, null)
- node
- case Elem(Markup(name, atts), ts) =>
- if (name == Markup.Data.name)
- error("Malformed data element: " + tr.toString)
- val node = doc.createElement(name)
- for ((name, value) <- atts) node.setAttribute(name, value)
- for (t <- ts) node.appendChild(DOM(t))
- node
- case Text(txt) => doc.createTextNode(txt)
- }
- DOM(tree)
- }
-
-
-
- /** XML as data representation language **/
-
- class XML_Atom(s: String) extends Exception(s)
- class XML_Body(body: XML.Body) extends Exception
-
- object Encode
- {
- type T[A] = A => XML.Body
-
-
- /* atomic values */
-
- def long_atom(i: Long): String = i.toString
-
- def int_atom(i: Int): String = i.toString
-
- def bool_atom(b: Boolean): String = if (b) "1" else "0"
-
- def unit_atom(u: Unit) = ""
-
-
- /* structural nodes */
-
- private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
-
- private def vector(xs: List[String]): XML.Attributes =
- xs.zipWithIndex.map(p => (int_atom(p._2), p._1))
-
- private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
- XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
-
-
- /* representation of standard types */
-
- val properties: T[Properties.T] =
- (props => List(XML.Elem(Markup(":", props), Nil)))
-
- val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
-
- val long: T[Long] = (x => string(long_atom(x)))
-
- val int: T[Int] = (x => string(int_atom(x)))
-
- val bool: T[Boolean] = (x => string(bool_atom(x)))
-
- val unit: T[Unit] = (x => string(unit_atom(x)))
-
- def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
- (x => List(node(f(x._1)), node(g(x._2))))
-
- def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
- (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
-
- def list[A](f: T[A]): T[List[A]] =
- (xs => xs.map((x: A) => node(f(x))))
-
- def option[A](f: T[A]): T[Option[A]] =
- {
- case None => Nil
- case Some(x) => List(node(f(x)))
- }
-
- def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] =
- {
- case x =>
- val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
- List(tagged(tag, f(x)))
- }
- }
-
- object Decode
- {
- type T[A] = XML.Body => A
- type V[A] = (List[String], XML.Body) => A
-
-
- /* atomic values */
-
- def long_atom(s: String): Long =
- try { java.lang.Long.parseLong(s) }
- catch { case e: NumberFormatException => throw new XML_Atom(s) }
-
- def int_atom(s: String): Int =
- try { Integer.parseInt(s) }
- catch { case e: NumberFormatException => throw new XML_Atom(s) }
-
- def bool_atom(s: String): Boolean =
- if (s == "1") true
- else if (s == "0") false
- else throw new XML_Atom(s)
-
- def unit_atom(s: String): Unit =
- if (s == "") () else throw new XML_Atom(s)
-
-
- /* structural nodes */
-
- private def node(t: XML.Tree): XML.Body =
- t match {
- case XML.Elem(Markup(":", Nil), ts) => ts
- case _ => throw new XML_Body(List(t))
- }
-
- private def vector(atts: XML.Attributes): List[String] =
- {
- val xs = new mutable.ListBuffer[String]
- var i = 0
- for ((a, x) <- atts) {
- if (int_atom(a) == i) { xs += x; i = i + 1 }
- else throw new XML_Atom(a)
- }
- xs.toList
- }
-
- private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
- t match {
- case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
- case _ => throw new XML_Body(List(t))
- }
-
-
- /* representation of standard types */
-
- val properties: T[Properties.T] =
- {
- case List(XML.Elem(Markup(":", props), Nil)) => props
- case ts => throw new XML_Body(ts)
- }
-
- val string: T[String] =
- {
- case Nil => ""
- case List(XML.Text(s)) => s
- case ts => throw new XML_Body(ts)
- }
-
- val long: T[Long] = (x => long_atom(string(x)))
-
- val int: T[Int] = (x => int_atom(string(x)))
-
- val bool: T[Boolean] = (x => bool_atom(string(x)))
-
- val unit: T[Unit] = (x => unit_atom(string(x)))
-
- def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
- {
- case List(t1, t2) => (f(node(t1)), g(node(t2)))
- case ts => throw new XML_Body(ts)
- }
-
- def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
- {
- case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
- case ts => throw new XML_Body(ts)
- }
-
- def list[A](f: T[A]): T[List[A]] =
- (ts => ts.map(t => f(node(t))))
-
- def option[A](f: T[A]): T[Option[A]] =
- {
- case Nil => None
- case List(t) => Some(f(node(t)))
- case ts => throw new XML_Body(ts)
- }
-
- def variant[A](fs: List[V[A]]): T[A] =
- {
- case List(t) =>
- val (tag, (xs, ts)) = tagged(t)
- val f =
- try { fs(tag) }
- catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
- f(xs, ts)
- case ts => throw new XML_Body(ts)
- }
- }
-}
--- a/src/Pure/General/yxml.ML Sun Sep 04 14:29:15 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,147 +0,0 @@
-(* Title: Pure/General/yxml.ML
- Author: Makarius
-
-Efficient text representation of XML trees using extra characters X
-and Y -- no escaping, may nest marked text verbatim.
-
-Markup <elem att="val" ...>...body...</elem> is encoded as:
-
- X Y name Y att=val ... X
- ...
- body
- ...
- X Y X
-*)
-
-signature YXML =
-sig
- val X: Symbol.symbol
- val Y: Symbol.symbol
- val embed_controls: string -> string
- val detect: string -> bool
- val output_markup: Markup.T -> string * string
- val element: string -> XML.attributes -> string list -> string
- val string_of_body: XML.body -> string
- val string_of: XML.tree -> string
- val parse_body: string -> XML.body
- val parse: string -> XML.tree
-end;
-
-structure YXML: YXML =
-struct
-
-(** string representation **)
-
-(* idempotent recoding of certain low ASCII control characters *)
-
-fun pseudo_utf8 c =
- if Symbol.is_ascii_control c
- then chr 192 ^ chr (128 + ord c)
- else c;
-
-fun embed_controls str =
- if exists_string Symbol.is_ascii_control str
- then translate_string pseudo_utf8 str
- else str;
-
-
-(* markers *)
-
-val X = chr 5;
-val Y = chr 6;
-val XY = X ^ Y;
-val XYX = XY ^ X;
-
-val detect = exists_string (fn s => s = X orelse s = Y);
-
-
-(* output *)
-
-fun output_markup (markup as (name, atts)) =
- if Markup.is_empty markup then Markup.no_output
- else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
-
-fun element name atts body =
- let val (pre, post) = output_markup (name, atts)
- in pre ^ implode body ^ post end;
-
-fun string_of_body body =
- let
- fun attrib (a, x) =
- Buffer.add Y #>
- Buffer.add a #> Buffer.add "=" #> Buffer.add x;
- fun tree (XML.Elem ((name, atts), ts)) =
- Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
- trees ts #>
- Buffer.add XYX
- | tree (XML.Text s) = Buffer.add s
- and trees ts = fold tree ts;
- in Buffer.empty |> trees body |> Buffer.content end;
-
-val string_of = string_of_body o single;
-
-
-
-(** efficient YXML parsing **)
-
-local
-
-(* splitting *)
-
-fun is_char s c = ord s = Char.ord c;
-
-val split_string =
- Substring.full #>
- Substring.tokens (is_char X) #>
- map (Substring.fields (is_char Y) #> map Substring.string);
-
-
-(* structural errors *)
-
-fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
-fun err_attribute () = err "bad attribute";
-fun err_element () = err "bad element";
-fun err_unbalanced "" = err "unbalanced element"
- | err_unbalanced name = err ("unbalanced element " ^ quote name);
-
-
-(* stack operations *)
-
-fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
-
-fun push "" _ _ = err_element ()
- | push name atts pending = ((name, atts), []) :: pending;
-
-fun pop ((("", _), _) :: _) = err_unbalanced ""
- | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
-
-
-(* parsing *)
-
-fun parse_attrib s =
- (case first_field "=" s of
- NONE => err_attribute ()
- | SOME ("", _) => err_attribute ()
- | SOME att => att);
-
-fun parse_chunk ["", ""] = pop
- | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
- | parse_chunk txts = fold (add o XML.Text) txts;
-
-in
-
-fun parse_body source =
- (case fold parse_chunk (split_string source) [(("", []), [])] of
- [(("", _), result)] => rev result
- | ((name, _), _) :: _ => err_unbalanced name);
-
-fun parse source =
- (case parse_body source of
- [result] => result
- | [] => XML.Text ""
- | _ => err "multiple results");
-
-end;
-
-end;
-
--- a/src/Pure/General/yxml.scala Sun Sep 04 14:29:15 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,134 +0,0 @@
-/* Title: Pure/General/yxml.scala
- Author: Makarius
-
-Efficient text representation of XML trees.
-*/
-
-package isabelle
-
-
-import scala.collection.mutable
-
-
-object YXML
-{
- /* chunk markers */
-
- val X = '\5'
- val Y = '\6'
- val X_string = X.toString
- val Y_string = Y.toString
-
- def detect(s: String): Boolean = s.exists(c => c == X || c == Y)
-
-
- /* string representation */ // FIXME byte array version with pseudo-utf-8 (!?)
-
- def string_of_body(body: XML.Body): String =
- {
- val s = new StringBuilder
- def attrib(p: (String, String)) { s += Y; s ++= p._1; s += '='; s ++= p._2 }
- def tree(t: XML.Tree): Unit =
- t match {
- case XML.Elem(Markup(name, atts), ts) =>
- s += X; s += Y; s++= name; atts.foreach(attrib); s += X
- ts.foreach(tree)
- s += X; s += Y; s += X
- case XML.Text(text) => s ++= text
- }
- body.foreach(tree)
- s.toString
- }
-
- def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
-
-
- /* parsing */
-
- private def err(msg: String) = error("Malformed YXML: " + msg)
- private def err_attribute() = err("bad attribute")
- private def err_element() = err("bad element")
- private def err_unbalanced(name: String) =
- if (name == "") err("unbalanced element")
- else err("unbalanced element " + quote(name))
-
- private def parse_attrib(source: CharSequence) = {
- val s = source.toString
- val i = s.indexOf('=')
- if (i <= 0) err_attribute()
- (s.substring(0, i), s.substring(i + 1))
- }
-
-
- def parse_body(source: CharSequence): XML.Body =
- {
- /* stack operations */
-
- def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree]
- var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer()))
-
- def add(x: XML.Tree)
- {
- (stack: @unchecked) match { case ((_, body) :: _) => body += x }
- }
-
- def push(name: String, atts: XML.Attributes)
- {
- if (name == "") err_element()
- else stack = (Markup(name, atts), buffer()) :: stack
- }
-
- def pop()
- {
- (stack: @unchecked) match {
- case ((Markup.Empty, _) :: _) => err_unbalanced("")
- case ((markup, body) :: pending) =>
- stack = pending
- add(XML.Elem(markup, body.toList))
- }
- }
-
-
- /* parse chunks */
-
- for (chunk <- Library.chunks(source, X) if chunk.length != 0) {
- if (chunk.length == 1 && chunk.charAt(0) == Y) pop()
- else {
- Library.chunks(chunk, Y).toList match {
- case ch :: name :: atts if ch.length == 0 =>
- push(name.toString, atts.map(parse_attrib))
- case txts => for (txt <- txts) add(XML.Text(txt.toString))
- }
- }
- }
- (stack: @unchecked) match {
- case List((Markup.Empty, body)) => body.toList
- case (Markup(name, _), _) :: _ => err_unbalanced(name)
- }
- }
-
- def parse(source: CharSequence): XML.Tree =
- parse_body(source) match {
- case List(result) => result
- case Nil => XML.Text("")
- case _ => err("multiple results")
- }
-
-
- /* failsafe parsing */
-
- private def markup_malformed(source: CharSequence) =
- XML.elem(Markup.MALFORMED, List(XML.Text(source.toString)))
-
- def parse_body_failsafe(source: CharSequence): XML.Body =
- {
- try { parse_body(source) }
- catch { case ERROR(_) => List(markup_malformed(source)) }
- }
-
- def parse_failsafe(source: CharSequence): XML.Tree =
- {
- try { parse(source) }
- catch { case ERROR(_) => markup_malformed(source) }
- }
-}
--- a/src/Pure/IsaMakefile Sun Sep 04 14:29:15 2011 +0200
+++ b/src/Pure/IsaMakefile Sun Sep 04 15:21:50 2011 +0200
@@ -105,8 +105,6 @@
General/table.ML \
General/timing.ML \
General/url.ML \
- General/xml.ML \
- General/yxml.ML \
Isar/args.ML \
Isar/attrib.ML \
Isar/auto_bind.ML \
@@ -158,6 +156,8 @@
ML/ml_thms.ML \
PIDE/document.ML \
PIDE/isar_document.ML \
+ PIDE/xml.ML \
+ PIDE/yxml.ML \
Proof/extraction.ML \
Proof/proof_checker.ML \
Proof/proof_rewrite_rules.ML \
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/PIDE/xml.ML Sun Sep 04 15:21:50 2011 +0200
@@ -0,0 +1,363 @@
+(* Title: Pure/PIDE/xml.ML
+ Author: David Aspinall
+ Author: Stefan Berghofer
+ Author: Makarius
+
+Untyped XML trees and basic data representation.
+*)
+
+signature XML_DATA_OPS =
+sig
+ type 'a A
+ type 'a T
+ type 'a V
+ val int_atom: int A
+ val bool_atom: bool A
+ val unit_atom: unit A
+ val properties: Properties.T T
+ val string: string T
+ val int: int T
+ val bool: bool T
+ val unit: unit T
+ val pair: 'a T -> 'b T -> ('a * 'b) T
+ val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
+ val list: 'a T -> 'a list T
+ val option: 'a T -> 'a option T
+ val variant: 'a V list -> 'a T
+end;
+
+signature XML =
+sig
+ type attributes = Properties.T
+ datatype tree =
+ Elem of Markup.T * tree list
+ | Text of string
+ type body = tree list
+ val add_content: tree -> Buffer.T -> Buffer.T
+ val content_of: body -> string
+ val header: string
+ val text: string -> string
+ val element: string -> attributes -> string list -> string
+ val output_markup: Markup.T -> Output.output * Output.output
+ val string_of: tree -> string
+ val pretty: int -> tree -> Pretty.T
+ val output: tree -> TextIO.outstream -> unit
+ val parse_comments: string list -> unit * string list
+ val parse_string : string -> string option
+ val parse_element: string list -> tree * string list
+ val parse_document: string list -> tree * string list
+ val parse: string -> tree
+ exception XML_ATOM of string
+ exception XML_BODY of body
+ structure Encode: XML_DATA_OPS
+ structure Decode: XML_DATA_OPS
+end;
+
+structure XML: XML =
+struct
+
+(** XML trees **)
+
+type attributes = Properties.T;
+
+datatype tree =
+ Elem of Markup.T * tree list
+ | Text of string;
+
+type body = tree list;
+
+fun add_content (Elem (_, ts)) = fold add_content ts
+ | add_content (Text s) = Buffer.add s;
+
+fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;
+
+
+
+(** string representation **)
+
+val header = "<?xml version=\"1.0\"?>\n";
+
+
+(* escaped text *)
+
+fun decode "<" = "<"
+ | decode ">" = ">"
+ | decode "&" = "&"
+ | decode "'" = "'"
+ | decode """ = "\""
+ | decode c = c;
+
+fun encode "<" = "<"
+ | encode ">" = ">"
+ | encode "&" = "&"
+ | encode "'" = "'"
+ | encode "\"" = """
+ | encode c = c;
+
+val text = translate_string encode;
+
+
+(* elements *)
+
+fun elem name atts =
+ space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
+
+fun element name atts body =
+ let val b = implode body in
+ if b = "" then enclose "<" "/>" (elem name atts)
+ else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
+ end;
+
+fun output_markup (markup as (name, atts)) =
+ if Markup.is_empty markup then Markup.no_output
+ else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
+
+
+(* output *)
+
+fun buffer_of depth tree =
+ let
+ fun traverse _ (Elem ((name, atts), [])) =
+ Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
+ | traverse d (Elem ((name, atts), ts)) =
+ Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
+ traverse_body d ts #>
+ Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
+ | traverse _ (Text s) = Buffer.add (text s)
+ and traverse_body 0 _ = Buffer.add "..."
+ | traverse_body d ts = fold (traverse (d - 1)) ts;
+ in Buffer.empty |> traverse depth tree end;
+
+val string_of = Buffer.content o buffer_of ~1;
+val output = Buffer.output o buffer_of ~1;
+
+fun pretty depth tree =
+ Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));
+
+
+
+(** XML parsing **)
+
+local
+
+fun err msg (xs, _) =
+ fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
+
+fun ignored _ = [];
+
+val blanks = Scan.many Symbol.is_blank;
+val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
+val regular = Scan.one Symbol.is_regular;
+fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
+
+val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
+
+val parse_cdata =
+ Scan.this_string "<![CDATA[" |--
+ (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
+ Scan.this_string "]]>";
+
+val parse_att =
+ (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
+ (($$ "\"" || $$ "'") :|-- (fn s =>
+ (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
+
+val parse_comment =
+ Scan.this_string "<!--" --
+ Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
+ Scan.this_string "-->" >> ignored;
+
+val parse_processing_instruction =
+ Scan.this_string "<?" --
+ Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
+ Scan.this_string "?>" >> ignored;
+
+val parse_doctype =
+ Scan.this_string "<!DOCTYPE" --
+ Scan.repeat (Scan.unless ($$ ">") regular) --
+ $$ ">" >> ignored;
+
+val parse_misc =
+ Scan.one Symbol.is_blank >> ignored ||
+ parse_processing_instruction ||
+ parse_comment;
+
+val parse_optional_text =
+ Scan.optional (parse_chars >> (single o Text)) [];
+
+fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";
+fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";
+val parse_name = Scan.one name_start_char ::: Scan.many name_char;
+
+in
+
+val parse_comments =
+ blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
+
+val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;
+
+fun parse_content xs =
+ (parse_optional_text @@@
+ (Scan.repeat
+ ((parse_element >> single ||
+ parse_cdata >> (single o Text) ||
+ parse_processing_instruction ||
+ parse_comment)
+ @@@ parse_optional_text) >> flat)) xs
+
+and parse_element xs =
+ ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--
+ (fn (name, _) =>
+ !! (err (fn () => "Expected > or />"))
+ ($$ "/" -- $$ ">" >> ignored ||
+ $$ ">" |-- parse_content --|
+ !! (err (fn () => "Expected </" ^ implode name ^ ">"))
+ ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))
+ >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;
+
+val parse_document =
+ (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
+ |-- parse_element;
+
+fun parse s =
+ (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))
+ (blanks |-- parse_document --| blanks))) (raw_explode s) of
+ (x, []) => x
+ | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
+
+end;
+
+
+
+(** XML as data representation language **)
+
+exception XML_ATOM of string;
+exception XML_BODY of tree list;
+
+
+structure Encode =
+struct
+
+type 'a A = 'a -> string;
+type 'a T = 'a -> body;
+type 'a V = 'a -> string list * body;
+
+
+(* atomic values *)
+
+fun int_atom i = signed_string_of_int i;
+
+fun bool_atom false = "0"
+ | bool_atom true = "1";
+
+fun unit_atom () = "";
+
+
+(* structural nodes *)
+
+fun node ts = Elem ((":", []), ts);
+
+fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;
+
+fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);
+
+
+(* representation of standard types *)
+
+fun properties props = [Elem ((":", props), [])];
+
+fun string "" = []
+ | string s = [Text s];
+
+val int = string o int_atom;
+
+val bool = string o bool_atom;
+
+val unit = string o unit_atom;
+
+fun pair f g (x, y) = [node (f x), node (g y)];
+
+fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];
+
+fun list f xs = map (node o f) xs;
+
+fun option _ NONE = []
+ | option f (SOME x) = [node (f x)];
+
+fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];
+
+end;
+
+
+structure Decode =
+struct
+
+type 'a A = string -> 'a;
+type 'a T = body -> 'a;
+type 'a V = string list * body -> 'a;
+
+
+(* atomic values *)
+
+fun int_atom s =
+ Markup.parse_int s
+ handle Fail _ => raise XML_ATOM s;
+
+fun bool_atom "0" = false
+ | bool_atom "1" = true
+ | bool_atom s = raise XML_ATOM s;
+
+fun unit_atom "" = ()
+ | unit_atom s = raise XML_ATOM s;
+
+
+(* structural nodes *)
+
+fun node (Elem ((":", []), ts)) = ts
+ | node t = raise XML_BODY [t];
+
+fun vector atts =
+ #1 (fold_map (fn (a, x) =>
+ fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0);
+
+fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))
+ | tagged t = raise XML_BODY [t];
+
+
+(* representation of standard types *)
+
+fun properties [Elem ((":", props), [])] = props
+ | properties ts = raise XML_BODY ts;
+
+fun string [] = ""
+ | string [Text s] = s
+ | string ts = raise XML_BODY ts;
+
+val int = int_atom o string;
+
+val bool = bool_atom o string;
+
+val unit = unit_atom o string;
+
+fun pair f g [t1, t2] = (f (node t1), g (node t2))
+ | pair _ _ ts = raise XML_BODY ts;
+
+fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))
+ | triple _ _ _ ts = raise XML_BODY ts;
+
+fun list f ts = map (f o node) ts;
+
+fun option _ [] = NONE
+ | option f [t] = SOME (f (node t))
+ | option _ ts = raise XML_BODY ts;
+
+fun variant fs [t] =
+ let
+ val (tag, (xs, ts)) = tagged t;
+ val f = nth fs tag handle General.Subscript => raise XML_BODY [t];
+ in f (xs, ts) end
+ | variant _ ts = raise XML_BODY ts;
+
+end;
+
+end;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/PIDE/xml.scala Sun Sep 04 15:21:50 2011 +0200
@@ -0,0 +1,390 @@
+/* Title: Pure/PIDE/xml.scala
+ Author: Makarius
+
+Untyped XML trees and basic data representation.
+*/
+
+package isabelle
+
+import java.lang.System
+import java.util.WeakHashMap
+import java.lang.ref.WeakReference
+import javax.xml.parsers.DocumentBuilderFactory
+
+import scala.actors.Actor._
+import scala.collection.mutable
+
+
+object XML
+{
+ /** XML trees **/
+
+ /* datatype representation */
+
+ type Attributes = Properties.T
+
+ sealed abstract class Tree { override def toString = string_of_tree(this) }
+ case class Elem(markup: Markup, body: List[Tree]) extends Tree
+ case class Text(content: String) extends Tree
+
+ def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
+ def elem(name: String) = Elem(Markup(name, Nil), Nil)
+
+ type Body = List[Tree]
+
+
+ /* string representation */
+
+ def string_of_body(body: Body): String =
+ {
+ val s = new StringBuilder
+
+ def text(txt: String) {
+ if (txt == null) s ++= txt
+ else {
+ for (c <- txt.iterator) c match {
+ case '<' => s ++= "<"
+ case '>' => s ++= ">"
+ case '&' => s ++= "&"
+ case '"' => s ++= """
+ case '\'' => s ++= "'"
+ case _ => s += c
+ }
+ }
+ }
+ def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
+ def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
+ def tree(t: Tree): Unit =
+ t match {
+ case Elem(markup, Nil) =>
+ s ++= "<"; elem(markup); s ++= "/>"
+ case Elem(markup, ts) =>
+ s ++= "<"; elem(markup); s ++= ">"
+ ts.foreach(tree)
+ s ++= "</"; s ++= markup.name; s ++= ">"
+ case Text(txt) => text(txt)
+ }
+ body.foreach(tree)
+ s.toString
+ }
+
+ def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
+
+
+ /* text content */
+
+ def content_stream(tree: Tree): Stream[String] =
+ tree match {
+ case Elem(_, body) => content_stream(body)
+ case Text(content) => Stream(content)
+ }
+ def content_stream(body: Body): Stream[String] =
+ body.toStream.flatten(content_stream(_))
+
+ def content(tree: Tree): Iterator[String] = content_stream(tree).iterator
+ def content(body: Body): Iterator[String] = content_stream(body).iterator
+
+
+ /* pipe-lined cache for partial sharing */
+
+ class Cache(initial_size: Int = 131071, max_string: Int = 100)
+ {
+ private val cache_actor = actor
+ {
+ val table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
+
+ def lookup[A](x: A): Option[A] =
+ {
+ val ref = table.get(x)
+ if (ref == null) None
+ else {
+ val y = ref.asInstanceOf[WeakReference[A]].get
+ if (y == null) None
+ else Some(y)
+ }
+ }
+ def store[A](x: A): A =
+ {
+ table.put(x, new WeakReference[Any](x))
+ x
+ }
+
+ def trim_bytes(s: String): String = new String(s.toCharArray)
+
+ def cache_string(x: String): String =
+ lookup(x) match {
+ case Some(y) => y
+ case None =>
+ val z = trim_bytes(x)
+ if (z.length > max_string) z else store(z)
+ }
+ def cache_props(x: Properties.T): Properties.T =
+ if (x.isEmpty) x
+ else
+ lookup(x) match {
+ case Some(y) => y
+ case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
+ }
+ def cache_markup(x: Markup): Markup =
+ lookup(x) match {
+ case Some(y) => y
+ case None =>
+ x match {
+ case Markup(name, props) =>
+ store(Markup(cache_string(name), cache_props(props)))
+ }
+ }
+ def cache_tree(x: XML.Tree): XML.Tree =
+ lookup(x) match {
+ case Some(y) => y
+ case None =>
+ x match {
+ case XML.Elem(markup, body) =>
+ store(XML.Elem(cache_markup(markup), cache_body(body)))
+ case XML.Text(text) => store(XML.Text(cache_string(text)))
+ }
+ }
+ def cache_body(x: XML.Body): XML.Body =
+ if (x.isEmpty) x
+ else
+ lookup(x) match {
+ case Some(y) => y
+ case None => x.map(cache_tree(_))
+ }
+
+ // main loop
+ loop {
+ react {
+ case Cache_String(x, f) => f(cache_string(x))
+ case Cache_Markup(x, f) => f(cache_markup(x))
+ case Cache_Tree(x, f) => f(cache_tree(x))
+ case Cache_Body(x, f) => f(cache_body(x))
+ case Cache_Ignore(x, f) => f(x)
+ case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad)
+ }
+ }
+ }
+
+ private case class Cache_String(x: String, f: String => Unit)
+ private case class Cache_Markup(x: Markup, f: Markup => Unit)
+ private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit)
+ private case class Cache_Body(x: XML.Body, f: XML.Body => Unit)
+ private case class Cache_Ignore[A](x: A, f: A => Unit)
+
+ // main methods
+ def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) }
+ def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) }
+ def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) }
+ def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) }
+ def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) }
+ }
+
+
+
+ /** document object model (W3C DOM) **/
+
+ def get_data(node: org.w3c.dom.Node): Option[XML.Tree] =
+ node.getUserData(Markup.Data.name) match {
+ case tree: XML.Tree => Some(tree)
+ case _ => None
+ }
+
+ def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node =
+ {
+ def DOM(tr: Tree): org.w3c.dom.Node = tr match {
+ case Elem(Markup.Data, List(data, t)) =>
+ val node = DOM(t)
+ node.setUserData(Markup.Data.name, data, null)
+ node
+ case Elem(Markup(name, atts), ts) =>
+ if (name == Markup.Data.name)
+ error("Malformed data element: " + tr.toString)
+ val node = doc.createElement(name)
+ for ((name, value) <- atts) node.setAttribute(name, value)
+ for (t <- ts) node.appendChild(DOM(t))
+ node
+ case Text(txt) => doc.createTextNode(txt)
+ }
+ DOM(tree)
+ }
+
+
+
+ /** XML as data representation language **/
+
+ class XML_Atom(s: String) extends Exception(s)
+ class XML_Body(body: XML.Body) extends Exception
+
+ object Encode
+ {
+ type T[A] = A => XML.Body
+
+
+ /* atomic values */
+
+ def long_atom(i: Long): String = i.toString
+
+ def int_atom(i: Int): String = i.toString
+
+ def bool_atom(b: Boolean): String = if (b) "1" else "0"
+
+ def unit_atom(u: Unit) = ""
+
+
+ /* structural nodes */
+
+ private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
+
+ private def vector(xs: List[String]): XML.Attributes =
+ xs.zipWithIndex.map(p => (int_atom(p._2), p._1))
+
+ private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
+ XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
+
+
+ /* representation of standard types */
+
+ val properties: T[Properties.T] =
+ (props => List(XML.Elem(Markup(":", props), Nil)))
+
+ val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
+
+ val long: T[Long] = (x => string(long_atom(x)))
+
+ val int: T[Int] = (x => string(int_atom(x)))
+
+ val bool: T[Boolean] = (x => string(bool_atom(x)))
+
+ val unit: T[Unit] = (x => string(unit_atom(x)))
+
+ def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
+ (x => List(node(f(x._1)), node(g(x._2))))
+
+ def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
+ (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
+
+ def list[A](f: T[A]): T[List[A]] =
+ (xs => xs.map((x: A) => node(f(x))))
+
+ def option[A](f: T[A]): T[Option[A]] =
+ {
+ case None => Nil
+ case Some(x) => List(node(f(x)))
+ }
+
+ def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] =
+ {
+ case x =>
+ val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
+ List(tagged(tag, f(x)))
+ }
+ }
+
+ object Decode
+ {
+ type T[A] = XML.Body => A
+ type V[A] = (List[String], XML.Body) => A
+
+
+ /* atomic values */
+
+ def long_atom(s: String): Long =
+ try { java.lang.Long.parseLong(s) }
+ catch { case e: NumberFormatException => throw new XML_Atom(s) }
+
+ def int_atom(s: String): Int =
+ try { Integer.parseInt(s) }
+ catch { case e: NumberFormatException => throw new XML_Atom(s) }
+
+ def bool_atom(s: String): Boolean =
+ if (s == "1") true
+ else if (s == "0") false
+ else throw new XML_Atom(s)
+
+ def unit_atom(s: String): Unit =
+ if (s == "") () else throw new XML_Atom(s)
+
+
+ /* structural nodes */
+
+ private def node(t: XML.Tree): XML.Body =
+ t match {
+ case XML.Elem(Markup(":", Nil), ts) => ts
+ case _ => throw new XML_Body(List(t))
+ }
+
+ private def vector(atts: XML.Attributes): List[String] =
+ {
+ val xs = new mutable.ListBuffer[String]
+ var i = 0
+ for ((a, x) <- atts) {
+ if (int_atom(a) == i) { xs += x; i = i + 1 }
+ else throw new XML_Atom(a)
+ }
+ xs.toList
+ }
+
+ private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
+ t match {
+ case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
+ case _ => throw new XML_Body(List(t))
+ }
+
+
+ /* representation of standard types */
+
+ val properties: T[Properties.T] =
+ {
+ case List(XML.Elem(Markup(":", props), Nil)) => props
+ case ts => throw new XML_Body(ts)
+ }
+
+ val string: T[String] =
+ {
+ case Nil => ""
+ case List(XML.Text(s)) => s
+ case ts => throw new XML_Body(ts)
+ }
+
+ val long: T[Long] = (x => long_atom(string(x)))
+
+ val int: T[Int] = (x => int_atom(string(x)))
+
+ val bool: T[Boolean] = (x => bool_atom(string(x)))
+
+ val unit: T[Unit] = (x => unit_atom(string(x)))
+
+ def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
+ {
+ case List(t1, t2) => (f(node(t1)), g(node(t2)))
+ case ts => throw new XML_Body(ts)
+ }
+
+ def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
+ {
+ case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
+ case ts => throw new XML_Body(ts)
+ }
+
+ def list[A](f: T[A]): T[List[A]] =
+ (ts => ts.map(t => f(node(t))))
+
+ def option[A](f: T[A]): T[Option[A]] =
+ {
+ case Nil => None
+ case List(t) => Some(f(node(t)))
+ case ts => throw new XML_Body(ts)
+ }
+
+ def variant[A](fs: List[V[A]]): T[A] =
+ {
+ case List(t) =>
+ val (tag, (xs, ts)) = tagged(t)
+ val f =
+ try { fs(tag) }
+ catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
+ f(xs, ts)
+ case ts => throw new XML_Body(ts)
+ }
+ }
+}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/PIDE/yxml.ML Sun Sep 04 15:21:50 2011 +0200
@@ -0,0 +1,148 @@
+(* Title: Pure/PIDE/yxml.ML
+ Author: Makarius
+
+Efficient text representation of XML trees using extra characters X
+and Y -- no escaping, may nest marked text verbatim. Suitable for
+direct inlining into plain text.
+
+Markup <elem att="val" ...>...body...</elem> is encoded as:
+
+ X Y name Y att=val ... X
+ ...
+ body
+ ...
+ X Y X
+*)
+
+signature YXML =
+sig
+ val X: Symbol.symbol
+ val Y: Symbol.symbol
+ val embed_controls: string -> string
+ val detect: string -> bool
+ val output_markup: Markup.T -> string * string
+ val element: string -> XML.attributes -> string list -> string
+ val string_of_body: XML.body -> string
+ val string_of: XML.tree -> string
+ val parse_body: string -> XML.body
+ val parse: string -> XML.tree
+end;
+
+structure YXML: YXML =
+struct
+
+(** string representation **)
+
+(* idempotent recoding of certain low ASCII control characters *)
+
+fun pseudo_utf8 c =
+ if Symbol.is_ascii_control c
+ then chr 192 ^ chr (128 + ord c)
+ else c;
+
+fun embed_controls str =
+ if exists_string Symbol.is_ascii_control str
+ then translate_string pseudo_utf8 str
+ else str;
+
+
+(* markers *)
+
+val X = chr 5;
+val Y = chr 6;
+val XY = X ^ Y;
+val XYX = XY ^ X;
+
+val detect = exists_string (fn s => s = X orelse s = Y);
+
+
+(* output *)
+
+fun output_markup (markup as (name, atts)) =
+ if Markup.is_empty markup then Markup.no_output
+ else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
+
+fun element name atts body =
+ let val (pre, post) = output_markup (name, atts)
+ in pre ^ implode body ^ post end;
+
+fun string_of_body body =
+ let
+ fun attrib (a, x) =
+ Buffer.add Y #>
+ Buffer.add a #> Buffer.add "=" #> Buffer.add x;
+ fun tree (XML.Elem ((name, atts), ts)) =
+ Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
+ trees ts #>
+ Buffer.add XYX
+ | tree (XML.Text s) = Buffer.add s
+ and trees ts = fold tree ts;
+ in Buffer.empty |> trees body |> Buffer.content end;
+
+val string_of = string_of_body o single;
+
+
+
+(** efficient YXML parsing **)
+
+local
+
+(* splitting *)
+
+fun is_char s c = ord s = Char.ord c;
+
+val split_string =
+ Substring.full #>
+ Substring.tokens (is_char X) #>
+ map (Substring.fields (is_char Y) #> map Substring.string);
+
+
+(* structural errors *)
+
+fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
+fun err_attribute () = err "bad attribute";
+fun err_element () = err "bad element";
+fun err_unbalanced "" = err "unbalanced element"
+ | err_unbalanced name = err ("unbalanced element " ^ quote name);
+
+
+(* stack operations *)
+
+fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
+
+fun push "" _ _ = err_element ()
+ | push name atts pending = ((name, atts), []) :: pending;
+
+fun pop ((("", _), _) :: _) = err_unbalanced ""
+ | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
+
+
+(* parsing *)
+
+fun parse_attrib s =
+ (case first_field "=" s of
+ NONE => err_attribute ()
+ | SOME ("", _) => err_attribute ()
+ | SOME att => att);
+
+fun parse_chunk ["", ""] = pop
+ | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
+ | parse_chunk txts = fold (add o XML.Text) txts;
+
+in
+
+fun parse_body source =
+ (case fold parse_chunk (split_string source) [(("", []), [])] of
+ [(("", _), result)] => rev result
+ | ((name, _), _) :: _ => err_unbalanced name);
+
+fun parse source =
+ (case parse_body source of
+ [result] => result
+ | [] => XML.Text ""
+ | _ => err "multiple results");
+
+end;
+
+end;
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/PIDE/yxml.scala Sun Sep 04 15:21:50 2011 +0200
@@ -0,0 +1,135 @@
+/* Title: Pure/PIDE/yxml.scala
+ Author: Makarius
+
+Efficient text representation of XML trees. Suitable for direct
+inlining into plain text.
+*/
+
+package isabelle
+
+
+import scala.collection.mutable
+
+
+object YXML
+{
+ /* chunk markers */
+
+ val X = '\5'
+ val Y = '\6'
+ val X_string = X.toString
+ val Y_string = Y.toString
+
+ def detect(s: String): Boolean = s.exists(c => c == X || c == Y)
+
+
+ /* string representation */ // FIXME byte array version with pseudo-utf-8 (!?)
+
+ def string_of_body(body: XML.Body): String =
+ {
+ val s = new StringBuilder
+ def attrib(p: (String, String)) { s += Y; s ++= p._1; s += '='; s ++= p._2 }
+ def tree(t: XML.Tree): Unit =
+ t match {
+ case XML.Elem(Markup(name, atts), ts) =>
+ s += X; s += Y; s++= name; atts.foreach(attrib); s += X
+ ts.foreach(tree)
+ s += X; s += Y; s += X
+ case XML.Text(text) => s ++= text
+ }
+ body.foreach(tree)
+ s.toString
+ }
+
+ def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
+
+
+ /* parsing */
+
+ private def err(msg: String) = error("Malformed YXML: " + msg)
+ private def err_attribute() = err("bad attribute")
+ private def err_element() = err("bad element")
+ private def err_unbalanced(name: String) =
+ if (name == "") err("unbalanced element")
+ else err("unbalanced element " + quote(name))
+
+ private def parse_attrib(source: CharSequence) = {
+ val s = source.toString
+ val i = s.indexOf('=')
+ if (i <= 0) err_attribute()
+ (s.substring(0, i), s.substring(i + 1))
+ }
+
+
+ def parse_body(source: CharSequence): XML.Body =
+ {
+ /* stack operations */
+
+ def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree]
+ var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer()))
+
+ def add(x: XML.Tree)
+ {
+ (stack: @unchecked) match { case ((_, body) :: _) => body += x }
+ }
+
+ def push(name: String, atts: XML.Attributes)
+ {
+ if (name == "") err_element()
+ else stack = (Markup(name, atts), buffer()) :: stack
+ }
+
+ def pop()
+ {
+ (stack: @unchecked) match {
+ case ((Markup.Empty, _) :: _) => err_unbalanced("")
+ case ((markup, body) :: pending) =>
+ stack = pending
+ add(XML.Elem(markup, body.toList))
+ }
+ }
+
+
+ /* parse chunks */
+
+ for (chunk <- Library.chunks(source, X) if chunk.length != 0) {
+ if (chunk.length == 1 && chunk.charAt(0) == Y) pop()
+ else {
+ Library.chunks(chunk, Y).toList match {
+ case ch :: name :: atts if ch.length == 0 =>
+ push(name.toString, atts.map(parse_attrib))
+ case txts => for (txt <- txts) add(XML.Text(txt.toString))
+ }
+ }
+ }
+ (stack: @unchecked) match {
+ case List((Markup.Empty, body)) => body.toList
+ case (Markup(name, _), _) :: _ => err_unbalanced(name)
+ }
+ }
+
+ def parse(source: CharSequence): XML.Tree =
+ parse_body(source) match {
+ case List(result) => result
+ case Nil => XML.Text("")
+ case _ => err("multiple results")
+ }
+
+
+ /* failsafe parsing */
+
+ private def markup_malformed(source: CharSequence) =
+ XML.elem(Markup.MALFORMED, List(XML.Text(source.toString)))
+
+ def parse_body_failsafe(source: CharSequence): XML.Body =
+ {
+ try { parse_body(source) }
+ catch { case ERROR(_) => List(markup_malformed(source)) }
+ }
+
+ def parse_failsafe(source: CharSequence): XML.Tree =
+ {
+ try { parse(source) }
+ catch { case ERROR(_) => markup_malformed(source) }
+ }
+}
--- a/src/Pure/ROOT.ML Sun Sep 04 14:29:15 2011 +0200
+++ b/src/Pure/ROOT.ML Sun Sep 04 15:21:50 2011 +0200
@@ -54,17 +54,18 @@
use "General/linear_set.ML";
use "General/buffer.ML";
use "General/pretty.ML";
-use "General/xml.ML";
use "General/path.ML";
use "General/url.ML";
use "General/file.ML";
-use "General/yxml.ML";
use "General/long_name.ML";
use "General/binding.ML";
use "General/sha1.ML";
if ML_System.is_polyml then use "General/sha1_polyml.ML" else ();
+use "PIDE/xml.ML";
+use "PIDE/yxml.ML";
+
(* concurrency within the ML runtime *)
--- a/src/Pure/build-jars Sun Sep 04 14:29:15 2011 +0200
+++ b/src/Pure/build-jars Sun Sep 04 15:21:50 2011 +0200
@@ -24,8 +24,6 @@
General/scan.scala
General/sha1.scala
General/symbol.scala
- General/xml.scala
- General/yxml.scala
Isar/keyword.scala
Isar/outer_syntax.scala
Isar/parse.scala
@@ -36,6 +34,8 @@
PIDE/isar_document.scala
PIDE/markup_tree.scala
PIDE/text.scala
+ PIDE/xml.scala
+ PIDE/yxml.scala
System/cygwin.scala
System/download.scala
System/event_bus.scala