# HG changeset patch # User wenzelm # Date 1315142510 -7200 # Node ID 0385292321a0804a588e4e688a2b8433e812108d # Parent b99dfee76538969fad4018a656ddebb9ce9321b3 moved XML/YXML to src/Pure/PIDE; tuned comments; diff -r b99dfee76538 -r 0385292321a0 src/Pure/General/xml.ML --- a/src/Pure/General/xml.ML Sun Sep 04 14:29:15 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,361 +0,0 @@ -(* Title: Pure/General/xml.ML - Author: David Aspinall, Stefan Berghofer and Markus Wenzel - -Untyped XML trees. -*) - -signature XML_DATA_OPS = -sig - type 'a A - type 'a T - type 'a V - val int_atom: int A - val bool_atom: bool A - val unit_atom: unit A - val properties: Properties.T T - val string: string T - val int: int T - val bool: bool T - val unit: unit T - val pair: 'a T -> 'b T -> ('a * 'b) T - val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T - val list: 'a T -> 'a list T - val option: 'a T -> 'a option T - val variant: 'a V list -> 'a T -end; - -signature XML = -sig - type attributes = Properties.T - datatype tree = - Elem of Markup.T * tree list - | Text of string - type body = tree list - val add_content: tree -> Buffer.T -> Buffer.T - val content_of: body -> string - val header: string - val text: string -> string - val element: string -> attributes -> string list -> string - val output_markup: Markup.T -> Output.output * Output.output - val string_of: tree -> string - val pretty: int -> tree -> Pretty.T - val output: tree -> TextIO.outstream -> unit - val parse_comments: string list -> unit * string list - val parse_string : string -> string option - val parse_element: string list -> tree * string list - val parse_document: string list -> tree * string list - val parse: string -> tree - exception XML_ATOM of string - exception XML_BODY of body - structure Encode: XML_DATA_OPS - structure Decode: XML_DATA_OPS -end; - -structure XML: XML = -struct - -(** XML trees **) - -type attributes = Properties.T; - -datatype tree = - Elem of Markup.T * tree list - | Text of string; - -type body = tree list; - -fun add_content (Elem (_, ts)) = fold add_content ts - | add_content (Text s) = Buffer.add s; - -fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content; - - - -(** string representation **) - -val header = "\n"; - - -(* escaped text *) - -fun decode "<" = "<" - | decode ">" = ">" - | decode "&" = "&" - | decode "'" = "'" - | decode """ = "\"" - | decode c = c; - -fun encode "<" = "<" - | encode ">" = ">" - | encode "&" = "&" - | encode "'" = "'" - | encode "\"" = """ - | encode c = c; - -val text = translate_string encode; - - -(* elements *) - -fun elem name atts = - space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts); - -fun element name atts body = - let val b = implode body in - if b = "" then enclose "<" "/>" (elem name atts) - else enclose "<" ">" (elem name atts) ^ b ^ enclose "" name - end; - -fun output_markup (markup as (name, atts)) = - if Markup.is_empty markup then Markup.no_output - else (enclose "<" ">" (elem name atts), enclose "" name); - - -(* output *) - -fun buffer_of depth tree = - let - fun traverse _ (Elem ((name, atts), [])) = - Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>" - | traverse d (Elem ((name, atts), ts)) = - Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #> - traverse_body d ts #> - Buffer.add " Buffer.add name #> Buffer.add ">" - | traverse _ (Text s) = Buffer.add (text s) - and traverse_body 0 _ = Buffer.add "..." - | traverse_body d ts = fold (traverse (d - 1)) ts; - in Buffer.empty |> traverse depth tree end; - -val string_of = Buffer.content o buffer_of ~1; -val output = Buffer.output o buffer_of ~1; - -fun pretty depth tree = - Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree)); - - - -(** XML parsing (slow) **) - -local - -fun err msg (xs, _) = - fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); - -fun ignored _ = []; - -val blanks = Scan.many Symbol.is_blank; -val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; -val regular = Scan.one Symbol.is_regular; -fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x); - -val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode; - -val parse_cdata = - Scan.this_string "") regular) >> implode) --| - Scan.this_string "]]>"; - -val parse_att = - (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) -- - (($$ "\"" || $$ "'") :|-- (fn s => - (Scan.repeat (special || regular_except s) >> implode) --| $$ s)); - -val parse_comment = - Scan.this_string "") regular) -- - Scan.this_string "-->" >> ignored; - -val parse_processing_instruction = - Scan.this_string "") regular) -- - Scan.this_string "?>" >> ignored; - -val parse_doctype = - Scan.this_string "") regular) -- - $$ ">" >> ignored; - -val parse_misc = - Scan.one Symbol.is_blank >> ignored || - parse_processing_instruction || - parse_comment; - -val parse_optional_text = - Scan.optional (parse_chars >> (single o Text)) []; - -fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_"; -fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = "."; -val parse_name = Scan.one name_start_char ::: Scan.many name_char; - -in - -val parse_comments = - blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K (); - -val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode; - -fun parse_content xs = - (parse_optional_text @@@ - (Scan.repeat - ((parse_element >> single || - parse_cdata >> (single o Text) || - parse_processing_instruction || - parse_comment) - @@@ parse_optional_text) >> flat)) xs - -and parse_element xs = - ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :-- - (fn (name, _) => - !! (err (fn () => "Expected > or />")) - ($$ "/" -- $$ ">" >> ignored || - $$ ">" |-- parse_content --| - !! (err (fn () => "Expected ")) - ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">"))) - >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs; - -val parse_document = - (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc) - |-- parse_element; - -fun parse s = - (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element")) - (blanks |-- parse_document --| blanks))) (raw_explode s) of - (x, []) => x - | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys)); - -end; - - - -(** XML as data representation language **) - -exception XML_ATOM of string; -exception XML_BODY of tree list; - - -structure Encode = -struct - -type 'a A = 'a -> string; -type 'a T = 'a -> body; -type 'a V = 'a -> string list * body; - - -(* atomic values *) - -fun int_atom i = signed_string_of_int i; - -fun bool_atom false = "0" - | bool_atom true = "1"; - -fun unit_atom () = ""; - - -(* structural nodes *) - -fun node ts = Elem ((":", []), ts); - -fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs; - -fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts); - - -(* representation of standard types *) - -fun properties props = [Elem ((":", props), [])]; - -fun string "" = [] - | string s = [Text s]; - -val int = string o int_atom; - -val bool = string o bool_atom; - -val unit = string o unit_atom; - -fun pair f g (x, y) = [node (f x), node (g y)]; - -fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)]; - -fun list f xs = map (node o f) xs; - -fun option _ NONE = [] - | option f (SOME x) = [node (f x)]; - -fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))]; - -end; - - -structure Decode = -struct - -type 'a A = string -> 'a; -type 'a T = body -> 'a; -type 'a V = string list * body -> 'a; - - -(* atomic values *) - -fun int_atom s = - Markup.parse_int s - handle Fail _ => raise XML_ATOM s; - -fun bool_atom "0" = false - | bool_atom "1" = true - | bool_atom s = raise XML_ATOM s; - -fun unit_atom "" = () - | unit_atom s = raise XML_ATOM s; - - -(* structural nodes *) - -fun node (Elem ((":", []), ts)) = ts - | node t = raise XML_BODY [t]; - -fun vector atts = - #1 (fold_map (fn (a, x) => - fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0); - -fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts)) - | tagged t = raise XML_BODY [t]; - - -(* representation of standard types *) - -fun properties [Elem ((":", props), [])] = props - | properties ts = raise XML_BODY ts; - -fun string [] = "" - | string [Text s] = s - | string ts = raise XML_BODY ts; - -val int = int_atom o string; - -val bool = bool_atom o string; - -val unit = unit_atom o string; - -fun pair f g [t1, t2] = (f (node t1), g (node t2)) - | pair _ _ ts = raise XML_BODY ts; - -fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3)) - | triple _ _ _ ts = raise XML_BODY ts; - -fun list f ts = map (f o node) ts; - -fun option _ [] = NONE - | option f [t] = SOME (f (node t)) - | option _ ts = raise XML_BODY ts; - -fun variant fs [t] = - let - val (tag, (xs, ts)) = tagged t; - val f = nth fs tag handle General.Subscript => raise XML_BODY [t]; - in f (xs, ts) end - | variant _ ts = raise XML_BODY ts; - -end; - -end; diff -r b99dfee76538 -r 0385292321a0 src/Pure/General/xml.scala --- a/src/Pure/General/xml.scala Sun Sep 04 14:29:15 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,390 +0,0 @@ -/* Title: Pure/General/xml.scala - Author: Makarius - -Untyped XML trees. -*/ - -package isabelle - -import java.lang.System -import java.util.WeakHashMap -import java.lang.ref.WeakReference -import javax.xml.parsers.DocumentBuilderFactory - -import scala.actors.Actor._ -import scala.collection.mutable - - -object XML -{ - /** XML trees **/ - - /* datatype representation */ - - type Attributes = Properties.T - - sealed abstract class Tree { override def toString = string_of_tree(this) } - case class Elem(markup: Markup, body: List[Tree]) extends Tree - case class Text(content: String) extends Tree - - def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body) - def elem(name: String) = Elem(Markup(name, Nil), Nil) - - type Body = List[Tree] - - - /* string representation */ - - def string_of_body(body: Body): String = - { - val s = new StringBuilder - - def text(txt: String) { - if (txt == null) s ++= txt - else { - for (c <- txt.iterator) c match { - case '<' => s ++= "<" - case '>' => s ++= ">" - case '&' => s ++= "&" - case '"' => s ++= """ - case '\'' => s ++= "'" - case _ => s += c - } - } - } - def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" } - def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) } - def tree(t: Tree): Unit = - t match { - case Elem(markup, Nil) => - s ++= "<"; elem(markup); s ++= "/>" - case Elem(markup, ts) => - s ++= "<"; elem(markup); s ++= ">" - ts.foreach(tree) - s ++= "" - case Text(txt) => text(txt) - } - body.foreach(tree) - s.toString - } - - def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) - - - /* text content */ - - def content_stream(tree: Tree): Stream[String] = - tree match { - case Elem(_, body) => content_stream(body) - case Text(content) => Stream(content) - } - def content_stream(body: Body): Stream[String] = - body.toStream.flatten(content_stream(_)) - - def content(tree: Tree): Iterator[String] = content_stream(tree).iterator - def content(body: Body): Iterator[String] = content_stream(body).iterator - - - /* pipe-lined cache for partial sharing */ - - class Cache(initial_size: Int = 131071, max_string: Int = 100) - { - private val cache_actor = actor - { - val table = new WeakHashMap[Any, WeakReference[Any]](initial_size) - - def lookup[A](x: A): Option[A] = - { - val ref = table.get(x) - if (ref == null) None - else { - val y = ref.asInstanceOf[WeakReference[A]].get - if (y == null) None - else Some(y) - } - } - def store[A](x: A): A = - { - table.put(x, new WeakReference[Any](x)) - x - } - - def trim_bytes(s: String): String = new String(s.toCharArray) - - def cache_string(x: String): String = - lookup(x) match { - case Some(y) => y - case None => - val z = trim_bytes(x) - if (z.length > max_string) z else store(z) - } - def cache_props(x: Properties.T): Properties.T = - if (x.isEmpty) x - else - lookup(x) match { - case Some(y) => y - case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2)))) - } - def cache_markup(x: Markup): Markup = - lookup(x) match { - case Some(y) => y - case None => - x match { - case Markup(name, props) => - store(Markup(cache_string(name), cache_props(props))) - } - } - def cache_tree(x: XML.Tree): XML.Tree = - lookup(x) match { - case Some(y) => y - case None => - x match { - case XML.Elem(markup, body) => - store(XML.Elem(cache_markup(markup), cache_body(body))) - case XML.Text(text) => store(XML.Text(cache_string(text))) - } - } - def cache_body(x: XML.Body): XML.Body = - if (x.isEmpty) x - else - lookup(x) match { - case Some(y) => y - case None => x.map(cache_tree(_)) - } - - // main loop - loop { - react { - case Cache_String(x, f) => f(cache_string(x)) - case Cache_Markup(x, f) => f(cache_markup(x)) - case Cache_Tree(x, f) => f(cache_tree(x)) - case Cache_Body(x, f) => f(cache_body(x)) - case Cache_Ignore(x, f) => f(x) - case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad) - } - } - } - - private case class Cache_String(x: String, f: String => Unit) - private case class Cache_Markup(x: Markup, f: Markup => Unit) - private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit) - private case class Cache_Body(x: XML.Body, f: XML.Body => Unit) - private case class Cache_Ignore[A](x: A, f: A => Unit) - - // main methods - def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) } - def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) } - def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) } - def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) } - def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) } - } - - - - /** document object model (W3C DOM) **/ - - def get_data(node: org.w3c.dom.Node): Option[XML.Tree] = - node.getUserData(Markup.Data.name) match { - case tree: XML.Tree => Some(tree) - case _ => None - } - - def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node = - { - def DOM(tr: Tree): org.w3c.dom.Node = tr match { - case Elem(Markup.Data, List(data, t)) => - val node = DOM(t) - node.setUserData(Markup.Data.name, data, null) - node - case Elem(Markup(name, atts), ts) => - if (name == Markup.Data.name) - error("Malformed data element: " + tr.toString) - val node = doc.createElement(name) - for ((name, value) <- atts) node.setAttribute(name, value) - for (t <- ts) node.appendChild(DOM(t)) - node - case Text(txt) => doc.createTextNode(txt) - } - DOM(tree) - } - - - - /** XML as data representation language **/ - - class XML_Atom(s: String) extends Exception(s) - class XML_Body(body: XML.Body) extends Exception - - object Encode - { - type T[A] = A => XML.Body - - - /* atomic values */ - - def long_atom(i: Long): String = i.toString - - def int_atom(i: Int): String = i.toString - - def bool_atom(b: Boolean): String = if (b) "1" else "0" - - def unit_atom(u: Unit) = "" - - - /* structural nodes */ - - private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts) - - private def vector(xs: List[String]): XML.Attributes = - xs.zipWithIndex.map(p => (int_atom(p._2), p._1)) - - private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree = - XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2) - - - /* representation of standard types */ - - val properties: T[Properties.T] = - (props => List(XML.Elem(Markup(":", props), Nil))) - - val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s))) - - val long: T[Long] = (x => string(long_atom(x))) - - val int: T[Int] = (x => string(int_atom(x))) - - val bool: T[Boolean] = (x => string(bool_atom(x))) - - val unit: T[Unit] = (x => string(unit_atom(x))) - - def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = - (x => List(node(f(x._1)), node(g(x._2)))) - - def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = - (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3)))) - - def list[A](f: T[A]): T[List[A]] = - (xs => xs.map((x: A) => node(f(x)))) - - def option[A](f: T[A]): T[Option[A]] = - { - case None => Nil - case Some(x) => List(node(f(x))) - } - - def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] = - { - case x => - val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get - List(tagged(tag, f(x))) - } - } - - object Decode - { - type T[A] = XML.Body => A - type V[A] = (List[String], XML.Body) => A - - - /* atomic values */ - - def long_atom(s: String): Long = - try { java.lang.Long.parseLong(s) } - catch { case e: NumberFormatException => throw new XML_Atom(s) } - - def int_atom(s: String): Int = - try { Integer.parseInt(s) } - catch { case e: NumberFormatException => throw new XML_Atom(s) } - - def bool_atom(s: String): Boolean = - if (s == "1") true - else if (s == "0") false - else throw new XML_Atom(s) - - def unit_atom(s: String): Unit = - if (s == "") () else throw new XML_Atom(s) - - - /* structural nodes */ - - private def node(t: XML.Tree): XML.Body = - t match { - case XML.Elem(Markup(":", Nil), ts) => ts - case _ => throw new XML_Body(List(t)) - } - - private def vector(atts: XML.Attributes): List[String] = - { - val xs = new mutable.ListBuffer[String] - var i = 0 - for ((a, x) <- atts) { - if (int_atom(a) == i) { xs += x; i = i + 1 } - else throw new XML_Atom(a) - } - xs.toList - } - - private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) = - t match { - case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts)) - case _ => throw new XML_Body(List(t)) - } - - - /* representation of standard types */ - - val properties: T[Properties.T] = - { - case List(XML.Elem(Markup(":", props), Nil)) => props - case ts => throw new XML_Body(ts) - } - - val string: T[String] = - { - case Nil => "" - case List(XML.Text(s)) => s - case ts => throw new XML_Body(ts) - } - - val long: T[Long] = (x => long_atom(string(x))) - - val int: T[Int] = (x => int_atom(string(x))) - - val bool: T[Boolean] = (x => bool_atom(string(x))) - - val unit: T[Unit] = (x => unit_atom(string(x))) - - def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = - { - case List(t1, t2) => (f(node(t1)), g(node(t2))) - case ts => throw new XML_Body(ts) - } - - def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = - { - case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3))) - case ts => throw new XML_Body(ts) - } - - def list[A](f: T[A]): T[List[A]] = - (ts => ts.map(t => f(node(t)))) - - def option[A](f: T[A]): T[Option[A]] = - { - case Nil => None - case List(t) => Some(f(node(t))) - case ts => throw new XML_Body(ts) - } - - def variant[A](fs: List[V[A]]): T[A] = - { - case List(t) => - val (tag, (xs, ts)) = tagged(t) - val f = - try { fs(tag) } - catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) } - f(xs, ts) - case ts => throw new XML_Body(ts) - } - } -} diff -r b99dfee76538 -r 0385292321a0 src/Pure/General/yxml.ML --- a/src/Pure/General/yxml.ML Sun Sep 04 14:29:15 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,147 +0,0 @@ -(* Title: Pure/General/yxml.ML - Author: Makarius - -Efficient text representation of XML trees using extra characters X -and Y -- no escaping, may nest marked text verbatim. - -Markup ...body... is encoded as: - - X Y name Y att=val ... X - ... - body - ... - X Y X -*) - -signature YXML = -sig - val X: Symbol.symbol - val Y: Symbol.symbol - val embed_controls: string -> string - val detect: string -> bool - val output_markup: Markup.T -> string * string - val element: string -> XML.attributes -> string list -> string - val string_of_body: XML.body -> string - val string_of: XML.tree -> string - val parse_body: string -> XML.body - val parse: string -> XML.tree -end; - -structure YXML: YXML = -struct - -(** string representation **) - -(* idempotent recoding of certain low ASCII control characters *) - -fun pseudo_utf8 c = - if Symbol.is_ascii_control c - then chr 192 ^ chr (128 + ord c) - else c; - -fun embed_controls str = - if exists_string Symbol.is_ascii_control str - then translate_string pseudo_utf8 str - else str; - - -(* markers *) - -val X = chr 5; -val Y = chr 6; -val XY = X ^ Y; -val XYX = XY ^ X; - -val detect = exists_string (fn s => s = X orelse s = Y); - - -(* output *) - -fun output_markup (markup as (name, atts)) = - if Markup.is_empty markup then Markup.no_output - else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX); - -fun element name atts body = - let val (pre, post) = output_markup (name, atts) - in pre ^ implode body ^ post end; - -fun string_of_body body = - let - fun attrib (a, x) = - Buffer.add Y #> - Buffer.add a #> Buffer.add "=" #> Buffer.add x; - fun tree (XML.Elem ((name, atts), ts)) = - Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #> - trees ts #> - Buffer.add XYX - | tree (XML.Text s) = Buffer.add s - and trees ts = fold tree ts; - in Buffer.empty |> trees body |> Buffer.content end; - -val string_of = string_of_body o single; - - - -(** efficient YXML parsing **) - -local - -(* splitting *) - -fun is_char s c = ord s = Char.ord c; - -val split_string = - Substring.full #> - Substring.tokens (is_char X) #> - map (Substring.fields (is_char Y) #> map Substring.string); - - -(* structural errors *) - -fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg); -fun err_attribute () = err "bad attribute"; -fun err_element () = err "bad element"; -fun err_unbalanced "" = err "unbalanced element" - | err_unbalanced name = err ("unbalanced element " ^ quote name); - - -(* stack operations *) - -fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending; - -fun push "" _ _ = err_element () - | push name atts pending = ((name, atts), []) :: pending; - -fun pop ((("", _), _) :: _) = err_unbalanced "" - | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending; - - -(* parsing *) - -fun parse_attrib s = - (case first_field "=" s of - NONE => err_attribute () - | SOME ("", _) => err_attribute () - | SOME att => att); - -fun parse_chunk ["", ""] = pop - | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts) - | parse_chunk txts = fold (add o XML.Text) txts; - -in - -fun parse_body source = - (case fold parse_chunk (split_string source) [(("", []), [])] of - [(("", _), result)] => rev result - | ((name, _), _) :: _ => err_unbalanced name); - -fun parse source = - (case parse_body source of - [result] => result - | [] => XML.Text "" - | _ => err "multiple results"); - -end; - -end; - diff -r b99dfee76538 -r 0385292321a0 src/Pure/General/yxml.scala --- a/src/Pure/General/yxml.scala Sun Sep 04 14:29:15 2011 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,134 +0,0 @@ -/* Title: Pure/General/yxml.scala - Author: Makarius - -Efficient text representation of XML trees. -*/ - -package isabelle - - -import scala.collection.mutable - - -object YXML -{ - /* chunk markers */ - - val X = '\5' - val Y = '\6' - val X_string = X.toString - val Y_string = Y.toString - - def detect(s: String): Boolean = s.exists(c => c == X || c == Y) - - - /* string representation */ // FIXME byte array version with pseudo-utf-8 (!?) - - def string_of_body(body: XML.Body): String = - { - val s = new StringBuilder - def attrib(p: (String, String)) { s += Y; s ++= p._1; s += '='; s ++= p._2 } - def tree(t: XML.Tree): Unit = - t match { - case XML.Elem(Markup(name, atts), ts) => - s += X; s += Y; s++= name; atts.foreach(attrib); s += X - ts.foreach(tree) - s += X; s += Y; s += X - case XML.Text(text) => s ++= text - } - body.foreach(tree) - s.toString - } - - def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) - - - /* parsing */ - - private def err(msg: String) = error("Malformed YXML: " + msg) - private def err_attribute() = err("bad attribute") - private def err_element() = err("bad element") - private def err_unbalanced(name: String) = - if (name == "") err("unbalanced element") - else err("unbalanced element " + quote(name)) - - private def parse_attrib(source: CharSequence) = { - val s = source.toString - val i = s.indexOf('=') - if (i <= 0) err_attribute() - (s.substring(0, i), s.substring(i + 1)) - } - - - def parse_body(source: CharSequence): XML.Body = - { - /* stack operations */ - - def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree] - var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer())) - - def add(x: XML.Tree) - { - (stack: @unchecked) match { case ((_, body) :: _) => body += x } - } - - def push(name: String, atts: XML.Attributes) - { - if (name == "") err_element() - else stack = (Markup(name, atts), buffer()) :: stack - } - - def pop() - { - (stack: @unchecked) match { - case ((Markup.Empty, _) :: _) => err_unbalanced("") - case ((markup, body) :: pending) => - stack = pending - add(XML.Elem(markup, body.toList)) - } - } - - - /* parse chunks */ - - for (chunk <- Library.chunks(source, X) if chunk.length != 0) { - if (chunk.length == 1 && chunk.charAt(0) == Y) pop() - else { - Library.chunks(chunk, Y).toList match { - case ch :: name :: atts if ch.length == 0 => - push(name.toString, atts.map(parse_attrib)) - case txts => for (txt <- txts) add(XML.Text(txt.toString)) - } - } - } - (stack: @unchecked) match { - case List((Markup.Empty, body)) => body.toList - case (Markup(name, _), _) :: _ => err_unbalanced(name) - } - } - - def parse(source: CharSequence): XML.Tree = - parse_body(source) match { - case List(result) => result - case Nil => XML.Text("") - case _ => err("multiple results") - } - - - /* failsafe parsing */ - - private def markup_malformed(source: CharSequence) = - XML.elem(Markup.MALFORMED, List(XML.Text(source.toString))) - - def parse_body_failsafe(source: CharSequence): XML.Body = - { - try { parse_body(source) } - catch { case ERROR(_) => List(markup_malformed(source)) } - } - - def parse_failsafe(source: CharSequence): XML.Tree = - { - try { parse(source) } - catch { case ERROR(_) => markup_malformed(source) } - } -} diff -r b99dfee76538 -r 0385292321a0 src/Pure/IsaMakefile --- a/src/Pure/IsaMakefile Sun Sep 04 14:29:15 2011 +0200 +++ b/src/Pure/IsaMakefile Sun Sep 04 15:21:50 2011 +0200 @@ -105,8 +105,6 @@ General/table.ML \ General/timing.ML \ General/url.ML \ - General/xml.ML \ - General/yxml.ML \ Isar/args.ML \ Isar/attrib.ML \ Isar/auto_bind.ML \ @@ -158,6 +156,8 @@ ML/ml_thms.ML \ PIDE/document.ML \ PIDE/isar_document.ML \ + PIDE/xml.ML \ + PIDE/yxml.ML \ Proof/extraction.ML \ Proof/proof_checker.ML \ Proof/proof_rewrite_rules.ML \ diff -r b99dfee76538 -r 0385292321a0 src/Pure/PIDE/xml.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/PIDE/xml.ML Sun Sep 04 15:21:50 2011 +0200 @@ -0,0 +1,363 @@ +(* Title: Pure/PIDE/xml.ML + Author: David Aspinall + Author: Stefan Berghofer + Author: Makarius + +Untyped XML trees and basic data representation. +*) + +signature XML_DATA_OPS = +sig + type 'a A + type 'a T + type 'a V + val int_atom: int A + val bool_atom: bool A + val unit_atom: unit A + val properties: Properties.T T + val string: string T + val int: int T + val bool: bool T + val unit: unit T + val pair: 'a T -> 'b T -> ('a * 'b) T + val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T + val list: 'a T -> 'a list T + val option: 'a T -> 'a option T + val variant: 'a V list -> 'a T +end; + +signature XML = +sig + type attributes = Properties.T + datatype tree = + Elem of Markup.T * tree list + | Text of string + type body = tree list + val add_content: tree -> Buffer.T -> Buffer.T + val content_of: body -> string + val header: string + val text: string -> string + val element: string -> attributes -> string list -> string + val output_markup: Markup.T -> Output.output * Output.output + val string_of: tree -> string + val pretty: int -> tree -> Pretty.T + val output: tree -> TextIO.outstream -> unit + val parse_comments: string list -> unit * string list + val parse_string : string -> string option + val parse_element: string list -> tree * string list + val parse_document: string list -> tree * string list + val parse: string -> tree + exception XML_ATOM of string + exception XML_BODY of body + structure Encode: XML_DATA_OPS + structure Decode: XML_DATA_OPS +end; + +structure XML: XML = +struct + +(** XML trees **) + +type attributes = Properties.T; + +datatype tree = + Elem of Markup.T * tree list + | Text of string; + +type body = tree list; + +fun add_content (Elem (_, ts)) = fold add_content ts + | add_content (Text s) = Buffer.add s; + +fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content; + + + +(** string representation **) + +val header = "\n"; + + +(* escaped text *) + +fun decode "<" = "<" + | decode ">" = ">" + | decode "&" = "&" + | decode "'" = "'" + | decode """ = "\"" + | decode c = c; + +fun encode "<" = "<" + | encode ">" = ">" + | encode "&" = "&" + | encode "'" = "'" + | encode "\"" = """ + | encode c = c; + +val text = translate_string encode; + + +(* elements *) + +fun elem name atts = + space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts); + +fun element name atts body = + let val b = implode body in + if b = "" then enclose "<" "/>" (elem name atts) + else enclose "<" ">" (elem name atts) ^ b ^ enclose "" name + end; + +fun output_markup (markup as (name, atts)) = + if Markup.is_empty markup then Markup.no_output + else (enclose "<" ">" (elem name atts), enclose "" name); + + +(* output *) + +fun buffer_of depth tree = + let + fun traverse _ (Elem ((name, atts), [])) = + Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>" + | traverse d (Elem ((name, atts), ts)) = + Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #> + traverse_body d ts #> + Buffer.add " Buffer.add name #> Buffer.add ">" + | traverse _ (Text s) = Buffer.add (text s) + and traverse_body 0 _ = Buffer.add "..." + | traverse_body d ts = fold (traverse (d - 1)) ts; + in Buffer.empty |> traverse depth tree end; + +val string_of = Buffer.content o buffer_of ~1; +val output = Buffer.output o buffer_of ~1; + +fun pretty depth tree = + Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree)); + + + +(** XML parsing **) + +local + +fun err msg (xs, _) = + fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); + +fun ignored _ = []; + +val blanks = Scan.many Symbol.is_blank; +val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; +val regular = Scan.one Symbol.is_regular; +fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x); + +val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode; + +val parse_cdata = + Scan.this_string "") regular) >> implode) --| + Scan.this_string "]]>"; + +val parse_att = + (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) -- + (($$ "\"" || $$ "'") :|-- (fn s => + (Scan.repeat (special || regular_except s) >> implode) --| $$ s)); + +val parse_comment = + Scan.this_string "") regular) -- + Scan.this_string "-->" >> ignored; + +val parse_processing_instruction = + Scan.this_string "") regular) -- + Scan.this_string "?>" >> ignored; + +val parse_doctype = + Scan.this_string "") regular) -- + $$ ">" >> ignored; + +val parse_misc = + Scan.one Symbol.is_blank >> ignored || + parse_processing_instruction || + parse_comment; + +val parse_optional_text = + Scan.optional (parse_chars >> (single o Text)) []; + +fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_"; +fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = "."; +val parse_name = Scan.one name_start_char ::: Scan.many name_char; + +in + +val parse_comments = + blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K (); + +val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode; + +fun parse_content xs = + (parse_optional_text @@@ + (Scan.repeat + ((parse_element >> single || + parse_cdata >> (single o Text) || + parse_processing_instruction || + parse_comment) + @@@ parse_optional_text) >> flat)) xs + +and parse_element xs = + ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :-- + (fn (name, _) => + !! (err (fn () => "Expected > or />")) + ($$ "/" -- $$ ">" >> ignored || + $$ ">" |-- parse_content --| + !! (err (fn () => "Expected ")) + ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">"))) + >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs; + +val parse_document = + (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc) + |-- parse_element; + +fun parse s = + (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element")) + (blanks |-- parse_document --| blanks))) (raw_explode s) of + (x, []) => x + | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys)); + +end; + + + +(** XML as data representation language **) + +exception XML_ATOM of string; +exception XML_BODY of tree list; + + +structure Encode = +struct + +type 'a A = 'a -> string; +type 'a T = 'a -> body; +type 'a V = 'a -> string list * body; + + +(* atomic values *) + +fun int_atom i = signed_string_of_int i; + +fun bool_atom false = "0" + | bool_atom true = "1"; + +fun unit_atom () = ""; + + +(* structural nodes *) + +fun node ts = Elem ((":", []), ts); + +fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs; + +fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts); + + +(* representation of standard types *) + +fun properties props = [Elem ((":", props), [])]; + +fun string "" = [] + | string s = [Text s]; + +val int = string o int_atom; + +val bool = string o bool_atom; + +val unit = string o unit_atom; + +fun pair f g (x, y) = [node (f x), node (g y)]; + +fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)]; + +fun list f xs = map (node o f) xs; + +fun option _ NONE = [] + | option f (SOME x) = [node (f x)]; + +fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))]; + +end; + + +structure Decode = +struct + +type 'a A = string -> 'a; +type 'a T = body -> 'a; +type 'a V = string list * body -> 'a; + + +(* atomic values *) + +fun int_atom s = + Markup.parse_int s + handle Fail _ => raise XML_ATOM s; + +fun bool_atom "0" = false + | bool_atom "1" = true + | bool_atom s = raise XML_ATOM s; + +fun unit_atom "" = () + | unit_atom s = raise XML_ATOM s; + + +(* structural nodes *) + +fun node (Elem ((":", []), ts)) = ts + | node t = raise XML_BODY [t]; + +fun vector atts = + #1 (fold_map (fn (a, x) => + fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0); + +fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts)) + | tagged t = raise XML_BODY [t]; + + +(* representation of standard types *) + +fun properties [Elem ((":", props), [])] = props + | properties ts = raise XML_BODY ts; + +fun string [] = "" + | string [Text s] = s + | string ts = raise XML_BODY ts; + +val int = int_atom o string; + +val bool = bool_atom o string; + +val unit = unit_atom o string; + +fun pair f g [t1, t2] = (f (node t1), g (node t2)) + | pair _ _ ts = raise XML_BODY ts; + +fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3)) + | triple _ _ _ ts = raise XML_BODY ts; + +fun list f ts = map (f o node) ts; + +fun option _ [] = NONE + | option f [t] = SOME (f (node t)) + | option _ ts = raise XML_BODY ts; + +fun variant fs [t] = + let + val (tag, (xs, ts)) = tagged t; + val f = nth fs tag handle General.Subscript => raise XML_BODY [t]; + in f (xs, ts) end + | variant _ ts = raise XML_BODY ts; + +end; + +end; diff -r b99dfee76538 -r 0385292321a0 src/Pure/PIDE/xml.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/PIDE/xml.scala Sun Sep 04 15:21:50 2011 +0200 @@ -0,0 +1,390 @@ +/* Title: Pure/PIDE/xml.scala + Author: Makarius + +Untyped XML trees and basic data representation. +*/ + +package isabelle + +import java.lang.System +import java.util.WeakHashMap +import java.lang.ref.WeakReference +import javax.xml.parsers.DocumentBuilderFactory + +import scala.actors.Actor._ +import scala.collection.mutable + + +object XML +{ + /** XML trees **/ + + /* datatype representation */ + + type Attributes = Properties.T + + sealed abstract class Tree { override def toString = string_of_tree(this) } + case class Elem(markup: Markup, body: List[Tree]) extends Tree + case class Text(content: String) extends Tree + + def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body) + def elem(name: String) = Elem(Markup(name, Nil), Nil) + + type Body = List[Tree] + + + /* string representation */ + + def string_of_body(body: Body): String = + { + val s = new StringBuilder + + def text(txt: String) { + if (txt == null) s ++= txt + else { + for (c <- txt.iterator) c match { + case '<' => s ++= "<" + case '>' => s ++= ">" + case '&' => s ++= "&" + case '"' => s ++= """ + case '\'' => s ++= "'" + case _ => s += c + } + } + } + def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" } + def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) } + def tree(t: Tree): Unit = + t match { + case Elem(markup, Nil) => + s ++= "<"; elem(markup); s ++= "/>" + case Elem(markup, ts) => + s ++= "<"; elem(markup); s ++= ">" + ts.foreach(tree) + s ++= "" + case Text(txt) => text(txt) + } + body.foreach(tree) + s.toString + } + + def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) + + + /* text content */ + + def content_stream(tree: Tree): Stream[String] = + tree match { + case Elem(_, body) => content_stream(body) + case Text(content) => Stream(content) + } + def content_stream(body: Body): Stream[String] = + body.toStream.flatten(content_stream(_)) + + def content(tree: Tree): Iterator[String] = content_stream(tree).iterator + def content(body: Body): Iterator[String] = content_stream(body).iterator + + + /* pipe-lined cache for partial sharing */ + + class Cache(initial_size: Int = 131071, max_string: Int = 100) + { + private val cache_actor = actor + { + val table = new WeakHashMap[Any, WeakReference[Any]](initial_size) + + def lookup[A](x: A): Option[A] = + { + val ref = table.get(x) + if (ref == null) None + else { + val y = ref.asInstanceOf[WeakReference[A]].get + if (y == null) None + else Some(y) + } + } + def store[A](x: A): A = + { + table.put(x, new WeakReference[Any](x)) + x + } + + def trim_bytes(s: String): String = new String(s.toCharArray) + + def cache_string(x: String): String = + lookup(x) match { + case Some(y) => y + case None => + val z = trim_bytes(x) + if (z.length > max_string) z else store(z) + } + def cache_props(x: Properties.T): Properties.T = + if (x.isEmpty) x + else + lookup(x) match { + case Some(y) => y + case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2)))) + } + def cache_markup(x: Markup): Markup = + lookup(x) match { + case Some(y) => y + case None => + x match { + case Markup(name, props) => + store(Markup(cache_string(name), cache_props(props))) + } + } + def cache_tree(x: XML.Tree): XML.Tree = + lookup(x) match { + case Some(y) => y + case None => + x match { + case XML.Elem(markup, body) => + store(XML.Elem(cache_markup(markup), cache_body(body))) + case XML.Text(text) => store(XML.Text(cache_string(text))) + } + } + def cache_body(x: XML.Body): XML.Body = + if (x.isEmpty) x + else + lookup(x) match { + case Some(y) => y + case None => x.map(cache_tree(_)) + } + + // main loop + loop { + react { + case Cache_String(x, f) => f(cache_string(x)) + case Cache_Markup(x, f) => f(cache_markup(x)) + case Cache_Tree(x, f) => f(cache_tree(x)) + case Cache_Body(x, f) => f(cache_body(x)) + case Cache_Ignore(x, f) => f(x) + case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad) + } + } + } + + private case class Cache_String(x: String, f: String => Unit) + private case class Cache_Markup(x: Markup, f: Markup => Unit) + private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit) + private case class Cache_Body(x: XML.Body, f: XML.Body => Unit) + private case class Cache_Ignore[A](x: A, f: A => Unit) + + // main methods + def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) } + def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) } + def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) } + def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) } + def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) } + } + + + + /** document object model (W3C DOM) **/ + + def get_data(node: org.w3c.dom.Node): Option[XML.Tree] = + node.getUserData(Markup.Data.name) match { + case tree: XML.Tree => Some(tree) + case _ => None + } + + def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node = + { + def DOM(tr: Tree): org.w3c.dom.Node = tr match { + case Elem(Markup.Data, List(data, t)) => + val node = DOM(t) + node.setUserData(Markup.Data.name, data, null) + node + case Elem(Markup(name, atts), ts) => + if (name == Markup.Data.name) + error("Malformed data element: " + tr.toString) + val node = doc.createElement(name) + for ((name, value) <- atts) node.setAttribute(name, value) + for (t <- ts) node.appendChild(DOM(t)) + node + case Text(txt) => doc.createTextNode(txt) + } + DOM(tree) + } + + + + /** XML as data representation language **/ + + class XML_Atom(s: String) extends Exception(s) + class XML_Body(body: XML.Body) extends Exception + + object Encode + { + type T[A] = A => XML.Body + + + /* atomic values */ + + def long_atom(i: Long): String = i.toString + + def int_atom(i: Int): String = i.toString + + def bool_atom(b: Boolean): String = if (b) "1" else "0" + + def unit_atom(u: Unit) = "" + + + /* structural nodes */ + + private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts) + + private def vector(xs: List[String]): XML.Attributes = + xs.zipWithIndex.map(p => (int_atom(p._2), p._1)) + + private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree = + XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2) + + + /* representation of standard types */ + + val properties: T[Properties.T] = + (props => List(XML.Elem(Markup(":", props), Nil))) + + val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s))) + + val long: T[Long] = (x => string(long_atom(x))) + + val int: T[Int] = (x => string(int_atom(x))) + + val bool: T[Boolean] = (x => string(bool_atom(x))) + + val unit: T[Unit] = (x => string(unit_atom(x))) + + def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = + (x => List(node(f(x._1)), node(g(x._2)))) + + def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = + (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3)))) + + def list[A](f: T[A]): T[List[A]] = + (xs => xs.map((x: A) => node(f(x)))) + + def option[A](f: T[A]): T[Option[A]] = + { + case None => Nil + case Some(x) => List(node(f(x))) + } + + def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] = + { + case x => + val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get + List(tagged(tag, f(x))) + } + } + + object Decode + { + type T[A] = XML.Body => A + type V[A] = (List[String], XML.Body) => A + + + /* atomic values */ + + def long_atom(s: String): Long = + try { java.lang.Long.parseLong(s) } + catch { case e: NumberFormatException => throw new XML_Atom(s) } + + def int_atom(s: String): Int = + try { Integer.parseInt(s) } + catch { case e: NumberFormatException => throw new XML_Atom(s) } + + def bool_atom(s: String): Boolean = + if (s == "1") true + else if (s == "0") false + else throw new XML_Atom(s) + + def unit_atom(s: String): Unit = + if (s == "") () else throw new XML_Atom(s) + + + /* structural nodes */ + + private def node(t: XML.Tree): XML.Body = + t match { + case XML.Elem(Markup(":", Nil), ts) => ts + case _ => throw new XML_Body(List(t)) + } + + private def vector(atts: XML.Attributes): List[String] = + { + val xs = new mutable.ListBuffer[String] + var i = 0 + for ((a, x) <- atts) { + if (int_atom(a) == i) { xs += x; i = i + 1 } + else throw new XML_Atom(a) + } + xs.toList + } + + private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) = + t match { + case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts)) + case _ => throw new XML_Body(List(t)) + } + + + /* representation of standard types */ + + val properties: T[Properties.T] = + { + case List(XML.Elem(Markup(":", props), Nil)) => props + case ts => throw new XML_Body(ts) + } + + val string: T[String] = + { + case Nil => "" + case List(XML.Text(s)) => s + case ts => throw new XML_Body(ts) + } + + val long: T[Long] = (x => long_atom(string(x))) + + val int: T[Int] = (x => int_atom(string(x))) + + val bool: T[Boolean] = (x => bool_atom(string(x))) + + val unit: T[Unit] = (x => unit_atom(string(x))) + + def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = + { + case List(t1, t2) => (f(node(t1)), g(node(t2))) + case ts => throw new XML_Body(ts) + } + + def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = + { + case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3))) + case ts => throw new XML_Body(ts) + } + + def list[A](f: T[A]): T[List[A]] = + (ts => ts.map(t => f(node(t)))) + + def option[A](f: T[A]): T[Option[A]] = + { + case Nil => None + case List(t) => Some(f(node(t))) + case ts => throw new XML_Body(ts) + } + + def variant[A](fs: List[V[A]]): T[A] = + { + case List(t) => + val (tag, (xs, ts)) = tagged(t) + val f = + try { fs(tag) } + catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) } + f(xs, ts) + case ts => throw new XML_Body(ts) + } + } +} diff -r b99dfee76538 -r 0385292321a0 src/Pure/PIDE/yxml.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/PIDE/yxml.ML Sun Sep 04 15:21:50 2011 +0200 @@ -0,0 +1,148 @@ +(* Title: Pure/PIDE/yxml.ML + Author: Makarius + +Efficient text representation of XML trees using extra characters X +and Y -- no escaping, may nest marked text verbatim. Suitable for +direct inlining into plain text. + +Markup ...body... is encoded as: + + X Y name Y att=val ... X + ... + body + ... + X Y X +*) + +signature YXML = +sig + val X: Symbol.symbol + val Y: Symbol.symbol + val embed_controls: string -> string + val detect: string -> bool + val output_markup: Markup.T -> string * string + val element: string -> XML.attributes -> string list -> string + val string_of_body: XML.body -> string + val string_of: XML.tree -> string + val parse_body: string -> XML.body + val parse: string -> XML.tree +end; + +structure YXML: YXML = +struct + +(** string representation **) + +(* idempotent recoding of certain low ASCII control characters *) + +fun pseudo_utf8 c = + if Symbol.is_ascii_control c + then chr 192 ^ chr (128 + ord c) + else c; + +fun embed_controls str = + if exists_string Symbol.is_ascii_control str + then translate_string pseudo_utf8 str + else str; + + +(* markers *) + +val X = chr 5; +val Y = chr 6; +val XY = X ^ Y; +val XYX = XY ^ X; + +val detect = exists_string (fn s => s = X orelse s = Y); + + +(* output *) + +fun output_markup (markup as (name, atts)) = + if Markup.is_empty markup then Markup.no_output + else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX); + +fun element name atts body = + let val (pre, post) = output_markup (name, atts) + in pre ^ implode body ^ post end; + +fun string_of_body body = + let + fun attrib (a, x) = + Buffer.add Y #> + Buffer.add a #> Buffer.add "=" #> Buffer.add x; + fun tree (XML.Elem ((name, atts), ts)) = + Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #> + trees ts #> + Buffer.add XYX + | tree (XML.Text s) = Buffer.add s + and trees ts = fold tree ts; + in Buffer.empty |> trees body |> Buffer.content end; + +val string_of = string_of_body o single; + + + +(** efficient YXML parsing **) + +local + +(* splitting *) + +fun is_char s c = ord s = Char.ord c; + +val split_string = + Substring.full #> + Substring.tokens (is_char X) #> + map (Substring.fields (is_char Y) #> map Substring.string); + + +(* structural errors *) + +fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg); +fun err_attribute () = err "bad attribute"; +fun err_element () = err "bad element"; +fun err_unbalanced "" = err "unbalanced element" + | err_unbalanced name = err ("unbalanced element " ^ quote name); + + +(* stack operations *) + +fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending; + +fun push "" _ _ = err_element () + | push name atts pending = ((name, atts), []) :: pending; + +fun pop ((("", _), _) :: _) = err_unbalanced "" + | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending; + + +(* parsing *) + +fun parse_attrib s = + (case first_field "=" s of + NONE => err_attribute () + | SOME ("", _) => err_attribute () + | SOME att => att); + +fun parse_chunk ["", ""] = pop + | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts) + | parse_chunk txts = fold (add o XML.Text) txts; + +in + +fun parse_body source = + (case fold parse_chunk (split_string source) [(("", []), [])] of + [(("", _), result)] => rev result + | ((name, _), _) :: _ => err_unbalanced name); + +fun parse source = + (case parse_body source of + [result] => result + | [] => XML.Text "" + | _ => err "multiple results"); + +end; + +end; + diff -r b99dfee76538 -r 0385292321a0 src/Pure/PIDE/yxml.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/PIDE/yxml.scala Sun Sep 04 15:21:50 2011 +0200 @@ -0,0 +1,135 @@ +/* Title: Pure/PIDE/yxml.scala + Author: Makarius + +Efficient text representation of XML trees. Suitable for direct +inlining into plain text. +*/ + +package isabelle + + +import scala.collection.mutable + + +object YXML +{ + /* chunk markers */ + + val X = '\5' + val Y = '\6' + val X_string = X.toString + val Y_string = Y.toString + + def detect(s: String): Boolean = s.exists(c => c == X || c == Y) + + + /* string representation */ // FIXME byte array version with pseudo-utf-8 (!?) + + def string_of_body(body: XML.Body): String = + { + val s = new StringBuilder + def attrib(p: (String, String)) { s += Y; s ++= p._1; s += '='; s ++= p._2 } + def tree(t: XML.Tree): Unit = + t match { + case XML.Elem(Markup(name, atts), ts) => + s += X; s += Y; s++= name; atts.foreach(attrib); s += X + ts.foreach(tree) + s += X; s += Y; s += X + case XML.Text(text) => s ++= text + } + body.foreach(tree) + s.toString + } + + def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) + + + /* parsing */ + + private def err(msg: String) = error("Malformed YXML: " + msg) + private def err_attribute() = err("bad attribute") + private def err_element() = err("bad element") + private def err_unbalanced(name: String) = + if (name == "") err("unbalanced element") + else err("unbalanced element " + quote(name)) + + private def parse_attrib(source: CharSequence) = { + val s = source.toString + val i = s.indexOf('=') + if (i <= 0) err_attribute() + (s.substring(0, i), s.substring(i + 1)) + } + + + def parse_body(source: CharSequence): XML.Body = + { + /* stack operations */ + + def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree] + var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer())) + + def add(x: XML.Tree) + { + (stack: @unchecked) match { case ((_, body) :: _) => body += x } + } + + def push(name: String, atts: XML.Attributes) + { + if (name == "") err_element() + else stack = (Markup(name, atts), buffer()) :: stack + } + + def pop() + { + (stack: @unchecked) match { + case ((Markup.Empty, _) :: _) => err_unbalanced("") + case ((markup, body) :: pending) => + stack = pending + add(XML.Elem(markup, body.toList)) + } + } + + + /* parse chunks */ + + for (chunk <- Library.chunks(source, X) if chunk.length != 0) { + if (chunk.length == 1 && chunk.charAt(0) == Y) pop() + else { + Library.chunks(chunk, Y).toList match { + case ch :: name :: atts if ch.length == 0 => + push(name.toString, atts.map(parse_attrib)) + case txts => for (txt <- txts) add(XML.Text(txt.toString)) + } + } + } + (stack: @unchecked) match { + case List((Markup.Empty, body)) => body.toList + case (Markup(name, _), _) :: _ => err_unbalanced(name) + } + } + + def parse(source: CharSequence): XML.Tree = + parse_body(source) match { + case List(result) => result + case Nil => XML.Text("") + case _ => err("multiple results") + } + + + /* failsafe parsing */ + + private def markup_malformed(source: CharSequence) = + XML.elem(Markup.MALFORMED, List(XML.Text(source.toString))) + + def parse_body_failsafe(source: CharSequence): XML.Body = + { + try { parse_body(source) } + catch { case ERROR(_) => List(markup_malformed(source)) } + } + + def parse_failsafe(source: CharSequence): XML.Tree = + { + try { parse(source) } + catch { case ERROR(_) => markup_malformed(source) } + } +} diff -r b99dfee76538 -r 0385292321a0 src/Pure/ROOT.ML --- a/src/Pure/ROOT.ML Sun Sep 04 14:29:15 2011 +0200 +++ b/src/Pure/ROOT.ML Sun Sep 04 15:21:50 2011 +0200 @@ -54,17 +54,18 @@ use "General/linear_set.ML"; use "General/buffer.ML"; use "General/pretty.ML"; -use "General/xml.ML"; use "General/path.ML"; use "General/url.ML"; use "General/file.ML"; -use "General/yxml.ML"; use "General/long_name.ML"; use "General/binding.ML"; use "General/sha1.ML"; if ML_System.is_polyml then use "General/sha1_polyml.ML" else (); +use "PIDE/xml.ML"; +use "PIDE/yxml.ML"; + (* concurrency within the ML runtime *) diff -r b99dfee76538 -r 0385292321a0 src/Pure/build-jars --- a/src/Pure/build-jars Sun Sep 04 14:29:15 2011 +0200 +++ b/src/Pure/build-jars Sun Sep 04 15:21:50 2011 +0200 @@ -24,8 +24,6 @@ General/scan.scala General/sha1.scala General/symbol.scala - General/xml.scala - General/yxml.scala Isar/keyword.scala Isar/outer_syntax.scala Isar/parse.scala @@ -36,6 +34,8 @@ PIDE/isar_document.scala PIDE/markup_tree.scala PIDE/text.scala + PIDE/xml.scala + PIDE/yxml.scala System/cygwin.scala System/download.scala System/event_bus.scala