--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/PIDE/xml.ML Sun Sep 04 15:21:50 2011 +0200
@@ -0,0 +1,363 @@
+(* Title: Pure/PIDE/xml.ML
+ Author: David Aspinall
+ Author: Stefan Berghofer
+ Author: Makarius
+
+Untyped XML trees and basic data representation.
+*)
+
+signature XML_DATA_OPS =
+sig
+ type 'a A
+ type 'a T
+ type 'a V
+ val int_atom: int A
+ val bool_atom: bool A
+ val unit_atom: unit A
+ val properties: Properties.T T
+ val string: string T
+ val int: int T
+ val bool: bool T
+ val unit: unit T
+ val pair: 'a T -> 'b T -> ('a * 'b) T
+ val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
+ val list: 'a T -> 'a list T
+ val option: 'a T -> 'a option T
+ val variant: 'a V list -> 'a T
+end;
+
+signature XML =
+sig
+ type attributes = Properties.T
+ datatype tree =
+ Elem of Markup.T * tree list
+ | Text of string
+ type body = tree list
+ val add_content: tree -> Buffer.T -> Buffer.T
+ val content_of: body -> string
+ val header: string
+ val text: string -> string
+ val element: string -> attributes -> string list -> string
+ val output_markup: Markup.T -> Output.output * Output.output
+ val string_of: tree -> string
+ val pretty: int -> tree -> Pretty.T
+ val output: tree -> TextIO.outstream -> unit
+ val parse_comments: string list -> unit * string list
+ val parse_string : string -> string option
+ val parse_element: string list -> tree * string list
+ val parse_document: string list -> tree * string list
+ val parse: string -> tree
+ exception XML_ATOM of string
+ exception XML_BODY of body
+ structure Encode: XML_DATA_OPS
+ structure Decode: XML_DATA_OPS
+end;
+
+structure XML: XML =
+struct
+
+(** XML trees **)
+
+type attributes = Properties.T;
+
+datatype tree =
+ Elem of Markup.T * tree list
+ | Text of string;
+
+type body = tree list;
+
+fun add_content (Elem (_, ts)) = fold add_content ts
+ | add_content (Text s) = Buffer.add s;
+
+fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;
+
+
+
+(** string representation **)
+
+val header = "<?xml version=\"1.0\"?>\n";
+
+
+(* escaped text *)
+
+fun decode "<" = "<"
+ | decode ">" = ">"
+ | decode "&" = "&"
+ | decode "'" = "'"
+ | decode """ = "\""
+ | decode c = c;
+
+fun encode "<" = "<"
+ | encode ">" = ">"
+ | encode "&" = "&"
+ | encode "'" = "'"
+ | encode "\"" = """
+ | encode c = c;
+
+val text = translate_string encode;
+
+
+(* elements *)
+
+fun elem name atts =
+ space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
+
+fun element name atts body =
+ let val b = implode body in
+ if b = "" then enclose "<" "/>" (elem name atts)
+ else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
+ end;
+
+fun output_markup (markup as (name, atts)) =
+ if Markup.is_empty markup then Markup.no_output
+ else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
+
+
+(* output *)
+
+fun buffer_of depth tree =
+ let
+ fun traverse _ (Elem ((name, atts), [])) =
+ Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
+ | traverse d (Elem ((name, atts), ts)) =
+ Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
+ traverse_body d ts #>
+ Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
+ | traverse _ (Text s) = Buffer.add (text s)
+ and traverse_body 0 _ = Buffer.add "..."
+ | traverse_body d ts = fold (traverse (d - 1)) ts;
+ in Buffer.empty |> traverse depth tree end;
+
+val string_of = Buffer.content o buffer_of ~1;
+val output = Buffer.output o buffer_of ~1;
+
+fun pretty depth tree =
+ Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));
+
+
+
+(** XML parsing **)
+
+local
+
+fun err msg (xs, _) =
+ fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
+
+fun ignored _ = [];
+
+val blanks = Scan.many Symbol.is_blank;
+val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
+val regular = Scan.one Symbol.is_regular;
+fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
+
+val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
+
+val parse_cdata =
+ Scan.this_string "<![CDATA[" |--
+ (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
+ Scan.this_string "]]>";
+
+val parse_att =
+ (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
+ (($$ "\"" || $$ "'") :|-- (fn s =>
+ (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
+
+val parse_comment =
+ Scan.this_string "<!--" --
+ Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
+ Scan.this_string "-->" >> ignored;
+
+val parse_processing_instruction =
+ Scan.this_string "<?" --
+ Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
+ Scan.this_string "?>" >> ignored;
+
+val parse_doctype =
+ Scan.this_string "<!DOCTYPE" --
+ Scan.repeat (Scan.unless ($$ ">") regular) --
+ $$ ">" >> ignored;
+
+val parse_misc =
+ Scan.one Symbol.is_blank >> ignored ||
+ parse_processing_instruction ||
+ parse_comment;
+
+val parse_optional_text =
+ Scan.optional (parse_chars >> (single o Text)) [];
+
+fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";
+fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";
+val parse_name = Scan.one name_start_char ::: Scan.many name_char;
+
+in
+
+val parse_comments =
+ blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
+
+val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;
+
+fun parse_content xs =
+ (parse_optional_text @@@
+ (Scan.repeat
+ ((parse_element >> single ||
+ parse_cdata >> (single o Text) ||
+ parse_processing_instruction ||
+ parse_comment)
+ @@@ parse_optional_text) >> flat)) xs
+
+and parse_element xs =
+ ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--
+ (fn (name, _) =>
+ !! (err (fn () => "Expected > or />"))
+ ($$ "/" -- $$ ">" >> ignored ||
+ $$ ">" |-- parse_content --|
+ !! (err (fn () => "Expected </" ^ implode name ^ ">"))
+ ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))
+ >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;
+
+val parse_document =
+ (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
+ |-- parse_element;
+
+fun parse s =
+ (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))
+ (blanks |-- parse_document --| blanks))) (raw_explode s) of
+ (x, []) => x
+ | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
+
+end;
+
+
+
+(** XML as data representation language **)
+
+exception XML_ATOM of string;
+exception XML_BODY of tree list;
+
+
+structure Encode =
+struct
+
+type 'a A = 'a -> string;
+type 'a T = 'a -> body;
+type 'a V = 'a -> string list * body;
+
+
+(* atomic values *)
+
+fun int_atom i = signed_string_of_int i;
+
+fun bool_atom false = "0"
+ | bool_atom true = "1";
+
+fun unit_atom () = "";
+
+
+(* structural nodes *)
+
+fun node ts = Elem ((":", []), ts);
+
+fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;
+
+fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);
+
+
+(* representation of standard types *)
+
+fun properties props = [Elem ((":", props), [])];
+
+fun string "" = []
+ | string s = [Text s];
+
+val int = string o int_atom;
+
+val bool = string o bool_atom;
+
+val unit = string o unit_atom;
+
+fun pair f g (x, y) = [node (f x), node (g y)];
+
+fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];
+
+fun list f xs = map (node o f) xs;
+
+fun option _ NONE = []
+ | option f (SOME x) = [node (f x)];
+
+fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];
+
+end;
+
+
+structure Decode =
+struct
+
+type 'a A = string -> 'a;
+type 'a T = body -> 'a;
+type 'a V = string list * body -> 'a;
+
+
+(* atomic values *)
+
+fun int_atom s =
+ Markup.parse_int s
+ handle Fail _ => raise XML_ATOM s;
+
+fun bool_atom "0" = false
+ | bool_atom "1" = true
+ | bool_atom s = raise XML_ATOM s;
+
+fun unit_atom "" = ()
+ | unit_atom s = raise XML_ATOM s;
+
+
+(* structural nodes *)
+
+fun node (Elem ((":", []), ts)) = ts
+ | node t = raise XML_BODY [t];
+
+fun vector atts =
+ #1 (fold_map (fn (a, x) =>
+ fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0);
+
+fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))
+ | tagged t = raise XML_BODY [t];
+
+
+(* representation of standard types *)
+
+fun properties [Elem ((":", props), [])] = props
+ | properties ts = raise XML_BODY ts;
+
+fun string [] = ""
+ | string [Text s] = s
+ | string ts = raise XML_BODY ts;
+
+val int = int_atom o string;
+
+val bool = bool_atom o string;
+
+val unit = unit_atom o string;
+
+fun pair f g [t1, t2] = (f (node t1), g (node t2))
+ | pair _ _ ts = raise XML_BODY ts;
+
+fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))
+ | triple _ _ _ ts = raise XML_BODY ts;
+
+fun list f ts = map (f o node) ts;
+
+fun option _ [] = NONE
+ | option f [t] = SOME (f (node t))
+ | option _ ts = raise XML_BODY ts;
+
+fun variant fs [t] =
+ let
+ val (tag, (xs, ts)) = tagged t;
+ val f = nth fs tag handle General.Subscript => raise XML_BODY [t];
+ in f (xs, ts) end
+ | variant _ ts = raise XML_BODY ts;
+
+end;
+
+end;