src/Pure/General/xml.ML
author wenzelm
Tue Jul 12 10:44:30 2011 +0200 (2011-07-12)
changeset 43767 e0219ef7f84c
parent 40627 becf5d5187cc
child 43768 d52ab827d62b
permissions -rw-r--r--
tuned XML modules;
     1 (*  Title:      Pure/General/xml.ML
     2     Author:     David Aspinall, Stefan Berghofer and Markus Wenzel
     3 
     4 Simple XML tree values.
     5 *)
     6 
     7 signature XML_DATA_OPS =
     8 sig
     9   type 'a T
    10   val properties: Properties.T T
    11   val string: string T
    12   val int: int T
    13   val bool: bool T
    14   val unit: unit T
    15   val pair: 'a T -> 'b T -> ('a * 'b) T
    16   val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
    17   val list: 'a T -> 'a list T
    18   val option: 'a T -> 'a option T
    19   val variant: 'a T list -> 'a T
    20 end;
    21 
    22 signature XML =
    23 sig
    24   type attributes = Properties.T
    25   datatype tree =
    26       Elem of Markup.T * tree list
    27     | Text of string
    28   type body = tree list
    29   val add_content: tree -> Buffer.T -> Buffer.T
    30   val content_of: body -> string
    31   val header: string
    32   val text: string -> string
    33   val element: string -> attributes -> string list -> string
    34   val output_markup: Markup.T -> Output.output * Output.output
    35   val string_of: tree -> string
    36   val output: tree -> TextIO.outstream -> unit
    37   val parse_comments: string list -> unit * string list
    38   val parse_string : string -> string option
    39   val parse_element: string list -> tree * string list
    40   val parse_document: string list -> tree * string list
    41   val parse: string -> tree
    42   exception XML_ATOM of string
    43   exception XML_BODY of body
    44   structure Encode: XML_DATA_OPS where type 'a T = 'a -> body
    45   structure Decode: XML_DATA_OPS where type 'a T = body -> 'a
    46 end;
    47 
    48 structure XML: XML =
    49 struct
    50 
    51 (** XML trees **)
    52 
    53 type attributes = Properties.T;
    54 
    55 datatype tree =
    56     Elem of Markup.T * tree list
    57   | Text of string;
    58 
    59 type body = tree list;
    60 
    61 fun add_content (Elem (_, ts)) = fold add_content ts
    62   | add_content (Text s) = Buffer.add s;
    63 
    64 fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;
    65 
    66 
    67 
    68 (** string representation **)
    69 
    70 val header = "<?xml version=\"1.0\"?>\n";
    71 
    72 
    73 (* escaped text *)
    74 
    75 fun decode "&lt;" = "<"
    76   | decode "&gt;" = ">"
    77   | decode "&amp;" = "&"
    78   | decode "&apos;" = "'"
    79   | decode "&quot;" = "\""
    80   | decode c = c;
    81 
    82 fun encode "<" = "&lt;"
    83   | encode ">" = "&gt;"
    84   | encode "&" = "&amp;"
    85   | encode "'" = "&apos;"
    86   | encode "\"" = "&quot;"
    87   | encode c = c;
    88 
    89 val text = translate_string encode;
    90 
    91 
    92 (* elements *)
    93 
    94 fun elem name atts =
    95   space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
    96 
    97 fun element name atts body =
    98   let val b = implode body in
    99     if b = "" then enclose "<" "/>" (elem name atts)
   100     else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
   101   end;
   102 
   103 fun output_markup (markup as (name, atts)) =
   104   if Markup.is_empty markup then Markup.no_output
   105   else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
   106 
   107 
   108 (* output *)
   109 
   110 fun buffer_of tree =
   111   let
   112     fun traverse (Elem ((name, atts), [])) =
   113           Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
   114       | traverse (Elem ((name, atts), ts)) =
   115           Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
   116           fold traverse ts #>
   117           Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
   118       | traverse (Text s) = Buffer.add (text s);
   119   in Buffer.empty |> traverse tree end;
   120 
   121 val string_of = Buffer.content o buffer_of;
   122 val output = Buffer.output o buffer_of;
   123 
   124 
   125 
   126 (** XML parsing (slow) **)
   127 
   128 local
   129 
   130 fun err s (xs, _) =
   131   "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
   132 
   133 fun ignored _ = [];
   134 
   135 val blanks = Scan.many Symbol.is_blank;
   136 val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
   137 val regular = Scan.one Symbol.is_regular;
   138 fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
   139 
   140 val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
   141 
   142 val parse_cdata =
   143   Scan.this_string "<![CDATA[" |--
   144   (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
   145   Scan.this_string "]]>";
   146 
   147 val parse_att =
   148   (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
   149   (($$ "\"" || $$ "'") :|-- (fn s =>
   150     (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
   151 
   152 val parse_comment =
   153   Scan.this_string "<!--" --
   154   Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
   155   Scan.this_string "-->" >> ignored;
   156 
   157 val parse_processing_instruction =
   158   Scan.this_string "<?" --
   159   Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
   160   Scan.this_string "?>" >> ignored;
   161 
   162 val parse_doctype =
   163   Scan.this_string "<!DOCTYPE" --
   164   Scan.repeat (Scan.unless ($$ ">") regular) --
   165   $$ ">" >> ignored;
   166 
   167 val parse_misc =
   168   Scan.one Symbol.is_blank >> ignored ||
   169   parse_processing_instruction ||
   170   parse_comment;
   171 
   172 val parse_optional_text =
   173   Scan.optional (parse_chars >> (single o Text)) [];
   174 
   175 in
   176 
   177 val parse_comments =
   178   blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
   179 
   180 val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;
   181 
   182 fun parse_content xs =
   183   (parse_optional_text @@@
   184     (Scan.repeat
   185       ((parse_element >> single ||
   186         parse_cdata >> (single o Text) ||
   187         parse_processing_instruction ||
   188         parse_comment)
   189       @@@ parse_optional_text) >> flat)) xs
   190 
   191 and parse_element xs =
   192   ($$ "<" |-- Symbol.scan_id --
   193     Scan.repeat (blanks |-- parse_att) --| blanks :-- (fn (s, _) =>
   194       !! (err "Expected > or />")
   195         (Scan.this_string "/>" >> ignored
   196          || $$ ">" |-- parse_content --|
   197             !! (err ("Expected </" ^ s ^ ">"))
   198               (Scan.this_string ("</" ^ s) --| blanks --| $$ ">"))) >> Elem) xs;
   199 
   200 val parse_document =
   201   (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
   202   |-- parse_element;
   203 
   204 fun parse s =
   205   (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
   206       (blanks |-- parse_document --| blanks))) (raw_explode s) of
   207     (x, []) => x
   208   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
   209 
   210 end;
   211 
   212 
   213 
   214 (** XML as data representation language **)
   215 
   216 exception XML_ATOM of string;
   217 exception XML_BODY of tree list;
   218 
   219 
   220 structure Encode =
   221 struct
   222 
   223 type 'a T = 'a -> body;
   224 
   225 
   226 (* basic values *)
   227 
   228 fun int_atom i = signed_string_of_int i;
   229 
   230 fun bool_atom false = "0"
   231   | bool_atom true = "1";
   232 
   233 fun unit_atom () = "";
   234 
   235 
   236 (* structural nodes *)
   237 
   238 fun node ts = Elem ((":", []), ts);
   239 
   240 fun tagged (tag, ts) = Elem ((int_atom tag, []), ts);
   241 
   242 
   243 (* representation of standard types *)
   244 
   245 fun properties props = [Elem ((":", props), [])];
   246 
   247 fun string "" = []
   248   | string s = [Text s];
   249 
   250 val int = string o int_atom;
   251 
   252 val bool = string o bool_atom;
   253 
   254 val unit = string o unit_atom;
   255 
   256 fun pair f g (x, y) = [node (f x), node (g y)];
   257 
   258 fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];
   259 
   260 fun list f xs = map (node o f) xs;
   261 
   262 fun option _ NONE = []
   263   | option f (SOME x) = [node (f x)];
   264 
   265 fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];
   266 
   267 end;
   268 
   269 
   270 structure Decode =
   271 struct
   272 
   273 type 'a T = body -> 'a;
   274 
   275 
   276 (* basic values *)
   277 
   278 fun int_atom s =
   279   (case Int.fromString s of
   280     SOME i => i
   281   | NONE => raise XML_ATOM s);
   282 
   283 fun bool_atom "0" = false
   284   | bool_atom "1" = true
   285   | bool_atom s = raise XML_ATOM s;
   286 
   287 fun unit_atom "" = ()
   288   | unit_atom s = raise XML_ATOM s;
   289 
   290 
   291 (* structural nodes *)
   292 
   293 fun node (Elem ((":", []), ts)) = ts
   294   | node t = raise XML_BODY [t];
   295 
   296 fun tagged (Elem ((s, []), ts)) = (int_atom s, ts)
   297   | tagged t = raise XML_BODY [t];
   298 
   299 
   300 (* representation of standard types *)
   301 
   302 fun properties [Elem ((":", props), [])] = props
   303   | properties ts = raise XML_BODY ts;
   304 
   305 fun string [] = ""
   306   | string [Text s] = s
   307   | string ts = raise XML_BODY ts;
   308 
   309 val int = int_atom o string;
   310 
   311 val bool = bool_atom o string;
   312 
   313 val unit = unit_atom o string;
   314 
   315 fun pair f g [t1, t2] = (f (node t1), g (node t2))
   316   | pair _ _ ts = raise XML_BODY ts;
   317 
   318 fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))
   319   | triple _ _ _ ts = raise XML_BODY ts;
   320 
   321 fun list f ts = map (f o node) ts;
   322 
   323 fun option _ [] = NONE
   324   | option f [t] = SOME (f (node t))
   325   | option _ ts = raise XML_BODY ts;
   326 
   327 fun variant fs [t] = uncurry (nth fs) (tagged t)
   328   | variant _ ts = raise XML_BODY ts;
   329 
   330 end;
   331 
   332 end;