src/Pure/General/xml.ML
author wenzelm
Tue Aug 10 22:26:23 2010 +0200 (2010-08-10)
changeset 38266 492d377ecfe2
parent 38228 ada3ab6b9085
child 38474 e498dc2eb576
permissions -rw-r--r--
type XML.body as basic data representation language;
tuned;
     1 (*  Title:      Pure/General/xml.ML
     2     Author:     David Aspinall, Stefan Berghofer and Markus Wenzel
     3 
     4 Simple XML tree values.
     5 *)
     6 
     7 signature XML =
     8 sig
     9   type attributes = Properties.T
    10   datatype tree =
    11       Elem of Markup.T * tree list
    12     | Text of string
    13   type body = tree list
    14   val add_content: tree -> Buffer.T -> Buffer.T
    15   val header: string
    16   val text: string -> string
    17   val element: string -> attributes -> string list -> string
    18   val output_markup: Markup.T -> output * output
    19   val string_of: tree -> string
    20   val output: tree -> TextIO.outstream -> unit
    21   val parse_comments: string list -> unit * string list
    22   val parse_string : string -> string option
    23   val parse_element: string list -> tree * string list
    24   val parse_document: string list -> tree * string list
    25   val parse: string -> tree
    26 end;
    27 
    28 structure XML: XML =
    29 struct
    30 
    31 (** XML trees **)
    32 
    33 type attributes = Properties.T;
    34 
    35 datatype tree =
    36     Elem of Markup.T * tree list
    37   | Text of string;
    38 
    39 type body = tree list;
    40 
    41 fun add_content (Elem (_, ts)) = fold add_content ts
    42   | add_content (Text s) = Buffer.add s;
    43 
    44 
    45 
    46 (** string representation **)
    47 
    48 val header = "<?xml version=\"1.0\"?>\n";
    49 
    50 
    51 (* escaped text *)
    52 
    53 fun decode "&lt;" = "<"
    54   | decode "&gt;" = ">"
    55   | decode "&amp;" = "&"
    56   | decode "&apos;" = "'"
    57   | decode "&quot;" = "\""
    58   | decode c = c;
    59 
    60 fun encode "<" = "&lt;"
    61   | encode ">" = "&gt;"
    62   | encode "&" = "&amp;"
    63   | encode "'" = "&apos;"
    64   | encode "\"" = "&quot;"
    65   | encode c = c;
    66 
    67 val text = translate_string encode;
    68 
    69 
    70 (* elements *)
    71 
    72 fun elem name atts =
    73   space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
    74 
    75 fun element name atts body =
    76   let val b = implode body in
    77     if b = "" then enclose "<" "/>" (elem name atts)
    78     else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
    79   end;
    80 
    81 fun output_markup (markup as (name, atts)) =
    82   if Markup.is_none markup then Markup.no_output
    83   else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
    84 
    85 
    86 (* output *)
    87 
    88 fun buffer_of tree =
    89   let
    90     fun traverse (Elem ((name, atts), [])) =
    91           Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
    92       | traverse (Elem ((name, atts), ts)) =
    93           Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
    94           fold traverse ts #>
    95           Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
    96       | traverse (Text s) = Buffer.add (text s);
    97   in Buffer.empty |> traverse tree end;
    98 
    99 val string_of = Buffer.content o buffer_of;
   100 val output = Buffer.output o buffer_of;
   101 
   102 
   103 
   104 (** XML parsing (slow) **)
   105 
   106 local
   107 
   108 fun err s (xs, _) =
   109   "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
   110 
   111 fun ignored _ = [];
   112 
   113 val blanks = Scan.many Symbol.is_blank;
   114 val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
   115 val regular = Scan.one Symbol.is_regular;
   116 fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
   117 
   118 val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
   119 
   120 val parse_cdata =
   121   Scan.this_string "<![CDATA[" |--
   122   (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
   123   Scan.this_string "]]>";
   124 
   125 val parse_att =
   126   (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
   127   (($$ "\"" || $$ "'") :|-- (fn s =>
   128     (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
   129 
   130 val parse_comment =
   131   Scan.this_string "<!--" --
   132   Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
   133   Scan.this_string "-->" >> ignored;
   134 
   135 val parse_processing_instruction =
   136   Scan.this_string "<?" --
   137   Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
   138   Scan.this_string "?>" >> ignored;
   139 
   140 val parse_doctype =
   141   Scan.this_string "<!DOCTYPE" --
   142   Scan.repeat (Scan.unless ($$ ">") regular) --
   143   $$ ">" >> ignored;
   144 
   145 val parse_misc =
   146   Scan.one Symbol.is_blank >> ignored ||
   147   parse_processing_instruction ||
   148   parse_comment;
   149 
   150 val parse_optional_text =
   151   Scan.optional (parse_chars >> (single o Text)) [];
   152 
   153 in
   154 
   155 val parse_comments =
   156   blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
   157 
   158 val parse_string = Scan.read Symbol.stopper parse_chars o explode;
   159 
   160 fun parse_content xs =
   161   (parse_optional_text @@@
   162     (Scan.repeat
   163       ((parse_element >> single ||
   164         parse_cdata >> (single o Text) ||
   165         parse_processing_instruction ||
   166         parse_comment)
   167       @@@ parse_optional_text) >> flat)) xs
   168 
   169 and parse_element xs =
   170   ($$ "<" |-- Symbol.scan_id --
   171     Scan.repeat (blanks |-- parse_att) --| blanks :-- (fn (s, _) =>
   172       !! (err "Expected > or />")
   173         (Scan.this_string "/>" >> ignored
   174          || $$ ">" |-- parse_content --|
   175             !! (err ("Expected </" ^ s ^ ">"))
   176               (Scan.this_string ("</" ^ s) --| blanks --| $$ ">"))) >> Elem) xs;
   177 
   178 val parse_document =
   179   (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
   180   |-- parse_element;
   181 
   182 fun parse s =
   183   (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
   184       (blanks |-- parse_document --| blanks))) (explode s) of
   185     (x, []) => x
   186   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
   187 
   188 end;
   189 
   190 end;