src/Pure/Tools/xml.ML
author wenzelm
Sat Jul 28 20:40:27 2007 +0200 (2007-07-28)
changeset 24022 ab76c73b3b58
parent 23784 75e6b9dd5336
permissions -rw-r--r--
tuned;
     1 (*  Title:      Pure/Tools/xml.ML
     2     ID:         $Id$
     3     Author:     David Aspinall, Stefan Berghofer and Markus Wenzel
     4 
     5 Basic support for XML.
     6 *)
     7 
     8 signature XML =
     9 sig
    10   (* string functions *)
    11   val header: string
    12   val text: string -> string
    13   val text_charref: string -> string
    14   val cdata: string -> string
    15   type attributes = (string * string) list
    16   val attribute: string * string -> string
    17   val element: string -> attributes -> string list -> string
    18   (* tree functions *)
    19   datatype tree =
    20       Elem of string * attributes * tree list
    21     | Text of string
    22     | Output of output
    23   type content = tree list
    24   type element = string * attributes * content
    25   val string_of_tree: tree -> string
    26   val buffer_of_tree: tree -> Buffer.T
    27   val parse_string : string -> string option
    28   val parse_content: string list -> tree list * string list
    29   val parse_elem: string list -> tree * string list
    30   val parse_document: string list -> (string option * tree) * string list
    31   val tree_of_string: string -> tree
    32   val scan_comment_whspc : string list -> unit * string list
    33 end;
    34 
    35 structure XML: XML =
    36 struct
    37 
    38 (** string based representation (small scale) **)
    39 
    40 val header = "<?xml version=\"1.0\"?>\n";
    41 
    42 
    43 (* text and character data *)
    44 
    45 fun decode "&lt;" = "<"
    46   | decode "&gt;" = ">"
    47   | decode "&amp;" = "&"
    48   | decode "&apos;" = "'"
    49   | decode "&quot;" = "\""
    50   | decode c = c;
    51 
    52 fun encode "<" = "&lt;"
    53   | encode ">" = "&gt;"
    54   | encode "&" = "&amp;"
    55   | encode "'" = "&apos;"
    56   | encode "\"" = "&quot;"
    57   | encode c = c;
    58 
    59 fun encode_charref c = "&#" ^ Int.toString (ord c) ^ ";"
    60 
    61 val text = Library.translate_string encode;
    62 
    63 val text_charref = translate_string encode_charref;
    64 
    65 val cdata = enclose "<![CDATA[" "]]>\n";
    66 
    67 
    68 (* elements *)
    69 
    70 fun attribute (a, x) = a ^ " = \"" ^ text x ^ "\"";
    71 
    72 fun element name atts cs =
    73   let val elem = space_implode " " (name :: map attribute atts) in
    74     if null cs then enclose "<" "/>" elem
    75     else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name
    76   end;
    77 
    78 
    79 
    80 (** explicit XML trees **)
    81 
    82 type attributes = (string * string) list;
    83 
    84 datatype tree =
    85     Elem of string * attributes * tree list
    86   | Text of string
    87   | Output of output;
    88 
    89 type content = tree list;
    90 
    91 type element = string * attributes * content;
    92 
    93 fun buffer_of_tree tree =
    94   let
    95     fun string_of (Elem (name, atts, ts)) buf =
    96         let val buf' =
    97           buf |> Buffer.add "<"
    98           |> fold Buffer.add (separate " " (name :: map attribute atts))
    99         in
   100           if null ts then
   101             buf' |> Buffer.add "/>"
   102           else
   103             buf' |> Buffer.add ">"
   104             |> fold string_of ts
   105             |> Buffer.add "</" |> Buffer.add name |> Buffer.add ">"
   106         end
   107       | string_of (Text s) buf = Buffer.add (text s) buf
   108       | string_of (Output s) buf = Buffer.add s buf;
   109   in string_of tree Buffer.empty end;
   110 
   111 val string_of_tree = Buffer.content o buffer_of_tree;
   112 
   113 
   114 
   115 (** XML parsing **)
   116 
   117 fun err s (xs, _) =
   118   "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
   119 
   120 val scan_whspc = Scan.many Symbol.is_blank;
   121 
   122 val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
   123 
   124 val parse_chars = Scan.repeat1 (Scan.unless ((* scan_whspc -- *)$$ "<")
   125   (scan_special || Scan.one Symbol.is_regular)) >> implode;
   126 
   127 val parse_string = Scan.read Symbol.stopper parse_chars o explode;
   128 
   129 val parse_cdata = Scan.this_string "<![CDATA[" |--
   130   (Scan.repeat (Scan.unless (Scan.this_string "]]>") (Scan.one Symbol.is_regular)) >>
   131     implode) --| Scan.this_string "]]>";
   132 
   133 val parse_att =
   134   Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --
   135   (($$ "\"" || $$ "'") :|-- (fn s => (Scan.repeat (Scan.unless ($$ s)
   136     (scan_special || Scan.one Symbol.is_regular)) >> implode) --| $$ s));
   137 
   138 val parse_comment = Scan.this_string "<!--" --
   139   Scan.repeat (Scan.unless (Scan.this_string "-->") (Scan.one Symbol.is_regular)) --
   140   Scan.this_string "-->";
   141 
   142 val scan_comment_whspc =
   143   (scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K())));
   144 
   145 val parse_pi = Scan.this_string "<?" |--
   146   Scan.repeat (Scan.unless (Scan.this_string "?>") (Scan.one Symbol.is_regular)) --|
   147   Scan.this_string "?>";
   148 
   149 fun parse_content xs =
   150   ((Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] --
   151     (Scan.repeat ((* scan_whspc |-- *)
   152        (   parse_elem >> single
   153         || parse_cdata >> (single o Text)
   154         || parse_pi >> K []
   155         || parse_comment >> K []) --
   156        Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) []
   157          >> op @) >> flat) >> op @)(* --| scan_whspc*)) xs
   158 
   159 and parse_elem xs =
   160   ($$ "<" |-- Symbol.scan_id --
   161     Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
   162       !! (err "Expected > or />")
   163         (Scan.this_string "/>" >> K []
   164          || $$ ">" |-- parse_content --|
   165             !! (err ("Expected </" ^ s ^ ">"))
   166               (Scan.this_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
   167     (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
   168 
   169 val parse_document =
   170   Scan.option (Scan.this_string "<!DOCTYPE" -- scan_whspc |--
   171     (Scan.repeat (Scan.unless ($$ ">")
   172       (Scan.one Symbol.is_regular)) >> implode) --| $$ ">" --| scan_whspc) --
   173   parse_elem;
   174 
   175 fun tree_of_string s =
   176   (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
   177       (scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of
   178     (x, []) => x
   179   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
   180 
   181 end;