src/Pure/Tools/xml.ML
changeset 24264 d6935e7dac8b
parent 24263 aff00d8b2e32
child 24265 4d5917cc60c4
equal deleted inserted replaced
24263:aff00d8b2e32 24264:d6935e7dac8b
     1 (*  Title:      Pure/Tools/xml.ML
       
     2     ID:         $Id$
       
     3     Author:     David Aspinall, Stefan Berghofer and Markus Wenzel
       
     4 
       
     5 Basic support for XML.
       
     6 *)
       
     7 
       
     8 signature XML =
       
     9 sig
       
    10   (* string functions *)
       
    11   val header: string
       
    12   val text: string -> string
       
    13   val text_charref: string -> string
       
    14   val cdata: string -> string
       
    15   type attributes = (string * string) list
       
    16   val attribute: string * string -> string
       
    17   val element: string -> attributes -> string list -> string
       
    18   (* tree functions *)
       
    19   datatype tree =
       
    20       Elem of string * attributes * tree list
       
    21     | Text of string
       
    22     | Output of output
       
    23   type content = tree list
       
    24   type element = string * attributes * content
       
    25   val string_of_tree: tree -> string
       
    26   val buffer_of_tree: tree -> Buffer.T
       
    27   val parse_string : string -> string option
       
    28   val parse_content: string list -> tree list * string list
       
    29   val parse_elem: string list -> tree * string list
       
    30   val parse_document: string list -> (string option * tree) * string list
       
    31   val tree_of_string: string -> tree
       
    32   val scan_comment_whspc : string list -> unit * string list
       
    33 end;
       
    34 
       
    35 structure XML: XML =
       
    36 struct
       
    37 
       
    38 (** string based representation (small scale) **)
       
    39 
       
    40 val header = "<?xml version=\"1.0\"?>\n";
       
    41 
       
    42 
       
    43 (* text and character data *)
       
    44 
       
    45 fun decode "&lt;" = "<"
       
    46   | decode "&gt;" = ">"
       
    47   | decode "&amp;" = "&"
       
    48   | decode "&apos;" = "'"
       
    49   | decode "&quot;" = "\""
       
    50   | decode c = c;
       
    51 
       
    52 fun encode "<" = "&lt;"
       
    53   | encode ">" = "&gt;"
       
    54   | encode "&" = "&amp;"
       
    55   | encode "'" = "&apos;"
       
    56   | encode "\"" = "&quot;"
       
    57   | encode c = c;
       
    58 
       
    59 fun encode_charref c = "&#" ^ Int.toString (ord c) ^ ";"
       
    60 
       
    61 val text = Library.translate_string encode;
       
    62 
       
    63 val text_charref = translate_string encode_charref;
       
    64 
       
    65 val cdata = enclose "<![CDATA[" "]]>\n";
       
    66 
       
    67 
       
    68 (* elements *)
       
    69 
       
    70 fun attribute (a, x) = a ^ " = \"" ^ text x ^ "\"";
       
    71 
       
    72 fun element name atts cs =
       
    73   let val elem = space_implode " " (name :: map attribute atts) in
       
    74     if null cs then enclose "<" "/>" elem
       
    75     else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name
       
    76   end;
       
    77 
       
    78 
       
    79 
       
    80 (** explicit XML trees **)
       
    81 
       
    82 type attributes = (string * string) list;
       
    83 
       
    84 datatype tree =
       
    85     Elem of string * attributes * tree list
       
    86   | Text of string
       
    87   | Output of output;
       
    88 
       
    89 type content = tree list;
       
    90 
       
    91 type element = string * attributes * content;
       
    92 
       
    93 fun buffer_of_tree tree =
       
    94   let
       
    95     fun string_of (Elem (name, atts, ts)) buf =
       
    96         let val buf' =
       
    97           buf |> Buffer.add "<"
       
    98           |> fold Buffer.add (separate " " (name :: map attribute atts))
       
    99         in
       
   100           if null ts then
       
   101             buf' |> Buffer.add "/>"
       
   102           else
       
   103             buf' |> Buffer.add ">"
       
   104             |> fold string_of ts
       
   105             |> Buffer.add "</" |> Buffer.add name |> Buffer.add ">"
       
   106         end
       
   107       | string_of (Text s) buf = Buffer.add (text s) buf
       
   108       | string_of (Output s) buf = Buffer.add s buf;
       
   109   in string_of tree Buffer.empty end;
       
   110 
       
   111 val string_of_tree = Buffer.content o buffer_of_tree;
       
   112 
       
   113 
       
   114 
       
   115 (** XML parsing **)
       
   116 
       
   117 fun err s (xs, _) =
       
   118   "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
       
   119 
       
   120 val scan_whspc = Scan.many Symbol.is_blank;
       
   121 
       
   122 val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
       
   123 
       
   124 val parse_chars = Scan.repeat1 (Scan.unless ((* scan_whspc -- *)$$ "<")
       
   125   (scan_special || Scan.one Symbol.is_regular)) >> implode;
       
   126 
       
   127 val parse_string = Scan.read Symbol.stopper parse_chars o explode;
       
   128 
       
   129 val parse_cdata = Scan.this_string "<![CDATA[" |--
       
   130   (Scan.repeat (Scan.unless (Scan.this_string "]]>") (Scan.one Symbol.is_regular)) >>
       
   131     implode) --| Scan.this_string "]]>";
       
   132 
       
   133 val parse_att =
       
   134   Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --
       
   135   (($$ "\"" || $$ "'") :|-- (fn s => (Scan.repeat (Scan.unless ($$ s)
       
   136     (scan_special || Scan.one Symbol.is_regular)) >> implode) --| $$ s));
       
   137 
       
   138 val parse_comment = Scan.this_string "<!--" --
       
   139   Scan.repeat (Scan.unless (Scan.this_string "-->") (Scan.one Symbol.is_regular)) --
       
   140   Scan.this_string "-->";
       
   141 
       
   142 val scan_comment_whspc =
       
   143   (scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K())));
       
   144 
       
   145 val parse_pi = Scan.this_string "<?" |--
       
   146   Scan.repeat (Scan.unless (Scan.this_string "?>") (Scan.one Symbol.is_regular)) --|
       
   147   Scan.this_string "?>";
       
   148 
       
   149 fun parse_content xs =
       
   150   ((Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] --
       
   151     (Scan.repeat ((* scan_whspc |-- *)
       
   152        (   parse_elem >> single
       
   153         || parse_cdata >> (single o Text)
       
   154         || parse_pi >> K []
       
   155         || parse_comment >> K []) --
       
   156        Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) []
       
   157          >> op @) >> flat) >> op @)(* --| scan_whspc*)) xs
       
   158 
       
   159 and parse_elem xs =
       
   160   ($$ "<" |-- Symbol.scan_id --
       
   161     Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
       
   162       !! (err "Expected > or />")
       
   163         (Scan.this_string "/>" >> K []
       
   164          || $$ ">" |-- parse_content --|
       
   165             !! (err ("Expected </" ^ s ^ ">"))
       
   166               (Scan.this_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
       
   167     (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
       
   168 
       
   169 val parse_document =
       
   170   Scan.option (Scan.this_string "<!DOCTYPE" -- scan_whspc |--
       
   171     (Scan.repeat (Scan.unless ($$ ">")
       
   172       (Scan.one Symbol.is_regular)) >> implode) --| $$ ">" --| scan_whspc) --
       
   173   parse_elem;
       
   174 
       
   175 fun tree_of_string s =
       
   176   (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
       
   177       (scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of
       
   178     (x, []) => x
       
   179   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
       
   180 
       
   181 end;