src/Pure/General/xml.ML
author wenzelm
Thu Apr 03 18:42:36 2008 +0200 (2008-04-03)
changeset 26538 d65504ffb47d
parent 26525 14a56f013469
child 26539 a0754be538ab
permissions -rw-r--r--
replaced ETX/EOT by ENQ/ACK, which are less likely to be interpreted by tty etc.;
     1 (*  Title:      Pure/General/xml.ML
     2     ID:         $Id$
     3     Author:     David Aspinall, Stefan Berghofer and Markus Wenzel
     4 
     5 Basic support for XML.
     6 *)
     7 
     8 signature XML =
     9 sig
    10   (*string functions*)
    11   val detect: string -> bool
    12   val header: string
    13   val text: string -> string
    14   type attributes = (string * string) list
    15   val attribute: string * string -> string
    16   val element: string -> attributes -> string list -> string
    17   (*tree functions*)
    18   datatype tree =
    19       Elem of string * attributes * tree list
    20     | Text of string
    21     | Output of output
    22   type content = tree list
    23   type element = string * attributes * content
    24   val string_of_tree: tree -> string
    25   val plain_content: tree -> string
    26   val parse_string : string -> string option
    27   val parse_content: string list -> tree list * string list
    28   val parse_elem: string list -> tree * string list
    29   val parse_document: string list -> (string option * tree) * string list
    30   val tree_of_string: string -> tree
    31   val scan_comment_whspc: string list -> unit * string list
    32 end;
    33 
    34 structure XML: XML =
    35 struct
    36 
    37 
    38 (** string representation **)
    39 
    40 val detect = String.isPrefix "<?xml";
    41 val header = "<?xml version=\"1.0\"?>\n";
    42 
    43 
    44 (* text and character data *)
    45 
    46 fun decode "&lt;" = "<"
    47   | decode "&gt;" = ">"
    48   | decode "&amp;" = "&"
    49   | decode "&apos;" = "'"
    50   | decode "&quot;" = "\""
    51   | decode c = c;
    52 
    53 fun encode "<" = "&lt;"
    54   | encode ">" = "&gt;"
    55   | encode "&" = "&amp;"
    56   | encode "'" = "&apos;"
    57   | encode "\"" = "&quot;"
    58   | encode c = c;
    59 
    60 val text = translate_string encode;
    61 
    62 
    63 (* elements *)
    64 
    65 fun attribute (a, x) = a ^ " = \"" ^ text x ^ "\"";
    66 
    67 fun element name atts body =
    68   let
    69     val elem = space_implode " " (name :: map attribute atts);
    70     val b = implode body;
    71   in
    72     if b = "" then enclose "<" "/>" elem
    73     else enclose "<" ">" elem ^ b ^ enclose "</" ">" name
    74   end;
    75 
    76 
    77 
    78 (** explicit XML trees **)
    79 
    80 type attributes = (string * string) list;
    81 
    82 datatype tree =
    83     Elem of string * attributes * tree list
    84   | Text of string
    85   | Output of output;
    86 
    87 type content = tree list;
    88 type element = string * attributes * content;
    89 
    90 fun string_of_tree t =
    91   let
    92     fun name_atts name atts = fold Buffer.add (separate " " (name :: map attribute atts));
    93     fun tree (Elem (name, atts, [])) =
    94           Buffer.add "<" #> name_atts name atts #> Buffer.add "/>"
    95       | tree (Elem (name, atts, ts)) =
    96           Buffer.add "<" #> name_atts name atts #> Buffer.add ">" #>
    97           fold tree ts #>
    98           Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
    99       | tree (Text s) = Buffer.add (text s)
   100       | tree (Output s) = Buffer.add s;
   101   in Buffer.empty |> tree t |> Buffer.content end;
   102 
   103 fun plain_content tree =
   104   let
   105     fun content (Elem (_, _, ts)) = fold content ts
   106       | content (Text s) = Buffer.add s
   107       | content (Output _) = I;    (* FIXME !? *)
   108   in Buffer.empty |> content tree |> Buffer.content end;
   109 
   110 
   111 
   112 (** XML parsing **)
   113 
   114 fun err s (xs, _) =
   115   "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
   116 
   117 val scan_whspc = Scan.many Symbol.is_blank;
   118 
   119 val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
   120 
   121 val parse_chars = Scan.repeat1 (Scan.unless ((* scan_whspc -- *)$$ "<")
   122   (scan_special || Scan.one Symbol.is_regular)) >> implode;
   123 
   124 val parse_string = Scan.read Symbol.stopper parse_chars o explode;
   125 
   126 val parse_cdata = Scan.this_string "<![CDATA[" |--
   127   (Scan.repeat (Scan.unless (Scan.this_string "]]>") (Scan.one Symbol.is_regular)) >>
   128     implode) --| Scan.this_string "]]>";
   129 
   130 val parse_att =
   131   Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --
   132   (($$ "\"" || $$ "'") :|-- (fn s => (Scan.repeat (Scan.unless ($$ s)
   133     (scan_special || Scan.one Symbol.is_regular)) >> implode) --| $$ s));
   134 
   135 val parse_comment = Scan.this_string "<!--" --
   136   Scan.repeat (Scan.unless (Scan.this_string "-->") (Scan.one Symbol.is_regular)) --
   137   Scan.this_string "-->";
   138 
   139 val scan_comment_whspc =
   140   (scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K())));
   141 
   142 val parse_pi = Scan.this_string "<?" |--
   143   Scan.repeat (Scan.unless (Scan.this_string "?>") (Scan.one Symbol.is_regular)) --|
   144   Scan.this_string "?>";
   145 
   146 fun parse_content xs =
   147   ((Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] --
   148     (Scan.repeat ((* scan_whspc |-- *)
   149        (   parse_elem >> single
   150         || parse_cdata >> (single o Text)
   151         || parse_pi >> K []
   152         || parse_comment >> K []) --
   153        Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) []
   154          >> op @) >> flat) >> op @)(* --| scan_whspc*)) xs
   155 
   156 and parse_elem xs =
   157   ($$ "<" |-- Symbol.scan_id --
   158     Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
   159       !! (err "Expected > or />")
   160         (Scan.this_string "/>" >> K []
   161          || $$ ">" |-- parse_content --|
   162             !! (err ("Expected </" ^ s ^ ">"))
   163               (Scan.this_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
   164     (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
   165 
   166 val parse_document =
   167   Scan.option (Scan.this_string "<!DOCTYPE" -- scan_whspc |--
   168     (Scan.repeat (Scan.unless ($$ ">")
   169       (Scan.one Symbol.is_regular)) >> implode) --| $$ ">" --| scan_whspc) --
   170   parse_elem;
   171 
   172 fun tree_of_string s =
   173   (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
   174       (scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of
   175     (x, []) => x
   176   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
   177 
   178 end;