src/Pure/General/xml.ML
changeset 14728 df34201f1a15
parent 14714 38ff9c8a7de0
child 14844 a15c65e66ee9
equal deleted inserted replaced
14727:ab06e87e5c83 14728:df34201f1a15
     1 (*  Title:      Pure/General/xml.ML
     1 (*  Title:      Pure/General/xml.ML
     2     ID:         $Id$
     2     ID:         $Id$
     3     Author:     Markus Wenzel, LMU Muenchen
     3     Author:     David Aspinall, Stefan Berghofer and Markus Wenzel
     4                 Stefan Berghofer, TU Muenchen
       
     5     License:    GPL (GNU GENERAL PUBLIC LICENSE)
     4     License:    GPL (GNU GENERAL PUBLIC LICENSE)
     6 
     5 
     7 Basic support for XML input and output.
     6 Basic support for XML input and output.
     8 
       
     9 FIXME da: missing input raises FAIL (scan.ML), should give error message.
       
    10 *)
     7 *)
    11 
     8 
    12 signature XML =
     9 signature XML =
    13 sig
    10 sig
    14   datatype tree =
    11   datatype tree =
    49 val cdata_open  = "<![CDATA["
    46 val cdata_open  = "<![CDATA["
    50 val cdata_close = "]]>"
    47 val cdata_close = "]]>"
    51 
    48 
    52 fun cdata s = cdata_open ^ s ^ cdata_close;
    49 fun cdata s = cdata_open ^ s ^ cdata_close;
    53 
    50 
       
    51 
    54 (* elements *)
    52 (* elements *)
    55 
    53 
    56 datatype tree =
    54 datatype tree =
    57     Elem of string * (string * string) list * tree list
    55     Elem of string * (string * string) list * tree list
    58   | Text of string;
    56   | Text of string;
    59 
    57 
    60 fun attribute (a, x) = a ^ " = \"" ^ (text x) ^ "\"";
    58 fun attribute (a, x) = a ^ " = \"" ^ text x ^ "\"";
    61 
    59 
    62 fun element name atts cs =
    60 fun element name atts cs =
    63   let val elem = space_implode " " (name :: map attribute atts) in
    61   let val elem = space_implode " " (name :: map attribute atts) in
    64     if null cs then enclose "<" "/>" elem
    62     if null cs then enclose "<" "/>" elem
    65     else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name
    63     else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name
    66   end;
    64   end;
    67 
    65 
    68 fun string_of_tree (Elem (name, atts, ts)) =
    66 fun string_of_tree (Elem (name, atts, ts)) =
    69       element name atts (map string_of_tree ts)
    67       element name atts (map string_of_tree ts)
    70   | string_of_tree (Text s) = s
    68   | string_of_tree (Text s) = s;
    71   
    69 
    72 val header = "<?xml version=\"1.0\"?>\n";
    70 val header = "<?xml version=\"1.0\"?>\n";
    73 
    71 
    74 
    72 
    75 (* parser *)
    73 (* parser *)
    76 
    74 
    77 fun err s (xs, _) = "XML parsing error: " ^ s ^ "\nfound:\n" ^
    75 fun err s (xs, _) =
    78   implode (take (100, xs));
    76   "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
    79 
    77 
    80 val scan_whspc = Scan.repeat ($$ " " || $$ "\n" || $$ "\t");
    78 val scan_whspc = Scan.any Symbol.is_blank;
    81 
    79 
    82 val literal = Scan.literal o Scan.make_lexicon o single o explode;
    80 fun scan_string s = Scan.list (Symbol.explode s) >> K s;
    83 
    81 
    84 val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
    82 val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
    85 
    83 
    86 val parse_chars = Scan.repeat1 (Scan.unless (scan_whspc -- $$ "<")
    84 val parse_chars = Scan.repeat1 (Scan.unless (scan_whspc -- $$ "<")
    87   (scan_special || Scan.one Symbol.not_eof)) >> implode;
    85   (scan_special || Scan.one Symbol.not_eof)) >> implode;
    88 
    86 
    89 val parse_cdata = literal "<![CDATA[" |--
    87 val parse_cdata = scan_string "<![CDATA[" |--
    90   (Scan.repeat (Scan.unless (literal "]]>") (Scan.one Symbol.not_eof)) >>
    88   (Scan.repeat (Scan.unless (scan_string "]]>") (Scan.one Symbol.not_eof)) >>
    91     implode) --| literal "]]>";
    89     implode) --| scan_string "]]>";
    92 
    90 
    93 val parse_att =
    91 val parse_att =
    94   Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --| $$ "\"" --
    92   Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --| $$ "\"" --
    95   (Scan.repeat (Scan.unless ($$ "\"")
    93   (Scan.repeat (Scan.unless ($$ "\"")
    96     (scan_special || Scan.one Symbol.not_eof)) >> implode) --| $$ "\"";
    94     (scan_special || Scan.one Symbol.not_eof)) >> implode) --| $$ "\"";
    97 
    95 
    98 val parse_comment = literal "<!--" --
    96 val parse_comment = scan_string "<!--" --
    99   Scan.repeat (Scan.unless (literal "-->") (Scan.one Symbol.not_eof)) --
    97   Scan.repeat (Scan.unless (scan_string "-->") (Scan.one Symbol.not_eof)) --
   100   literal "-->";
    98   scan_string "-->";
   101 
    99 
   102 val parse_pi = literal "<?" |--
   100 val parse_pi = scan_string "<?" |--
   103   Scan.repeat (Scan.unless (literal "?>") (Scan.one Symbol.not_eof)) --|
   101   Scan.repeat (Scan.unless (scan_string "?>") (Scan.one Symbol.not_eof)) --|
   104   literal "?>";
   102   scan_string "?>";
   105 
   103 
   106 fun parse_content xs =
   104 fun parse_content xs =
   107   ((Scan.optional (scan_whspc |-- parse_chars >> (single o Text)) [] --
   105   ((Scan.optional (scan_whspc |-- parse_chars >> (single o Text)) [] --
   108     (Scan.repeat (scan_whspc |--
   106     (Scan.repeat (scan_whspc |--
   109        (   parse_elem >> single
   107        (   parse_elem >> single
   115 
   113 
   116 and parse_elem xs =
   114 and parse_elem xs =
   117   ($$ "<" |-- Symbol.scan_id --
   115   ($$ "<" |-- Symbol.scan_id --
   118     Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
   116     Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
   119       !! (err "Expected > or />")
   117       !! (err "Expected > or />")
   120         (   literal "/>" >> K []
   118         (scan_string "/>" >> K []
   121          || $$ ">" |-- parse_content --|
   119          || $$ ">" |-- parse_content --|
   122             !! (err ("Expected </" ^ s ^ ">"))
   120             !! (err ("Expected </" ^ s ^ ">"))
   123               (literal ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
   121               (scan_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
   124     (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
   122     (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
   125 
   123 
   126 val parse_document =
   124 val parse_document =
   127   Scan.option (literal "<!DOCTYPE" -- scan_whspc |--
   125   Scan.option (scan_string "<!DOCTYPE" -- scan_whspc |--
   128     (Scan.repeat (Scan.unless ($$ ">")
   126     (Scan.repeat (Scan.unless ($$ ">")
   129       (Scan.one Symbol.not_eof)) >> implode) --| $$ ">" --| scan_whspc) --
   127       (Scan.one Symbol.not_eof)) >> implode) --| $$ ">" --| scan_whspc) --
   130   parse_elem;
   128   parse_elem;
   131 
   129 
   132 fun tree_of_string s =
   130 fun tree_of_string s =
   133   (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
   131   (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
   134       (scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of
   132       (scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of
   135      (x, []) => x
   133     (x, []) => x
   136    | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^
   134   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
   137        implode (take (100, ys))));
       
   138 
   135 
   139 end;
   136 end;