| author | wenzelm | 
| Wed, 31 Dec 2008 19:56:38 +0100 | |
| changeset 29280 | c5531bf7c6b2 | 
| parent 28033 | f03b5856f286 | 
| child 29325 | a205acc94356 | 
| permissions | -rw-r--r-- | 
| 24584 | 1 | (* Title: Pure/General/xml.ML | 
| 24264 | 2 | ID: $Id$ | 
| 3 | Author: David Aspinall, Stefan Berghofer and Markus Wenzel | |
| 4 | ||
| 5 | Basic support for XML. | |
| 6 | *) | |
| 7 | ||
| 8 | signature XML = | |
| 9 | sig | |
| 28017 | 10 | type attributes = Properties.T | 
| 24264 | 11 | datatype tree = | 
| 12 | Elem of string * attributes * tree list | |
| 13 | | Text of string | |
| 26546 | 14 | val add_content: tree -> Buffer.T -> Buffer.T | 
| 15 | val detect: string -> bool | |
| 16 | val header: string | |
| 17 | val text: string -> string | |
| 18 | val element: string -> attributes -> string list -> string | |
| 19 | val output_markup: Markup.T -> output * output | |
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 20 | val string_of: tree -> string | 
| 26546 | 21 | val output: tree -> TextIO.outstream -> unit | 
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 22 | val parse_comments: string list -> unit * string list | 
| 24264 | 23 | val parse_string : string -> string option | 
| 26546 | 24 | val parse_element: string list -> tree * string list | 
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 25 | val parse_document: string list -> tree * string list | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 26 | val parse: string -> tree | 
| 24264 | 27 | end; | 
| 28 | ||
| 29 | structure XML: XML = | |
| 30 | struct | |
| 31 | ||
| 26546 | 32 | (** XML trees **) | 
| 33 | ||
| 28017 | 34 | type attributes = Properties.T; | 
| 26546 | 35 | |
| 36 | datatype tree = | |
| 37 | Elem of string * attributes * tree list | |
| 28033 | 38 | | Text of string; | 
| 26546 | 39 | |
| 40 | fun add_content (Elem (_, _, ts)) = fold add_content ts | |
| 28033 | 41 | | add_content (Text s) = Buffer.add s; | 
| 26546 | 42 | |
| 43 | ||
| 24264 | 44 | |
| 26525 | 45 | (** string representation **) | 
| 46 | ||
| 47 | val detect = String.isPrefix "<?xml"; | |
| 24264 | 48 | val header = "<?xml version=\"1.0\"?>\n"; | 
| 49 | ||
| 50 | ||
| 26546 | 51 | (* escaped text *) | 
| 24264 | 52 | |
| 53 | fun decode "<" = "<" | |
| 54 | | decode ">" = ">" | |
| 55 | | decode "&" = "&" | |
| 56 | | decode "'" = "'" | |
| 57 | | decode """ = "\"" | |
| 58 | | decode c = c; | |
| 59 | ||
| 60 | fun encode "<" = "<" | |
| 61 | | encode ">" = ">" | |
| 62 | | encode "&" = "&" | |
| 63 | | encode "'" = "'" | |
| 64 | | encode "\"" = """ | |
| 65 | | encode c = c; | |
| 66 | ||
| 25838 | 67 | val text = translate_string encode; | 
| 24264 | 68 | |
| 69 | ||
| 70 | (* elements *) | |
| 71 | ||
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 72 | fun elem name atts = | 
| 26551 | 73 | space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts); | 
| 24264 | 74 | |
| 26525 | 75 | fun element name atts body = | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 76 | let val b = implode body in | 
| 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 77 | if b = "" then enclose "<" "/>" (elem name atts) | 
| 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 78 | else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name | 
| 24264 | 79 | end; | 
| 80 | ||
| 27884 | 81 | fun output_markup (markup as (name, atts)) = | 
| 82 |   if Markup.is_none markup then ("", "")
 | |
| 83 | else (enclose "<" ">" (elem name atts), enclose "</" ">" name); | |
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 84 | |
| 24264 | 85 | |
| 26546 | 86 | (* output *) | 
| 24264 | 87 | |
| 26546 | 88 | fun buffer_of tree = | 
| 24264 | 89 | let | 
| 26546 | 90 | fun traverse (Elem (name, atts, [])) = | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 91 | Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>" | 
| 26546 | 92 | | traverse (Elem (name, atts, ts)) = | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 93 | Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #> | 
| 26546 | 94 | fold traverse ts #> | 
| 26525 | 95 | Buffer.add "</" #> Buffer.add name #> Buffer.add ">" | 
| 28033 | 96 | | traverse (Text s) = Buffer.add (text s); | 
| 26546 | 97 | in Buffer.empty |> traverse tree end; | 
| 24264 | 98 | |
| 26546 | 99 | val string_of = Buffer.content o buffer_of; | 
| 100 | val output = Buffer.output o buffer_of; | |
| 25838 | 101 | |
| 24264 | 102 | |
| 103 | ||
| 26546 | 104 | (** XML parsing (slow) **) | 
| 105 | ||
| 106 | local | |
| 24264 | 107 | |
| 108 | fun err s (xs, _) = | |
| 109 | "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); | |
| 110 | ||
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 111 | fun ignored _ = []; | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 112 | |
| 26551 | 113 | val blanks = Scan.many Symbol.is_blank; | 
| 114 | val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; | |
| 115 | val regular = Scan.one Symbol.is_regular; | |
| 116 | fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x); | |
| 24264 | 117 | |
| 26551 | 118 | val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode; | 
| 24264 | 119 | |
| 26551 | 120 | val parse_cdata = | 
| 121 | Scan.this_string "<![CDATA[" |-- | |
| 122 | (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --| | |
| 123 | Scan.this_string "]]>"; | |
| 24264 | 124 | |
| 125 | val parse_att = | |
| 26551 | 126 | (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) -- | 
| 127 | (($$ "\"" || $$ "'") :|-- (fn s => | |
| 128 | (Scan.repeat (special || regular_except s) >> implode) --| $$ s)); | |
| 24264 | 129 | |
| 26551 | 130 | val parse_comment = | 
| 131 | Scan.this_string "<!--" -- | |
| 132 | Scan.repeat (Scan.unless (Scan.this_string "-->") regular) -- | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 133 | Scan.this_string "-->" >> ignored; | 
| 24264 | 134 | |
| 26551 | 135 | val parse_processing_instruction = | 
| 136 | Scan.this_string "<?" -- | |
| 137 | Scan.repeat (Scan.unless (Scan.this_string "?>") regular) -- | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 138 | Scan.this_string "?>" >> ignored; | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 139 | |
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 140 | val parse_doctype = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 141 | Scan.this_string "<!DOCTYPE" -- | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 142 | Scan.repeat (Scan.unless ($$ ">") regular) -- | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 143 | $$ ">" >> ignored; | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 144 | |
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 145 | val parse_misc = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 146 | Scan.one Symbol.is_blank >> ignored || | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 147 | parse_processing_instruction || | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 148 | parse_comment; | 
| 26551 | 149 | |
| 150 | val parse_optional_text = | |
| 151 | Scan.optional (parse_chars >> (single o Text)) []; | |
| 24264 | 152 | |
| 26546 | 153 | in | 
| 154 | ||
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 155 | val parse_comments = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 156 | blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K (); | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 157 | |
| 26546 | 158 | val parse_string = Scan.read Symbol.stopper parse_chars o explode; | 
| 159 | ||
| 24264 | 160 | fun parse_content xs = | 
| 26551 | 161 | (parse_optional_text @@@ | 
| 162 | (Scan.repeat | |
| 163 | ((parse_element >> single || | |
| 164 | parse_cdata >> (single o Text) || | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 165 | parse_processing_instruction || | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 166 | parse_comment) | 
| 26551 | 167 | @@@ parse_optional_text) >> flat)) xs | 
| 24264 | 168 | |
| 26546 | 169 | and parse_element xs = | 
| 24264 | 170 | ($$ "<" |-- Symbol.scan_id -- | 
| 26551 | 171 | Scan.repeat (blanks |-- parse_att) --| blanks :-- (fn (s, _) => | 
| 24264 | 172 | !! (err "Expected > or />") | 
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 173 | (Scan.this_string "/>" >> ignored | 
| 24264 | 174 | || $$ ">" |-- parse_content --| | 
| 175 |             !! (err ("Expected </" ^ s ^ ">"))
 | |
| 26551 | 176 |               (Scan.this_string ("</" ^ s) --| blanks --| $$ ">"))) >>
 | 
| 24264 | 177 | (fn ((s, atts), ts) => Elem (s, atts, ts))) xs; | 
| 178 | ||
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 179 | val parse_document = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 180 | (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc) | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 181 | |-- parse_element; | 
| 24264 | 182 | |
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 183 | fun parse s = | 
| 24264 | 184 | (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element") | 
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 185 | (blanks |-- parse_document --| blanks))) (explode s) of | 
| 24264 | 186 | (x, []) => x | 
| 187 |   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
 | |
| 188 | ||
| 189 | end; | |
| 26546 | 190 | |
| 191 | end; |