| author | bulwahn | 
| Wed, 20 Jan 2010 18:08:08 +0100 | |
| changeset 34953 | a053ad2a7a72 | 
| parent 31469 | 40f815edbde4 | 
| child 38228 | ada3ab6b9085 | 
| permissions | -rw-r--r-- | 
| 24584 | 1 | (* Title: Pure/General/xml.ML | 
| 24264 | 2 | Author: David Aspinall, Stefan Berghofer and Markus Wenzel | 
| 3 | ||
| 4 | Basic support for XML. | |
| 5 | *) | |
| 6 | ||
| 7 | signature XML = | |
| 8 | sig | |
| 28017 | 9 | type attributes = Properties.T | 
| 24264 | 10 | datatype tree = | 
| 11 | Elem of string * attributes * tree list | |
| 12 | | Text of string | |
| 26546 | 13 | val add_content: tree -> Buffer.T -> Buffer.T | 
| 14 | val header: string | |
| 15 | val text: string -> string | |
| 16 | val element: string -> attributes -> string list -> string | |
| 17 | val output_markup: Markup.T -> output * output | |
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 18 | val string_of: tree -> string | 
| 26546 | 19 | val output: tree -> TextIO.outstream -> unit | 
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 20 | val parse_comments: string list -> unit * string list | 
| 24264 | 21 | val parse_string : string -> string option | 
| 26546 | 22 | val parse_element: string list -> tree * string list | 
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 23 | val parse_document: string list -> tree * string list | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 24 | val parse: string -> tree | 
| 24264 | 25 | end; | 
| 26 | ||
| 27 | structure XML: XML = | |
| 28 | struct | |
| 29 | ||
| 26546 | 30 | (** XML trees **) | 
| 31 | ||
| 28017 | 32 | type attributes = Properties.T; | 
| 26546 | 33 | |
| 34 | datatype tree = | |
| 35 | Elem of string * attributes * tree list | |
| 28033 | 36 | | Text of string; | 
| 26546 | 37 | |
| 38 | fun add_content (Elem (_, _, ts)) = fold add_content ts | |
| 28033 | 39 | | add_content (Text s) = Buffer.add s; | 
| 26546 | 40 | |
| 41 | ||
| 24264 | 42 | |
| 26525 | 43 | (** string representation **) | 
| 44 | ||
| 24264 | 45 | val header = "<?xml version=\"1.0\"?>\n"; | 
| 46 | ||
| 47 | ||
| 26546 | 48 | (* escaped text *) | 
| 24264 | 49 | |
| 50 | fun decode "<" = "<" | |
| 51 | | decode ">" = ">" | |
| 52 | | decode "&" = "&" | |
| 53 | | decode "'" = "'" | |
| 54 | | decode """ = "\"" | |
| 55 | | decode c = c; | |
| 56 | ||
| 57 | fun encode "<" = "<" | |
| 58 | | encode ">" = ">" | |
| 59 | | encode "&" = "&" | |
| 60 | | encode "'" = "'" | |
| 61 | | encode "\"" = """ | |
| 62 | | encode c = c; | |
| 63 | ||
| 25838 | 64 | val text = translate_string encode; | 
| 24264 | 65 | |
| 66 | ||
| 67 | (* elements *) | |
| 68 | ||
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 69 | fun elem name atts = | 
| 26551 | 70 | space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts); | 
| 24264 | 71 | |
| 26525 | 72 | fun element name atts body = | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 73 | let val b = implode body in | 
| 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 74 | if b = "" then enclose "<" "/>" (elem name atts) | 
| 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 75 | else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name | 
| 24264 | 76 | end; | 
| 77 | ||
| 27884 | 78 | fun output_markup (markup as (name, atts)) = | 
| 29325 | 79 | if Markup.is_none markup then Markup.no_output | 
| 27884 | 80 | else (enclose "<" ">" (elem name atts), enclose "</" ">" name); | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 81 | |
| 24264 | 82 | |
| 26546 | 83 | (* output *) | 
| 24264 | 84 | |
| 26546 | 85 | fun buffer_of tree = | 
| 24264 | 86 | let | 
| 26546 | 87 | fun traverse (Elem (name, atts, [])) = | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 88 | Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>" | 
| 26546 | 89 | | traverse (Elem (name, atts, ts)) = | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 90 | Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #> | 
| 26546 | 91 | fold traverse ts #> | 
| 26525 | 92 | Buffer.add "</" #> Buffer.add name #> Buffer.add ">" | 
| 28033 | 93 | | traverse (Text s) = Buffer.add (text s); | 
| 26546 | 94 | in Buffer.empty |> traverse tree end; | 
| 24264 | 95 | |
| 26546 | 96 | val string_of = Buffer.content o buffer_of; | 
| 97 | val output = Buffer.output o buffer_of; | |
| 25838 | 98 | |
| 24264 | 99 | |
| 100 | ||
| 26546 | 101 | (** XML parsing (slow) **) | 
| 102 | ||
| 103 | local | |
| 24264 | 104 | |
| 105 | fun err s (xs, _) = | |
| 106 | "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); | |
| 107 | ||
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 108 | fun ignored _ = []; | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 109 | |
| 26551 | 110 | val blanks = Scan.many Symbol.is_blank; | 
| 111 | val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; | |
| 112 | val regular = Scan.one Symbol.is_regular; | |
| 113 | fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x); | |
| 24264 | 114 | |
| 26551 | 115 | val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode; | 
| 24264 | 116 | |
| 26551 | 117 | val parse_cdata = | 
| 118 | Scan.this_string "<![CDATA[" |-- | |
| 119 | (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --| | |
| 120 | Scan.this_string "]]>"; | |
| 24264 | 121 | |
| 122 | val parse_att = | |
| 26551 | 123 | (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) -- | 
| 124 | (($$ "\"" || $$ "'") :|-- (fn s => | |
| 125 | (Scan.repeat (special || regular_except s) >> implode) --| $$ s)); | |
| 24264 | 126 | |
| 26551 | 127 | val parse_comment = | 
| 128 | Scan.this_string "<!--" -- | |
| 129 | Scan.repeat (Scan.unless (Scan.this_string "-->") regular) -- | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 130 | Scan.this_string "-->" >> ignored; | 
| 24264 | 131 | |
| 26551 | 132 | val parse_processing_instruction = | 
| 133 | Scan.this_string "<?" -- | |
| 134 | Scan.repeat (Scan.unless (Scan.this_string "?>") regular) -- | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 135 | Scan.this_string "?>" >> ignored; | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 136 | |
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 137 | val parse_doctype = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 138 | Scan.this_string "<!DOCTYPE" -- | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 139 | Scan.repeat (Scan.unless ($$ ">") regular) -- | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 140 | $$ ">" >> ignored; | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 141 | |
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 142 | val parse_misc = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 143 | Scan.one Symbol.is_blank >> ignored || | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 144 | parse_processing_instruction || | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 145 | parse_comment; | 
| 26551 | 146 | |
| 147 | val parse_optional_text = | |
| 148 | Scan.optional (parse_chars >> (single o Text)) []; | |
| 24264 | 149 | |
| 26546 | 150 | in | 
| 151 | ||
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 152 | val parse_comments = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 153 | blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K (); | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 154 | |
| 26546 | 155 | val parse_string = Scan.read Symbol.stopper parse_chars o explode; | 
| 156 | ||
| 24264 | 157 | fun parse_content xs = | 
| 26551 | 158 | (parse_optional_text @@@ | 
| 159 | (Scan.repeat | |
| 160 | ((parse_element >> single || | |
| 161 | parse_cdata >> (single o Text) || | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 162 | parse_processing_instruction || | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 163 | parse_comment) | 
| 26551 | 164 | @@@ parse_optional_text) >> flat)) xs | 
| 24264 | 165 | |
| 26546 | 166 | and parse_element xs = | 
| 24264 | 167 | ($$ "<" |-- Symbol.scan_id -- | 
| 26551 | 168 | Scan.repeat (blanks |-- parse_att) --| blanks :-- (fn (s, _) => | 
| 24264 | 169 | !! (err "Expected > or />") | 
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 170 | (Scan.this_string "/>" >> ignored | 
| 24264 | 171 | || $$ ">" |-- parse_content --| | 
| 172 |             !! (err ("Expected </" ^ s ^ ">"))
 | |
| 26551 | 173 |               (Scan.this_string ("</" ^ s) --| blanks --| $$ ">"))) >>
 | 
| 24264 | 174 | (fn ((s, atts), ts) => Elem (s, atts, ts))) xs; | 
| 175 | ||
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 176 | val parse_document = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 177 | (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc) | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 178 | |-- parse_element; | 
| 24264 | 179 | |
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 180 | fun parse s = | 
| 24264 | 181 | (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element") | 
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 182 | (blanks |-- parse_document --| blanks))) (explode s) of | 
| 24264 | 183 | (x, []) => x | 
| 184 |   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
 | |
| 185 | ||
| 186 | end; | |
| 26546 | 187 | |
| 188 | end; |