| author | kleing | 
| Mon, 08 Aug 2011 16:47:55 +0200 | |
| changeset 44070 | cebb7abb54b1 | 
| parent 43949 | 94033767ef9b | 
| permissions | -rw-r--r-- | 
| 24584 | 1 | (* Title: Pure/General/xml.ML | 
| 24264 | 2 | Author: David Aspinall, Stefan Berghofer and Markus Wenzel | 
| 3 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 4 | Untyped XML trees. | 
| 24264 | 5 | *) | 
| 6 | ||
| 43767 | 7 | signature XML_DATA_OPS = | 
| 8 | sig | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 9 | type 'a A | 
| 43767 | 10 | type 'a T | 
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 11 | type 'a V | 
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 12 | val int_atom: int A | 
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 13 | val bool_atom: bool A | 
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 14 | val unit_atom: unit A | 
| 43767 | 15 | val properties: Properties.T T | 
| 16 | val string: string T | |
| 17 | val int: int T | |
| 18 | val bool: bool T | |
| 19 | val unit: unit T | |
| 20 |   val pair: 'a T -> 'b T -> ('a * 'b) T
 | |
| 21 |   val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
 | |
| 22 | val list: 'a T -> 'a list T | |
| 23 | val option: 'a T -> 'a option T | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 24 | val variant: 'a V list -> 'a T | 
| 43767 | 25 | end; | 
| 26 | ||
| 24264 | 27 | signature XML = | 
| 28 | sig | |
| 28017 | 29 | type attributes = Properties.T | 
| 24264 | 30 | datatype tree = | 
| 38228 
ada3ab6b9085
simplified type XML.tree: embed Markup.T directly, avoid slightly odd triple;
 wenzelm parents: 
31469diff
changeset | 31 | Elem of Markup.T * tree list | 
| 24264 | 32 | | Text of string | 
| 38266 
492d377ecfe2
type XML.body as basic data representation language;
 wenzelm parents: 
38228diff
changeset | 33 | type body = tree list | 
| 26546 | 34 | val add_content: tree -> Buffer.T -> Buffer.T | 
| 39555 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 wenzelm parents: 
38474diff
changeset | 35 | val content_of: body -> string | 
| 26546 | 36 | val header: string | 
| 37 | val text: string -> string | |
| 38 | val element: string -> attributes -> string list -> string | |
| 40131 
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
 wenzelm parents: 
39555diff
changeset | 39 | val output_markup: Markup.T -> Output.output * Output.output | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 40 | val string_of: tree -> string | 
| 43791 | 41 | val pretty: int -> tree -> Pretty.T | 
| 26546 | 42 | val output: tree -> TextIO.outstream -> unit | 
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 43 | val parse_comments: string list -> unit * string list | 
| 24264 | 44 | val parse_string : string -> string option | 
| 26546 | 45 | val parse_element: string list -> tree * string list | 
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 46 | val parse_document: string list -> tree * string list | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 47 | val parse: string -> tree | 
| 43767 | 48 | exception XML_ATOM of string | 
| 49 | exception XML_BODY of body | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 50 | structure Encode: XML_DATA_OPS | 
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 51 | structure Decode: XML_DATA_OPS | 
| 24264 | 52 | end; | 
| 53 | ||
| 54 | structure XML: XML = | |
| 55 | struct | |
| 56 | ||
| 26546 | 57 | (** XML trees **) | 
| 58 | ||
| 28017 | 59 | type attributes = Properties.T; | 
| 26546 | 60 | |
| 61 | datatype tree = | |
| 38228 
ada3ab6b9085
simplified type XML.tree: embed Markup.T directly, avoid slightly odd triple;
 wenzelm parents: 
31469diff
changeset | 62 | Elem of Markup.T * tree list | 
| 28033 | 63 | | Text of string; | 
| 26546 | 64 | |
| 38266 
492d377ecfe2
type XML.body as basic data representation language;
 wenzelm parents: 
38228diff
changeset | 65 | type body = tree list; | 
| 
492d377ecfe2
type XML.body as basic data representation language;
 wenzelm parents: 
38228diff
changeset | 66 | |
| 38228 
ada3ab6b9085
simplified type XML.tree: embed Markup.T directly, avoid slightly odd triple;
 wenzelm parents: 
31469diff
changeset | 67 | fun add_content (Elem (_, ts)) = fold add_content ts | 
| 28033 | 68 | | add_content (Text s) = Buffer.add s; | 
| 26546 | 69 | |
| 39555 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 wenzelm parents: 
38474diff
changeset | 70 | fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content; | 
| 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 wenzelm parents: 
38474diff
changeset | 71 | |
| 26546 | 72 | |
| 24264 | 73 | |
| 26525 | 74 | (** string representation **) | 
| 75 | ||
| 24264 | 76 | val header = "<?xml version=\"1.0\"?>\n"; | 
| 77 | ||
| 78 | ||
| 26546 | 79 | (* escaped text *) | 
| 24264 | 80 | |
| 81 | fun decode "<" = "<" | |
| 82 | | decode ">" = ">" | |
| 83 | | decode "&" = "&" | |
| 84 | | decode "'" = "'" | |
| 85 | | decode """ = "\"" | |
| 86 | | decode c = c; | |
| 87 | ||
| 88 | fun encode "<" = "<" | |
| 89 | | encode ">" = ">" | |
| 90 | | encode "&" = "&" | |
| 91 | | encode "'" = "'" | |
| 92 | | encode "\"" = """ | |
| 93 | | encode c = c; | |
| 94 | ||
| 25838 | 95 | val text = translate_string encode; | 
| 24264 | 96 | |
| 97 | ||
| 98 | (* elements *) | |
| 99 | ||
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 100 | fun elem name atts = | 
| 26551 | 101 | space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts); | 
| 24264 | 102 | |
| 26525 | 103 | fun element name atts body = | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 104 | let val b = implode body in | 
| 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 105 | if b = "" then enclose "<" "/>" (elem name atts) | 
| 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 106 | else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name | 
| 24264 | 107 | end; | 
| 108 | ||
| 27884 | 109 | fun output_markup (markup as (name, atts)) = | 
| 38474 
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
 wenzelm parents: 
38266diff
changeset | 110 | if Markup.is_empty markup then Markup.no_output | 
| 27884 | 111 | else (enclose "<" ">" (elem name atts), enclose "</" ">" name); | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 112 | |
| 24264 | 113 | |
| 26546 | 114 | (* output *) | 
| 24264 | 115 | |
| 43791 | 116 | fun buffer_of depth tree = | 
| 24264 | 117 | let | 
| 43791 | 118 | fun traverse _ (Elem ((name, atts), [])) = | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 119 | Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>" | 
| 43791 | 120 | | traverse d (Elem ((name, atts), ts)) = | 
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 121 | Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #> | 
| 43791 | 122 | traverse_body d ts #> | 
| 26525 | 123 | Buffer.add "</" #> Buffer.add name #> Buffer.add ">" | 
| 43791 | 124 | | traverse _ (Text s) = Buffer.add (text s) | 
| 125 | and traverse_body 0 _ = Buffer.add "..." | |
| 126 | | traverse_body d ts = fold (traverse (d - 1)) ts; | |
| 127 | in Buffer.empty |> traverse depth tree end; | |
| 24264 | 128 | |
| 43791 | 129 | val string_of = Buffer.content o buffer_of ~1; | 
| 130 | val output = Buffer.output o buffer_of ~1; | |
| 131 | ||
| 132 | fun pretty depth tree = | |
| 133 | Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree)); | |
| 25838 | 134 | |
| 24264 | 135 | |
| 136 | ||
| 26546 | 137 | (** XML parsing (slow) **) | 
| 138 | ||
| 139 | local | |
| 24264 | 140 | |
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43844diff
changeset | 141 | fun err msg (xs, _) = | 
| 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43844diff
changeset | 142 | fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); | 
| 24264 | 143 | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 144 | fun ignored _ = []; | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 145 | |
| 26551 | 146 | val blanks = Scan.many Symbol.is_blank; | 
| 147 | val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; | |
| 148 | val regular = Scan.one Symbol.is_regular; | |
| 149 | fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x); | |
| 24264 | 150 | |
| 26551 | 151 | val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode; | 
| 24264 | 152 | |
| 26551 | 153 | val parse_cdata = | 
| 154 | Scan.this_string "<![CDATA[" |-- | |
| 155 | (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --| | |
| 156 | Scan.this_string "]]>"; | |
| 24264 | 157 | |
| 158 | val parse_att = | |
| 26551 | 159 | (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) -- | 
| 160 | (($$ "\"" || $$ "'") :|-- (fn s => | |
| 161 | (Scan.repeat (special || regular_except s) >> implode) --| $$ s)); | |
| 24264 | 162 | |
| 26551 | 163 | val parse_comment = | 
| 164 | Scan.this_string "<!--" -- | |
| 165 | Scan.repeat (Scan.unless (Scan.this_string "-->") regular) -- | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 166 | Scan.this_string "-->" >> ignored; | 
| 24264 | 167 | |
| 26551 | 168 | val parse_processing_instruction = | 
| 169 | Scan.this_string "<?" -- | |
| 170 | Scan.repeat (Scan.unless (Scan.this_string "?>") regular) -- | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 171 | Scan.this_string "?>" >> ignored; | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 172 | |
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 173 | val parse_doctype = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 174 | Scan.this_string "<!DOCTYPE" -- | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 175 | Scan.repeat (Scan.unless ($$ ">") regular) -- | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 176 | $$ ">" >> ignored; | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 177 | |
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 178 | val parse_misc = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 179 | Scan.one Symbol.is_blank >> ignored || | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 180 | parse_processing_instruction || | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 181 | parse_comment; | 
| 26551 | 182 | |
| 183 | val parse_optional_text = | |
| 184 | Scan.optional (parse_chars >> (single o Text)) []; | |
| 24264 | 185 | |
| 43949 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 186 | fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_"; | 
| 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 187 | fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = "."; | 
| 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 188 | val parse_name = Scan.one name_start_char ::: Scan.many name_char; | 
| 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 189 | |
| 26546 | 190 | in | 
| 191 | ||
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 192 | val parse_comments = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 193 | blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K (); | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 194 | |
| 40627 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 wenzelm parents: 
40131diff
changeset | 195 | val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode; | 
| 26546 | 196 | |
| 24264 | 197 | fun parse_content xs = | 
| 26551 | 198 | (parse_optional_text @@@ | 
| 199 | (Scan.repeat | |
| 200 | ((parse_element >> single || | |
| 201 | parse_cdata >> (single o Text) || | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 202 | parse_processing_instruction || | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 203 | parse_comment) | 
| 26551 | 204 | @@@ parse_optional_text) >> flat)) xs | 
| 24264 | 205 | |
| 26546 | 206 | and parse_element xs = | 
| 43949 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 207 | ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :-- | 
| 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 208 | (fn (name, _) => | 
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43844diff
changeset | 209 | !! (err (fn () => "Expected > or />")) | 
| 43949 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 210 | ($$ "/" -- $$ ">" >> ignored || | 
| 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 211 | $$ ">" |-- parse_content --| | 
| 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 212 | !! (err (fn () => "Expected </" ^ implode name ^ ">")) | 
| 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 213 | ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">"))) | 
| 
94033767ef9b
more precise parse_name according to XML standard;
 wenzelm parents: 
43947diff
changeset | 214 | >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs; | 
| 24264 | 215 | |
| 26984 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 216 | val parse_document = | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 217 | (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc) | 
| 
d0e098e206f3
added parse_document (optional unchecked header material);
 wenzelm parents: 
26554diff
changeset | 218 | |-- parse_element; | 
| 24264 | 219 | |
| 26539 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 wenzelm parents: 
26525diff
changeset | 220 | fun parse s = | 
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43844diff
changeset | 221 | (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element")) | 
| 40627 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 wenzelm parents: 
40131diff
changeset | 222 | (blanks |-- parse_document --| blanks))) (raw_explode s) of | 
| 24264 | 223 | (x, []) => x | 
| 224 |   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
 | |
| 225 | ||
| 226 | end; | |
| 26546 | 227 | |
| 43767 | 228 | |
| 229 | ||
| 230 | (** XML as data representation language **) | |
| 231 | ||
| 232 | exception XML_ATOM of string; | |
| 233 | exception XML_BODY of tree list; | |
| 234 | ||
| 235 | ||
| 236 | structure Encode = | |
| 237 | struct | |
| 238 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 239 | type 'a A = 'a -> string; | 
| 43767 | 240 | type 'a T = 'a -> body; | 
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 241 | type 'a V = 'a -> string list * body; | 
| 43767 | 242 | |
| 243 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 244 | (* atomic values *) | 
| 43767 | 245 | |
| 246 | fun int_atom i = signed_string_of_int i; | |
| 247 | ||
| 248 | fun bool_atom false = "0" | |
| 249 | | bool_atom true = "1"; | |
| 250 | ||
| 251 | fun unit_atom () = ""; | |
| 252 | ||
| 253 | ||
| 254 | (* structural nodes *) | |
| 255 | ||
| 256 | fun node ts = Elem ((":", []), ts);
 | |
| 257 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 258 | fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs; | 
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 259 | |
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 260 | fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts); | 
| 43767 | 261 | |
| 262 | ||
| 263 | (* representation of standard types *) | |
| 264 | ||
| 265 | fun properties props = [Elem ((":", props), [])];
 | |
| 266 | ||
| 267 | fun string "" = [] | |
| 268 | | string s = [Text s]; | |
| 269 | ||
| 270 | val int = string o int_atom; | |
| 271 | ||
| 272 | val bool = string o bool_atom; | |
| 273 | ||
| 274 | val unit = string o unit_atom; | |
| 275 | ||
| 276 | fun pair f g (x, y) = [node (f x), node (g y)]; | |
| 277 | ||
| 278 | fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)]; | |
| 279 | ||
| 280 | fun list f xs = map (node o f) xs; | |
| 281 | ||
| 282 | fun option _ NONE = [] | |
| 283 | | option f (SOME x) = [node (f x)]; | |
| 284 | ||
| 285 | fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))]; | |
| 286 | ||
| 26546 | 287 | end; | 
| 43767 | 288 | |
| 289 | ||
| 290 | structure Decode = | |
| 291 | struct | |
| 292 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 293 | type 'a A = string -> 'a; | 
| 43767 | 294 | type 'a T = body -> 'a; | 
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 295 | type 'a V = string list * body -> 'a; | 
| 43767 | 296 | |
| 297 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 298 | (* atomic values *) | 
| 43767 | 299 | |
| 300 | fun int_atom s = | |
| 43797 
fad7758421bf
more precise integer Markup.properties/XML.attributes: disallow ML-style ~ minus;
 wenzelm parents: 
43791diff
changeset | 301 | Markup.parse_int s | 
| 
fad7758421bf
more precise integer Markup.properties/XML.attributes: disallow ML-style ~ minus;
 wenzelm parents: 
43791diff
changeset | 302 | handle Fail _ => raise XML_ATOM s; | 
| 43767 | 303 | |
| 304 | fun bool_atom "0" = false | |
| 305 | | bool_atom "1" = true | |
| 306 | | bool_atom s = raise XML_ATOM s; | |
| 307 | ||
| 308 | fun unit_atom "" = () | |
| 309 | | unit_atom s = raise XML_ATOM s; | |
| 310 | ||
| 311 | ||
| 312 | (* structural nodes *) | |
| 313 | ||
| 314 | fun node (Elem ((":", []), ts)) = ts
 | |
| 315 | | node t = raise XML_BODY [t]; | |
| 316 | ||
| 43783 | 317 | fun vector atts = | 
| 43844 | 318 | #1 (fold_map (fn (a, x) => | 
| 319 | fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0); | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 320 | |
| 43844 | 321 | fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts)) | 
| 43767 | 322 | | tagged t = raise XML_BODY [t]; | 
| 323 | ||
| 324 | ||
| 325 | (* representation of standard types *) | |
| 326 | ||
| 327 | fun properties [Elem ((":", props), [])] = props
 | |
| 328 | | properties ts = raise XML_BODY ts; | |
| 329 | ||
| 330 | fun string [] = "" | |
| 331 | | string [Text s] = s | |
| 332 | | string ts = raise XML_BODY ts; | |
| 333 | ||
| 334 | val int = int_atom o string; | |
| 335 | ||
| 336 | val bool = bool_atom o string; | |
| 337 | ||
| 338 | val unit = unit_atom o string; | |
| 339 | ||
| 340 | fun pair f g [t1, t2] = (f (node t1), g (node t2)) | |
| 341 | | pair _ _ ts = raise XML_BODY ts; | |
| 342 | ||
| 343 | fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3)) | |
| 344 | | triple _ _ _ ts = raise XML_BODY ts; | |
| 345 | ||
| 346 | fun list f ts = map (f o node) ts; | |
| 347 | ||
| 348 | fun option _ [] = NONE | |
| 349 | | option f [t] = SOME (f (node t)) | |
| 350 | | option _ ts = raise XML_BODY ts; | |
| 351 | ||
| 43768 | 352 | fun variant fs [t] = | 
| 353 | let | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 354 | val (tag, (xs, ts)) = tagged t; | 
| 43768 | 355 | val f = nth fs tag handle General.Subscript => raise XML_BODY [t]; | 
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 356 | in f (xs, ts) end | 
| 43767 | 357 | | variant _ ts = raise XML_BODY ts; | 
| 358 | ||
| 359 | end; | |
| 360 | ||
| 361 | end; |