| author | wenzelm | 
| Wed, 31 Aug 2011 14:39:41 +0200 | |
| changeset 44606 | b625650aa2db | 
| parent 43949 | 94033767ef9b | 
| permissions | -rw-r--r-- | 
| 24584 | 1  | 
(* Title: Pure/General/xml.ML  | 
| 24264 | 2  | 
Author: David Aspinall, Stefan Berghofer and Markus Wenzel  | 
3  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
4  | 
Untyped XML trees.  | 
| 24264 | 5  | 
*)  | 
6  | 
||
| 43767 | 7  | 
signature XML_DATA_OPS =  | 
8  | 
sig  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
9  | 
type 'a A  | 
| 43767 | 10  | 
type 'a T  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
11  | 
type 'a V  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
12  | 
val int_atom: int A  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
13  | 
val bool_atom: bool A  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
14  | 
val unit_atom: unit A  | 
| 43767 | 15  | 
val properties: Properties.T T  | 
16  | 
val string: string T  | 
|
17  | 
val int: int T  | 
|
18  | 
val bool: bool T  | 
|
19  | 
val unit: unit T  | 
|
20  | 
  val pair: 'a T -> 'b T -> ('a * 'b) T
 | 
|
21  | 
  val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
 | 
|
22  | 
val list: 'a T -> 'a list T  | 
|
23  | 
val option: 'a T -> 'a option T  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
24  | 
val variant: 'a V list -> 'a T  | 
| 43767 | 25  | 
end;  | 
26  | 
||
| 24264 | 27  | 
signature XML =  | 
28  | 
sig  | 
|
| 28017 | 29  | 
type attributes = Properties.T  | 
| 24264 | 30  | 
datatype tree =  | 
| 
38228
 
ada3ab6b9085
simplified type XML.tree: embed Markup.T directly, avoid slightly odd triple;
 
wenzelm 
parents: 
31469 
diff
changeset
 | 
31  | 
Elem of Markup.T * tree list  | 
| 24264 | 32  | 
| Text of string  | 
| 
38266
 
492d377ecfe2
type XML.body as basic data representation language;
 
wenzelm 
parents: 
38228 
diff
changeset
 | 
33  | 
type body = tree list  | 
| 26546 | 34  | 
val add_content: tree -> Buffer.T -> Buffer.T  | 
| 
39555
 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
35  | 
val content_of: body -> string  | 
| 26546 | 36  | 
val header: string  | 
37  | 
val text: string -> string  | 
|
38  | 
val element: string -> attributes -> string list -> string  | 
|
| 
40131
 
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
 
wenzelm 
parents: 
39555 
diff
changeset
 | 
39  | 
val output_markup: Markup.T -> Output.output * Output.output  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
40  | 
val string_of: tree -> string  | 
| 43791 | 41  | 
val pretty: int -> tree -> Pretty.T  | 
| 26546 | 42  | 
val output: tree -> TextIO.outstream -> unit  | 
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
43  | 
val parse_comments: string list -> unit * string list  | 
| 24264 | 44  | 
val parse_string : string -> string option  | 
| 26546 | 45  | 
val parse_element: string list -> tree * string list  | 
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
46  | 
val parse_document: string list -> tree * string list  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
47  | 
val parse: string -> tree  | 
| 43767 | 48  | 
exception XML_ATOM of string  | 
49  | 
exception XML_BODY of body  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
50  | 
structure Encode: XML_DATA_OPS  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
51  | 
structure Decode: XML_DATA_OPS  | 
| 24264 | 52  | 
end;  | 
53  | 
||
54  | 
structure XML: XML =  | 
|
55  | 
struct  | 
|
56  | 
||
| 26546 | 57  | 
(** XML trees **)  | 
58  | 
||
| 28017 | 59  | 
type attributes = Properties.T;  | 
| 26546 | 60  | 
|
61  | 
datatype tree =  | 
|
| 
38228
 
ada3ab6b9085
simplified type XML.tree: embed Markup.T directly, avoid slightly odd triple;
 
wenzelm 
parents: 
31469 
diff
changeset
 | 
62  | 
Elem of Markup.T * tree list  | 
| 28033 | 63  | 
| Text of string;  | 
| 26546 | 64  | 
|
| 
38266
 
492d377ecfe2
type XML.body as basic data representation language;
 
wenzelm 
parents: 
38228 
diff
changeset
 | 
65  | 
type body = tree list;  | 
| 
 
492d377ecfe2
type XML.body as basic data representation language;
 
wenzelm 
parents: 
38228 
diff
changeset
 | 
66  | 
|
| 
38228
 
ada3ab6b9085
simplified type XML.tree: embed Markup.T directly, avoid slightly odd triple;
 
wenzelm 
parents: 
31469 
diff
changeset
 | 
67  | 
fun add_content (Elem (_, ts)) = fold add_content ts  | 
| 28033 | 68  | 
| add_content (Text s) = Buffer.add s;  | 
| 26546 | 69  | 
|
| 
39555
 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
70  | 
fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;  | 
| 
 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
71  | 
|
| 26546 | 72  | 
|
| 24264 | 73  | 
|
| 26525 | 74  | 
(** string representation **)  | 
75  | 
||
| 24264 | 76  | 
val header = "<?xml version=\"1.0\"?>\n";  | 
77  | 
||
78  | 
||
| 26546 | 79  | 
(* escaped text *)  | 
| 24264 | 80  | 
|
81  | 
fun decode "<" = "<"  | 
|
82  | 
| decode ">" = ">"  | 
|
83  | 
| decode "&" = "&"  | 
|
84  | 
| decode "'" = "'"  | 
|
85  | 
| decode """ = "\""  | 
|
86  | 
| decode c = c;  | 
|
87  | 
||
88  | 
fun encode "<" = "<"  | 
|
89  | 
| encode ">" = ">"  | 
|
90  | 
| encode "&" = "&"  | 
|
91  | 
| encode "'" = "'"  | 
|
92  | 
| encode "\"" = """  | 
|
93  | 
| encode c = c;  | 
|
94  | 
||
| 25838 | 95  | 
val text = translate_string encode;  | 
| 24264 | 96  | 
|
97  | 
||
98  | 
(* elements *)  | 
|
99  | 
||
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
100  | 
fun elem name atts =  | 
| 26551 | 101  | 
space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);  | 
| 24264 | 102  | 
|
| 26525 | 103  | 
fun element name atts body =  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
104  | 
let val b = implode body in  | 
| 
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
105  | 
if b = "" then enclose "<" "/>" (elem name atts)  | 
| 
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
106  | 
else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name  | 
| 24264 | 107  | 
end;  | 
108  | 
||
| 27884 | 109  | 
fun output_markup (markup as (name, atts)) =  | 
| 
38474
 
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
 
wenzelm 
parents: 
38266 
diff
changeset
 | 
110  | 
if Markup.is_empty markup then Markup.no_output  | 
| 27884 | 111  | 
else (enclose "<" ">" (elem name atts), enclose "</" ">" name);  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
112  | 
|
| 24264 | 113  | 
|
| 26546 | 114  | 
(* output *)  | 
| 24264 | 115  | 
|
| 43791 | 116  | 
fun buffer_of depth tree =  | 
| 24264 | 117  | 
let  | 
| 43791 | 118  | 
fun traverse _ (Elem ((name, atts), [])) =  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
119  | 
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"  | 
| 43791 | 120  | 
| traverse d (Elem ((name, atts), ts)) =  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
121  | 
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>  | 
| 43791 | 122  | 
traverse_body d ts #>  | 
| 26525 | 123  | 
Buffer.add "</" #> Buffer.add name #> Buffer.add ">"  | 
| 43791 | 124  | 
| traverse _ (Text s) = Buffer.add (text s)  | 
125  | 
and traverse_body 0 _ = Buffer.add "..."  | 
|
126  | 
| traverse_body d ts = fold (traverse (d - 1)) ts;  | 
|
127  | 
in Buffer.empty |> traverse depth tree end;  | 
|
| 24264 | 128  | 
|
| 43791 | 129  | 
val string_of = Buffer.content o buffer_of ~1;  | 
130  | 
val output = Buffer.output o buffer_of ~1;  | 
|
131  | 
||
132  | 
fun pretty depth tree =  | 
|
133  | 
Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));  | 
|
| 25838 | 134  | 
|
| 24264 | 135  | 
|
136  | 
||
| 26546 | 137  | 
(** XML parsing (slow) **)  | 
138  | 
||
139  | 
local  | 
|
| 24264 | 140  | 
|
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
141  | 
fun err msg (xs, _) =  | 
| 
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
142  | 
fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);  | 
| 24264 | 143  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
144  | 
fun ignored _ = [];  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
145  | 
|
| 26551 | 146  | 
val blanks = Scan.many Symbol.is_blank;  | 
147  | 
val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;  | 
|
148  | 
val regular = Scan.one Symbol.is_regular;  | 
|
149  | 
fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);  | 
|
| 24264 | 150  | 
|
| 26551 | 151  | 
val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;  | 
| 24264 | 152  | 
|
| 26551 | 153  | 
val parse_cdata =  | 
154  | 
Scan.this_string "<![CDATA[" |--  | 
|
155  | 
(Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|  | 
|
156  | 
Scan.this_string "]]>";  | 
|
| 24264 | 157  | 
|
158  | 
val parse_att =  | 
|
| 26551 | 159  | 
(Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --  | 
160  | 
(($$ "\"" || $$ "'") :|-- (fn s =>  | 
|
161  | 
(Scan.repeat (special || regular_except s) >> implode) --| $$ s));  | 
|
| 24264 | 162  | 
|
| 26551 | 163  | 
val parse_comment =  | 
164  | 
Scan.this_string "<!--" --  | 
|
165  | 
Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
166  | 
Scan.this_string "-->" >> ignored;  | 
| 24264 | 167  | 
|
| 26551 | 168  | 
val parse_processing_instruction =  | 
169  | 
Scan.this_string "<?" --  | 
|
170  | 
Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
171  | 
Scan.this_string "?>" >> ignored;  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
172  | 
|
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
173  | 
val parse_doctype =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
174  | 
Scan.this_string "<!DOCTYPE" --  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
175  | 
Scan.repeat (Scan.unless ($$ ">") regular) --  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
176  | 
$$ ">" >> ignored;  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
177  | 
|
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
178  | 
val parse_misc =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
179  | 
Scan.one Symbol.is_blank >> ignored ||  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
180  | 
parse_processing_instruction ||  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
181  | 
parse_comment;  | 
| 26551 | 182  | 
|
183  | 
val parse_optional_text =  | 
|
184  | 
Scan.optional (parse_chars >> (single o Text)) [];  | 
|
| 24264 | 185  | 
|
| 
43949
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
186  | 
fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
187  | 
fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
188  | 
val parse_name = Scan.one name_start_char ::: Scan.many name_char;  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
189  | 
|
| 26546 | 190  | 
in  | 
191  | 
||
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
192  | 
val parse_comments =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
193  | 
blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
194  | 
|
| 
40627
 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 
wenzelm 
parents: 
40131 
diff
changeset
 | 
195  | 
val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;  | 
| 26546 | 196  | 
|
| 24264 | 197  | 
fun parse_content xs =  | 
| 26551 | 198  | 
(parse_optional_text @@@  | 
199  | 
(Scan.repeat  | 
|
200  | 
((parse_element >> single ||  | 
|
201  | 
parse_cdata >> (single o Text) ||  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
202  | 
parse_processing_instruction ||  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
203  | 
parse_comment)  | 
| 26551 | 204  | 
@@@ parse_optional_text) >> flat)) xs  | 
| 24264 | 205  | 
|
| 26546 | 206  | 
and parse_element xs =  | 
| 
43949
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
207  | 
($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
208  | 
(fn (name, _) =>  | 
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
209  | 
!! (err (fn () => "Expected > or />"))  | 
| 
43949
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
210  | 
($$ "/" -- $$ ">" >> ignored ||  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
211  | 
$$ ">" |-- parse_content --|  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
212  | 
!! (err (fn () => "Expected </" ^ implode name ^ ">"))  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
213  | 
($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
214  | 
>> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;  | 
| 24264 | 215  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
216  | 
val parse_document =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
217  | 
(Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
218  | 
|-- parse_element;  | 
| 24264 | 219  | 
|
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
220  | 
fun parse s =  | 
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
221  | 
(case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))  | 
| 
40627
 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 
wenzelm 
parents: 
40131 
diff
changeset
 | 
222  | 
(blanks |-- parse_document --| blanks))) (raw_explode s) of  | 
| 24264 | 223  | 
(x, []) => x  | 
224  | 
  | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
 | 
|
225  | 
||
226  | 
end;  | 
|
| 26546 | 227  | 
|
| 43767 | 228  | 
|
229  | 
||
230  | 
(** XML as data representation language **)  | 
|
231  | 
||
232  | 
exception XML_ATOM of string;  | 
|
233  | 
exception XML_BODY of tree list;  | 
|
234  | 
||
235  | 
||
236  | 
structure Encode =  | 
|
237  | 
struct  | 
|
238  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
239  | 
type 'a A = 'a -> string;  | 
| 43767 | 240  | 
type 'a T = 'a -> body;  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
241  | 
type 'a V = 'a -> string list * body;  | 
| 43767 | 242  | 
|
243  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
244  | 
(* atomic values *)  | 
| 43767 | 245  | 
|
246  | 
fun int_atom i = signed_string_of_int i;  | 
|
247  | 
||
248  | 
fun bool_atom false = "0"  | 
|
249  | 
| bool_atom true = "1";  | 
|
250  | 
||
251  | 
fun unit_atom () = "";  | 
|
252  | 
||
253  | 
||
254  | 
(* structural nodes *)  | 
|
255  | 
||
256  | 
fun node ts = Elem ((":", []), ts);
 | 
|
257  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
258  | 
fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
259  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
260  | 
fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);  | 
| 43767 | 261  | 
|
262  | 
||
263  | 
(* representation of standard types *)  | 
|
264  | 
||
265  | 
fun properties props = [Elem ((":", props), [])];
 | 
|
266  | 
||
267  | 
fun string "" = []  | 
|
268  | 
| string s = [Text s];  | 
|
269  | 
||
270  | 
val int = string o int_atom;  | 
|
271  | 
||
272  | 
val bool = string o bool_atom;  | 
|
273  | 
||
274  | 
val unit = string o unit_atom;  | 
|
275  | 
||
276  | 
fun pair f g (x, y) = [node (f x), node (g y)];  | 
|
277  | 
||
278  | 
fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];  | 
|
279  | 
||
280  | 
fun list f xs = map (node o f) xs;  | 
|
281  | 
||
282  | 
fun option _ NONE = []  | 
|
283  | 
| option f (SOME x) = [node (f x)];  | 
|
284  | 
||
285  | 
fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];  | 
|
286  | 
||
| 26546 | 287  | 
end;  | 
| 43767 | 288  | 
|
289  | 
||
290  | 
structure Decode =  | 
|
291  | 
struct  | 
|
292  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
293  | 
type 'a A = string -> 'a;  | 
| 43767 | 294  | 
type 'a T = body -> 'a;  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
295  | 
type 'a V = string list * body -> 'a;  | 
| 43767 | 296  | 
|
297  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
298  | 
(* atomic values *)  | 
| 43767 | 299  | 
|
300  | 
fun int_atom s =  | 
|
| 
43797
 
fad7758421bf
more precise integer Markup.properties/XML.attributes: disallow ML-style ~ minus;
 
wenzelm 
parents: 
43791 
diff
changeset
 | 
301  | 
Markup.parse_int s  | 
| 
 
fad7758421bf
more precise integer Markup.properties/XML.attributes: disallow ML-style ~ minus;
 
wenzelm 
parents: 
43791 
diff
changeset
 | 
302  | 
handle Fail _ => raise XML_ATOM s;  | 
| 43767 | 303  | 
|
304  | 
fun bool_atom "0" = false  | 
|
305  | 
| bool_atom "1" = true  | 
|
306  | 
| bool_atom s = raise XML_ATOM s;  | 
|
307  | 
||
308  | 
fun unit_atom "" = ()  | 
|
309  | 
| unit_atom s = raise XML_ATOM s;  | 
|
310  | 
||
311  | 
||
312  | 
(* structural nodes *)  | 
|
313  | 
||
314  | 
fun node (Elem ((":", []), ts)) = ts
 | 
|
315  | 
| node t = raise XML_BODY [t];  | 
|
316  | 
||
| 43783 | 317  | 
fun vector atts =  | 
| 43844 | 318  | 
#1 (fold_map (fn (a, x) =>  | 
319  | 
fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0);  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
320  | 
|
| 43844 | 321  | 
fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))  | 
| 43767 | 322  | 
| tagged t = raise XML_BODY [t];  | 
323  | 
||
324  | 
||
325  | 
(* representation of standard types *)  | 
|
326  | 
||
327  | 
fun properties [Elem ((":", props), [])] = props
 | 
|
328  | 
| properties ts = raise XML_BODY ts;  | 
|
329  | 
||
330  | 
fun string [] = ""  | 
|
331  | 
| string [Text s] = s  | 
|
332  | 
| string ts = raise XML_BODY ts;  | 
|
333  | 
||
334  | 
val int = int_atom o string;  | 
|
335  | 
||
336  | 
val bool = bool_atom o string;  | 
|
337  | 
||
338  | 
val unit = unit_atom o string;  | 
|
339  | 
||
340  | 
fun pair f g [t1, t2] = (f (node t1), g (node t2))  | 
|
341  | 
| pair _ _ ts = raise XML_BODY ts;  | 
|
342  | 
||
343  | 
fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))  | 
|
344  | 
| triple _ _ _ ts = raise XML_BODY ts;  | 
|
345  | 
||
346  | 
fun list f ts = map (f o node) ts;  | 
|
347  | 
||
348  | 
fun option _ [] = NONE  | 
|
349  | 
| option f [t] = SOME (f (node t))  | 
|
350  | 
| option _ ts = raise XML_BODY ts;  | 
|
351  | 
||
| 43768 | 352  | 
fun variant fs [t] =  | 
353  | 
let  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
354  | 
val (tag, (xs, ts)) = tagged t;  | 
| 43768 | 355  | 
val f = nth fs tag handle General.Subscript => raise XML_BODY [t];  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
356  | 
in f (xs, ts) end  | 
| 43767 | 357  | 
| variant _ ts = raise XML_BODY ts;  | 
358  | 
||
359  | 
end;  | 
|
360  | 
||
361  | 
end;  |