| author | blanchet | 
| Mon, 08 Sep 2014 15:11:37 +0200 | |
| changeset 58227 | d91f7a80f412 | 
| parent 56059 | 2390391584c2 | 
| child 58854 | b979c781c2db | 
| permissions | -rw-r--r-- | 
| 44698 | 1  | 
(* Title: Pure/PIDE/xml.ML  | 
2  | 
Author: David Aspinall  | 
|
3  | 
Author: Stefan Berghofer  | 
|
4  | 
Author: Makarius  | 
|
| 24264 | 5  | 
|
| 46840 | 6  | 
Untyped XML trees and representation of ML values.  | 
| 24264 | 7  | 
*)  | 
8  | 
||
| 43767 | 9  | 
signature XML_DATA_OPS =  | 
10  | 
sig  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
11  | 
type 'a A  | 
| 43767 | 12  | 
type 'a T  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
13  | 
type 'a V  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
14  | 
val int_atom: int A  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
15  | 
val bool_atom: bool A  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
16  | 
val unit_atom: unit A  | 
| 43767 | 17  | 
val properties: Properties.T T  | 
18  | 
val string: string T  | 
|
19  | 
val int: int T  | 
|
20  | 
val bool: bool T  | 
|
21  | 
val unit: unit T  | 
|
22  | 
  val pair: 'a T -> 'b T -> ('a * 'b) T
 | 
|
23  | 
  val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
 | 
|
24  | 
val list: 'a T -> 'a list T  | 
|
25  | 
val option: 'a T -> 'a option T  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
26  | 
val variant: 'a V list -> 'a T  | 
| 43767 | 27  | 
end;  | 
28  | 
||
| 24264 | 29  | 
signature XML =  | 
30  | 
sig  | 
|
| 
46837
 
5bdd68f380b3
clarified XML signature (again) -- coincide with basic Markup without explicit dependency;
 
wenzelm 
parents: 
45155 
diff
changeset
 | 
31  | 
type attributes = (string * string) list  | 
| 24264 | 32  | 
datatype tree =  | 
| 
46837
 
5bdd68f380b3
clarified XML signature (again) -- coincide with basic Markup without explicit dependency;
 
wenzelm 
parents: 
45155 
diff
changeset
 | 
33  | 
Elem of (string * attributes) * tree list  | 
| 24264 | 34  | 
| Text of string  | 
| 
38266
 
492d377ecfe2
type XML.body as basic data representation language;
 
wenzelm 
parents: 
38228 
diff
changeset
 | 
35  | 
type body = tree list  | 
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
36  | 
val wrap_elem: ((string * attributes) * tree list) * tree list -> tree  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
37  | 
val unwrap_elem: tree -> (((string * attributes) * tree list) * tree list) option  | 
| 26546 | 38  | 
val add_content: tree -> Buffer.T -> Buffer.T  | 
| 
39555
 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
39  | 
val content_of: body -> string  | 
| 
56059
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
40  | 
val trim_blanks: body -> body  | 
| 26546 | 41  | 
val header: string  | 
42  | 
val text: string -> string  | 
|
43  | 
val element: string -> attributes -> string list -> string  | 
|
| 
40131
 
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
 
wenzelm 
parents: 
39555 
diff
changeset
 | 
44  | 
val output_markup: Markup.T -> Output.output * Output.output  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
45  | 
val string_of: tree -> string  | 
| 43791 | 46  | 
val pretty: int -> tree -> Pretty.T  | 
| 26546 | 47  | 
val output: tree -> TextIO.outstream -> unit  | 
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
48  | 
val parse_comments: string list -> unit * string list  | 
| 24264 | 49  | 
val parse_string : string -> string option  | 
| 26546 | 50  | 
val parse_element: string list -> tree * string list  | 
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
51  | 
val parse_document: string list -> tree * string list  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
52  | 
val parse: string -> tree  | 
| 43767 | 53  | 
exception XML_ATOM of string  | 
54  | 
exception XML_BODY of body  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
55  | 
structure Encode: XML_DATA_OPS  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
56  | 
structure Decode: XML_DATA_OPS  | 
| 24264 | 57  | 
end;  | 
58  | 
||
59  | 
structure XML: XML =  | 
|
60  | 
struct  | 
|
61  | 
||
| 26546 | 62  | 
(** XML trees **)  | 
63  | 
||
| 
46837
 
5bdd68f380b3
clarified XML signature (again) -- coincide with basic Markup without explicit dependency;
 
wenzelm 
parents: 
45155 
diff
changeset
 | 
64  | 
type attributes = (string * string) list;  | 
| 26546 | 65  | 
|
66  | 
datatype tree =  | 
|
| 
46837
 
5bdd68f380b3
clarified XML signature (again) -- coincide with basic Markup without explicit dependency;
 
wenzelm 
parents: 
45155 
diff
changeset
 | 
67  | 
Elem of (string * attributes) * tree list  | 
| 28033 | 68  | 
| Text of string;  | 
| 26546 | 69  | 
|
| 
38266
 
492d377ecfe2
type XML.body as basic data representation language;
 
wenzelm 
parents: 
38228 
diff
changeset
 | 
70  | 
type body = tree list;  | 
| 
 
492d377ecfe2
type XML.body as basic data representation language;
 
wenzelm 
parents: 
38228 
diff
changeset
 | 
71  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
72  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
73  | 
(* wrapped elements *)  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
74  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
75  | 
val xml_elemN = "xml_elem";  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
76  | 
val xml_nameN = "xml_name";  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
77  | 
val xml_bodyN = "xml_body";  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
78  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
79  | 
fun wrap_elem (((a, atts), body1), body2) =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
80  | 
Elem ((xml_elemN, (xml_nameN, a) :: atts), Elem ((xml_bodyN, []), body1) :: body2);  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
81  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
82  | 
fun unwrap_elem (Elem ((name, (n, a) :: atts), Elem ((name', atts'), body1) :: body2)) =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
83  | 
if name = xml_elemN andalso n = xml_nameN andalso name' = xml_bodyN andalso null atts'  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
84  | 
then SOME (((a, atts), body1), body2) else NONE  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
85  | 
| unwrap_elem _ = NONE;  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
86  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
87  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
88  | 
(* text context *)  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
89  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
90  | 
fun add_content tree =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
91  | 
(case unwrap_elem tree of  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
92  | 
SOME (_, ts) => fold add_content ts  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
93  | 
| NONE =>  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
94  | 
(case tree of  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
95  | 
Elem (_, ts) => fold add_content ts  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
96  | 
| Text s => Buffer.add s));  | 
| 26546 | 97  | 
|
| 
39555
 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
98  | 
fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;  | 
| 
 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
99  | 
|
| 26546 | 100  | 
|
| 
56059
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
101  | 
(* trim blanks *)  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
102  | 
|
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
103  | 
fun trim_blanks trees =  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
104  | 
trees |> maps  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
105  | 
(fn Elem (markup, body) => [Elem (markup, trim_blanks body)]  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
106  | 
| Text s =>  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
107  | 
let  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
108  | 
val s' = s  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
109  | 
|> raw_explode  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
110  | 
|> take_prefix Symbol.is_blank |> #2  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
111  | 
|> take_suffix Symbol.is_blank |> #1  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
112  | 
|> implode;  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
113  | 
in if s' = "" then [] else [Text s'] end);  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
114  | 
|
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
115  | 
|
| 24264 | 116  | 
|
| 26525 | 117  | 
(** string representation **)  | 
118  | 
||
| 24264 | 119  | 
val header = "<?xml version=\"1.0\"?>\n";  | 
120  | 
||
121  | 
||
| 26546 | 122  | 
(* escaped text *)  | 
| 24264 | 123  | 
|
124  | 
fun decode "<" = "<"  | 
|
125  | 
| decode ">" = ">"  | 
|
126  | 
| decode "&" = "&"  | 
|
127  | 
| decode "'" = "'"  | 
|
128  | 
| decode """ = "\""  | 
|
129  | 
| decode c = c;  | 
|
130  | 
||
131  | 
fun encode "<" = "<"  | 
|
132  | 
| encode ">" = ">"  | 
|
133  | 
| encode "&" = "&"  | 
|
134  | 
| encode "'" = "'"  | 
|
135  | 
| encode "\"" = """  | 
|
136  | 
| encode c = c;  | 
|
137  | 
||
| 25838 | 138  | 
val text = translate_string encode;  | 
| 24264 | 139  | 
|
140  | 
||
141  | 
(* elements *)  | 
|
142  | 
||
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
143  | 
fun elem name atts =  | 
| 26551 | 144  | 
space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);  | 
| 24264 | 145  | 
|
| 26525 | 146  | 
fun element name atts body =  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
147  | 
let val b = implode body in  | 
| 
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
148  | 
if b = "" then enclose "<" "/>" (elem name atts)  | 
| 
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
149  | 
else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name  | 
| 24264 | 150  | 
end;  | 
151  | 
||
| 27884 | 152  | 
fun output_markup (markup as (name, atts)) =  | 
| 
38474
 
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
 
wenzelm 
parents: 
38266 
diff
changeset
 | 
153  | 
if Markup.is_empty markup then Markup.no_output  | 
| 27884 | 154  | 
else (enclose "<" ">" (elem name atts), enclose "</" ">" name);  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
155  | 
|
| 24264 | 156  | 
|
| 26546 | 157  | 
(* output *)  | 
| 24264 | 158  | 
|
| 43791 | 159  | 
fun buffer_of depth tree =  | 
| 24264 | 160  | 
let  | 
| 43791 | 161  | 
fun traverse _ (Elem ((name, atts), [])) =  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
162  | 
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"  | 
| 43791 | 163  | 
| traverse d (Elem ((name, atts), ts)) =  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
164  | 
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>  | 
| 43791 | 165  | 
traverse_body d ts #>  | 
| 26525 | 166  | 
Buffer.add "</" #> Buffer.add name #> Buffer.add ">"  | 
| 43791 | 167  | 
| traverse _ (Text s) = Buffer.add (text s)  | 
168  | 
and traverse_body 0 _ = Buffer.add "..."  | 
|
169  | 
| traverse_body d ts = fold (traverse (d - 1)) ts;  | 
|
170  | 
in Buffer.empty |> traverse depth tree end;  | 
|
| 24264 | 171  | 
|
| 43791 | 172  | 
val string_of = Buffer.content o buffer_of ~1;  | 
173  | 
val output = Buffer.output o buffer_of ~1;  | 
|
174  | 
||
175  | 
fun pretty depth tree =  | 
|
176  | 
Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));  | 
|
| 25838 | 177  | 
|
| 24264 | 178  | 
|
179  | 
||
| 44698 | 180  | 
(** XML parsing **)  | 
| 26546 | 181  | 
|
182  | 
local  | 
|
| 24264 | 183  | 
|
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
184  | 
fun err msg (xs, _) =  | 
| 
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
185  | 
fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);  | 
| 24264 | 186  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
187  | 
fun ignored _ = [];  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
188  | 
|
| 
45155
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
189  | 
fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";  | 
| 
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
190  | 
fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";  | 
| 
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
191  | 
val parse_name = Scan.one name_start_char ::: Scan.many name_char;  | 
| 
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
192  | 
|
| 26551 | 193  | 
val blanks = Scan.many Symbol.is_blank;  | 
| 
45155
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
194  | 
val special = $$ "&" ^^ (parse_name >> implode) ^^ $$ ";" >> decode;  | 
| 26551 | 195  | 
val regular = Scan.one Symbol.is_regular;  | 
196  | 
fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);  | 
|
| 24264 | 197  | 
|
| 26551 | 198  | 
val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;  | 
| 24264 | 199  | 
|
| 26551 | 200  | 
val parse_cdata =  | 
201  | 
Scan.this_string "<![CDATA[" |--  | 
|
202  | 
(Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|  | 
|
203  | 
Scan.this_string "]]>";  | 
|
| 24264 | 204  | 
|
205  | 
val parse_att =  | 
|
| 
45155
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
206  | 
((parse_name >> implode) --| (blanks -- $$ "=" -- blanks)) --  | 
| 26551 | 207  | 
(($$ "\"" || $$ "'") :|-- (fn s =>  | 
208  | 
(Scan.repeat (special || regular_except s) >> implode) --| $$ s));  | 
|
| 24264 | 209  | 
|
| 26551 | 210  | 
val parse_comment =  | 
211  | 
Scan.this_string "<!--" --  | 
|
212  | 
Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
213  | 
Scan.this_string "-->" >> ignored;  | 
| 24264 | 214  | 
|
| 26551 | 215  | 
val parse_processing_instruction =  | 
216  | 
Scan.this_string "<?" --  | 
|
217  | 
Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
218  | 
Scan.this_string "?>" >> ignored;  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
219  | 
|
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
220  | 
val parse_doctype =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
221  | 
Scan.this_string "<!DOCTYPE" --  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
222  | 
Scan.repeat (Scan.unless ($$ ">") regular) --  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
223  | 
$$ ">" >> ignored;  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
224  | 
|
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
225  | 
val parse_misc =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
226  | 
Scan.one Symbol.is_blank >> ignored ||  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
227  | 
parse_processing_instruction ||  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
228  | 
parse_comment;  | 
| 26551 | 229  | 
|
230  | 
val parse_optional_text =  | 
|
231  | 
Scan.optional (parse_chars >> (single o Text)) [];  | 
|
| 24264 | 232  | 
|
| 26546 | 233  | 
in  | 
234  | 
||
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
235  | 
val parse_comments =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
236  | 
blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
237  | 
|
| 
40627
 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 
wenzelm 
parents: 
40131 
diff
changeset
 | 
238  | 
val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;  | 
| 26546 | 239  | 
|
| 24264 | 240  | 
fun parse_content xs =  | 
| 26551 | 241  | 
(parse_optional_text @@@  | 
242  | 
(Scan.repeat  | 
|
243  | 
((parse_element >> single ||  | 
|
244  | 
parse_cdata >> (single o Text) ||  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
245  | 
parse_processing_instruction ||  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
246  | 
parse_comment)  | 
| 26551 | 247  | 
@@@ parse_optional_text) >> flat)) xs  | 
| 24264 | 248  | 
|
| 26546 | 249  | 
and parse_element xs =  | 
| 
43949
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
250  | 
($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
251  | 
(fn (name, _) =>  | 
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
252  | 
!! (err (fn () => "Expected > or />"))  | 
| 
43949
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
253  | 
($$ "/" -- $$ ">" >> ignored ||  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
254  | 
$$ ">" |-- parse_content --|  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
255  | 
!! (err (fn () => "Expected </" ^ implode name ^ ">"))  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
256  | 
($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
257  | 
>> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;  | 
| 24264 | 258  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
259  | 
val parse_document =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
260  | 
(Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
261  | 
|-- parse_element;  | 
| 24264 | 262  | 
|
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
263  | 
fun parse s =  | 
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
264  | 
(case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))  | 
| 
40627
 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 
wenzelm 
parents: 
40131 
diff
changeset
 | 
265  | 
(blanks |-- parse_document --| blanks))) (raw_explode s) of  | 
| 24264 | 266  | 
(x, []) => x  | 
| 48769 | 267  | 
  | (_, ys) => error ("XML parsing error: unprocessed input\n" ^ Symbol.beginning 100 ys));
 | 
| 24264 | 268  | 
|
269  | 
end;  | 
|
| 26546 | 270  | 
|
| 43767 | 271  | 
|
272  | 
||
273  | 
(** XML as data representation language **)  | 
|
274  | 
||
275  | 
exception XML_ATOM of string;  | 
|
276  | 
exception XML_BODY of tree list;  | 
|
277  | 
||
278  | 
||
279  | 
structure Encode =  | 
|
280  | 
struct  | 
|
281  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
282  | 
type 'a A = 'a -> string;  | 
| 43767 | 283  | 
type 'a T = 'a -> body;  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
284  | 
type 'a V = 'a -> string list * body;  | 
| 43767 | 285  | 
|
286  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
287  | 
(* atomic values *)  | 
| 43767 | 288  | 
|
289  | 
fun int_atom i = signed_string_of_int i;  | 
|
290  | 
||
291  | 
fun bool_atom false = "0"  | 
|
292  | 
| bool_atom true = "1";  | 
|
293  | 
||
294  | 
fun unit_atom () = "";  | 
|
295  | 
||
296  | 
||
297  | 
(* structural nodes *)  | 
|
298  | 
||
299  | 
fun node ts = Elem ((":", []), ts);
 | 
|
300  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
301  | 
fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
302  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
303  | 
fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);  | 
| 43767 | 304  | 
|
305  | 
||
306  | 
(* representation of standard types *)  | 
|
307  | 
||
308  | 
fun properties props = [Elem ((":", props), [])];
 | 
|
309  | 
||
310  | 
fun string "" = []  | 
|
311  | 
| string s = [Text s];  | 
|
312  | 
||
313  | 
val int = string o int_atom;  | 
|
314  | 
||
315  | 
val bool = string o bool_atom;  | 
|
316  | 
||
317  | 
val unit = string o unit_atom;  | 
|
318  | 
||
319  | 
fun pair f g (x, y) = [node (f x), node (g y)];  | 
|
320  | 
||
321  | 
fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];  | 
|
322  | 
||
323  | 
fun list f xs = map (node o f) xs;  | 
|
324  | 
||
325  | 
fun option _ NONE = []  | 
|
326  | 
| option f (SOME x) = [node (f x)];  | 
|
327  | 
||
| 
47199
 
15ede9f1da3f
more specific notion of partiality (cf. Scala version);
 
wenzelm 
parents: 
46840 
diff
changeset
 | 
328  | 
fun variant fs x =  | 
| 
 
15ede9f1da3f
more specific notion of partiality (cf. Scala version);
 
wenzelm 
parents: 
46840 
diff
changeset
 | 
329  | 
[tagged (the (get_index (fn f => SOME (f x) handle General.Match => NONE) fs))];  | 
| 43767 | 330  | 
|
| 26546 | 331  | 
end;  | 
| 43767 | 332  | 
|
333  | 
||
334  | 
structure Decode =  | 
|
335  | 
struct  | 
|
336  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
337  | 
type 'a A = string -> 'a;  | 
| 43767 | 338  | 
type 'a T = body -> 'a;  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
339  | 
type 'a V = string list * body -> 'a;  | 
| 43767 | 340  | 
|
341  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
342  | 
(* atomic values *)  | 
| 43767 | 343  | 
|
344  | 
fun int_atom s =  | 
|
| 
43797
 
fad7758421bf
more precise integer Markup.properties/XML.attributes: disallow ML-style ~ minus;
 
wenzelm 
parents: 
43791 
diff
changeset
 | 
345  | 
Markup.parse_int s  | 
| 
 
fad7758421bf
more precise integer Markup.properties/XML.attributes: disallow ML-style ~ minus;
 
wenzelm 
parents: 
43791 
diff
changeset
 | 
346  | 
handle Fail _ => raise XML_ATOM s;  | 
| 43767 | 347  | 
|
348  | 
fun bool_atom "0" = false  | 
|
349  | 
| bool_atom "1" = true  | 
|
350  | 
| bool_atom s = raise XML_ATOM s;  | 
|
351  | 
||
352  | 
fun unit_atom "" = ()  | 
|
353  | 
| unit_atom s = raise XML_ATOM s;  | 
|
354  | 
||
355  | 
||
356  | 
(* structural nodes *)  | 
|
357  | 
||
358  | 
fun node (Elem ((":", []), ts)) = ts
 | 
|
359  | 
| node t = raise XML_BODY [t];  | 
|
360  | 
||
| 43783 | 361  | 
fun vector atts =  | 
| 
46839
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
46837 
diff
changeset
 | 
362  | 
map_index (fn (i, (a, x)) => if int_atom a = i then x else raise XML_ATOM a) atts;  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
363  | 
|
| 43844 | 364  | 
fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))  | 
| 43767 | 365  | 
| tagged t = raise XML_BODY [t];  | 
366  | 
||
367  | 
||
368  | 
(* representation of standard types *)  | 
|
369  | 
||
370  | 
fun properties [Elem ((":", props), [])] = props
 | 
|
371  | 
| properties ts = raise XML_BODY ts;  | 
|
372  | 
||
373  | 
fun string [] = ""  | 
|
374  | 
| string [Text s] = s  | 
|
375  | 
| string ts = raise XML_BODY ts;  | 
|
376  | 
||
377  | 
val int = int_atom o string;  | 
|
378  | 
||
379  | 
val bool = bool_atom o string;  | 
|
380  | 
||
381  | 
val unit = unit_atom o string;  | 
|
382  | 
||
383  | 
fun pair f g [t1, t2] = (f (node t1), g (node t2))  | 
|
384  | 
| pair _ _ ts = raise XML_BODY ts;  | 
|
385  | 
||
386  | 
fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))  | 
|
387  | 
| triple _ _ _ ts = raise XML_BODY ts;  | 
|
388  | 
||
389  | 
fun list f ts = map (f o node) ts;  | 
|
390  | 
||
391  | 
fun option _ [] = NONE  | 
|
392  | 
| option f [t] = SOME (f (node t))  | 
|
393  | 
| option _ ts = raise XML_BODY ts;  | 
|
394  | 
||
| 43768 | 395  | 
fun variant fs [t] =  | 
396  | 
let  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
397  | 
val (tag, (xs, ts)) = tagged t;  | 
| 43768 | 398  | 
val f = nth fs tag handle General.Subscript => raise XML_BODY [t];  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
399  | 
in f (xs, ts) end  | 
| 43767 | 400  | 
| variant _ ts = raise XML_BODY ts;  | 
401  | 
||
402  | 
end;  | 
|
403  | 
||
404  | 
end;  |