| author | wenzelm | 
| Sat, 04 Sep 2021 20:01:43 +0200 | |
| changeset 74231 | b3c65c984210 | 
| parent 73570 | e21aef453cd4 | 
| child 74785 | 4671d29feb00 | 
| permissions | -rw-r--r-- | 
| 44698 | 1  | 
(* Title: Pure/PIDE/xml.ML  | 
2  | 
Author: David Aspinall  | 
|
3  | 
Author: Stefan Berghofer  | 
|
4  | 
Author: Makarius  | 
|
| 24264 | 5  | 
|
| 46840 | 6  | 
Untyped XML trees and representation of ML values.  | 
| 24264 | 7  | 
*)  | 
8  | 
||
| 43767 | 9  | 
signature XML_DATA_OPS =  | 
10  | 
sig  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
11  | 
type 'a A  | 
| 43767 | 12  | 
type 'a T  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
13  | 
type 'a V  | 
| 70828 | 14  | 
type 'a P  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
15  | 
val int_atom: int A  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
16  | 
val bool_atom: bool A  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
17  | 
val unit_atom: unit A  | 
| 43767 | 18  | 
val properties: Properties.T T  | 
19  | 
val string: string T  | 
|
20  | 
val int: int T  | 
|
21  | 
val bool: bool T  | 
|
22  | 
val unit: unit T  | 
|
23  | 
  val pair: 'a T -> 'b T -> ('a * 'b) T
 | 
|
24  | 
  val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
 | 
|
25  | 
val list: 'a T -> 'a list T  | 
|
26  | 
val option: 'a T -> 'a option T  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
27  | 
val variant: 'a V list -> 'a T  | 
| 43767 | 28  | 
end;  | 
29  | 
||
| 24264 | 30  | 
signature XML =  | 
31  | 
sig  | 
|
| 
46837
 
5bdd68f380b3
clarified XML signature (again) -- coincide with basic Markup without explicit dependency;
 
wenzelm 
parents: 
45155 
diff
changeset
 | 
32  | 
type attributes = (string * string) list  | 
| 24264 | 33  | 
datatype tree =  | 
| 
46837
 
5bdd68f380b3
clarified XML signature (again) -- coincide with basic Markup without explicit dependency;
 
wenzelm 
parents: 
45155 
diff
changeset
 | 
34  | 
Elem of (string * attributes) * tree list  | 
| 24264 | 35  | 
| Text of string  | 
| 
38266
 
492d377ecfe2
type XML.body as basic data representation language;
 
wenzelm 
parents: 
38228 
diff
changeset
 | 
36  | 
type body = tree list  | 
| 
70991
 
f9f7c34b7dd4
more scalable protocol_message: use XML.body directly (Output.output hook is not required);
 
wenzelm 
parents: 
70828 
diff
changeset
 | 
37  | 
val blob: string list -> body  | 
| 70994 | 38  | 
val is_empty: tree -> bool  | 
39  | 
val is_empty_body: body -> bool  | 
|
| 69234 | 40  | 
val xml_elemN: string  | 
41  | 
val xml_nameN: string  | 
|
42  | 
val xml_bodyN: string  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
43  | 
val wrap_elem: ((string * attributes) * tree list) * tree list -> tree  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
44  | 
val unwrap_elem: tree -> (((string * attributes) * tree list) * tree list) option  | 
| 26546 | 45  | 
val add_content: tree -> Buffer.T -> Buffer.T  | 
| 
39555
 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
46  | 
val content_of: body -> string  | 
| 
56059
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
47  | 
val trim_blanks: body -> body  | 
| 26546 | 48  | 
val header: string  | 
49  | 
val text: string -> string  | 
|
50  | 
val element: string -> attributes -> string list -> string  | 
|
| 
69345
 
6bd63c94cf62
tuned signature (see also src/Tools/Haskell/Markup.hs);
 
wenzelm 
parents: 
69234 
diff
changeset
 | 
51  | 
val output_markup: Markup.T -> Markup.output  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
52  | 
val string_of: tree -> string  | 
| 43791 | 53  | 
val pretty: int -> tree -> Pretty.T  | 
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
54  | 
val parse_comments: string list -> unit * string list  | 
| 24264 | 55  | 
val parse_string : string -> string option  | 
| 26546 | 56  | 
val parse_element: string list -> tree * string list  | 
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
57  | 
val parse_document: string list -> tree * string list  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
58  | 
val parse: string -> tree  | 
| 43767 | 59  | 
exception XML_ATOM of string  | 
60  | 
exception XML_BODY of body  | 
|
| 65333 | 61  | 
structure Encode:  | 
62  | 
sig  | 
|
63  | 
include XML_DATA_OPS  | 
|
64  | 
val tree: tree T  | 
|
65  | 
end  | 
|
66  | 
structure Decode:  | 
|
67  | 
sig  | 
|
68  | 
include XML_DATA_OPS  | 
|
69  | 
val tree: tree T  | 
|
70  | 
end  | 
|
| 24264 | 71  | 
end;  | 
72  | 
||
73  | 
structure XML: XML =  | 
|
74  | 
struct  | 
|
75  | 
||
| 26546 | 76  | 
(** XML trees **)  | 
77  | 
||
| 
70991
 
f9f7c34b7dd4
more scalable protocol_message: use XML.body directly (Output.output hook is not required);
 
wenzelm 
parents: 
70828 
diff
changeset
 | 
78  | 
open Output_Primitives.XML;  | 
| 26546 | 79  | 
|
| 
70991
 
f9f7c34b7dd4
more scalable protocol_message: use XML.body directly (Output.output hook is not required);
 
wenzelm 
parents: 
70828 
diff
changeset
 | 
80  | 
val blob = map Text;  | 
| 
38266
 
492d377ecfe2
type XML.body as basic data representation language;
 
wenzelm 
parents: 
38228 
diff
changeset
 | 
81  | 
|
| 70994 | 82  | 
fun is_empty (Text "") = true  | 
83  | 
| is_empty _ = false;  | 
|
84  | 
||
85  | 
val is_empty_body = forall is_empty;  | 
|
86  | 
||
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
87  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
88  | 
(* wrapped elements *)  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
89  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
90  | 
val xml_elemN = "xml_elem";  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
91  | 
val xml_nameN = "xml_name";  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
92  | 
val xml_bodyN = "xml_body";  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
93  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
94  | 
fun wrap_elem (((a, atts), body1), body2) =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
95  | 
Elem ((xml_elemN, (xml_nameN, a) :: atts), Elem ((xml_bodyN, []), body1) :: body2);  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
96  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
97  | 
fun unwrap_elem (Elem ((name, (n, a) :: atts), Elem ((name', atts'), body1) :: body2)) =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
98  | 
if name = xml_elemN andalso n = xml_nameN andalso name' = xml_bodyN andalso null atts'  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
99  | 
then SOME (((a, atts), body1), body2) else NONE  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
100  | 
| unwrap_elem _ = NONE;  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
101  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
102  | 
|
| 69224 | 103  | 
(* text content *)  | 
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
104  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
105  | 
fun add_content tree =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
106  | 
(case unwrap_elem tree of  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
107  | 
SOME (_, ts) => fold add_content ts  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
108  | 
| NONE =>  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
109  | 
(case tree of  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
110  | 
Elem (_, ts) => fold add_content ts  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49599 
diff
changeset
 | 
111  | 
| Text s => Buffer.add s));  | 
| 26546 | 112  | 
|
| 74231 | 113  | 
val content_of = Buffer.build_content o fold add_content;  | 
| 
39555
 
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
 
wenzelm 
parents: 
38474 
diff
changeset
 | 
114  | 
|
| 26546 | 115  | 
|
| 
56059
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
116  | 
(* trim blanks *)  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
117  | 
|
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
118  | 
fun trim_blanks trees =  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
119  | 
trees |> maps  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
120  | 
(fn Elem (markup, body) => [Elem (markup, trim_blanks body)]  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
121  | 
| Text s =>  | 
| 61707 | 122  | 
let val s' = s |> raw_explode |> trim Symbol.is_blank |> implode;  | 
| 
56059
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
123  | 
in if s' = "" then [] else [Text s'] end);  | 
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
124  | 
|
| 
 
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
 
wenzelm 
parents: 
49650 
diff
changeset
 | 
125  | 
|
| 24264 | 126  | 
|
| 26525 | 127  | 
(** string representation **)  | 
128  | 
||
| 69806 | 129  | 
val header = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";  | 
| 24264 | 130  | 
|
131  | 
||
| 26546 | 132  | 
(* escaped text *)  | 
| 24264 | 133  | 
|
134  | 
fun decode "<" = "<"  | 
|
135  | 
| decode ">" = ">"  | 
|
136  | 
| decode "&" = "&"  | 
|
137  | 
| decode "'" = "'"  | 
|
138  | 
| decode """ = "\""  | 
|
139  | 
| decode c = c;  | 
|
140  | 
||
141  | 
fun encode "<" = "<"  | 
|
142  | 
| encode ">" = ">"  | 
|
143  | 
| encode "&" = "&"  | 
|
144  | 
| encode "'" = "'"  | 
|
145  | 
| encode "\"" = """  | 
|
146  | 
| encode c = c;  | 
|
147  | 
||
| 25838 | 148  | 
val text = translate_string encode;  | 
| 24264 | 149  | 
|
150  | 
||
151  | 
(* elements *)  | 
|
152  | 
||
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
153  | 
fun elem name atts =  | 
| 26551 | 154  | 
space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);  | 
| 24264 | 155  | 
|
| 26525 | 156  | 
fun element name atts body =  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
157  | 
let val b = implode body in  | 
| 
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
158  | 
if b = "" then enclose "<" "/>" (elem name atts)  | 
| 
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
159  | 
else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name  | 
| 24264 | 160  | 
end;  | 
161  | 
||
| 27884 | 162  | 
fun output_markup (markup as (name, atts)) =  | 
| 
38474
 
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
 
wenzelm 
parents: 
38266 
diff
changeset
 | 
163  | 
if Markup.is_empty markup then Markup.no_output  | 
| 27884 | 164  | 
else (enclose "<" ">" (elem name atts), enclose "</" ">" name);  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
165  | 
|
| 24264 | 166  | 
|
| 74231 | 167  | 
(* output content *)  | 
| 24264 | 168  | 
|
| 74231 | 169  | 
fun content_depth depth =  | 
| 24264 | 170  | 
let  | 
| 43791 | 171  | 
fun traverse _ (Elem ((name, atts), [])) =  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
172  | 
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"  | 
| 43791 | 173  | 
| traverse d (Elem ((name, atts), ts)) =  | 
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
174  | 
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>  | 
| 43791 | 175  | 
traverse_body d ts #>  | 
| 26525 | 176  | 
Buffer.add "</" #> Buffer.add name #> Buffer.add ">"  | 
| 43791 | 177  | 
| traverse _ (Text s) = Buffer.add (text s)  | 
178  | 
and traverse_body 0 _ = Buffer.add "..."  | 
|
179  | 
| traverse_body d ts = fold (traverse (d - 1)) ts;  | 
|
| 74231 | 180  | 
in Buffer.build_content o traverse depth end;  | 
| 24264 | 181  | 
|
| 74231 | 182  | 
val string_of = content_depth ~1;  | 
| 43791 | 183  | 
|
| 74231 | 184  | 
fun pretty depth tree = Pretty.str (content_depth (Int.max (0, depth)) tree);  | 
| 25838 | 185  | 
|
| 
62819
 
d3ff367a16a0
careful export of type-dependent functions, without losing their special status;
 
wenzelm 
parents: 
62663 
diff
changeset
 | 
186  | 
val _ = ML_system_pp (fn depth => fn _ => Pretty.to_polyml o pretty (FixedInt.toInt depth));  | 
| 62663 | 187  | 
|
| 24264 | 188  | 
|
189  | 
||
| 44698 | 190  | 
(** XML parsing **)  | 
| 26546 | 191  | 
|
192  | 
local  | 
|
| 24264 | 193  | 
|
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
194  | 
fun err msg (xs, _) =  | 
| 
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
195  | 
fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);  | 
| 24264 | 196  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
197  | 
fun ignored _ = [];  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
198  | 
|
| 
45155
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
199  | 
fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";  | 
| 
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
200  | 
fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";  | 
| 
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
201  | 
val parse_name = Scan.one name_start_char ::: Scan.many name_char;  | 
| 
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
202  | 
|
| 26551 | 203  | 
val blanks = Scan.many Symbol.is_blank;  | 
| 
45155
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
204  | 
val special = $$ "&" ^^ (parse_name >> implode) ^^ $$ ";" >> decode;  | 
| 58854 | 205  | 
val regular = Scan.one Symbol.not_eof;  | 
206  | 
fun regular_except x = Scan.one (fn c => Symbol.not_eof c andalso c <> x);  | 
|
| 24264 | 207  | 
|
| 26551 | 208  | 
val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;  | 
| 24264 | 209  | 
|
| 26551 | 210  | 
val parse_cdata =  | 
211  | 
Scan.this_string "<![CDATA[" |--  | 
|
212  | 
(Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|  | 
|
213  | 
Scan.this_string "]]>";  | 
|
| 24264 | 214  | 
|
215  | 
val parse_att =  | 
|
| 
45155
 
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
 
wenzelm 
parents: 
44809 
diff
changeset
 | 
216  | 
((parse_name >> implode) --| (blanks -- $$ "=" -- blanks)) --  | 
| 26551 | 217  | 
(($$ "\"" || $$ "'") :|-- (fn s =>  | 
218  | 
(Scan.repeat (special || regular_except s) >> implode) --| $$ s));  | 
|
| 24264 | 219  | 
|
| 26551 | 220  | 
val parse_comment =  | 
221  | 
Scan.this_string "<!--" --  | 
|
222  | 
Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
223  | 
Scan.this_string "-->" >> ignored;  | 
| 24264 | 224  | 
|
| 26551 | 225  | 
val parse_processing_instruction =  | 
226  | 
Scan.this_string "<?" --  | 
|
227  | 
Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
228  | 
Scan.this_string "?>" >> ignored;  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
229  | 
|
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
230  | 
val parse_doctype =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
231  | 
Scan.this_string "<!DOCTYPE" --  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
232  | 
Scan.repeat (Scan.unless ($$ ">") regular) --  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
233  | 
$$ ">" >> ignored;  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
234  | 
|
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
235  | 
val parse_misc =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
236  | 
Scan.one Symbol.is_blank >> ignored ||  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
237  | 
parse_processing_instruction ||  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
238  | 
parse_comment;  | 
| 26551 | 239  | 
|
240  | 
val parse_optional_text =  | 
|
241  | 
Scan.optional (parse_chars >> (single o Text)) [];  | 
|
| 24264 | 242  | 
|
| 26546 | 243  | 
in  | 
244  | 
||
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
245  | 
val parse_comments =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
246  | 
blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
247  | 
|
| 
40627
 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 
wenzelm 
parents: 
40131 
diff
changeset
 | 
248  | 
val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;  | 
| 26546 | 249  | 
|
| 24264 | 250  | 
fun parse_content xs =  | 
| 26551 | 251  | 
(parse_optional_text @@@  | 
| 61476 | 252  | 
Scan.repeats  | 
| 26551 | 253  | 
((parse_element >> single ||  | 
254  | 
parse_cdata >> (single o Text) ||  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
255  | 
parse_processing_instruction ||  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
256  | 
parse_comment)  | 
| 61476 | 257  | 
@@@ parse_optional_text)) xs  | 
| 24264 | 258  | 
|
| 26546 | 259  | 
and parse_element xs =  | 
| 
43949
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
260  | 
($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
261  | 
(fn (name, _) =>  | 
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
262  | 
!! (err (fn () => "Expected > or />"))  | 
| 
43949
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
263  | 
($$ "/" -- $$ ">" >> ignored ||  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
264  | 
$$ ">" |-- parse_content --|  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
265  | 
!! (err (fn () => "Expected </" ^ implode name ^ ">"))  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
266  | 
($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))  | 
| 
 
94033767ef9b
more precise parse_name according to XML standard;
 
wenzelm 
parents: 
43947 
diff
changeset
 | 
267  | 
>> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;  | 
| 24264 | 268  | 
|
| 
26984
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
269  | 
val parse_document =  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
270  | 
(Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)  | 
| 
 
d0e098e206f3
added parse_document (optional unchecked header material);
 
wenzelm 
parents: 
26554 
diff
changeset
 | 
271  | 
|-- parse_element;  | 
| 24264 | 272  | 
|
| 
26539
 
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
 
wenzelm 
parents: 
26525 
diff
changeset
 | 
273  | 
fun parse s =  | 
| 
43947
 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 
wenzelm 
parents: 
43844 
diff
changeset
 | 
274  | 
(case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))  | 
| 
40627
 
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
 
wenzelm 
parents: 
40131 
diff
changeset
 | 
275  | 
(blanks |-- parse_document --| blanks))) (raw_explode s) of  | 
| 24264 | 276  | 
(x, []) => x  | 
| 48769 | 277  | 
  | (_, ys) => error ("XML parsing error: unprocessed input\n" ^ Symbol.beginning 100 ys));
 | 
| 24264 | 278  | 
|
279  | 
end;  | 
|
| 26546 | 280  | 
|
| 43767 | 281  | 
|
282  | 
||
283  | 
(** XML as data representation language **)  | 
|
284  | 
||
285  | 
exception XML_ATOM of string;  | 
|
286  | 
exception XML_BODY of tree list;  | 
|
287  | 
||
288  | 
||
289  | 
structure Encode =  | 
|
290  | 
struct  | 
|
291  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
292  | 
type 'a A = 'a -> string;  | 
| 43767 | 293  | 
type 'a T = 'a -> body;  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
294  | 
type 'a V = 'a -> string list * body;  | 
| 70828 | 295  | 
type 'a P = 'a -> string list;  | 
| 43767 | 296  | 
|
297  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
298  | 
(* atomic values *)  | 
| 43767 | 299  | 
|
| 69574 | 300  | 
fun int_atom i = Value.print_int i;  | 
| 43767 | 301  | 
|
302  | 
fun bool_atom false = "0"  | 
|
303  | 
| bool_atom true = "1";  | 
|
304  | 
||
305  | 
fun unit_atom () = "";  | 
|
306  | 
||
307  | 
||
308  | 
(* structural nodes *)  | 
|
309  | 
||
310  | 
fun node ts = Elem ((":", []), ts);
 | 
|
311  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
312  | 
fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
313  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
314  | 
fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);  | 
| 43767 | 315  | 
|
316  | 
||
317  | 
(* representation of standard types *)  | 
|
318  | 
||
| 65333 | 319  | 
fun tree (t: tree) = [t];  | 
320  | 
||
| 43767 | 321  | 
fun properties props = [Elem ((":", props), [])];
 | 
322  | 
||
323  | 
fun string "" = []  | 
|
324  | 
| string s = [Text s];  | 
|
325  | 
||
326  | 
val int = string o int_atom;  | 
|
327  | 
||
328  | 
val bool = string o bool_atom;  | 
|
329  | 
||
330  | 
val unit = string o unit_atom;  | 
|
331  | 
||
332  | 
fun pair f g (x, y) = [node (f x), node (g y)];  | 
|
333  | 
||
334  | 
fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];  | 
|
335  | 
||
336  | 
fun list f xs = map (node o f) xs;  | 
|
337  | 
||
338  | 
fun option _ NONE = []  | 
|
339  | 
| option f (SOME x) = [node (f x)];  | 
|
340  | 
||
| 
47199
 
15ede9f1da3f
more specific notion of partiality (cf. Scala version);
 
wenzelm 
parents: 
46840 
diff
changeset
 | 
341  | 
fun variant fs x =  | 
| 
 
15ede9f1da3f
more specific notion of partiality (cf. Scala version);
 
wenzelm 
parents: 
46840 
diff
changeset
 | 
342  | 
[tagged (the (get_index (fn f => SOME (f x) handle General.Match => NONE) fs))];  | 
| 43767 | 343  | 
|
| 26546 | 344  | 
end;  | 
| 43767 | 345  | 
|
346  | 
||
347  | 
structure Decode =  | 
|
348  | 
struct  | 
|
349  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
350  | 
type 'a A = string -> 'a;  | 
| 43767 | 351  | 
type 'a T = body -> 'a;  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
352  | 
type 'a V = string list * body -> 'a;  | 
| 70828 | 353  | 
type 'a P = string list -> 'a;  | 
| 43767 | 354  | 
|
355  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
356  | 
(* atomic values *)  | 
| 43767 | 357  | 
|
358  | 
fun int_atom s =  | 
|
| 63806 | 359  | 
Value.parse_int s  | 
| 
43797
 
fad7758421bf
more precise integer Markup.properties/XML.attributes: disallow ML-style ~ minus;
 
wenzelm 
parents: 
43791 
diff
changeset
 | 
360  | 
handle Fail _ => raise XML_ATOM s;  | 
| 43767 | 361  | 
|
362  | 
fun bool_atom "0" = false  | 
|
363  | 
| bool_atom "1" = true  | 
|
364  | 
| bool_atom s = raise XML_ATOM s;  | 
|
365  | 
||
366  | 
fun unit_atom "" = ()  | 
|
367  | 
| unit_atom s = raise XML_ATOM s;  | 
|
368  | 
||
369  | 
||
370  | 
(* structural nodes *)  | 
|
371  | 
||
372  | 
fun node (Elem ((":", []), ts)) = ts
 | 
|
373  | 
| node t = raise XML_BODY [t];  | 
|
374  | 
||
| 43783 | 375  | 
fun vector atts =  | 
| 
46839
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
46837 
diff
changeset
 | 
376  | 
map_index (fn (i, (a, x)) => if int_atom a = i then x else raise XML_ATOM a) atts;  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
377  | 
|
| 43844 | 378  | 
fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))  | 
| 43767 | 379  | 
| tagged t = raise XML_BODY [t];  | 
380  | 
||
381  | 
||
382  | 
(* representation of standard types *)  | 
|
383  | 
||
| 65333 | 384  | 
fun tree [t] = t  | 
385  | 
| tree ts = raise XML_BODY ts;  | 
|
386  | 
||
| 43767 | 387  | 
fun properties [Elem ((":", props), [])] = props
 | 
388  | 
| properties ts = raise XML_BODY ts;  | 
|
389  | 
||
390  | 
fun string [] = ""  | 
|
391  | 
| string [Text s] = s  | 
|
392  | 
| string ts = raise XML_BODY ts;  | 
|
393  | 
||
394  | 
val int = int_atom o string;  | 
|
395  | 
||
396  | 
val bool = bool_atom o string;  | 
|
397  | 
||
398  | 
val unit = unit_atom o string;  | 
|
399  | 
||
400  | 
fun pair f g [t1, t2] = (f (node t1), g (node t2))  | 
|
401  | 
| pair _ _ ts = raise XML_BODY ts;  | 
|
402  | 
||
403  | 
fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))  | 
|
404  | 
| triple _ _ _ ts = raise XML_BODY ts;  | 
|
405  | 
||
406  | 
fun list f ts = map (f o node) ts;  | 
|
407  | 
||
408  | 
fun option _ [] = NONE  | 
|
409  | 
| option f [t] = SOME (f (node t))  | 
|
410  | 
| option _ ts = raise XML_BODY ts;  | 
|
411  | 
||
| 43768 | 412  | 
fun variant fs [t] =  | 
413  | 
let  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
414  | 
val (tag, (xs, ts)) = tagged t;  | 
| 43768 | 415  | 
val f = nth fs tag handle General.Subscript => raise XML_BODY [t];  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
416  | 
in f (xs, ts) end  | 
| 43767 | 417  | 
| variant _ ts = raise XML_BODY ts;  | 
418  | 
||
419  | 
end;  | 
|
420  | 
||
421  | 
end;  |