author | wenzelm |
Sat, 31 Aug 2024 16:00:16 +0200 | |
changeset 80793 | 90f6e541e926 |
parent 80461 | 38d020af64aa |
child 80801 | 090adcdceaae |
permissions | -rw-r--r-- |
44698 | 1 |
(* Title: Pure/PIDE/xml.ML |
2 |
Author: David Aspinall |
|
3 |
Author: Stefan Berghofer |
|
4 |
Author: Makarius |
|
24264 | 5 |
|
46840 | 6 |
Untyped XML trees and representation of ML values. |
24264 | 7 |
*) |
8 |
||
43767 | 9 |
signature XML_DATA_OPS = |
10 |
sig |
|
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
11 |
type 'a A |
43767 | 12 |
type 'a T |
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
13 |
type 'a V |
70828 | 14 |
type 'a P |
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
15 |
val int_atom: int A |
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
16 |
val bool_atom: bool A |
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
17 |
val unit_atom: unit A |
80461 | 18 |
val self: Output_Primitives.XML.body T |
19 |
val tree: Output_Primitives.XML.tree T |
|
43767 | 20 |
val properties: Properties.T T |
21 |
val string: string T |
|
22 |
val int: int T |
|
23 |
val bool: bool T |
|
24 |
val unit: unit T |
|
25 |
val pair: 'a T -> 'b T -> ('a * 'b) T |
|
26 |
val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T |
|
27 |
val list: 'a T -> 'a list T |
|
28 |
val option: 'a T -> 'a option T |
|
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
29 |
val variant: 'a V list -> 'a T |
43767 | 30 |
end; |
31 |
||
24264 | 32 |
signature XML = |
33 |
sig |
|
46837
5bdd68f380b3
clarified XML signature (again) -- coincide with basic Markup without explicit dependency;
wenzelm
parents:
45155
diff
changeset
|
34 |
type attributes = (string * string) list |
24264 | 35 |
datatype tree = |
46837
5bdd68f380b3
clarified XML signature (again) -- coincide with basic Markup without explicit dependency;
wenzelm
parents:
45155
diff
changeset
|
36 |
Elem of (string * attributes) * tree list |
24264 | 37 |
| Text of string |
38266
492d377ecfe2
type XML.body as basic data representation language;
wenzelm
parents:
38228
diff
changeset
|
38 |
type body = tree list |
70991
f9f7c34b7dd4
more scalable protocol_message: use XML.body directly (Output.output hook is not required);
wenzelm
parents:
70828
diff
changeset
|
39 |
val blob: string list -> body |
70994 | 40 |
val is_empty: tree -> bool |
41 |
val is_empty_body: body -> bool |
|
74789 | 42 |
val string: string -> body |
43 |
val enclose: string -> string -> body -> body |
|
69234 | 44 |
val xml_elemN: string |
45 |
val xml_nameN: string |
|
46 |
val xml_bodyN: string |
|
49650
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
47 |
val wrap_elem: ((string * attributes) * tree list) * tree list -> tree |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
48 |
val unwrap_elem: tree -> (((string * attributes) * tree list) * tree list) option |
26546 | 49 |
val add_content: tree -> Buffer.T -> Buffer.T |
39555
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
wenzelm
parents:
38474
diff
changeset
|
50 |
val content_of: body -> string |
56059
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
wenzelm
parents:
49650
diff
changeset
|
51 |
val trim_blanks: body -> body |
26546 | 52 |
val header: string |
53 |
val text: string -> string |
|
54 |
val element: string -> attributes -> string list -> string |
|
69345
6bd63c94cf62
tuned signature (see also src/Tools/Haskell/Markup.hs);
wenzelm
parents:
69234
diff
changeset
|
55 |
val output_markup: Markup.T -> Markup.output |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
56 |
val string_of: tree -> string |
43791 | 57 |
val pretty: int -> tree -> Pretty.T |
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
58 |
val parse_comments: string list -> unit * string list |
24264 | 59 |
val parse_string : string -> string option |
26546 | 60 |
val parse_element: string list -> tree * string list |
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
61 |
val parse_document: string list -> tree * string list |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
62 |
val parse: string -> tree |
43767 | 63 |
exception XML_ATOM of string |
64 |
exception XML_BODY of body |
|
80461 | 65 |
structure Encode: XML_DATA_OPS |
66 |
structure Decode: XML_DATA_OPS |
|
24264 | 67 |
end; |
68 |
||
69 |
structure XML: XML = |
|
70 |
struct |
|
71 |
||
26546 | 72 |
(** XML trees **) |
73 |
||
70991
f9f7c34b7dd4
more scalable protocol_message: use XML.body directly (Output.output hook is not required);
wenzelm
parents:
70828
diff
changeset
|
74 |
open Output_Primitives.XML; |
26546 | 75 |
|
80793
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
76 |
val blob = |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
77 |
let |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
78 |
val limit = 8000; |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
79 |
|
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
80 |
val buffer = fn "" => I | s => cons s; |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
81 |
val output1 = fn "" => I | s => cons (Text s); |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
82 |
val output = fn [] => I | ss => cons (Text (implode ss)); |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
83 |
|
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
84 |
fun make [] _ buf result = output buf result |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
85 |
| make (x :: xs) m buf result = |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
86 |
let val l = size x in |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
87 |
if l + m < limit then make xs (l + m) (buffer x buf) result |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
88 |
else if l + m = limit then make xs 0 [] (output (buffer x buf) result) |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
89 |
else if l >= limit then make xs 0 [] (output1 x (output buf result)) |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
90 |
else make xs l [x] (output buf result) |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
91 |
end; |
90f6e541e926
minor performance tuning: avoid many small strings, notably in File_Stream.output;
wenzelm
parents:
80461
diff
changeset
|
92 |
in fn xs => make (rev xs) 0 [] [] end |
38266
492d377ecfe2
type XML.body as basic data representation language;
wenzelm
parents:
38228
diff
changeset
|
93 |
|
70994 | 94 |
fun is_empty (Text "") = true |
95 |
| is_empty _ = false; |
|
96 |
||
97 |
val is_empty_body = forall is_empty; |
|
98 |
||
74789 | 99 |
fun string "" = [] |
100 |
| string s = [Text s]; |
|
101 |
||
102 |
fun enclose bg en body = string bg @ body @ string en; |
|
103 |
||
49650
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
104 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
105 |
(* wrapped elements *) |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
106 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
107 |
val xml_elemN = "xml_elem"; |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
108 |
val xml_nameN = "xml_name"; |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
109 |
val xml_bodyN = "xml_body"; |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
110 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
111 |
fun wrap_elem (((a, atts), body1), body2) = |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
112 |
Elem ((xml_elemN, (xml_nameN, a) :: atts), Elem ((xml_bodyN, []), body1) :: body2); |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
113 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
114 |
fun unwrap_elem (Elem ((name, (n, a) :: atts), Elem ((name', atts'), body1) :: body2)) = |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
115 |
if name = xml_elemN andalso n = xml_nameN andalso name' = xml_bodyN andalso null atts' |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
116 |
then SOME (((a, atts), body1), body2) else NONE |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
117 |
| unwrap_elem _ = NONE; |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
118 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
119 |
|
69224 | 120 |
(* text content *) |
49650
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
121 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
122 |
fun add_content tree = |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
123 |
(case unwrap_elem tree of |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
124 |
SOME (_, ts) => fold add_content ts |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
125 |
| NONE => |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
126 |
(case tree of |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
127 |
Elem (_, ts) => fold add_content ts |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49599
diff
changeset
|
128 |
| Text s => Buffer.add s)); |
26546 | 129 |
|
74231 | 130 |
val content_of = Buffer.build_content o fold add_content; |
39555
ccb223a4d49c
added XML.content_of convenience -- cover XML.body, which is the general situation;
wenzelm
parents:
38474
diff
changeset
|
131 |
|
26546 | 132 |
|
56059
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
wenzelm
parents:
49650
diff
changeset
|
133 |
(* trim blanks *) |
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
wenzelm
parents:
49650
diff
changeset
|
134 |
|
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
wenzelm
parents:
49650
diff
changeset
|
135 |
fun trim_blanks trees = |
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
wenzelm
parents:
49650
diff
changeset
|
136 |
trees |> maps |
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
wenzelm
parents:
49650
diff
changeset
|
137 |
(fn Elem (markup, body) => [Elem (markup, trim_blanks body)] |
74785 | 138 |
| Text s => s |> raw_explode |> trim Symbol.is_blank |> implode |> string); |
56059
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
wenzelm
parents:
49650
diff
changeset
|
139 |
|
2390391584c2
some document antiquotations for Isabelle/jEdit elements;
wenzelm
parents:
49650
diff
changeset
|
140 |
|
24264 | 141 |
|
26525 | 142 |
(** string representation **) |
143 |
||
69806 | 144 |
val header = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"; |
24264 | 145 |
|
146 |
||
26546 | 147 |
(* escaped text *) |
24264 | 148 |
|
149 |
fun decode "<" = "<" |
|
150 |
| decode ">" = ">" |
|
151 |
| decode "&" = "&" |
|
152 |
| decode "'" = "'" |
|
153 |
| decode """ = "\"" |
|
154 |
| decode c = c; |
|
155 |
||
156 |
fun encode "<" = "<" |
|
157 |
| encode ">" = ">" |
|
158 |
| encode "&" = "&" |
|
159 |
| encode "'" = "'" |
|
160 |
| encode "\"" = """ |
|
161 |
| encode c = c; |
|
162 |
||
25838 | 163 |
val text = translate_string encode; |
24264 | 164 |
|
165 |
||
166 |
(* elements *) |
|
167 |
||
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
168 |
fun elem name atts = |
26551 | 169 |
space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts); |
24264 | 170 |
|
26525 | 171 |
fun element name atts body = |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
172 |
let val b = implode body in |
74789 | 173 |
if b = "" then Library.enclose "<" "/>" (elem name atts) |
174 |
else Library.enclose "<" ">" (elem name atts) ^ b ^ Library.enclose "</" ">" name |
|
24264 | 175 |
end; |
176 |
||
27884 | 177 |
fun output_markup (markup as (name, atts)) = |
38474
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents:
38266
diff
changeset
|
178 |
if Markup.is_empty markup then Markup.no_output |
74789 | 179 |
else (Library.enclose "<" ">" (elem name atts), Library.enclose "</" ">" name); |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
180 |
|
24264 | 181 |
|
74231 | 182 |
(* output content *) |
24264 | 183 |
|
74231 | 184 |
fun content_depth depth = |
24264 | 185 |
let |
43791 | 186 |
fun traverse _ (Elem ((name, atts), [])) = |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
187 |
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>" |
43791 | 188 |
| traverse d (Elem ((name, atts), ts)) = |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
189 |
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #> |
43791 | 190 |
traverse_body d ts #> |
26525 | 191 |
Buffer.add "</" #> Buffer.add name #> Buffer.add ">" |
43791 | 192 |
| traverse _ (Text s) = Buffer.add (text s) |
193 |
and traverse_body 0 _ = Buffer.add "..." |
|
194 |
| traverse_body d ts = fold (traverse (d - 1)) ts; |
|
74231 | 195 |
in Buffer.build_content o traverse depth end; |
24264 | 196 |
|
74231 | 197 |
val string_of = content_depth ~1; |
43791 | 198 |
|
74231 | 199 |
fun pretty depth tree = Pretty.str (content_depth (Int.max (0, depth)) tree); |
25838 | 200 |
|
62819
d3ff367a16a0
careful export of type-dependent functions, without losing their special status;
wenzelm
parents:
62663
diff
changeset
|
201 |
val _ = ML_system_pp (fn depth => fn _ => Pretty.to_polyml o pretty (FixedInt.toInt depth)); |
62663 | 202 |
|
24264 | 203 |
|
204 |
||
44698 | 205 |
(** XML parsing **) |
26546 | 206 |
|
207 |
local |
|
24264 | 208 |
|
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43844
diff
changeset
|
209 |
fun err msg (xs, _) = |
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43844
diff
changeset
|
210 |
fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); |
24264 | 211 |
|
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
212 |
fun ignored _ = []; |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
213 |
|
45155
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
wenzelm
parents:
44809
diff
changeset
|
214 |
fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_"; |
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
wenzelm
parents:
44809
diff
changeset
|
215 |
fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = "."; |
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
wenzelm
parents:
44809
diff
changeset
|
216 |
val parse_name = Scan.one name_start_char ::: Scan.many name_char; |
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
wenzelm
parents:
44809
diff
changeset
|
217 |
|
26551 | 218 |
val blanks = Scan.many Symbol.is_blank; |
45155
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
wenzelm
parents:
44809
diff
changeset
|
219 |
val special = $$ "&" ^^ (parse_name >> implode) ^^ $$ ";" >> decode; |
58854 | 220 |
val regular = Scan.one Symbol.not_eof; |
221 |
fun regular_except x = Scan.one (fn c => Symbol.not_eof c andalso c <> x); |
|
24264 | 222 |
|
26551 | 223 |
val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode; |
24264 | 224 |
|
26551 | 225 |
val parse_cdata = |
226 |
Scan.this_string "<![CDATA[" |-- |
|
227 |
(Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --| |
|
228 |
Scan.this_string "]]>"; |
|
24264 | 229 |
|
230 |
val parse_att = |
|
45155
3216d65d8f34
slightly more standard-conformant XML parsing (see also 94033767ef9b);
wenzelm
parents:
44809
diff
changeset
|
231 |
((parse_name >> implode) --| (blanks -- $$ "=" -- blanks)) -- |
26551 | 232 |
(($$ "\"" || $$ "'") :|-- (fn s => |
233 |
(Scan.repeat (special || regular_except s) >> implode) --| $$ s)); |
|
24264 | 234 |
|
26551 | 235 |
val parse_comment = |
236 |
Scan.this_string "<!--" -- |
|
237 |
Scan.repeat (Scan.unless (Scan.this_string "-->") regular) -- |
|
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
238 |
Scan.this_string "-->" >> ignored; |
24264 | 239 |
|
26551 | 240 |
val parse_processing_instruction = |
241 |
Scan.this_string "<?" -- |
|
242 |
Scan.repeat (Scan.unless (Scan.this_string "?>") regular) -- |
|
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
243 |
Scan.this_string "?>" >> ignored; |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
244 |
|
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
245 |
val parse_doctype = |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
246 |
Scan.this_string "<!DOCTYPE" -- |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
247 |
Scan.repeat (Scan.unless ($$ ">") regular) -- |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
248 |
$$ ">" >> ignored; |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
249 |
|
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
250 |
val parse_misc = |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
251 |
Scan.one Symbol.is_blank >> ignored || |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
252 |
parse_processing_instruction || |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
253 |
parse_comment; |
26551 | 254 |
|
255 |
val parse_optional_text = |
|
256 |
Scan.optional (parse_chars >> (single o Text)) []; |
|
24264 | 257 |
|
26546 | 258 |
in |
259 |
||
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
260 |
val parse_comments = |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
261 |
blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K (); |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
262 |
|
40627
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
wenzelm
parents:
40131
diff
changeset
|
263 |
val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode; |
26546 | 264 |
|
24264 | 265 |
fun parse_content xs = |
26551 | 266 |
(parse_optional_text @@@ |
61476 | 267 |
Scan.repeats |
26551 | 268 |
((parse_element >> single || |
269 |
parse_cdata >> (single o Text) || |
|
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
270 |
parse_processing_instruction || |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
271 |
parse_comment) |
61476 | 272 |
@@@ parse_optional_text)) xs |
24264 | 273 |
|
26546 | 274 |
and parse_element xs = |
43949
94033767ef9b
more precise parse_name according to XML standard;
wenzelm
parents:
43947
diff
changeset
|
275 |
($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :-- |
94033767ef9b
more precise parse_name according to XML standard;
wenzelm
parents:
43947
diff
changeset
|
276 |
(fn (name, _) => |
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43844
diff
changeset
|
277 |
!! (err (fn () => "Expected > or />")) |
43949
94033767ef9b
more precise parse_name according to XML standard;
wenzelm
parents:
43947
diff
changeset
|
278 |
($$ "/" -- $$ ">" >> ignored || |
94033767ef9b
more precise parse_name according to XML standard;
wenzelm
parents:
43947
diff
changeset
|
279 |
$$ ">" |-- parse_content --| |
94033767ef9b
more precise parse_name according to XML standard;
wenzelm
parents:
43947
diff
changeset
|
280 |
!! (err (fn () => "Expected </" ^ implode name ^ ">")) |
94033767ef9b
more precise parse_name according to XML standard;
wenzelm
parents:
43947
diff
changeset
|
281 |
($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">"))) |
94033767ef9b
more precise parse_name according to XML standard;
wenzelm
parents:
43947
diff
changeset
|
282 |
>> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs; |
24264 | 283 |
|
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
284 |
val parse_document = |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
285 |
(Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc) |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
286 |
|-- parse_element; |
24264 | 287 |
|
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
288 |
fun parse s = |
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43844
diff
changeset
|
289 |
(case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element")) |
40627
becf5d5187cc
renamed raw "explode" function to "raw_explode" to emphasize its meaning;
wenzelm
parents:
40131
diff
changeset
|
290 |
(blanks |-- parse_document --| blanks))) (raw_explode s) of |
24264 | 291 |
(x, []) => x |
48769 | 292 |
| (_, ys) => error ("XML parsing error: unprocessed input\n" ^ Symbol.beginning 100 ys)); |
24264 | 293 |
|
294 |
end; |
|
26546 | 295 |
|
43767 | 296 |
|
297 |
||
298 |
(** XML as data representation language **) |
|
299 |
||
300 |
exception XML_ATOM of string; |
|
301 |
exception XML_BODY of tree list; |
|
302 |
||
303 |
||
304 |
structure Encode = |
|
305 |
struct |
|
306 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
307 |
type 'a A = 'a -> string; |
43767 | 308 |
type 'a T = 'a -> body; |
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
309 |
type 'a V = 'a -> string list * body; |
70828 | 310 |
type 'a P = 'a -> string list; |
43767 | 311 |
|
312 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
313 |
(* atomic values *) |
43767 | 314 |
|
69574 | 315 |
fun int_atom i = Value.print_int i; |
43767 | 316 |
|
317 |
fun bool_atom false = "0" |
|
318 |
| bool_atom true = "1"; |
|
319 |
||
320 |
fun unit_atom () = ""; |
|
321 |
||
322 |
||
323 |
(* structural nodes *) |
|
324 |
||
325 |
fun node ts = Elem ((":", []), ts); |
|
326 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
327 |
fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs; |
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
328 |
|
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
329 |
fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts); |
43767 | 330 |
|
331 |
||
332 |
(* representation of standard types *) |
|
333 |
||
80461 | 334 |
fun self (x: body) = x; |
335 |
||
65333 | 336 |
fun tree (t: tree) = [t]; |
337 |
||
43767 | 338 |
fun properties props = [Elem ((":", props), [])]; |
339 |
||
340 |
fun string "" = [] |
|
341 |
| string s = [Text s]; |
|
342 |
||
343 |
val int = string o int_atom; |
|
344 |
||
345 |
val bool = string o bool_atom; |
|
346 |
||
347 |
val unit = string o unit_atom; |
|
348 |
||
349 |
fun pair f g (x, y) = [node (f x), node (g y)]; |
|
350 |
||
351 |
fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)]; |
|
352 |
||
353 |
fun list f xs = map (node o f) xs; |
|
354 |
||
355 |
fun option _ NONE = [] |
|
356 |
| option f (SOME x) = [node (f x)]; |
|
357 |
||
47199
15ede9f1da3f
more specific notion of partiality (cf. Scala version);
wenzelm
parents:
46840
diff
changeset
|
358 |
fun variant fs x = |
15ede9f1da3f
more specific notion of partiality (cf. Scala version);
wenzelm
parents:
46840
diff
changeset
|
359 |
[tagged (the (get_index (fn f => SOME (f x) handle General.Match => NONE) fs))]; |
43767 | 360 |
|
26546 | 361 |
end; |
43767 | 362 |
|
363 |
||
364 |
structure Decode = |
|
365 |
struct |
|
366 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
367 |
type 'a A = string -> 'a; |
43767 | 368 |
type 'a T = body -> 'a; |
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
369 |
type 'a V = string list * body -> 'a; |
70828 | 370 |
type 'a P = string list -> 'a; |
43767 | 371 |
|
372 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
373 |
(* atomic values *) |
43767 | 374 |
|
375 |
fun int_atom s = |
|
63806 | 376 |
Value.parse_int s |
43797
fad7758421bf
more precise integer Markup.properties/XML.attributes: disallow ML-style ~ minus;
wenzelm
parents:
43791
diff
changeset
|
377 |
handle Fail _ => raise XML_ATOM s; |
43767 | 378 |
|
379 |
fun bool_atom "0" = false |
|
380 |
| bool_atom "1" = true |
|
381 |
| bool_atom s = raise XML_ATOM s; |
|
382 |
||
383 |
fun unit_atom "" = () |
|
384 |
| unit_atom s = raise XML_ATOM s; |
|
385 |
||
386 |
||
387 |
(* structural nodes *) |
|
388 |
||
389 |
fun node (Elem ((":", []), ts)) = ts |
|
390 |
| node t = raise XML_BODY [t]; |
|
391 |
||
43783 | 392 |
fun vector atts = |
46839
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
wenzelm
parents:
46837
diff
changeset
|
393 |
map_index (fn (i, (a, x)) => if int_atom a = i then x else raise XML_ATOM a) atts; |
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
394 |
|
43844 | 395 |
fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts)) |
43767 | 396 |
| tagged t = raise XML_BODY [t]; |
397 |
||
398 |
||
399 |
(* representation of standard types *) |
|
400 |
||
80461 | 401 |
fun self (x: body) = x; |
402 |
||
65333 | 403 |
fun tree [t] = t |
404 |
| tree ts = raise XML_BODY ts; |
|
405 |
||
43767 | 406 |
fun properties [Elem ((":", props), [])] = props |
407 |
| properties ts = raise XML_BODY ts; |
|
408 |
||
409 |
fun string [] = "" |
|
410 |
| string [Text s] = s |
|
411 |
| string ts = raise XML_BODY ts; |
|
412 |
||
413 |
val int = int_atom o string; |
|
414 |
||
415 |
val bool = bool_atom o string; |
|
416 |
||
417 |
val unit = unit_atom o string; |
|
418 |
||
419 |
fun pair f g [t1, t2] = (f (node t1), g (node t2)) |
|
420 |
| pair _ _ ts = raise XML_BODY ts; |
|
421 |
||
422 |
fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3)) |
|
423 |
| triple _ _ _ ts = raise XML_BODY ts; |
|
424 |
||
425 |
fun list f ts = map (f o node) ts; |
|
426 |
||
427 |
fun option _ [] = NONE |
|
428 |
| option f [t] = SOME (f (node t)) |
|
429 |
| option _ ts = raise XML_BODY ts; |
|
430 |
||
43768 | 431 |
fun variant fs [t] = |
432 |
let |
|
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
433 |
val (tag, (xs, ts)) = tagged t; |
43768 | 434 |
val f = nth fs tag handle General.Subscript => raise XML_BODY [t]; |
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
435 |
in f (xs, ts) end |
43767 | 436 |
| variant _ ts = raise XML_BODY ts; |
437 |
||
438 |
end; |
|
439 |
||
440 |
end; |