haftmann@24584
|
1 |
(* Title: Pure/General/xml.ML
|
wenzelm@24264
|
2 |
Author: David Aspinall, Stefan Berghofer and Markus Wenzel
|
wenzelm@24264
|
3 |
|
wenzelm@38228
|
4 |
Simple XML tree values.
|
wenzelm@24264
|
5 |
*)
|
wenzelm@24264
|
6 |
|
wenzelm@24264
|
7 |
signature XML =
|
wenzelm@24264
|
8 |
sig
|
wenzelm@28017
|
9 |
type attributes = Properties.T
|
wenzelm@24264
|
10 |
datatype tree =
|
wenzelm@38228
|
11 |
Elem of Markup.T * tree list
|
wenzelm@24264
|
12 |
| Text of string
|
wenzelm@38266
|
13 |
type body = tree list
|
wenzelm@26546
|
14 |
val add_content: tree -> Buffer.T -> Buffer.T
|
wenzelm@26546
|
15 |
val header: string
|
wenzelm@26546
|
16 |
val text: string -> string
|
wenzelm@26546
|
17 |
val element: string -> attributes -> string list -> string
|
wenzelm@26546
|
18 |
val output_markup: Markup.T -> output * output
|
wenzelm@26539
|
19 |
val string_of: tree -> string
|
wenzelm@26546
|
20 |
val output: tree -> TextIO.outstream -> unit
|
wenzelm@26984
|
21 |
val parse_comments: string list -> unit * string list
|
wenzelm@24264
|
22 |
val parse_string : string -> string option
|
wenzelm@26546
|
23 |
val parse_element: string list -> tree * string list
|
wenzelm@26984
|
24 |
val parse_document: string list -> tree * string list
|
wenzelm@26539
|
25 |
val parse: string -> tree
|
wenzelm@24264
|
26 |
end;
|
wenzelm@24264
|
27 |
|
wenzelm@24264
|
28 |
structure XML: XML =
|
wenzelm@24264
|
29 |
struct
|
wenzelm@24264
|
30 |
|
wenzelm@26546
|
31 |
(** XML trees **)
|
wenzelm@26546
|
32 |
|
wenzelm@28017
|
33 |
type attributes = Properties.T;
|
wenzelm@26546
|
34 |
|
wenzelm@26546
|
35 |
datatype tree =
|
wenzelm@38228
|
36 |
Elem of Markup.T * tree list
|
wenzelm@28033
|
37 |
| Text of string;
|
wenzelm@26546
|
38 |
|
wenzelm@38266
|
39 |
type body = tree list;
|
wenzelm@38266
|
40 |
|
wenzelm@38228
|
41 |
fun add_content (Elem (_, ts)) = fold add_content ts
|
wenzelm@28033
|
42 |
| add_content (Text s) = Buffer.add s;
|
wenzelm@26546
|
43 |
|
wenzelm@26546
|
44 |
|
wenzelm@24264
|
45 |
|
wenzelm@26525
|
46 |
(** string representation **)
|
wenzelm@26525
|
47 |
|
wenzelm@24264
|
48 |
val header = "<?xml version=\"1.0\"?>\n";
|
wenzelm@24264
|
49 |
|
wenzelm@24264
|
50 |
|
wenzelm@26546
|
51 |
(* escaped text *)
|
wenzelm@24264
|
52 |
|
wenzelm@24264
|
53 |
fun decode "<" = "<"
|
wenzelm@24264
|
54 |
| decode ">" = ">"
|
wenzelm@24264
|
55 |
| decode "&" = "&"
|
wenzelm@24264
|
56 |
| decode "'" = "'"
|
wenzelm@24264
|
57 |
| decode """ = "\""
|
wenzelm@24264
|
58 |
| decode c = c;
|
wenzelm@24264
|
59 |
|
wenzelm@24264
|
60 |
fun encode "<" = "<"
|
wenzelm@24264
|
61 |
| encode ">" = ">"
|
wenzelm@24264
|
62 |
| encode "&" = "&"
|
wenzelm@24264
|
63 |
| encode "'" = "'"
|
wenzelm@24264
|
64 |
| encode "\"" = """
|
wenzelm@24264
|
65 |
| encode c = c;
|
wenzelm@24264
|
66 |
|
wenzelm@25838
|
67 |
val text = translate_string encode;
|
wenzelm@24264
|
68 |
|
wenzelm@24264
|
69 |
|
wenzelm@24264
|
70 |
(* elements *)
|
wenzelm@24264
|
71 |
|
wenzelm@26539
|
72 |
fun elem name atts =
|
wenzelm@26551
|
73 |
space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
|
wenzelm@24264
|
74 |
|
wenzelm@26525
|
75 |
fun element name atts body =
|
wenzelm@26539
|
76 |
let val b = implode body in
|
wenzelm@26539
|
77 |
if b = "" then enclose "<" "/>" (elem name atts)
|
wenzelm@26539
|
78 |
else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
|
wenzelm@24264
|
79 |
end;
|
wenzelm@24264
|
80 |
|
wenzelm@27884
|
81 |
fun output_markup (markup as (name, atts)) =
|
wenzelm@29325
|
82 |
if Markup.is_none markup then Markup.no_output
|
wenzelm@27884
|
83 |
else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
|
wenzelm@26539
|
84 |
|
wenzelm@24264
|
85 |
|
wenzelm@26546
|
86 |
(* output *)
|
wenzelm@24264
|
87 |
|
wenzelm@26546
|
88 |
fun buffer_of tree =
|
wenzelm@24264
|
89 |
let
|
wenzelm@38228
|
90 |
fun traverse (Elem ((name, atts), [])) =
|
wenzelm@26539
|
91 |
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
|
wenzelm@38228
|
92 |
| traverse (Elem ((name, atts), ts)) =
|
wenzelm@26539
|
93 |
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
|
wenzelm@26546
|
94 |
fold traverse ts #>
|
wenzelm@26525
|
95 |
Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
|
wenzelm@28033
|
96 |
| traverse (Text s) = Buffer.add (text s);
|
wenzelm@26546
|
97 |
in Buffer.empty |> traverse tree end;
|
wenzelm@24264
|
98 |
|
wenzelm@26546
|
99 |
val string_of = Buffer.content o buffer_of;
|
wenzelm@26546
|
100 |
val output = Buffer.output o buffer_of;
|
wenzelm@25838
|
101 |
|
wenzelm@24264
|
102 |
|
wenzelm@24264
|
103 |
|
wenzelm@26546
|
104 |
(** XML parsing (slow) **)
|
wenzelm@26546
|
105 |
|
wenzelm@26546
|
106 |
local
|
wenzelm@24264
|
107 |
|
wenzelm@24264
|
108 |
fun err s (xs, _) =
|
wenzelm@24264
|
109 |
"XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
|
wenzelm@24264
|
110 |
|
wenzelm@26984
|
111 |
fun ignored _ = [];
|
wenzelm@26984
|
112 |
|
wenzelm@26551
|
113 |
val blanks = Scan.many Symbol.is_blank;
|
wenzelm@26551
|
114 |
val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
|
wenzelm@26551
|
115 |
val regular = Scan.one Symbol.is_regular;
|
wenzelm@26551
|
116 |
fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
|
wenzelm@24264
|
117 |
|
wenzelm@26551
|
118 |
val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
|
wenzelm@24264
|
119 |
|
wenzelm@26551
|
120 |
val parse_cdata =
|
wenzelm@26551
|
121 |
Scan.this_string "<![CDATA[" |--
|
wenzelm@26551
|
122 |
(Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
|
wenzelm@26551
|
123 |
Scan.this_string "]]>";
|
wenzelm@24264
|
124 |
|
wenzelm@24264
|
125 |
val parse_att =
|
wenzelm@26551
|
126 |
(Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
|
wenzelm@26551
|
127 |
(($$ "\"" || $$ "'") :|-- (fn s =>
|
wenzelm@26551
|
128 |
(Scan.repeat (special || regular_except s) >> implode) --| $$ s));
|
wenzelm@24264
|
129 |
|
wenzelm@26551
|
130 |
val parse_comment =
|
wenzelm@26551
|
131 |
Scan.this_string "<!--" --
|
wenzelm@26551
|
132 |
Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
|
wenzelm@26984
|
133 |
Scan.this_string "-->" >> ignored;
|
wenzelm@24264
|
134 |
|
wenzelm@26551
|
135 |
val parse_processing_instruction =
|
wenzelm@26551
|
136 |
Scan.this_string "<?" --
|
wenzelm@26551
|
137 |
Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
|
wenzelm@26984
|
138 |
Scan.this_string "?>" >> ignored;
|
wenzelm@26984
|
139 |
|
wenzelm@26984
|
140 |
val parse_doctype =
|
wenzelm@26984
|
141 |
Scan.this_string "<!DOCTYPE" --
|
wenzelm@26984
|
142 |
Scan.repeat (Scan.unless ($$ ">") regular) --
|
wenzelm@26984
|
143 |
$$ ">" >> ignored;
|
wenzelm@26984
|
144 |
|
wenzelm@26984
|
145 |
val parse_misc =
|
wenzelm@26984
|
146 |
Scan.one Symbol.is_blank >> ignored ||
|
wenzelm@26984
|
147 |
parse_processing_instruction ||
|
wenzelm@26984
|
148 |
parse_comment;
|
wenzelm@26551
|
149 |
|
wenzelm@26551
|
150 |
val parse_optional_text =
|
wenzelm@26551
|
151 |
Scan.optional (parse_chars >> (single o Text)) [];
|
wenzelm@24264
|
152 |
|
wenzelm@26546
|
153 |
in
|
wenzelm@26546
|
154 |
|
wenzelm@26984
|
155 |
val parse_comments =
|
wenzelm@26984
|
156 |
blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
|
wenzelm@26984
|
157 |
|
wenzelm@26546
|
158 |
val parse_string = Scan.read Symbol.stopper parse_chars o explode;
|
wenzelm@26546
|
159 |
|
wenzelm@24264
|
160 |
fun parse_content xs =
|
wenzelm@26551
|
161 |
(parse_optional_text @@@
|
wenzelm@26551
|
162 |
(Scan.repeat
|
wenzelm@26551
|
163 |
((parse_element >> single ||
|
wenzelm@26551
|
164 |
parse_cdata >> (single o Text) ||
|
wenzelm@26984
|
165 |
parse_processing_instruction ||
|
wenzelm@26984
|
166 |
parse_comment)
|
wenzelm@26551
|
167 |
@@@ parse_optional_text) >> flat)) xs
|
wenzelm@24264
|
168 |
|
wenzelm@26546
|
169 |
and parse_element xs =
|
wenzelm@24264
|
170 |
($$ "<" |-- Symbol.scan_id --
|
wenzelm@26551
|
171 |
Scan.repeat (blanks |-- parse_att) --| blanks :-- (fn (s, _) =>
|
wenzelm@24264
|
172 |
!! (err "Expected > or />")
|
wenzelm@26984
|
173 |
(Scan.this_string "/>" >> ignored
|
wenzelm@24264
|
174 |
|| $$ ">" |-- parse_content --|
|
wenzelm@24264
|
175 |
!! (err ("Expected </" ^ s ^ ">"))
|
wenzelm@38228
|
176 |
(Scan.this_string ("</" ^ s) --| blanks --| $$ ">"))) >> Elem) xs;
|
wenzelm@24264
|
177 |
|
wenzelm@26984
|
178 |
val parse_document =
|
wenzelm@26984
|
179 |
(Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
|
wenzelm@26984
|
180 |
|-- parse_element;
|
wenzelm@24264
|
181 |
|
wenzelm@26539
|
182 |
fun parse s =
|
wenzelm@24264
|
183 |
(case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
|
wenzelm@26984
|
184 |
(blanks |-- parse_document --| blanks))) (explode s) of
|
wenzelm@24264
|
185 |
(x, []) => x
|
wenzelm@24264
|
186 |
| (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
|
wenzelm@24264
|
187 |
|
wenzelm@24264
|
188 |
end;
|
wenzelm@26546
|
189 |
|
wenzelm@26546
|
190 |
end;
|