author | haftmann |
Mon, 16 Feb 2009 19:11:55 +0100 | |
changeset 29940 | 83b373f61d41 |
parent 29606 | fedb8be05f24 |
child 31469 | 40f815edbde4 |
permissions | -rw-r--r-- |
24584 | 1 |
(* Title: Pure/General/xml.ML |
24264 | 2 |
Author: David Aspinall, Stefan Berghofer and Markus Wenzel |
3 |
||
4 |
Basic support for XML. |
|
5 |
*) |
|
6 |
||
7 |
signature XML = |
|
8 |
sig |
|
28017 | 9 |
type attributes = Properties.T |
24264 | 10 |
datatype tree = |
11 |
Elem of string * attributes * tree list |
|
12 |
| Text of string |
|
26546 | 13 |
val add_content: tree -> Buffer.T -> Buffer.T |
14 |
val detect: string -> bool |
|
15 |
val header: string |
|
16 |
val text: string -> string |
|
17 |
val element: string -> attributes -> string list -> string |
|
18 |
val output_markup: Markup.T -> output * output |
|
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
19 |
val string_of: tree -> string |
26546 | 20 |
val output: tree -> TextIO.outstream -> unit |
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
21 |
val parse_comments: string list -> unit * string list |
24264 | 22 |
val parse_string : string -> string option |
26546 | 23 |
val parse_element: string list -> tree * string list |
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
24 |
val parse_document: string list -> tree * string list |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
25 |
val parse: string -> tree |
24264 | 26 |
end; |
27 |
||
28 |
structure XML: XML = |
|
29 |
struct |
|
30 |
||
26546 | 31 |
(** XML trees **) |
32 |
||
28017 | 33 |
type attributes = Properties.T; |
26546 | 34 |
|
35 |
datatype tree = |
|
36 |
Elem of string * attributes * tree list |
|
28033 | 37 |
| Text of string; |
26546 | 38 |
|
39 |
fun add_content (Elem (_, _, ts)) = fold add_content ts |
|
28033 | 40 |
| add_content (Text s) = Buffer.add s; |
26546 | 41 |
|
42 |
||
24264 | 43 |
|
26525 | 44 |
(** string representation **) |
45 |
||
46 |
val detect = String.isPrefix "<?xml"; |
|
24264 | 47 |
val header = "<?xml version=\"1.0\"?>\n"; |
48 |
||
49 |
||
26546 | 50 |
(* escaped text *) |
24264 | 51 |
|
52 |
fun decode "<" = "<" |
|
53 |
| decode ">" = ">" |
|
54 |
| decode "&" = "&" |
|
55 |
| decode "'" = "'" |
|
56 |
| decode """ = "\"" |
|
57 |
| decode c = c; |
|
58 |
||
59 |
fun encode "<" = "<" |
|
60 |
| encode ">" = ">" |
|
61 |
| encode "&" = "&" |
|
62 |
| encode "'" = "'" |
|
63 |
| encode "\"" = """ |
|
64 |
| encode c = c; |
|
65 |
||
25838 | 66 |
val text = translate_string encode; |
24264 | 67 |
|
68 |
||
69 |
(* elements *) |
|
70 |
||
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
71 |
fun elem name atts = |
26551 | 72 |
space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts); |
24264 | 73 |
|
26525 | 74 |
fun element name atts body = |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
75 |
let val b = implode body in |
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
76 |
if b = "" then enclose "<" "/>" (elem name atts) |
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
77 |
else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name |
24264 | 78 |
end; |
79 |
||
27884 | 80 |
fun output_markup (markup as (name, atts)) = |
29325 | 81 |
if Markup.is_none markup then Markup.no_output |
27884 | 82 |
else (enclose "<" ">" (elem name atts), enclose "</" ">" name); |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
83 |
|
24264 | 84 |
|
26546 | 85 |
(* output *) |
24264 | 86 |
|
26546 | 87 |
fun buffer_of tree = |
24264 | 88 |
let |
26546 | 89 |
fun traverse (Elem (name, atts, [])) = |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
90 |
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>" |
26546 | 91 |
| traverse (Elem (name, atts, ts)) = |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
92 |
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #> |
26546 | 93 |
fold traverse ts #> |
26525 | 94 |
Buffer.add "</" #> Buffer.add name #> Buffer.add ">" |
28033 | 95 |
| traverse (Text s) = Buffer.add (text s); |
26546 | 96 |
in Buffer.empty |> traverse tree end; |
24264 | 97 |
|
26546 | 98 |
val string_of = Buffer.content o buffer_of; |
99 |
val output = Buffer.output o buffer_of; |
|
25838 | 100 |
|
24264 | 101 |
|
102 |
||
26546 | 103 |
(** XML parsing (slow) **) |
104 |
||
105 |
local |
|
24264 | 106 |
|
107 |
fun err s (xs, _) = |
|
108 |
"XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); |
|
109 |
||
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
110 |
fun ignored _ = []; |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
111 |
|
26551 | 112 |
val blanks = Scan.many Symbol.is_blank; |
113 |
val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; |
|
114 |
val regular = Scan.one Symbol.is_regular; |
|
115 |
fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x); |
|
24264 | 116 |
|
26551 | 117 |
val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode; |
24264 | 118 |
|
26551 | 119 |
val parse_cdata = |
120 |
Scan.this_string "<![CDATA[" |-- |
|
121 |
(Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --| |
|
122 |
Scan.this_string "]]>"; |
|
24264 | 123 |
|
124 |
val parse_att = |
|
26551 | 125 |
(Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) -- |
126 |
(($$ "\"" || $$ "'") :|-- (fn s => |
|
127 |
(Scan.repeat (special || regular_except s) >> implode) --| $$ s)); |
|
24264 | 128 |
|
26551 | 129 |
val parse_comment = |
130 |
Scan.this_string "<!--" -- |
|
131 |
Scan.repeat (Scan.unless (Scan.this_string "-->") regular) -- |
|
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
132 |
Scan.this_string "-->" >> ignored; |
24264 | 133 |
|
26551 | 134 |
val parse_processing_instruction = |
135 |
Scan.this_string "<?" -- |
|
136 |
Scan.repeat (Scan.unless (Scan.this_string "?>") regular) -- |
|
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
137 |
Scan.this_string "?>" >> ignored; |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
138 |
|
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
139 |
val parse_doctype = |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
140 |
Scan.this_string "<!DOCTYPE" -- |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
141 |
Scan.repeat (Scan.unless ($$ ">") regular) -- |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
142 |
$$ ">" >> ignored; |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
143 |
|
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
144 |
val parse_misc = |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
145 |
Scan.one Symbol.is_blank >> ignored || |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
146 |
parse_processing_instruction || |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
147 |
parse_comment; |
26551 | 148 |
|
149 |
val parse_optional_text = |
|
150 |
Scan.optional (parse_chars >> (single o Text)) []; |
|
24264 | 151 |
|
26546 | 152 |
in |
153 |
||
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
154 |
val parse_comments = |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
155 |
blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K (); |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
156 |
|
26546 | 157 |
val parse_string = Scan.read Symbol.stopper parse_chars o explode; |
158 |
||
24264 | 159 |
fun parse_content xs = |
26551 | 160 |
(parse_optional_text @@@ |
161 |
(Scan.repeat |
|
162 |
((parse_element >> single || |
|
163 |
parse_cdata >> (single o Text) || |
|
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
164 |
parse_processing_instruction || |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
165 |
parse_comment) |
26551 | 166 |
@@@ parse_optional_text) >> flat)) xs |
24264 | 167 |
|
26546 | 168 |
and parse_element xs = |
24264 | 169 |
($$ "<" |-- Symbol.scan_id -- |
26551 | 170 |
Scan.repeat (blanks |-- parse_att) --| blanks :-- (fn (s, _) => |
24264 | 171 |
!! (err "Expected > or />") |
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
172 |
(Scan.this_string "/>" >> ignored |
24264 | 173 |
|| $$ ">" |-- parse_content --| |
174 |
!! (err ("Expected </" ^ s ^ ">")) |
|
26551 | 175 |
(Scan.this_string ("</" ^ s) --| blanks --| $$ ">"))) >> |
24264 | 176 |
(fn ((s, atts), ts) => Elem (s, atts, ts))) xs; |
177 |
||
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
178 |
val parse_document = |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
179 |
(Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc) |
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
180 |
|-- parse_element; |
24264 | 181 |
|
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
182 |
fun parse s = |
24264 | 183 |
(case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element") |
26984
d0e098e206f3
added parse_document (optional unchecked header material);
wenzelm
parents:
26554
diff
changeset
|
184 |
(blanks |-- parse_document --| blanks))) (explode s) of |
24264 | 185 |
(x, []) => x |
186 |
| (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys)); |
|
187 |
||
188 |
end; |
|
26546 | 189 |
|
190 |
end; |