1 (* Title: Pure/General/xml.ML |
|
2 ID: $Id$ |
|
3 Author: David Aspinall, Stefan Berghofer and Markus Wenzel |
|
4 |
|
5 Basic support for XML. |
|
6 *) |
|
7 |
|
8 signature XML = |
|
9 sig |
|
10 (* string functions *) |
|
11 val header: string |
|
12 val text: string -> string |
|
13 val text_charref: string -> string |
|
14 val cdata: string -> string |
|
15 type attributes = (string * string) list |
|
16 val element: string -> attributes -> string list -> string |
|
17 (* tree functions *) |
|
18 datatype tree = |
|
19 Elem of string * attributes * tree list |
|
20 | Text of string (* native string, subject to XML.text on output *) |
|
21 | Rawtext of string (* XML string: output directly, not parsed *) |
|
22 type content = tree list |
|
23 type element = string * attributes * content |
|
24 val string_of_tree: tree -> string |
|
25 val buffer_of_tree: tree -> Buffer.T |
|
26 val parse_string : string -> string option |
|
27 val parse_content: string list -> tree list * string list |
|
28 val parse_elem: string list -> tree * string list |
|
29 val parse_document: string list -> (string option * tree) * string list |
|
30 val tree_of_string: string -> tree |
|
31 val scan_comment_whspc : string list -> unit * string list |
|
32 end; |
|
33 |
|
34 structure XML: XML = |
|
35 struct |
|
36 |
|
37 (** string based representation (small scale) **) |
|
38 |
|
39 val header = "<?xml version=\"1.0\"?>\n"; |
|
40 |
|
41 |
|
42 (* text and character data *) |
|
43 |
|
44 fun decode "<" = "<" |
|
45 | decode ">" = ">" |
|
46 | decode "&" = "&" |
|
47 | decode "'" = "'" |
|
48 | decode """ = "\"" |
|
49 | decode c = c; |
|
50 |
|
51 fun encode "<" = "<" |
|
52 | encode ">" = ">" |
|
53 | encode "&" = "&" |
|
54 | encode "'" = "'" |
|
55 | encode "\"" = """ |
|
56 | encode c = c; |
|
57 |
|
58 fun encode_charref c = "&#" ^ Int.toString (ord c) ^ ";" |
|
59 |
|
60 val text = Library.translate_string encode |
|
61 |
|
62 val text_charref = translate_string encode_charref; |
|
63 |
|
64 val cdata = enclose "<![CDATA[" "]]>\n" |
|
65 |
|
66 |
|
67 (* elements *) |
|
68 |
|
69 fun attribute (a, x) = a ^ " = \"" ^ text x ^ "\""; |
|
70 |
|
71 fun element name atts cs = |
|
72 let val elem = space_implode " " (name :: map attribute atts) in |
|
73 if null cs then enclose "<" "/>" elem |
|
74 else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name |
|
75 end; |
|
76 |
|
77 |
|
78 |
|
79 |
|
80 (** explicit XML trees **) |
|
81 |
|
82 type attributes = (string * string) list |
|
83 |
|
84 datatype tree = |
|
85 Elem of string * attributes * tree list |
|
86 | Text of string |
|
87 | Rawtext of string; |
|
88 |
|
89 type content = tree list |
|
90 |
|
91 type element = string * attributes * content |
|
92 |
|
93 fun buffer_of_tree tree = |
|
94 let |
|
95 fun string_of (Elem (name, atts, ts)) buf = |
|
96 let val buf' = |
|
97 buf |> Buffer.add "<" |
|
98 |> fold Buffer.add (separate " " (name :: map attribute atts)) |
|
99 in |
|
100 if null ts then |
|
101 buf' |> Buffer.add "/>" |
|
102 else |
|
103 buf' |> Buffer.add ">" |
|
104 |> fold string_of ts |
|
105 |> Buffer.add "</" |> Buffer.add name |> Buffer.add ">" |
|
106 end |
|
107 | string_of (Text s) buf = Buffer.add (text s) buf |
|
108 | string_of (Rawtext s) buf = Buffer.add s buf; |
|
109 in string_of tree Buffer.empty end; |
|
110 |
|
111 val string_of_tree = Buffer.content o buffer_of_tree; |
|
112 |
|
113 |
|
114 |
|
115 (** XML parsing **) |
|
116 |
|
117 fun err s (xs, _) = |
|
118 "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); |
|
119 |
|
120 val scan_whspc = Scan.many Symbol.is_blank; |
|
121 |
|
122 val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; |
|
123 |
|
124 val parse_chars = Scan.repeat1 (Scan.unless ((* scan_whspc -- *)$$ "<") |
|
125 (scan_special || Scan.one Symbol.not_eof)) >> implode; |
|
126 |
|
127 val parse_string = Scan.read Symbol.stopper parse_chars o explode; |
|
128 |
|
129 val parse_cdata = Scan.this_string "<![CDATA[" |-- |
|
130 (Scan.repeat (Scan.unless (Scan.this_string "]]>") (Scan.one Symbol.not_eof)) >> |
|
131 implode) --| Scan.this_string "]]>"; |
|
132 |
|
133 val parse_att = |
|
134 Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc -- |
|
135 (($$ "\"" || $$ "'") :-- (fn s => (Scan.repeat (Scan.unless ($$ s) |
|
136 (scan_special || Scan.one Symbol.not_eof)) >> implode) --| $$ s) >> snd); |
|
137 |
|
138 val parse_comment = Scan.this_string "<!--" -- |
|
139 Scan.repeat (Scan.unless (Scan.this_string "-->") (Scan.one Symbol.not_eof)) -- |
|
140 Scan.this_string "-->"; |
|
141 |
|
142 val scan_comment_whspc = |
|
143 (scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K()))); |
|
144 |
|
145 val parse_pi = Scan.this_string "<?" |-- |
|
146 Scan.repeat (Scan.unless (Scan.this_string "?>") (Scan.one Symbol.not_eof)) --| |
|
147 Scan.this_string "?>"; |
|
148 |
|
149 fun parse_content xs = |
|
150 ((Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] -- |
|
151 (Scan.repeat ((* scan_whspc |-- *) |
|
152 ( parse_elem >> single |
|
153 || parse_cdata >> (single o Text) |
|
154 || parse_pi >> K [] |
|
155 || parse_comment >> K []) -- |
|
156 Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] |
|
157 >> op @) >> flat) >> op @)(* --| scan_whspc*)) xs |
|
158 |
|
159 and parse_elem xs = |
|
160 ($$ "<" |-- Symbol.scan_id -- |
|
161 Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) => |
|
162 !! (err "Expected > or />") |
|
163 (Scan.this_string "/>" >> K [] |
|
164 || $$ ">" |-- parse_content --| |
|
165 !! (err ("Expected </" ^ s ^ ">")) |
|
166 (Scan.this_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >> |
|
167 (fn ((s, atts), ts) => Elem (s, atts, ts))) xs; |
|
168 |
|
169 val parse_document = |
|
170 Scan.option (Scan.this_string "<!DOCTYPE" -- scan_whspc |-- |
|
171 (Scan.repeat (Scan.unless ($$ ">") |
|
172 (Scan.one Symbol.not_eof)) >> implode) --| $$ ">" --| scan_whspc) -- |
|
173 parse_elem; |
|
174 |
|
175 fun tree_of_string s = |
|
176 (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element") |
|
177 (scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of |
|
178 (x, []) => x |
|
179 | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys)); |
|
180 |
|
181 end; |
|