author | wenzelm |
Thu, 03 Apr 2008 18:42:37 +0200 | |
changeset 26539 | a0754be538ab |
parent 26525 | 14a56f013469 |
child 26546 | ba4cdf92c7c4 |
permissions | -rw-r--r-- |
24584 | 1 |
(* Title: Pure/General/xml.ML |
24264 | 2 |
ID: $Id$ |
3 |
Author: David Aspinall, Stefan Berghofer and Markus Wenzel |
|
4 |
||
5 |
Basic support for XML. |
|
6 |
*) |
|
7 |
||
8 |
signature XML = |
|
9 |
sig |
|
10 |
(*string functions*) |
|
26525 | 11 |
val detect: string -> bool |
24264 | 12 |
val header: string |
13 |
val text: string -> string |
|
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
14 |
type attributes = Markup.property list |
24264 | 15 |
val element: string -> attributes -> string list -> string |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
16 |
val output_markup: Markup.T -> output * output |
24264 | 17 |
(*tree functions*) |
18 |
datatype tree = |
|
19 |
Elem of string * attributes * tree list |
|
20 |
| Text of string |
|
21 |
| Output of output |
|
22 |
type content = tree list |
|
23 |
type element = string * attributes * content |
|
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
24 |
val string_of: tree -> string |
25838 | 25 |
val plain_content: tree -> string |
24264 | 26 |
val parse_string : string -> string option |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
27 |
val parse_comment_whspc: string list -> unit * string list |
24264 | 28 |
val parse_content: string list -> tree list * string list |
29 |
val parse_elem: string list -> tree * string list |
|
30 |
val parse_document: string list -> (string option * tree) * string list |
|
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
31 |
val parse: string -> tree |
24264 | 32 |
end; |
33 |
||
34 |
structure XML: XML = |
|
35 |
struct |
|
36 |
||
37 |
||
26525 | 38 |
(** string representation **) |
39 |
||
40 |
val detect = String.isPrefix "<?xml"; |
|
24264 | 41 |
val header = "<?xml version=\"1.0\"?>\n"; |
42 |
||
43 |
||
44 |
(* text and character data *) |
|
45 |
||
46 |
fun decode "<" = "<" |
|
47 |
| decode ">" = ">" |
|
48 |
| decode "&" = "&" |
|
49 |
| decode "'" = "'" |
|
50 |
| decode """ = "\"" |
|
51 |
| decode c = c; |
|
52 |
||
53 |
fun encode "<" = "<" |
|
54 |
| encode ">" = ">" |
|
55 |
| encode "&" = "&" |
|
56 |
| encode "'" = "'" |
|
57 |
| encode "\"" = """ |
|
58 |
| encode c = c; |
|
59 |
||
25838 | 60 |
val text = translate_string encode; |
24264 | 61 |
|
62 |
||
63 |
(* elements *) |
|
64 |
||
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
65 |
type attributes = Markup.property list; |
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
66 |
|
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
67 |
fun elem name atts = |
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
68 |
space_implode " " (name :: map (fn (a, x) => a ^ " = \"" ^ text x ^ "\"") atts); |
24264 | 69 |
|
26525 | 70 |
fun element name atts body = |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
71 |
let val b = implode body in |
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
72 |
if b = "" then enclose "<" "/>" (elem name atts) |
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
73 |
else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name |
24264 | 74 |
end; |
75 |
||
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
76 |
fun output_markup (name, atts) = |
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
77 |
(enclose "<" ">" (elem name atts), |
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
78 |
enclose "</" ">" name); |
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
79 |
|
24264 | 80 |
|
81 |
||
82 |
(** explicit XML trees **) |
|
83 |
||
84 |
datatype tree = |
|
85 |
Elem of string * attributes * tree list |
|
86 |
| Text of string |
|
87 |
| Output of output; |
|
88 |
||
89 |
type content = tree list; |
|
90 |
type element = string * attributes * content; |
|
91 |
||
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
92 |
fun string_of t = |
24264 | 93 |
let |
26525 | 94 |
fun tree (Elem (name, atts, [])) = |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
95 |
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>" |
26525 | 96 |
| tree (Elem (name, atts, ts)) = |
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
97 |
Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #> |
26525 | 98 |
fold tree ts #> |
99 |
Buffer.add "</" #> Buffer.add name #> Buffer.add ">" |
|
100 |
| tree (Text s) = Buffer.add (text s) |
|
101 |
| tree (Output s) = Buffer.add s; |
|
102 |
in Buffer.empty |> tree t |> Buffer.content end; |
|
24264 | 103 |
|
25838 | 104 |
fun plain_content tree = |
105 |
let |
|
106 |
fun content (Elem (_, _, ts)) = fold content ts |
|
107 |
| content (Text s) = Buffer.add s |
|
108 |
| content (Output _) = I; (* FIXME !? *) |
|
109 |
in Buffer.empty |> content tree |> Buffer.content end; |
|
110 |
||
24264 | 111 |
|
112 |
||
113 |
(** XML parsing **) |
|
114 |
||
115 |
fun err s (xs, _) = |
|
116 |
"XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); |
|
117 |
||
118 |
val scan_whspc = Scan.many Symbol.is_blank; |
|
119 |
||
120 |
val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; |
|
121 |
||
122 |
val parse_chars = Scan.repeat1 (Scan.unless ((* scan_whspc -- *)$$ "<") |
|
123 |
(scan_special || Scan.one Symbol.is_regular)) >> implode; |
|
124 |
||
125 |
val parse_string = Scan.read Symbol.stopper parse_chars o explode; |
|
126 |
||
127 |
val parse_cdata = Scan.this_string "<![CDATA[" |-- |
|
128 |
(Scan.repeat (Scan.unless (Scan.this_string "]]>") (Scan.one Symbol.is_regular)) >> |
|
129 |
implode) --| Scan.this_string "]]>"; |
|
130 |
||
131 |
val parse_att = |
|
132 |
Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc -- |
|
133 |
(($$ "\"" || $$ "'") :|-- (fn s => (Scan.repeat (Scan.unless ($$ s) |
|
134 |
(scan_special || Scan.one Symbol.is_regular)) >> implode) --| $$ s)); |
|
135 |
||
136 |
val parse_comment = Scan.this_string "<!--" -- |
|
137 |
Scan.repeat (Scan.unless (Scan.this_string "-->") (Scan.one Symbol.is_regular)) -- |
|
138 |
Scan.this_string "-->"; |
|
139 |
||
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
140 |
val parse_comment_whspc = |
24264 | 141 |
(scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K()))); |
142 |
||
143 |
val parse_pi = Scan.this_string "<?" |-- |
|
144 |
Scan.repeat (Scan.unless (Scan.this_string "?>") (Scan.one Symbol.is_regular)) --| |
|
145 |
Scan.this_string "?>"; |
|
146 |
||
147 |
fun parse_content xs = |
|
148 |
((Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] -- |
|
149 |
(Scan.repeat ((* scan_whspc |-- *) |
|
150 |
( parse_elem >> single |
|
151 |
|| parse_cdata >> (single o Text) |
|
152 |
|| parse_pi >> K [] |
|
153 |
|| parse_comment >> K []) -- |
|
154 |
Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] |
|
155 |
>> op @) >> flat) >> op @)(* --| scan_whspc*)) xs |
|
156 |
||
157 |
and parse_elem xs = |
|
158 |
($$ "<" |-- Symbol.scan_id -- |
|
159 |
Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) => |
|
160 |
!! (err "Expected > or />") |
|
161 |
(Scan.this_string "/>" >> K [] |
|
162 |
|| $$ ">" |-- parse_content --| |
|
163 |
!! (err ("Expected </" ^ s ^ ">")) |
|
164 |
(Scan.this_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >> |
|
165 |
(fn ((s, atts), ts) => Elem (s, atts, ts))) xs; |
|
166 |
||
167 |
val parse_document = |
|
168 |
Scan.option (Scan.this_string "<!DOCTYPE" -- scan_whspc |-- |
|
169 |
(Scan.repeat (Scan.unless ($$ ">") |
|
170 |
(Scan.one Symbol.is_regular)) >> implode) --| $$ ">" --| scan_whspc) -- |
|
171 |
parse_elem; |
|
172 |
||
26539
a0754be538ab
added output_markup (from Tools/isabelle_process.ML);
wenzelm
parents:
26525
diff
changeset
|
173 |
fun parse s = |
24264 | 174 |
(case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element") |
175 |
(scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of |
|
176 |
(x, []) => x |
|
177 |
| (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys)); |
|
178 |
||
179 |
end; |