1 /* Title: Pure/General/xml.scala |
|
2 Author: Makarius |
|
3 |
|
4 Untyped XML trees. |
|
5 */ |
|
6 |
|
7 package isabelle |
|
8 |
|
9 import java.lang.System |
|
10 import java.util.WeakHashMap |
|
11 import java.lang.ref.WeakReference |
|
12 import javax.xml.parsers.DocumentBuilderFactory |
|
13 |
|
14 import scala.actors.Actor._ |
|
15 import scala.collection.mutable |
|
16 |
|
17 |
|
18 object XML |
|
19 { |
|
20 /** XML trees **/ |
|
21 |
|
22 /* datatype representation */ |
|
23 |
|
24 type Attributes = Properties.T |
|
25 |
|
26 sealed abstract class Tree { override def toString = string_of_tree(this) } |
|
27 case class Elem(markup: Markup, body: List[Tree]) extends Tree |
|
28 case class Text(content: String) extends Tree |
|
29 |
|
30 def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body) |
|
31 def elem(name: String) = Elem(Markup(name, Nil), Nil) |
|
32 |
|
33 type Body = List[Tree] |
|
34 |
|
35 |
|
36 /* string representation */ |
|
37 |
|
38 def string_of_body(body: Body): String = |
|
39 { |
|
40 val s = new StringBuilder |
|
41 |
|
42 def text(txt: String) { |
|
43 if (txt == null) s ++= txt |
|
44 else { |
|
45 for (c <- txt.iterator) c match { |
|
46 case '<' => s ++= "<" |
|
47 case '>' => s ++= ">" |
|
48 case '&' => s ++= "&" |
|
49 case '"' => s ++= """ |
|
50 case '\'' => s ++= "'" |
|
51 case _ => s += c |
|
52 } |
|
53 } |
|
54 } |
|
55 def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" } |
|
56 def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) } |
|
57 def tree(t: Tree): Unit = |
|
58 t match { |
|
59 case Elem(markup, Nil) => |
|
60 s ++= "<"; elem(markup); s ++= "/>" |
|
61 case Elem(markup, ts) => |
|
62 s ++= "<"; elem(markup); s ++= ">" |
|
63 ts.foreach(tree) |
|
64 s ++= "</"; s ++= markup.name; s ++= ">" |
|
65 case Text(txt) => text(txt) |
|
66 } |
|
67 body.foreach(tree) |
|
68 s.toString |
|
69 } |
|
70 |
|
71 def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) |
|
72 |
|
73 |
|
74 /* text content */ |
|
75 |
|
76 def content_stream(tree: Tree): Stream[String] = |
|
77 tree match { |
|
78 case Elem(_, body) => content_stream(body) |
|
79 case Text(content) => Stream(content) |
|
80 } |
|
81 def content_stream(body: Body): Stream[String] = |
|
82 body.toStream.flatten(content_stream(_)) |
|
83 |
|
84 def content(tree: Tree): Iterator[String] = content_stream(tree).iterator |
|
85 def content(body: Body): Iterator[String] = content_stream(body).iterator |
|
86 |
|
87 |
|
88 /* pipe-lined cache for partial sharing */ |
|
89 |
|
90 class Cache(initial_size: Int = 131071, max_string: Int = 100) |
|
91 { |
|
92 private val cache_actor = actor |
|
93 { |
|
94 val table = new WeakHashMap[Any, WeakReference[Any]](initial_size) |
|
95 |
|
96 def lookup[A](x: A): Option[A] = |
|
97 { |
|
98 val ref = table.get(x) |
|
99 if (ref == null) None |
|
100 else { |
|
101 val y = ref.asInstanceOf[WeakReference[A]].get |
|
102 if (y == null) None |
|
103 else Some(y) |
|
104 } |
|
105 } |
|
106 def store[A](x: A): A = |
|
107 { |
|
108 table.put(x, new WeakReference[Any](x)) |
|
109 x |
|
110 } |
|
111 |
|
112 def trim_bytes(s: String): String = new String(s.toCharArray) |
|
113 |
|
114 def cache_string(x: String): String = |
|
115 lookup(x) match { |
|
116 case Some(y) => y |
|
117 case None => |
|
118 val z = trim_bytes(x) |
|
119 if (z.length > max_string) z else store(z) |
|
120 } |
|
121 def cache_props(x: Properties.T): Properties.T = |
|
122 if (x.isEmpty) x |
|
123 else |
|
124 lookup(x) match { |
|
125 case Some(y) => y |
|
126 case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2)))) |
|
127 } |
|
128 def cache_markup(x: Markup): Markup = |
|
129 lookup(x) match { |
|
130 case Some(y) => y |
|
131 case None => |
|
132 x match { |
|
133 case Markup(name, props) => |
|
134 store(Markup(cache_string(name), cache_props(props))) |
|
135 } |
|
136 } |
|
137 def cache_tree(x: XML.Tree): XML.Tree = |
|
138 lookup(x) match { |
|
139 case Some(y) => y |
|
140 case None => |
|
141 x match { |
|
142 case XML.Elem(markup, body) => |
|
143 store(XML.Elem(cache_markup(markup), cache_body(body))) |
|
144 case XML.Text(text) => store(XML.Text(cache_string(text))) |
|
145 } |
|
146 } |
|
147 def cache_body(x: XML.Body): XML.Body = |
|
148 if (x.isEmpty) x |
|
149 else |
|
150 lookup(x) match { |
|
151 case Some(y) => y |
|
152 case None => x.map(cache_tree(_)) |
|
153 } |
|
154 |
|
155 // main loop |
|
156 loop { |
|
157 react { |
|
158 case Cache_String(x, f) => f(cache_string(x)) |
|
159 case Cache_Markup(x, f) => f(cache_markup(x)) |
|
160 case Cache_Tree(x, f) => f(cache_tree(x)) |
|
161 case Cache_Body(x, f) => f(cache_body(x)) |
|
162 case Cache_Ignore(x, f) => f(x) |
|
163 case bad => System.err.println("XML.cache_actor: ignoring bad input " + bad) |
|
164 } |
|
165 } |
|
166 } |
|
167 |
|
168 private case class Cache_String(x: String, f: String => Unit) |
|
169 private case class Cache_Markup(x: Markup, f: Markup => Unit) |
|
170 private case class Cache_Tree(x: XML.Tree, f: XML.Tree => Unit) |
|
171 private case class Cache_Body(x: XML.Body, f: XML.Body => Unit) |
|
172 private case class Cache_Ignore[A](x: A, f: A => Unit) |
|
173 |
|
174 // main methods |
|
175 def cache_string(x: String)(f: String => Unit) { cache_actor ! Cache_String(x, f) } |
|
176 def cache_markup(x: Markup)(f: Markup => Unit) { cache_actor ! Cache_Markup(x, f) } |
|
177 def cache_tree(x: XML.Tree)(f: XML.Tree => Unit) { cache_actor ! Cache_Tree(x, f) } |
|
178 def cache_body(x: XML.Body)(f: XML.Body => Unit) { cache_actor ! Cache_Body(x, f) } |
|
179 def cache_ignore[A](x: A)(f: A => Unit) { cache_actor ! Cache_Ignore(x, f) } |
|
180 } |
|
181 |
|
182 |
|
183 |
|
184 /** document object model (W3C DOM) **/ |
|
185 |
|
186 def get_data(node: org.w3c.dom.Node): Option[XML.Tree] = |
|
187 node.getUserData(Markup.Data.name) match { |
|
188 case tree: XML.Tree => Some(tree) |
|
189 case _ => None |
|
190 } |
|
191 |
|
192 def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node = |
|
193 { |
|
194 def DOM(tr: Tree): org.w3c.dom.Node = tr match { |
|
195 case Elem(Markup.Data, List(data, t)) => |
|
196 val node = DOM(t) |
|
197 node.setUserData(Markup.Data.name, data, null) |
|
198 node |
|
199 case Elem(Markup(name, atts), ts) => |
|
200 if (name == Markup.Data.name) |
|
201 error("Malformed data element: " + tr.toString) |
|
202 val node = doc.createElement(name) |
|
203 for ((name, value) <- atts) node.setAttribute(name, value) |
|
204 for (t <- ts) node.appendChild(DOM(t)) |
|
205 node |
|
206 case Text(txt) => doc.createTextNode(txt) |
|
207 } |
|
208 DOM(tree) |
|
209 } |
|
210 |
|
211 |
|
212 |
|
213 /** XML as data representation language **/ |
|
214 |
|
215 class XML_Atom(s: String) extends Exception(s) |
|
216 class XML_Body(body: XML.Body) extends Exception |
|
217 |
|
218 object Encode |
|
219 { |
|
220 type T[A] = A => XML.Body |
|
221 |
|
222 |
|
223 /* atomic values */ |
|
224 |
|
225 def long_atom(i: Long): String = i.toString |
|
226 |
|
227 def int_atom(i: Int): String = i.toString |
|
228 |
|
229 def bool_atom(b: Boolean): String = if (b) "1" else "0" |
|
230 |
|
231 def unit_atom(u: Unit) = "" |
|
232 |
|
233 |
|
234 /* structural nodes */ |
|
235 |
|
236 private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts) |
|
237 |
|
238 private def vector(xs: List[String]): XML.Attributes = |
|
239 xs.zipWithIndex.map(p => (int_atom(p._2), p._1)) |
|
240 |
|
241 private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree = |
|
242 XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2) |
|
243 |
|
244 |
|
245 /* representation of standard types */ |
|
246 |
|
247 val properties: T[Properties.T] = |
|
248 (props => List(XML.Elem(Markup(":", props), Nil))) |
|
249 |
|
250 val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s))) |
|
251 |
|
252 val long: T[Long] = (x => string(long_atom(x))) |
|
253 |
|
254 val int: T[Int] = (x => string(int_atom(x))) |
|
255 |
|
256 val bool: T[Boolean] = (x => string(bool_atom(x))) |
|
257 |
|
258 val unit: T[Unit] = (x => string(unit_atom(x))) |
|
259 |
|
260 def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = |
|
261 (x => List(node(f(x._1)), node(g(x._2)))) |
|
262 |
|
263 def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = |
|
264 (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3)))) |
|
265 |
|
266 def list[A](f: T[A]): T[List[A]] = |
|
267 (xs => xs.map((x: A) => node(f(x)))) |
|
268 |
|
269 def option[A](f: T[A]): T[Option[A]] = |
|
270 { |
|
271 case None => Nil |
|
272 case Some(x) => List(node(f(x))) |
|
273 } |
|
274 |
|
275 def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] = |
|
276 { |
|
277 case x => |
|
278 val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get |
|
279 List(tagged(tag, f(x))) |
|
280 } |
|
281 } |
|
282 |
|
283 object Decode |
|
284 { |
|
285 type T[A] = XML.Body => A |
|
286 type V[A] = (List[String], XML.Body) => A |
|
287 |
|
288 |
|
289 /* atomic values */ |
|
290 |
|
291 def long_atom(s: String): Long = |
|
292 try { java.lang.Long.parseLong(s) } |
|
293 catch { case e: NumberFormatException => throw new XML_Atom(s) } |
|
294 |
|
295 def int_atom(s: String): Int = |
|
296 try { Integer.parseInt(s) } |
|
297 catch { case e: NumberFormatException => throw new XML_Atom(s) } |
|
298 |
|
299 def bool_atom(s: String): Boolean = |
|
300 if (s == "1") true |
|
301 else if (s == "0") false |
|
302 else throw new XML_Atom(s) |
|
303 |
|
304 def unit_atom(s: String): Unit = |
|
305 if (s == "") () else throw new XML_Atom(s) |
|
306 |
|
307 |
|
308 /* structural nodes */ |
|
309 |
|
310 private def node(t: XML.Tree): XML.Body = |
|
311 t match { |
|
312 case XML.Elem(Markup(":", Nil), ts) => ts |
|
313 case _ => throw new XML_Body(List(t)) |
|
314 } |
|
315 |
|
316 private def vector(atts: XML.Attributes): List[String] = |
|
317 { |
|
318 val xs = new mutable.ListBuffer[String] |
|
319 var i = 0 |
|
320 for ((a, x) <- atts) { |
|
321 if (int_atom(a) == i) { xs += x; i = i + 1 } |
|
322 else throw new XML_Atom(a) |
|
323 } |
|
324 xs.toList |
|
325 } |
|
326 |
|
327 private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) = |
|
328 t match { |
|
329 case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts)) |
|
330 case _ => throw new XML_Body(List(t)) |
|
331 } |
|
332 |
|
333 |
|
334 /* representation of standard types */ |
|
335 |
|
336 val properties: T[Properties.T] = |
|
337 { |
|
338 case List(XML.Elem(Markup(":", props), Nil)) => props |
|
339 case ts => throw new XML_Body(ts) |
|
340 } |
|
341 |
|
342 val string: T[String] = |
|
343 { |
|
344 case Nil => "" |
|
345 case List(XML.Text(s)) => s |
|
346 case ts => throw new XML_Body(ts) |
|
347 } |
|
348 |
|
349 val long: T[Long] = (x => long_atom(string(x))) |
|
350 |
|
351 val int: T[Int] = (x => int_atom(string(x))) |
|
352 |
|
353 val bool: T[Boolean] = (x => bool_atom(string(x))) |
|
354 |
|
355 val unit: T[Unit] = (x => unit_atom(string(x))) |
|
356 |
|
357 def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = |
|
358 { |
|
359 case List(t1, t2) => (f(node(t1)), g(node(t2))) |
|
360 case ts => throw new XML_Body(ts) |
|
361 } |
|
362 |
|
363 def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = |
|
364 { |
|
365 case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3))) |
|
366 case ts => throw new XML_Body(ts) |
|
367 } |
|
368 |
|
369 def list[A](f: T[A]): T[List[A]] = |
|
370 (ts => ts.map(t => f(node(t)))) |
|
371 |
|
372 def option[A](f: T[A]): T[Option[A]] = |
|
373 { |
|
374 case Nil => None |
|
375 case List(t) => Some(f(node(t))) |
|
376 case ts => throw new XML_Body(ts) |
|
377 } |
|
378 |
|
379 def variant[A](fs: List[V[A]]): T[A] = |
|
380 { |
|
381 case List(t) => |
|
382 val (tag, (xs, ts)) = tagged(t) |
|
383 val f = |
|
384 try { fs(tag) } |
|
385 catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) } |
|
386 f(xs, ts) |
|
387 case ts => throw new XML_Body(ts) |
|
388 } |
|
389 } |
|
390 } |
|