author | wenzelm |
Wed, 11 Aug 2010 00:42:01 +0200 | |
changeset 38268 | beb86b805590 |
parent 38267 | e50c283dd125 |
child 38446 | 9d59dab38fef |
permissions | -rw-r--r-- |
27931 | 1 |
/* Title: Pure/General/xml.scala |
2 |
Author: Makarius |
|
3 |
||
27947 | 4 |
Simple XML tree values. |
27931 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
34108 | 9 |
import java.util.WeakHashMap |
10 |
import java.lang.ref.WeakReference |
|
11 |
import javax.xml.parsers.DocumentBuilderFactory |
|
12 |
||
27947 | 13 |
|
29203 | 14 |
object XML |
15 |
{ |
|
27947 | 16 |
/* datatype representation */ |
17 |
||
27931 | 18 |
type Attributes = List[(String, String)] |
19 |
||
38268
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
20 |
sealed abstract class Tree { override def toString = string_of_tree(this) } |
38230
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
21 |
case class Elem(markup: Markup, body: List[Tree]) extends Tree |
29204 | 22 |
case class Text(content: String) extends Tree |
29203 | 23 |
|
38230
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
24 |
def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body) |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
25 |
def elem(name: String) = Elem(Markup(name, Nil), Nil) |
33999 | 26 |
|
38267
e50c283dd125
type XML.Body as basic data representation language (Scala version);
wenzelm
parents:
38263
diff
changeset
|
27 |
type Body = List[Tree] |
e50c283dd125
type XML.Body as basic data representation language (Scala version);
wenzelm
parents:
38263
diff
changeset
|
28 |
|
29203 | 29 |
|
30 |
/* string representation */ |
|
31 |
||
38268
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
32 |
def string_of_body(body: Body): String = |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
33 |
{ |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
34 |
val s = new StringBuilder |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
35 |
|
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
36 |
def text(txt: String) { |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
37 |
if (txt == null) s ++= txt |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
38 |
else { |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
39 |
for (c <- txt.iterator) c match { |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
40 |
case '<' => s ++= "<" |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
41 |
case '>' => s ++= ">" |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
42 |
case '&' => s ++= "&" |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
43 |
case '"' => s ++= """ |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
44 |
case '\'' => s ++= "'" |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
45 |
case _ => s += c |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
46 |
} |
34005 | 47 |
} |
29203 | 48 |
} |
38268
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
49 |
def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" } |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
50 |
def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) } |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
51 |
def tree(t: Tree): Unit = |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
52 |
t match { |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
53 |
case Elem(markup, Nil) => |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
54 |
s ++= "<"; elem(markup); s ++= "/>" |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
55 |
case Elem(markup, ts) => |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
56 |
s ++= "<"; elem(markup); s ++= ">" |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
57 |
ts.foreach(tree) |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
58 |
s ++= "</"; s ++= markup.name; s ++= ">" |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
59 |
case Text(txt) => text(txt) |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
60 |
} |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
61 |
body.foreach(tree) |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
62 |
s.toString |
29203 | 63 |
} |
64 |
||
38268
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
65 |
def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) |
27941 | 66 |
|
67 |
||
27942 | 68 |
/* iterate over content */ |
27941 | 69 |
|
70 |
private type State = Option[(String, List[Tree])] |
|
71 |
||
72 |
private def get_next(tree: Tree): State = tree match { |
|
38230
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
73 |
case Elem(_, body) => get_nexts(body) |
27941 | 74 |
case Text(content) => Some(content, Nil) |
75 |
} |
|
76 |
private def get_nexts(trees: List[Tree]): State = trees match { |
|
77 |
case Nil => None |
|
78 |
case t :: ts => get_next(t) match { |
|
79 |
case None => get_nexts(ts) |
|
28007 | 80 |
case Some((s, r)) => Some((s, r ++ ts)) |
27941 | 81 |
} |
82 |
} |
|
83 |
||
84 |
def content(tree: Tree) = new Iterator[String] { |
|
85 |
private var state = get_next(tree) |
|
86 |
def hasNext() = state.isDefined |
|
87 |
def next() = state match { |
|
88 |
case Some((s, rest)) => { state = get_nexts(rest); s } |
|
89 |
case None => throw new NoSuchElementException("next on empty iterator") |
|
90 |
} |
|
91 |
} |
|
92 |
||
27947 | 93 |
|
34108 | 94 |
/* cache for partial sharing -- NOT THREAD SAFE */ |
95 |
||
96 |
class Cache(initial_size: Int) |
|
97 |
{ |
|
98 |
private val table = new WeakHashMap[Any, WeakReference[Any]](initial_size) |
|
99 |
||
100 |
private def lookup[A](x: A): Option[A] = |
|
101 |
{ |
|
102 |
val ref = table.get(x) |
|
103 |
if (ref == null) None |
|
104 |
else { |
|
105 |
val y = ref.asInstanceOf[WeakReference[A]].get |
|
106 |
if (y == null) None |
|
107 |
else Some(y) |
|
108 |
} |
|
109 |
} |
|
110 |
private def store[A](x: A): A = |
|
111 |
{ |
|
112 |
table.put(x, new WeakReference[Any](x)) |
|
113 |
x |
|
114 |
} |
|
115 |
||
116 |
def cache_string(x: String): String = |
|
117 |
lookup(x) match { |
|
118 |
case Some(y) => y |
|
38233 | 119 |
case None => store(new String(x.toCharArray)) // trim string value |
34108 | 120 |
} |
121 |
def cache_props(x: List[(String, String)]): List[(String, String)] = |
|
34133 | 122 |
if (x.isEmpty) x |
123 |
else |
|
124 |
lookup(x) match { |
|
125 |
case Some(y) => y |
|
126 |
case None => store(x.map(p => (cache_string(p._1), cache_string(p._2)))) |
|
127 |
} |
|
38230
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
128 |
def cache_markup(x: Markup): Markup = |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
129 |
lookup(x) match { |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
130 |
case Some(y) => y |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
131 |
case None => |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
132 |
x match { |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
133 |
case Markup(name, props) => |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
134 |
store(Markup(cache_string(name), cache_props(props))) |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
135 |
} |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
136 |
} |
34117 | 137 |
def cache_tree(x: XML.Tree): XML.Tree = |
34108 | 138 |
lookup(x) match { |
139 |
case Some(y) => y |
|
140 |
case None => |
|
141 |
x match { |
|
38230
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
142 |
case XML.Elem(markup, body) => |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
143 |
store(XML.Elem(cache_markup(markup), cache_trees(body))) |
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
144 |
case XML.Text(text) => store(XML.Text(cache_string(text))) |
34108 | 145 |
} |
146 |
} |
|
34117 | 147 |
def cache_trees(x: List[XML.Tree]): List[XML.Tree] = |
34133 | 148 |
if (x.isEmpty) x |
149 |
else |
|
150 |
lookup(x) match { |
|
151 |
case Some(y) => y |
|
152 |
case None => x.map(cache_tree(_)) |
|
153 |
} |
|
34108 | 154 |
} |
155 |
||
156 |
||
33953 | 157 |
/* document object model (W3C DOM) */ |
27948
2638b611d3ce
renamed DOM to document, add xml version and optional stylesheets;
wenzelm
parents:
27947
diff
changeset
|
158 |
|
34871
e596a0b71f3c
incorporate "proofdocument" part into main Isabelle/Pure.jar -- except for html_panel.scala, which depends on external library (Lobo/Cobra browser);
wenzelm
parents:
34133
diff
changeset
|
159 |
def get_data(node: org.w3c.dom.Node): Option[XML.Tree] = |
38231 | 160 |
node.getUserData(Markup.Data.name) match { |
34047 | 161 |
case tree: XML.Tree => Some(tree) |
162 |
case _ => None |
|
163 |
} |
|
164 |
||
34871
e596a0b71f3c
incorporate "proofdocument" part into main Isabelle/Pure.jar -- except for html_panel.scala, which depends on external library (Lobo/Cobra browser);
wenzelm
parents:
34133
diff
changeset
|
165 |
def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node = |
33953 | 166 |
{ |
34871
e596a0b71f3c
incorporate "proofdocument" part into main Isabelle/Pure.jar -- except for html_panel.scala, which depends on external library (Lobo/Cobra browser);
wenzelm
parents:
34133
diff
changeset
|
167 |
def DOM(tr: Tree): org.w3c.dom.Node = tr match { |
38231 | 168 |
case Elem(Markup.Data, List(data, t)) => |
34046 | 169 |
val node = DOM(t) |
38231 | 170 |
node.setUserData(Markup.Data.name, data, null) |
34046 | 171 |
node |
38230
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents:
36817
diff
changeset
|
172 |
case Elem(Markup(name, atts), ts) => |
38231 | 173 |
if (name == Markup.Data.name) |
34046 | 174 |
error("Malformed data element: " + tr.toString) |
27947 | 175 |
val node = doc.createElement(name) |
176 |
for ((name, value) <- atts) node.setAttribute(name, value) |
|
27952 | 177 |
for (t <- ts) node.appendChild(DOM(t)) |
27947 | 178 |
node |
179 |
case Text(txt) => doc.createTextNode(txt) |
|
180 |
} |
|
33953 | 181 |
DOM(tree) |
182 |
} |
|
27931 | 183 |
} |