author | wenzelm |
Sun, 11 Mar 2018 21:08:47 +0100 | |
changeset 67827 | b027c97c77c9 |
parent 67818 | 2457bea123e4 |
child 68169 | 395432e7516e |
permissions | -rw-r--r-- |
44698 | 1 |
/* Title: Pure/PIDE/xml.scala |
27931 | 2 |
Author: Makarius |
3 |
||
44698 | 4 |
Untyped XML trees and basic data representation. |
27931 | 5 |
*/ |
6 |
||
7 |
package isabelle |
|
8 |
||
55618 | 9 |
|
67109
5fce3a24e476
proper synchronized Map: this may be used on multiple threads;
wenzelm
parents:
66196
diff
changeset
|
10 |
import java.util.{Collections, WeakHashMap} |
34108 | 11 |
import java.lang.ref.WeakReference |
12 |
import javax.xml.parsers.DocumentBuilderFactory |
|
13 |
||
27947 | 14 |
|
29203 | 15 |
object XML |
16 |
{ |
|
43767 | 17 |
/** XML trees **/ |
18 |
||
27947 | 19 |
/* datatype representation */ |
20 |
||
65753 | 21 |
type Attribute = Properties.Entry |
43780 | 22 |
type Attributes = Properties.T |
27931 | 23 |
|
57912 | 24 |
sealed abstract class Tree { override def toString: String = string_of_tree(this) } |
64354 | 25 |
type Body = List[Tree] |
26 |
case class Elem(markup: Markup, body: Body) extends Tree |
|
52890 | 27 |
{ |
28 |
def name: String = markup.name |
|
65753 | 29 |
|
64358 | 30 |
def update_attributes(more_attributes: Attributes): Elem = |
31 |
if (more_attributes.isEmpty) this |
|
32 |
else Elem(markup.update_properties(more_attributes), body) |
|
65753 | 33 |
|
65772 | 34 |
def + (att: Attribute): Elem = Elem(markup + att, body) |
52890 | 35 |
} |
29204 | 36 |
case class Text(content: String) extends Tree |
29203 | 37 |
|
66196 | 38 |
def elem(markup: Markup): XML.Elem = XML.Elem(markup, Nil) |
64354 | 39 |
def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body) |
40 |
def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil) |
|
38267
e50c283dd125
type XML.Body as basic data representation language (Scala version);
wenzelm
parents:
38263
diff
changeset
|
41 |
|
29203 | 42 |
|
49650
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
43 |
/* wrapped elements */ |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
44 |
|
60215 | 45 |
val XML_ELEM = "xml_elem" |
46 |
val XML_NAME = "xml_name" |
|
47 |
val XML_BODY = "xml_body" |
|
49650
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
48 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
49 |
object Wrapped_Elem |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
50 |
{ |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
51 |
def apply(markup: Markup, body1: Body, body2: Body): XML.Elem = |
61026 | 52 |
XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties), |
53 |
XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) |
|
49650
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
54 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
55 |
def unapply(tree: Tree): Option[(Markup, Body, Body)] = |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
56 |
tree match { |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
57 |
case |
61026 | 58 |
XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props), |
59 |
XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) => |
|
49650
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
60 |
Some(Markup(name, props), body1, body2) |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
61 |
case _ => None |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
62 |
} |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
63 |
} |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
64 |
|
67818
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
wenzelm
parents:
67113
diff
changeset
|
65 |
object Root_Elem |
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
wenzelm
parents:
67113
diff
changeset
|
66 |
{ |
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
wenzelm
parents:
67113
diff
changeset
|
67 |
def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body) |
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
wenzelm
parents:
67113
diff
changeset
|
68 |
def unapply(tree: Tree): Option[Body] = |
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
wenzelm
parents:
67113
diff
changeset
|
69 |
tree match { |
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
wenzelm
parents:
67113
diff
changeset
|
70 |
case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body) |
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
wenzelm
parents:
67113
diff
changeset
|
71 |
case _ => None |
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
wenzelm
parents:
67113
diff
changeset
|
72 |
} |
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
wenzelm
parents:
67113
diff
changeset
|
73 |
} |
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
wenzelm
parents:
67113
diff
changeset
|
74 |
|
49650
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
75 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
76 |
/* traverse text */ |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
77 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
78 |
def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A = |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
79 |
{ |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
80 |
def traverse(x: A, t: Tree): A = |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
81 |
t match { |
61026 | 82 |
case XML.Wrapped_Elem(_, _, ts) => (x /: ts)(traverse) |
83 |
case XML.Elem(_, ts) => (x /: ts)(traverse) |
|
84 |
case XML.Text(s) => op(x, s) |
|
49650
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
85 |
} |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
86 |
(a /: body)(traverse) |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
87 |
} |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
88 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
89 |
def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length } |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
90 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
91 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
92 |
/* text content */ |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
93 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
94 |
def content(body: Body): String = |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
95 |
{ |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
96 |
val text = new StringBuilder(text_length(body)) |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
97 |
traverse_text(body)(()) { case (_, s) => text.append(s) } |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
98 |
text.toString |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
99 |
} |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
100 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
101 |
def content(tree: Tree): String = content(List(tree)) |
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
102 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
103 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
104 |
|
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents:
49613
diff
changeset
|
105 |
/** string representation **/ |
29203 | 106 |
|
65990 | 107 |
def output_char(c: Char, s: StringBuilder) |
108 |
{ |
|
109 |
c match { |
|
110 |
case '<' => s ++= "<" |
|
111 |
case '>' => s ++= ">" |
|
112 |
case '&' => s ++= "&" |
|
113 |
case '"' => s ++= """ |
|
114 |
case '\'' => s ++= "'" |
|
115 |
case _ => s += c |
|
116 |
} |
|
117 |
} |
|
118 |
||
119 |
def output_string(str: String, s: StringBuilder) |
|
120 |
{ |
|
121 |
if (str == null) s ++= str |
|
122 |
else str.iterator.foreach(c => output_char(c, s)) |
|
123 |
} |
|
124 |
||
38268
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
125 |
def string_of_body(body: Body): String = |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
126 |
{ |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
127 |
val s = new StringBuilder |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
128 |
|
65990 | 129 |
def text(txt: String) { output_string(txt, s) } |
65991 | 130 |
def elem(markup: Markup) |
131 |
{ |
|
132 |
s ++= markup.name |
|
133 |
for ((a, b) <- markup.properties) { |
|
134 |
s += ' '; s ++= a; s += '='; s += '"'; text(b); s += '"' |
|
135 |
} |
|
136 |
} |
|
38268
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
137 |
def tree(t: Tree): Unit = |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
138 |
t match { |
61026 | 139 |
case XML.Elem(markup, Nil) => |
65991 | 140 |
s += '<'; elem(markup); s ++= "/>" |
61026 | 141 |
case XML.Elem(markup, ts) => |
65991 | 142 |
s += '<'; elem(markup); s += '>' |
38268
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
143 |
ts.foreach(tree) |
65991 | 144 |
s ++= "</"; s ++= markup.name; s += '>' |
61026 | 145 |
case XML.Text(txt) => text(txt) |
38268
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
146 |
} |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
147 |
body.foreach(tree) |
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
148 |
s.toString |
29203 | 149 |
} |
150 |
||
38268
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents:
38267
diff
changeset
|
151 |
def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) |
27941 | 152 |
|
153 |
||
44808 | 154 |
|
155 |
/** cache for partial sharing (weak table) **/ |
|
34108 | 156 |
|
43745 | 157 |
class Cache(initial_size: Int = 131071, max_string: Int = 100) |
34108 | 158 |
{ |
67109
5fce3a24e476
proper synchronized Map: this may be used on multiple threads;
wenzelm
parents:
66196
diff
changeset
|
159 |
private val table = |
5fce3a24e476
proper synchronized Map: this may be used on multiple threads;
wenzelm
parents:
66196
diff
changeset
|
160 |
Collections.synchronizedMap(new WeakHashMap[Any, WeakReference[Any]](initial_size)) |
38446
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
wenzelm
parents:
38268
diff
changeset
|
161 |
|
67113 | 162 |
def size: Int = table.size |
163 |
||
44704
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
164 |
private def lookup[A](x: A): Option[A] = |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
165 |
{ |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
166 |
val ref = table.get(x) |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
167 |
if (ref == null) None |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
168 |
else { |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
169 |
val y = ref.asInstanceOf[WeakReference[A]].get |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
170 |
if (y == null) None |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
171 |
else Some(y) |
38446
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
wenzelm
parents:
38268
diff
changeset
|
172 |
} |
44704
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
173 |
} |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
174 |
private def store[A](x: A): A = |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
175 |
{ |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
176 |
table.put(x, new WeakReference[Any](x)) |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
177 |
x |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
178 |
} |
34108 | 179 |
|
51663 | 180 |
private def cache_string(x: String): String = |
67827 | 181 |
if (x == "") "" |
182 |
else if (x == "true") "true" |
|
57909
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
183 |
else if (x == "false") "false" |
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
184 |
else if (x == "0.0") "0.0" |
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
185 |
else if (Library.is_small_int(x)) Library.signed_string_of_int(Integer.parseInt(x)) |
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
186 |
else |
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
187 |
lookup(x) match { |
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
188 |
case Some(y) => y |
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
189 |
case None => |
65903 | 190 |
val z = Library.isolate_substring(x) |
57909
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
191 |
if (z.length > max_string) z else store(z) |
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
192 |
} |
51663 | 193 |
private def cache_props(x: Properties.T): Properties.T = |
44704
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
194 |
if (x.isEmpty) x |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
195 |
else |
34133 | 196 |
lookup(x) match { |
197 |
case Some(y) => y |
|
65903 | 198 |
case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2)))) |
34133 | 199 |
} |
51663 | 200 |
private def cache_markup(x: Markup): Markup = |
44704
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
201 |
lookup(x) match { |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
202 |
case Some(y) => y |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
203 |
case None => |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
204 |
x match { |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
205 |
case Markup(name, props) => |
51663 | 206 |
store(Markup(cache_string(name), cache_props(props))) |
44704
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
207 |
} |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
208 |
} |
51663 | 209 |
private def cache_tree(x: XML.Tree): XML.Tree = |
44704
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
210 |
lookup(x) match { |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
211 |
case Some(y) => y |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
212 |
case None => |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
213 |
x match { |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
214 |
case XML.Elem(markup, body) => |
51663 | 215 |
store(XML.Elem(cache_markup(markup), cache_body(body))) |
216 |
case XML.Text(text) => store(XML.Text(cache_string(text))) |
|
44704
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
217 |
} |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
218 |
} |
51663 | 219 |
private def cache_body(x: XML.Body): XML.Body = |
44704
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
220 |
if (x.isEmpty) x |
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents:
44698
diff
changeset
|
221 |
else |
34133 | 222 |
lookup(x) match { |
223 |
case Some(y) => y |
|
51663 | 224 |
case None => x.map(cache_tree(_)) |
34133 | 225 |
} |
38446
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
wenzelm
parents:
38268
diff
changeset
|
226 |
|
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
wenzelm
parents:
38268
diff
changeset
|
227 |
// main methods |
51663 | 228 |
def string(x: String): String = synchronized { cache_string(x) } |
229 |
def props(x: Properties.T): Properties.T = synchronized { cache_props(x) } |
|
230 |
def markup(x: Markup): Markup = synchronized { cache_markup(x) } |
|
231 |
def tree(x: XML.Tree): XML.Tree = synchronized { cache_tree(x) } |
|
232 |
def body(x: XML.Body): XML.Body = synchronized { cache_body(x) } |
|
233 |
def elem(x: XML.Elem): XML.Elem = synchronized { cache_tree(x).asInstanceOf[XML.Elem] } |
|
34108 | 234 |
} |
235 |
||
236 |
||
43767 | 237 |
|
238 |
/** XML as data representation language **/ |
|
239 |
||
51987 | 240 |
abstract class Error(s: String) extends Exception(s) |
241 |
class XML_Atom(s: String) extends Error(s) |
|
242 |
class XML_Body(body: XML.Body) extends Error("") |
|
43767 | 243 |
|
244 |
object Encode |
|
245 |
{ |
|
246 |
type T[A] = A => XML.Body |
|
65334 | 247 |
type V[A] = PartialFunction[A, (List[String], XML.Body)] |
43767 | 248 |
|
249 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
250 |
/* atomic values */ |
43767 | 251 |
|
57909
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
252 |
def long_atom(i: Long): String = Library.signed_string_of_long(i) |
43767 | 253 |
|
57909
0fb331032f02
more compact representation of special string values;
wenzelm
parents:
55618
diff
changeset
|
254 |
def int_atom(i: Int): String = Library.signed_string_of_int(i) |
43767 | 255 |
|
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
256 |
def bool_atom(b: Boolean): String = if (b) "1" else "0" |
43767 | 257 |
|
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
258 |
def unit_atom(u: Unit) = "" |
43767 | 259 |
|
260 |
||
261 |
/* structural nodes */ |
|
262 |
||
263 |
private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts) |
|
264 |
||
43781 | 265 |
private def vector(xs: List[String]): XML.Attributes = |
46839
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
wenzelm
parents:
45673
diff
changeset
|
266 |
xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) }) |
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
267 |
|
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
268 |
private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree = |
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
269 |
XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2) |
43767 | 270 |
|
271 |
||
272 |
/* representation of standard types */ |
|
273 |
||
65333 | 274 |
val tree: T[XML.Tree] = (t => List(t)) |
275 |
||
43780 | 276 |
val properties: T[Properties.T] = |
43767 | 277 |
(props => List(XML.Elem(Markup(":", props), Nil))) |
278 |
||
279 |
val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s))) |
|
280 |
||
281 |
val long: T[Long] = (x => string(long_atom(x))) |
|
282 |
||
283 |
val int: T[Int] = (x => string(int_atom(x))) |
|
284 |
||
285 |
val bool: T[Boolean] = (x => string(bool_atom(x))) |
|
286 |
||
287 |
val unit: T[Unit] = (x => string(unit_atom(x))) |
|
288 |
||
289 |
def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = |
|
290 |
(x => List(node(f(x._1)), node(g(x._2)))) |
|
291 |
||
292 |
def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = |
|
293 |
(x => List(node(f(x._1)), node(g(x._2)), node(h(x._3)))) |
|
294 |
||
295 |
def list[A](f: T[A]): T[List[A]] = |
|
296 |
(xs => xs.map((x: A) => node(f(x)))) |
|
297 |
||
298 |
def option[A](f: T[A]): T[Option[A]] = |
|
299 |
{ |
|
300 |
case None => Nil |
|
301 |
case Some(x) => List(node(f(x))) |
|
302 |
} |
|
303 |
||
65334 | 304 |
def variant[A](fs: List[V[A]]): T[A] = |
43767 | 305 |
{ |
306 |
case x => |
|
307 |
val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get |
|
308 |
List(tagged(tag, f(x))) |
|
309 |
} |
|
310 |
} |
|
311 |
||
312 |
object Decode |
|
313 |
{ |
|
314 |
type T[A] = XML.Body => A |
|
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
315 |
type V[A] = (List[String], XML.Body) => A |
43767 | 316 |
|
317 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
318 |
/* atomic values */ |
43767 | 319 |
|
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
320 |
def long_atom(s: String): Long = |
43767 | 321 |
try { java.lang.Long.parseLong(s) } |
322 |
catch { case e: NumberFormatException => throw new XML_Atom(s) } |
|
323 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
324 |
def int_atom(s: String): Int = |
43767 | 325 |
try { Integer.parseInt(s) } |
326 |
catch { case e: NumberFormatException => throw new XML_Atom(s) } |
|
327 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
328 |
def bool_atom(s: String): Boolean = |
43767 | 329 |
if (s == "1") true |
330 |
else if (s == "0") false |
|
331 |
else throw new XML_Atom(s) |
|
332 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
333 |
def unit_atom(s: String): Unit = |
43767 | 334 |
if (s == "") () else throw new XML_Atom(s) |
335 |
||
336 |
||
337 |
/* structural nodes */ |
|
338 |
||
339 |
private def node(t: XML.Tree): XML.Body = |
|
340 |
t match { |
|
341 |
case XML.Elem(Markup(":", Nil), ts) => ts |
|
342 |
case _ => throw new XML_Body(List(t)) |
|
343 |
} |
|
344 |
||
43781 | 345 |
private def vector(atts: XML.Attributes): List[String] = |
46839
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
wenzelm
parents:
45673
diff
changeset
|
346 |
atts.iterator.zipWithIndex.map( |
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
wenzelm
parents:
45673
diff
changeset
|
347 |
{ case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList |
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
348 |
|
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
349 |
private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) = |
43767 | 350 |
t match { |
43781 | 351 |
case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts)) |
43767 | 352 |
case _ => throw new XML_Body(List(t)) |
353 |
} |
|
354 |
||
355 |
||
356 |
/* representation of standard types */ |
|
357 |
||
65333 | 358 |
val tree: T[XML.Tree] = |
359 |
{ |
|
360 |
case List(t) => t |
|
361 |
case ts => throw new XML_Body(ts) |
|
362 |
} |
|
363 |
||
43780 | 364 |
val properties: T[Properties.T] = |
43767 | 365 |
{ |
366 |
case List(XML.Elem(Markup(":", props), Nil)) => props |
|
367 |
case ts => throw new XML_Body(ts) |
|
368 |
} |
|
369 |
||
370 |
val string: T[String] = |
|
371 |
{ |
|
372 |
case Nil => "" |
|
373 |
case List(XML.Text(s)) => s |
|
374 |
case ts => throw new XML_Body(ts) |
|
375 |
} |
|
376 |
||
377 |
val long: T[Long] = (x => long_atom(string(x))) |
|
378 |
||
379 |
val int: T[Int] = (x => int_atom(string(x))) |
|
380 |
||
381 |
val bool: T[Boolean] = (x => bool_atom(string(x))) |
|
382 |
||
383 |
val unit: T[Unit] = (x => unit_atom(string(x))) |
|
384 |
||
385 |
def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = |
|
386 |
{ |
|
387 |
case List(t1, t2) => (f(node(t1)), g(node(t2))) |
|
388 |
case ts => throw new XML_Body(ts) |
|
389 |
} |
|
390 |
||
391 |
def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = |
|
392 |
{ |
|
393 |
case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3))) |
|
394 |
case ts => throw new XML_Body(ts) |
|
395 |
} |
|
396 |
||
397 |
def list[A](f: T[A]): T[List[A]] = |
|
398 |
(ts => ts.map(t => f(node(t)))) |
|
399 |
||
400 |
def option[A](f: T[A]): T[Option[A]] = |
|
401 |
{ |
|
402 |
case Nil => None |
|
403 |
case List(t) => Some(f(node(t))) |
|
404 |
case ts => throw new XML_Body(ts) |
|
405 |
} |
|
406 |
||
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
407 |
def variant[A](fs: List[V[A]]): T[A] = |
43767 | 408 |
{ |
409 |
case List(t) => |
|
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
410 |
val (tag, (xs, ts)) = tagged(t) |
43768 | 411 |
val f = |
412 |
try { fs(tag) } |
|
413 |
catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) } |
|
43778
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents:
43768
diff
changeset
|
414 |
f(xs, ts) |
43767 | 415 |
case ts => throw new XML_Body(ts) |
416 |
} |
|
417 |
} |
|
27931 | 418 |
} |