| author | wenzelm | 
| Fri, 27 Mar 2020 22:01:27 +0100 | |
| changeset 71601 | 97ccf48c2f0c | 
| parent 70828 | cb70d84a9f5e | 
| child 73024 | 337e1b135d2f | 
| permissions | -rw-r--r-- | 
| 44698 | 1  | 
/* Title: Pure/PIDE/xml.scala  | 
| 27931 | 2  | 
Author: Makarius  | 
3  | 
||
| 44698 | 4  | 
Untyped XML trees and basic data representation.  | 
| 27931 | 5  | 
*/  | 
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
| 55618 | 9  | 
|
| 29203 | 10  | 
object XML  | 
11  | 
{
 | 
|
| 43767 | 12  | 
/** XML trees **/  | 
13  | 
||
| 27947 | 14  | 
/* datatype representation */  | 
15  | 
||
| 65753 | 16  | 
type Attribute = Properties.Entry  | 
| 43780 | 17  | 
type Attributes = Properties.T  | 
| 27931 | 18  | 
|
| 57912 | 19  | 
  sealed abstract class Tree { override def toString: String = string_of_tree(this) }
 | 
| 64354 | 20  | 
type Body = List[Tree]  | 
21  | 
case class Elem(markup: Markup, body: Body) extends Tree  | 
|
| 52890 | 22  | 
  {
 | 
23  | 
def name: String = markup.name  | 
|
| 65753 | 24  | 
|
| 64358 | 25  | 
def update_attributes(more_attributes: Attributes): Elem =  | 
26  | 
if (more_attributes.isEmpty) this  | 
|
27  | 
else Elem(markup.update_properties(more_attributes), body)  | 
|
| 65753 | 28  | 
|
| 65772 | 29  | 
def + (att: Attribute): Elem = Elem(markup + att, body)  | 
| 52890 | 30  | 
}  | 
| 29204 | 31  | 
case class Text(content: String) extends Tree  | 
| 29203 | 32  | 
|
| 66196 | 33  | 
def elem(markup: Markup): XML.Elem = XML.Elem(markup, Nil)  | 
| 64354 | 34  | 
def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body)  | 
35  | 
def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil)  | 
|
| 
38267
 
e50c283dd125
type XML.Body as basic data representation language (Scala version);
 
wenzelm 
parents: 
38263 
diff
changeset
 | 
36  | 
|
| 69867 | 37  | 
  val newline: Text = Text("\n")
 | 
38  | 
||
| 29203 | 39  | 
|
| 69805 | 40  | 
/* name space */  | 
41  | 
||
42  | 
object Namespace  | 
|
43  | 
  {
 | 
|
44  | 
def apply(prefix: String, target: String): Namespace =  | 
|
45  | 
new Namespace(prefix, target)  | 
|
46  | 
}  | 
|
47  | 
||
48  | 
final class Namespace private(prefix: String, target: String)  | 
|
49  | 
  {
 | 
|
50  | 
def apply(name: String): String = prefix + ":" + name  | 
|
51  | 
    val attribute: XML.Attribute = ("xmlns:" + prefix, target)
 | 
|
52  | 
||
53  | 
override def toString: String = attribute.toString  | 
|
54  | 
}  | 
|
55  | 
||
56  | 
||
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
57  | 
/* wrapped elements */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
58  | 
|
| 60215 | 59  | 
val XML_ELEM = "xml_elem"  | 
60  | 
val XML_NAME = "xml_name"  | 
|
61  | 
val XML_BODY = "xml_body"  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
62  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
63  | 
object Wrapped_Elem  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
64  | 
  {
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
65  | 
def apply(markup: Markup, body1: Body, body2: Body): XML.Elem =  | 
| 61026 | 66  | 
XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties),  | 
67  | 
XML.Elem(Markup(XML_BODY, Nil), body1) :: body2)  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
68  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
69  | 
def unapply(tree: Tree): Option[(Markup, Body, Body)] =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
70  | 
      tree match {
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
71  | 
case  | 
| 61026 | 72  | 
XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props),  | 
73  | 
XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) =>  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
74  | 
Some(Markup(name, props), body1, body2)  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
75  | 
case _ => None  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
76  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
77  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
78  | 
|
| 
67818
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
79  | 
object Root_Elem  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
80  | 
  {
 | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
81  | 
def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body)  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
82  | 
def unapply(tree: Tree): Option[Body] =  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
83  | 
      tree match {
 | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
84  | 
case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body)  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
85  | 
case _ => None  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
86  | 
}  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
87  | 
}  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
88  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
89  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
90  | 
/* traverse text */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
91  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
92  | 
def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
93  | 
  {
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
94  | 
def traverse(x: A, t: Tree): A =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
95  | 
      t match {
 | 
| 61026 | 96  | 
case XML.Wrapped_Elem(_, _, ts) => (x /: ts)(traverse)  | 
97  | 
case XML.Elem(_, ts) => (x /: ts)(traverse)  | 
|
98  | 
case XML.Text(s) => op(x, s)  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
99  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
100  | 
(a /: body)(traverse)  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
101  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
102  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
103  | 
  def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length }
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
104  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
105  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
106  | 
/* text content */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
107  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
108  | 
def content(body: Body): String =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
109  | 
  {
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
110  | 
val text = new StringBuilder(text_length(body))  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
111  | 
    traverse_text(body)(()) { case (_, s) => text.append(s) }
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
112  | 
text.toString  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
113  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
114  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
115  | 
def content(tree: Tree): String = content(List(tree))  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
116  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
117  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
118  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
119  | 
/** string representation **/  | 
| 29203 | 120  | 
|
| 69804 | 121  | 
val header: String = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"  | 
122  | 
||
| 65990 | 123  | 
def output_char(c: Char, s: StringBuilder)  | 
124  | 
  {
 | 
|
125  | 
    c match {
 | 
|
126  | 
case '<' => s ++= "<"  | 
|
127  | 
case '>' => s ++= ">"  | 
|
128  | 
case '&' => s ++= "&"  | 
|
129  | 
case '"' => s ++= """  | 
|
130  | 
case '\'' => s ++= "'"  | 
|
131  | 
case _ => s += c  | 
|
132  | 
}  | 
|
133  | 
}  | 
|
134  | 
||
135  | 
def output_string(str: String, s: StringBuilder)  | 
|
136  | 
  {
 | 
|
137  | 
if (str == null) s ++= str  | 
|
138  | 
else str.iterator.foreach(c => output_char(c, s))  | 
|
139  | 
}  | 
|
140  | 
||
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
141  | 
def string_of_body(body: Body): String =  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
142  | 
  {
 | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
143  | 
val s = new StringBuilder  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
144  | 
|
| 65990 | 145  | 
    def text(txt: String) { output_string(txt, s) }
 | 
| 65991 | 146  | 
def elem(markup: Markup)  | 
147  | 
    {
 | 
|
148  | 
s ++= markup.name  | 
|
149  | 
      for ((a, b) <- markup.properties) {
 | 
|
150  | 
s += ' '; s ++= a; s += '='; s += '"'; text(b); s += '"'  | 
|
151  | 
}  | 
|
152  | 
}  | 
|
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
153  | 
def tree(t: Tree): Unit =  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
154  | 
      t match {
 | 
| 61026 | 155  | 
case XML.Elem(markup, Nil) =>  | 
| 65991 | 156  | 
s += '<'; elem(markup); s ++= "/>"  | 
| 61026 | 157  | 
case XML.Elem(markup, ts) =>  | 
| 65991 | 158  | 
s += '<'; elem(markup); s += '>'  | 
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
159  | 
ts.foreach(tree)  | 
| 65991 | 160  | 
s ++= "</"; s ++= markup.name; s += '>'  | 
| 61026 | 161  | 
case XML.Text(txt) => text(txt)  | 
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
162  | 
}  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
163  | 
body.foreach(tree)  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
164  | 
s.toString  | 
| 29203 | 165  | 
}  | 
166  | 
||
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
167  | 
def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))  | 
| 27941 | 168  | 
|
169  | 
||
| 44808 | 170  | 
|
| 68265 | 171  | 
/** cache **/  | 
| 34108 | 172  | 
|
| 68169 | 173  | 
def make_cache(initial_size: Int = 131071, max_string: Int = 100): Cache =  | 
174  | 
new Cache(initial_size, max_string)  | 
|
175  | 
||
176  | 
class Cache private[XML](initial_size: Int, max_string: Int)  | 
|
| 68265 | 177  | 
extends isabelle.Cache(initial_size, max_string)  | 
| 34108 | 178  | 
  {
 | 
| 68265 | 179  | 
protected def cache_props(x: Properties.T): Properties.T =  | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
180  | 
    {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
181  | 
if (x.isEmpty) x  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
182  | 
else  | 
| 34133 | 183  | 
        lookup(x) match {
 | 
184  | 
case Some(y) => y  | 
|
| 65903 | 185  | 
case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2))))  | 
| 34133 | 186  | 
}  | 
| 68265 | 187  | 
}  | 
188  | 
||
189  | 
protected def cache_markup(x: Markup): Markup =  | 
|
190  | 
    {
 | 
|
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
191  | 
      lookup(x) match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
192  | 
case Some(y) => y  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
193  | 
case None =>  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
194  | 
          x match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
195  | 
case Markup(name, props) =>  | 
| 51663 | 196  | 
store(Markup(cache_string(name), cache_props(props)))  | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
197  | 
}  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
198  | 
}  | 
| 68265 | 199  | 
}  | 
200  | 
||
201  | 
protected def cache_tree(x: XML.Tree): XML.Tree =  | 
|
202  | 
    {
 | 
|
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
203  | 
      lookup(x) match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
204  | 
case Some(y) => y  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
205  | 
case None =>  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
206  | 
          x match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
207  | 
case XML.Elem(markup, body) =>  | 
| 51663 | 208  | 
store(XML.Elem(cache_markup(markup), cache_body(body)))  | 
209  | 
case XML.Text(text) => store(XML.Text(cache_string(text)))  | 
|
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
210  | 
}  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
211  | 
}  | 
| 68265 | 212  | 
}  | 
213  | 
||
214  | 
protected def cache_body(x: XML.Body): XML.Body =  | 
|
215  | 
    {
 | 
|
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
216  | 
if (x.isEmpty) x  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
217  | 
else  | 
| 34133 | 218  | 
        lookup(x) match {
 | 
219  | 
case Some(y) => y  | 
|
| 71601 | 220  | 
case None => x.map(cache_tree)  | 
| 34133 | 221  | 
}  | 
| 68265 | 222  | 
}  | 
| 
38446
 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 
wenzelm 
parents: 
38268 
diff
changeset
 | 
223  | 
|
| 
 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 
wenzelm 
parents: 
38268 
diff
changeset
 | 
224  | 
// main methods  | 
| 51663 | 225  | 
    def props(x: Properties.T): Properties.T = synchronized { cache_props(x) }
 | 
226  | 
    def markup(x: Markup): Markup = synchronized { cache_markup(x) }
 | 
|
227  | 
    def tree(x: XML.Tree): XML.Tree = synchronized { cache_tree(x) }
 | 
|
228  | 
    def body(x: XML.Body): XML.Body = synchronized { cache_body(x) }
 | 
|
229  | 
    def elem(x: XML.Elem): XML.Elem = synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
 | 
|
| 34108 | 230  | 
}  | 
231  | 
||
232  | 
||
| 43767 | 233  | 
|
234  | 
/** XML as data representation language **/  | 
|
235  | 
||
| 51987 | 236  | 
abstract class Error(s: String) extends Exception(s)  | 
237  | 
class XML_Atom(s: String) extends Error(s)  | 
|
238  | 
  class XML_Body(body: XML.Body) extends Error("")
 | 
|
| 43767 | 239  | 
|
240  | 
object Encode  | 
|
241  | 
  {
 | 
|
242  | 
type T[A] = A => XML.Body  | 
|
| 65334 | 243  | 
type V[A] = PartialFunction[A, (List[String], XML.Body)]  | 
| 70828 | 244  | 
type P[A] = PartialFunction[A, List[String]]  | 
| 43767 | 245  | 
|
246  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
247  | 
/* atomic values */  | 
| 43767 | 248  | 
|
| 
57909
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
249  | 
def long_atom(i: Long): String = Library.signed_string_of_long(i)  | 
| 43767 | 250  | 
|
| 
57909
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
251  | 
def int_atom(i: Int): String = Library.signed_string_of_int(i)  | 
| 43767 | 252  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
253  | 
def bool_atom(b: Boolean): String = if (b) "1" else "0"  | 
| 43767 | 254  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
255  | 
def unit_atom(u: Unit) = ""  | 
| 43767 | 256  | 
|
257  | 
||
258  | 
/* structural nodes */  | 
|
259  | 
||
260  | 
    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
 | 
|
261  | 
||
| 43781 | 262  | 
private def vector(xs: List[String]): XML.Attributes =  | 
| 
46839
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
263  | 
      xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
 | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
264  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
265  | 
private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
266  | 
XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)  | 
| 43767 | 267  | 
|
268  | 
||
269  | 
/* representation of standard types */  | 
|
270  | 
||
| 65333 | 271  | 
val tree: T[XML.Tree] = (t => List(t))  | 
272  | 
||
| 43780 | 273  | 
val properties: T[Properties.T] =  | 
| 43767 | 274  | 
      (props => List(XML.Elem(Markup(":", props), Nil)))
 | 
275  | 
||
276  | 
val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))  | 
|
277  | 
||
278  | 
val long: T[Long] = (x => string(long_atom(x)))  | 
|
279  | 
||
280  | 
val int: T[Int] = (x => string(int_atom(x)))  | 
|
281  | 
||
282  | 
val bool: T[Boolean] = (x => string(bool_atom(x)))  | 
|
283  | 
||
284  | 
val unit: T[Unit] = (x => string(unit_atom(x)))  | 
|
285  | 
||
286  | 
def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =  | 
|
287  | 
(x => List(node(f(x._1)), node(g(x._2))))  | 
|
288  | 
||
289  | 
def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =  | 
|
290  | 
(x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))  | 
|
291  | 
||
292  | 
def list[A](f: T[A]): T[List[A]] =  | 
|
293  | 
(xs => xs.map((x: A) => node(f(x))))  | 
|
294  | 
||
295  | 
def option[A](f: T[A]): T[Option[A]] =  | 
|
296  | 
    {
 | 
|
297  | 
case None => Nil  | 
|
298  | 
case Some(x) => List(node(f(x)))  | 
|
299  | 
}  | 
|
300  | 
||
| 65334 | 301  | 
def variant[A](fs: List[V[A]]): T[A] =  | 
| 43767 | 302  | 
    {
 | 
303  | 
case x =>  | 
|
304  | 
val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get  | 
|
305  | 
List(tagged(tag, f(x)))  | 
|
306  | 
}  | 
|
307  | 
}  | 
|
308  | 
||
309  | 
object Decode  | 
|
310  | 
  {
 | 
|
311  | 
type T[A] = XML.Body => A  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
312  | 
type V[A] = (List[String], XML.Body) => A  | 
| 70828 | 313  | 
type P[A] = PartialFunction[List[String], A]  | 
| 43767 | 314  | 
|
315  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
316  | 
/* atomic values */  | 
| 43767 | 317  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
318  | 
def long_atom(s: String): Long =  | 
| 43767 | 319  | 
      try { java.lang.Long.parseLong(s) }
 | 
320  | 
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | 
|
321  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
322  | 
def int_atom(s: String): Int =  | 
| 43767 | 323  | 
      try { Integer.parseInt(s) }
 | 
324  | 
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | 
|
325  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
326  | 
def bool_atom(s: String): Boolean =  | 
| 43767 | 327  | 
if (s == "1") true  | 
328  | 
else if (s == "0") false  | 
|
329  | 
else throw new XML_Atom(s)  | 
|
330  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
331  | 
def unit_atom(s: String): Unit =  | 
| 43767 | 332  | 
if (s == "") () else throw new XML_Atom(s)  | 
333  | 
||
334  | 
||
335  | 
/* structural nodes */  | 
|
336  | 
||
337  | 
private def node(t: XML.Tree): XML.Body =  | 
|
338  | 
      t match {
 | 
|
339  | 
        case XML.Elem(Markup(":", Nil), ts) => ts
 | 
|
340  | 
case _ => throw new XML_Body(List(t))  | 
|
341  | 
}  | 
|
342  | 
||
| 43781 | 343  | 
private def vector(atts: XML.Attributes): List[String] =  | 
| 
46839
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
344  | 
atts.iterator.zipWithIndex.map(  | 
| 
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
345  | 
        { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
 | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
346  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
347  | 
private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =  | 
| 43767 | 348  | 
      t match {
 | 
| 43781 | 349  | 
case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))  | 
| 43767 | 350  | 
case _ => throw new XML_Body(List(t))  | 
351  | 
}  | 
|
352  | 
||
353  | 
||
354  | 
/* representation of standard types */  | 
|
355  | 
||
| 65333 | 356  | 
val tree: T[XML.Tree] =  | 
357  | 
    {
 | 
|
358  | 
case List(t) => t  | 
|
359  | 
case ts => throw new XML_Body(ts)  | 
|
360  | 
}  | 
|
361  | 
||
| 43780 | 362  | 
val properties: T[Properties.T] =  | 
| 43767 | 363  | 
    {
 | 
364  | 
      case List(XML.Elem(Markup(":", props), Nil)) => props
 | 
|
365  | 
case ts => throw new XML_Body(ts)  | 
|
366  | 
}  | 
|
367  | 
||
368  | 
val string: T[String] =  | 
|
369  | 
    {
 | 
|
370  | 
case Nil => ""  | 
|
371  | 
case List(XML.Text(s)) => s  | 
|
372  | 
case ts => throw new XML_Body(ts)  | 
|
373  | 
}  | 
|
374  | 
||
375  | 
val long: T[Long] = (x => long_atom(string(x)))  | 
|
376  | 
||
377  | 
val int: T[Int] = (x => int_atom(string(x)))  | 
|
378  | 
||
379  | 
val bool: T[Boolean] = (x => bool_atom(string(x)))  | 
|
380  | 
||
381  | 
val unit: T[Unit] = (x => unit_atom(string(x)))  | 
|
382  | 
||
383  | 
def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =  | 
|
384  | 
    {
 | 
|
385  | 
case List(t1, t2) => (f(node(t1)), g(node(t2)))  | 
|
386  | 
case ts => throw new XML_Body(ts)  | 
|
387  | 
}  | 
|
388  | 
||
389  | 
def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =  | 
|
390  | 
    {
 | 
|
391  | 
case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))  | 
|
392  | 
case ts => throw new XML_Body(ts)  | 
|
393  | 
}  | 
|
394  | 
||
395  | 
def list[A](f: T[A]): T[List[A]] =  | 
|
396  | 
(ts => ts.map(t => f(node(t))))  | 
|
397  | 
||
398  | 
def option[A](f: T[A]): T[Option[A]] =  | 
|
399  | 
    {
 | 
|
400  | 
case Nil => None  | 
|
401  | 
case List(t) => Some(f(node(t)))  | 
|
402  | 
case ts => throw new XML_Body(ts)  | 
|
403  | 
}  | 
|
404  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
405  | 
def variant[A](fs: List[V[A]]): T[A] =  | 
| 43767 | 406  | 
    {
 | 
407  | 
case List(t) =>  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
408  | 
val (tag, (xs, ts)) = tagged(t)  | 
| 43768 | 409  | 
val f =  | 
410  | 
          try { fs(tag) }
 | 
|
411  | 
          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
 | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
412  | 
f(xs, ts)  | 
| 43767 | 413  | 
case ts => throw new XML_Body(ts)  | 
414  | 
}  | 
|
415  | 
}  | 
|
| 27931 | 416  | 
}  |