| author | wenzelm | 
| Mon, 08 May 2017 11:00:20 +0200 | |
| changeset 65768 | b8da621a3297 | 
| parent 65753 | 787e5ee6ef53 | 
| child 65772 | 368399c5d87f | 
| permissions | -rw-r--r-- | 
| 44698 | 1  | 
/* Title: Pure/PIDE/xml.scala  | 
| 27931 | 2  | 
Author: Makarius  | 
3  | 
||
| 44698 | 4  | 
Untyped XML trees and basic data representation.  | 
| 27931 | 5  | 
*/  | 
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
| 55618 | 9  | 
|
| 34108 | 10  | 
import java.util.WeakHashMap  | 
11  | 
import java.lang.ref.WeakReference  | 
|
12  | 
import javax.xml.parsers.DocumentBuilderFactory  | 
|
13  | 
||
| 27947 | 14  | 
|
| 29203 | 15  | 
object XML  | 
16  | 
{
 | 
|
| 43767 | 17  | 
/** XML trees **/  | 
18  | 
||
| 27947 | 19  | 
/* datatype representation */  | 
20  | 
||
| 65753 | 21  | 
type Attribute = Properties.Entry  | 
| 43780 | 22  | 
type Attributes = Properties.T  | 
| 27931 | 23  | 
|
| 57912 | 24  | 
  sealed abstract class Tree { override def toString: String = string_of_tree(this) }
 | 
| 64354 | 25  | 
type Body = List[Tree]  | 
26  | 
case class Elem(markup: Markup, body: Body) extends Tree  | 
|
| 52890 | 27  | 
  {
 | 
28  | 
def name: String = markup.name  | 
|
| 65753 | 29  | 
|
| 64358 | 30  | 
def update_attributes(more_attributes: Attributes): Elem =  | 
31  | 
if (more_attributes.isEmpty) this  | 
|
32  | 
else Elem(markup.update_properties(more_attributes), body)  | 
|
| 65753 | 33  | 
|
34  | 
def + (att: Attribute): Tree = Elem(markup + att, body)  | 
|
| 52890 | 35  | 
}  | 
| 29204 | 36  | 
case class Text(content: String) extends Tree  | 
| 29203 | 37  | 
|
| 64354 | 38  | 
def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body)  | 
39  | 
def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil)  | 
|
| 
38267
 
e50c283dd125
type XML.Body as basic data representation language (Scala version);
 
wenzelm 
parents: 
38263 
diff
changeset
 | 
40  | 
|
| 29203 | 41  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
42  | 
/* wrapped elements */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
43  | 
|
| 60215 | 44  | 
val XML_ELEM = "xml_elem"  | 
45  | 
val XML_NAME = "xml_name"  | 
|
46  | 
val XML_BODY = "xml_body"  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
47  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
48  | 
object Wrapped_Elem  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
49  | 
  {
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
50  | 
def apply(markup: Markup, body1: Body, body2: Body): XML.Elem =  | 
| 61026 | 51  | 
XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties),  | 
52  | 
XML.Elem(Markup(XML_BODY, Nil), body1) :: body2)  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
53  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
54  | 
def unapply(tree: Tree): Option[(Markup, Body, Body)] =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
55  | 
      tree match {
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
56  | 
case  | 
| 61026 | 57  | 
XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props),  | 
58  | 
XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) =>  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
59  | 
Some(Markup(name, props), body1, body2)  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
60  | 
case _ => None  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
61  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
62  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
63  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
64  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
65  | 
/* traverse text */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
66  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
67  | 
def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
68  | 
  {
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
69  | 
def traverse(x: A, t: Tree): A =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
70  | 
      t match {
 | 
| 61026 | 71  | 
case XML.Wrapped_Elem(_, _, ts) => (x /: ts)(traverse)  | 
72  | 
case XML.Elem(_, ts) => (x /: ts)(traverse)  | 
|
73  | 
case XML.Text(s) => op(x, s)  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
74  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
75  | 
(a /: body)(traverse)  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
76  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
77  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
78  | 
  def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length }
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
79  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
80  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
81  | 
/* text content */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
82  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
83  | 
def content(body: Body): String =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
84  | 
  {
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
85  | 
val text = new StringBuilder(text_length(body))  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
86  | 
    traverse_text(body)(()) { case (_, s) => text.append(s) }
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
87  | 
text.toString  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
88  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
89  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
90  | 
def content(tree: Tree): String = content(List(tree))  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
91  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
92  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
93  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
94  | 
/** string representation **/  | 
| 29203 | 95  | 
|
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
96  | 
def string_of_body(body: Body): String =  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
97  | 
  {
 | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
98  | 
val s = new StringBuilder  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
99  | 
|
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
100  | 
    def text(txt: String) {
 | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
101  | 
if (txt == null) s ++= txt  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
102  | 
      else {
 | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
103  | 
        for (c <- txt.iterator) c match {
 | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
104  | 
case '<' => s ++= "<"  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
105  | 
case '>' => s ++= ">"  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
106  | 
case '&' => s ++= "&"  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
107  | 
case '"' => s ++= """  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
108  | 
case '\'' => s ++= "'"  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
109  | 
case _ => s += c  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
110  | 
}  | 
| 34005 | 111  | 
}  | 
| 29203 | 112  | 
}  | 
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
113  | 
    def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
 | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
114  | 
    def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
 | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
115  | 
def tree(t: Tree): Unit =  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
116  | 
      t match {
 | 
| 61026 | 117  | 
case XML.Elem(markup, Nil) =>  | 
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
118  | 
s ++= "<"; elem(markup); s ++= "/>"  | 
| 61026 | 119  | 
case XML.Elem(markup, ts) =>  | 
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
120  | 
s ++= "<"; elem(markup); s ++= ">"  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
121  | 
ts.foreach(tree)  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
122  | 
s ++= "</"; s ++= markup.name; s ++= ">"  | 
| 61026 | 123  | 
case XML.Text(txt) => text(txt)  | 
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
124  | 
}  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
125  | 
body.foreach(tree)  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
126  | 
s.toString  | 
| 29203 | 127  | 
}  | 
128  | 
||
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
129  | 
def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))  | 
| 27941 | 130  | 
|
131  | 
||
| 44808 | 132  | 
|
133  | 
/** cache for partial sharing (weak table) **/  | 
|
| 34108 | 134  | 
|
| 43745 | 135  | 
class Cache(initial_size: Int = 131071, max_string: Int = 100)  | 
| 34108 | 136  | 
  {
 | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
137  | 
private var table = new WeakHashMap[Any, WeakReference[Any]](initial_size)  | 
| 
38446
 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 
wenzelm 
parents: 
38268 
diff
changeset
 | 
138  | 
|
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
139  | 
private def lookup[A](x: A): Option[A] =  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
140  | 
    {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
141  | 
val ref = table.get(x)  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
142  | 
if (ref == null) None  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
143  | 
      else {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
144  | 
val y = ref.asInstanceOf[WeakReference[A]].get  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
145  | 
if (y == null) None  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
146  | 
else Some(y)  | 
| 
38446
 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 
wenzelm 
parents: 
38268 
diff
changeset
 | 
147  | 
}  | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
148  | 
}  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
149  | 
private def store[A](x: A): A =  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
150  | 
    {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
151  | 
table.put(x, new WeakReference[Any](x))  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
152  | 
x  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
153  | 
}  | 
| 34108 | 154  | 
|
| 51663 | 155  | 
private def cache_string(x: String): String =  | 
| 
57909
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
156  | 
if (x == "true") "true"  | 
| 
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
157  | 
else if (x == "false") "false"  | 
| 
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
158  | 
else if (x == "0.0") "0.0"  | 
| 
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
159  | 
else if (Library.is_small_int(x)) Library.signed_string_of_int(Integer.parseInt(x))  | 
| 
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
160  | 
else  | 
| 
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
161  | 
        lookup(x) match {
 | 
| 
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
162  | 
case Some(y) => y  | 
| 
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
163  | 
case None =>  | 
| 
64820
 
00488a8c042f
Line.Document consists of independently allocated strings;
 
wenzelm 
parents: 
64370 
diff
changeset
 | 
164  | 
val z = Library.trim_substring(x)  | 
| 
57909
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
165  | 
if (z.length > max_string) z else store(z)  | 
| 
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
166  | 
}  | 
| 51663 | 167  | 
private def cache_props(x: Properties.T): Properties.T =  | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
168  | 
if (x.isEmpty) x  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
169  | 
else  | 
| 34133 | 170  | 
        lookup(x) match {
 | 
171  | 
case Some(y) => y  | 
|
| 
64820
 
00488a8c042f
Line.Document consists of independently allocated strings;
 
wenzelm 
parents: 
64370 
diff
changeset
 | 
172  | 
case None => store(x.map(p => (Library.trim_substring(p._1).intern, cache_string(p._2))))  | 
| 34133 | 173  | 
}  | 
| 51663 | 174  | 
private def cache_markup(x: Markup): Markup =  | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
175  | 
      lookup(x) match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
176  | 
case Some(y) => y  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
177  | 
case None =>  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
178  | 
          x match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
179  | 
case Markup(name, props) =>  | 
| 51663 | 180  | 
store(Markup(cache_string(name), cache_props(props)))  | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
181  | 
}  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
182  | 
}  | 
| 51663 | 183  | 
private def cache_tree(x: XML.Tree): XML.Tree =  | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
184  | 
      lookup(x) match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
185  | 
case Some(y) => y  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
186  | 
case None =>  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
187  | 
          x match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
188  | 
case XML.Elem(markup, body) =>  | 
| 51663 | 189  | 
store(XML.Elem(cache_markup(markup), cache_body(body)))  | 
190  | 
case XML.Text(text) => store(XML.Text(cache_string(text)))  | 
|
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
191  | 
}  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
192  | 
}  | 
| 51663 | 193  | 
private def cache_body(x: XML.Body): XML.Body =  | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
194  | 
if (x.isEmpty) x  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
195  | 
else  | 
| 34133 | 196  | 
        lookup(x) match {
 | 
197  | 
case Some(y) => y  | 
|
| 51663 | 198  | 
case None => x.map(cache_tree(_))  | 
| 34133 | 199  | 
}  | 
| 
38446
 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 
wenzelm 
parents: 
38268 
diff
changeset
 | 
200  | 
|
| 
 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 
wenzelm 
parents: 
38268 
diff
changeset
 | 
201  | 
// main methods  | 
| 51663 | 202  | 
    def string(x: String): String = synchronized { cache_string(x) }
 | 
203  | 
    def props(x: Properties.T): Properties.T = synchronized { cache_props(x) }
 | 
|
204  | 
    def markup(x: Markup): Markup = synchronized { cache_markup(x) }
 | 
|
205  | 
    def tree(x: XML.Tree): XML.Tree = synchronized { cache_tree(x) }
 | 
|
206  | 
    def body(x: XML.Body): XML.Body = synchronized { cache_body(x) }
 | 
|
207  | 
    def elem(x: XML.Elem): XML.Elem = synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
 | 
|
| 34108 | 208  | 
}  | 
209  | 
||
210  | 
||
| 43767 | 211  | 
|
212  | 
/** XML as data representation language **/  | 
|
213  | 
||
| 51987 | 214  | 
abstract class Error(s: String) extends Exception(s)  | 
215  | 
class XML_Atom(s: String) extends Error(s)  | 
|
216  | 
  class XML_Body(body: XML.Body) extends Error("")
 | 
|
| 43767 | 217  | 
|
218  | 
object Encode  | 
|
219  | 
  {
 | 
|
220  | 
type T[A] = A => XML.Body  | 
|
| 65334 | 221  | 
type V[A] = PartialFunction[A, (List[String], XML.Body)]  | 
| 43767 | 222  | 
|
223  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
224  | 
/* atomic values */  | 
| 43767 | 225  | 
|
| 
57909
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
226  | 
def long_atom(i: Long): String = Library.signed_string_of_long(i)  | 
| 43767 | 227  | 
|
| 
57909
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
228  | 
def int_atom(i: Int): String = Library.signed_string_of_int(i)  | 
| 43767 | 229  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
230  | 
def bool_atom(b: Boolean): String = if (b) "1" else "0"  | 
| 43767 | 231  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
232  | 
def unit_atom(u: Unit) = ""  | 
| 43767 | 233  | 
|
234  | 
||
235  | 
/* structural nodes */  | 
|
236  | 
||
237  | 
    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
 | 
|
238  | 
||
| 43781 | 239  | 
private def vector(xs: List[String]): XML.Attributes =  | 
| 
46839
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
240  | 
      xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
 | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
241  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
242  | 
private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
243  | 
XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)  | 
| 43767 | 244  | 
|
245  | 
||
246  | 
/* representation of standard types */  | 
|
247  | 
||
| 65333 | 248  | 
val tree: T[XML.Tree] = (t => List(t))  | 
249  | 
||
| 43780 | 250  | 
val properties: T[Properties.T] =  | 
| 43767 | 251  | 
      (props => List(XML.Elem(Markup(":", props), Nil)))
 | 
252  | 
||
253  | 
val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))  | 
|
254  | 
||
255  | 
val long: T[Long] = (x => string(long_atom(x)))  | 
|
256  | 
||
257  | 
val int: T[Int] = (x => string(int_atom(x)))  | 
|
258  | 
||
259  | 
val bool: T[Boolean] = (x => string(bool_atom(x)))  | 
|
260  | 
||
261  | 
val unit: T[Unit] = (x => string(unit_atom(x)))  | 
|
262  | 
||
263  | 
def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =  | 
|
264  | 
(x => List(node(f(x._1)), node(g(x._2))))  | 
|
265  | 
||
266  | 
def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =  | 
|
267  | 
(x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))  | 
|
268  | 
||
269  | 
def list[A](f: T[A]): T[List[A]] =  | 
|
270  | 
(xs => xs.map((x: A) => node(f(x))))  | 
|
271  | 
||
272  | 
def option[A](f: T[A]): T[Option[A]] =  | 
|
273  | 
    {
 | 
|
274  | 
case None => Nil  | 
|
275  | 
case Some(x) => List(node(f(x)))  | 
|
276  | 
}  | 
|
277  | 
||
| 65334 | 278  | 
def variant[A](fs: List[V[A]]): T[A] =  | 
| 43767 | 279  | 
    {
 | 
280  | 
case x =>  | 
|
281  | 
val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get  | 
|
282  | 
List(tagged(tag, f(x)))  | 
|
283  | 
}  | 
|
284  | 
}  | 
|
285  | 
||
286  | 
object Decode  | 
|
287  | 
  {
 | 
|
288  | 
type T[A] = XML.Body => A  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
289  | 
type V[A] = (List[String], XML.Body) => A  | 
| 43767 | 290  | 
|
291  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
292  | 
/* atomic values */  | 
| 43767 | 293  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
294  | 
def long_atom(s: String): Long =  | 
| 43767 | 295  | 
      try { java.lang.Long.parseLong(s) }
 | 
296  | 
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | 
|
297  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
298  | 
def int_atom(s: String): Int =  | 
| 43767 | 299  | 
      try { Integer.parseInt(s) }
 | 
300  | 
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | 
|
301  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
302  | 
def bool_atom(s: String): Boolean =  | 
| 43767 | 303  | 
if (s == "1") true  | 
304  | 
else if (s == "0") false  | 
|
305  | 
else throw new XML_Atom(s)  | 
|
306  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
307  | 
def unit_atom(s: String): Unit =  | 
| 43767 | 308  | 
if (s == "") () else throw new XML_Atom(s)  | 
309  | 
||
310  | 
||
311  | 
/* structural nodes */  | 
|
312  | 
||
313  | 
private def node(t: XML.Tree): XML.Body =  | 
|
314  | 
      t match {
 | 
|
315  | 
        case XML.Elem(Markup(":", Nil), ts) => ts
 | 
|
316  | 
case _ => throw new XML_Body(List(t))  | 
|
317  | 
}  | 
|
318  | 
||
| 43781 | 319  | 
private def vector(atts: XML.Attributes): List[String] =  | 
| 
46839
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
320  | 
atts.iterator.zipWithIndex.map(  | 
| 
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
321  | 
        { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
 | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
322  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
323  | 
private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =  | 
| 43767 | 324  | 
      t match {
 | 
| 43781 | 325  | 
case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))  | 
| 43767 | 326  | 
case _ => throw new XML_Body(List(t))  | 
327  | 
}  | 
|
328  | 
||
329  | 
||
330  | 
/* representation of standard types */  | 
|
331  | 
||
| 65333 | 332  | 
val tree: T[XML.Tree] =  | 
333  | 
    {
 | 
|
334  | 
case List(t) => t  | 
|
335  | 
case ts => throw new XML_Body(ts)  | 
|
336  | 
}  | 
|
337  | 
||
| 43780 | 338  | 
val properties: T[Properties.T] =  | 
| 43767 | 339  | 
    {
 | 
340  | 
      case List(XML.Elem(Markup(":", props), Nil)) => props
 | 
|
341  | 
case ts => throw new XML_Body(ts)  | 
|
342  | 
}  | 
|
343  | 
||
344  | 
val string: T[String] =  | 
|
345  | 
    {
 | 
|
346  | 
case Nil => ""  | 
|
347  | 
case List(XML.Text(s)) => s  | 
|
348  | 
case ts => throw new XML_Body(ts)  | 
|
349  | 
}  | 
|
350  | 
||
351  | 
val long: T[Long] = (x => long_atom(string(x)))  | 
|
352  | 
||
353  | 
val int: T[Int] = (x => int_atom(string(x)))  | 
|
354  | 
||
355  | 
val bool: T[Boolean] = (x => bool_atom(string(x)))  | 
|
356  | 
||
357  | 
val unit: T[Unit] = (x => unit_atom(string(x)))  | 
|
358  | 
||
359  | 
def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =  | 
|
360  | 
    {
 | 
|
361  | 
case List(t1, t2) => (f(node(t1)), g(node(t2)))  | 
|
362  | 
case ts => throw new XML_Body(ts)  | 
|
363  | 
}  | 
|
364  | 
||
365  | 
def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =  | 
|
366  | 
    {
 | 
|
367  | 
case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))  | 
|
368  | 
case ts => throw new XML_Body(ts)  | 
|
369  | 
}  | 
|
370  | 
||
371  | 
def list[A](f: T[A]): T[List[A]] =  | 
|
372  | 
(ts => ts.map(t => f(node(t))))  | 
|
373  | 
||
374  | 
def option[A](f: T[A]): T[Option[A]] =  | 
|
375  | 
    {
 | 
|
376  | 
case Nil => None  | 
|
377  | 
case List(t) => Some(f(node(t)))  | 
|
378  | 
case ts => throw new XML_Body(ts)  | 
|
379  | 
}  | 
|
380  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
381  | 
def variant[A](fs: List[V[A]]): T[A] =  | 
| 43767 | 382  | 
    {
 | 
383  | 
case List(t) =>  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
384  | 
val (tag, (xs, ts)) = tagged(t)  | 
| 43768 | 385  | 
val f =  | 
386  | 
          try { fs(tag) }
 | 
|
387  | 
          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
 | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
388  | 
f(xs, ts)  | 
| 43767 | 389  | 
case ts => throw new XML_Body(ts)  | 
390  | 
}  | 
|
391  | 
}  | 
|
| 27931 | 392  | 
}  |