| author | wenzelm | 
| Tue, 22 Oct 2024 12:45:38 +0200 | |
| changeset 81229 | e18600daa904 | 
| parent 80818 | da2557168da7 | 
| child 81428 | 257ec066b360 | 
| permissions | -rw-r--r-- | 
| 44698 | 1  | 
/* Title: Pure/PIDE/xml.scala  | 
| 27931 | 2  | 
Author: Makarius  | 
3  | 
||
| 44698 | 4  | 
Untyped XML trees and basic data representation.  | 
| 27931 | 5  | 
*/  | 
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
| 80430 | 9  | 
import scala.annotation.tailrec  | 
10  | 
||
| 55618 | 11  | 
|
| 75393 | 12  | 
object XML {
 | 
| 43767 | 13  | 
/** XML trees **/  | 
14  | 
||
| 27947 | 15  | 
/* datatype representation */  | 
16  | 
||
| 65753 | 17  | 
type Attribute = Properties.Entry  | 
| 43780 | 18  | 
type Attributes = Properties.T  | 
| 27931 | 19  | 
|
| 80430 | 20  | 
trait Trav  | 
21  | 
case class End(name: String) extends Trav  | 
|
22  | 
||
23  | 
  sealed abstract class Tree extends Trav {
 | 
|
24  | 
override def toString: String = string_of_tree(this)  | 
|
25  | 
}  | 
|
| 64354 | 26  | 
type Body = List[Tree]  | 
| 80430 | 27  | 
  case class Elem(markup: Markup, body: Body) extends Tree with Trav {
 | 
| 73032 | 28  | 
private lazy val hash: Int = (markup, body).hashCode()  | 
29  | 
override def hashCode(): Int = hash  | 
|
30  | 
||
| 52890 | 31  | 
def name: String = markup.name  | 
| 65753 | 32  | 
|
| 64358 | 33  | 
def update_attributes(more_attributes: Attributes): Elem =  | 
34  | 
if (more_attributes.isEmpty) this  | 
|
35  | 
else Elem(markup.update_properties(more_attributes), body)  | 
|
| 65753 | 36  | 
|
| 65772 | 37  | 
def + (att: Attribute): Elem = Elem(markup + att, body)  | 
| 52890 | 38  | 
}  | 
| 80430 | 39  | 
  case class Text(content: String) extends Tree with Trav {
 | 
| 73032 | 40  | 
private lazy val hash: Int = content.hashCode()  | 
41  | 
override def hashCode(): Int = hash  | 
|
42  | 
}  | 
|
| 29203 | 43  | 
|
| 80429 | 44  | 
  trait Traversal {
 | 
45  | 
def text(s: String): Unit  | 
|
46  | 
def elem(markup: Markup, end: Boolean = false): Unit  | 
|
47  | 
def end_elem(name: String): Unit  | 
|
48  | 
||
| 80430 | 49  | 
    def traverse(trees: List[Tree]): Unit = {
 | 
| 80454 | 50  | 
@tailrec def trav_atomic(list: List[Trav]): List[Trav] =  | 
51  | 
        list match {
 | 
|
52  | 
case Text(s) :: rest => text(s); trav_atomic(rest)  | 
|
53  | 
case Elem(markup, Nil) :: rest =>  | 
|
54  | 
if (!markup.is_empty) elem(markup, end = true)  | 
|
55  | 
trav_atomic(rest)  | 
|
56  | 
case End(name) :: rest => end_elem(name); trav_atomic(rest)  | 
|
57  | 
case _ => list  | 
|
58  | 
}  | 
|
59  | 
||
| 80430 | 60  | 
@tailrec def trav(list: List[Trav]): Unit =  | 
| 80454 | 61  | 
        (trav_atomic(list) : @unchecked) match {
 | 
| 80430 | 62  | 
case Nil =>  | 
| 80454 | 63  | 
case Elem(markup, body) :: rest if body.nonEmpty =>  | 
64  | 
if (markup.is_empty) trav(trav_atomic(body) ::: rest)  | 
|
65  | 
            else { elem(markup); trav(trav_atomic(body) ::: End(markup.name) :: rest) }
 | 
|
| 80430 | 66  | 
}  | 
| 80454 | 67  | 
|
| 80430 | 68  | 
trav(trees)  | 
69  | 
}  | 
|
| 80429 | 70  | 
}  | 
71  | 
||
| 80434 | 72  | 
def elem(markup: Markup): XML.Elem = XML.Elem(markup, Nil)  | 
73  | 
def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body)  | 
|
74  | 
def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil)  | 
|
75  | 
||
76  | 
  val no_text: Text = Text("")
 | 
|
77  | 
  val newline: Text = Text("\n")
 | 
|
78  | 
||
79  | 
def string(s: String): Body = if (s.isEmpty) Nil else List(Text(s))  | 
|
80  | 
||
81  | 
def enclose(bg: String, en:String, body: Body): Body =  | 
|
82  | 
string(bg) ::: body ::: string(en)  | 
|
83  | 
||
| 29203 | 84  | 
|
| 69805 | 85  | 
/* name space */  | 
86  | 
||
| 75393 | 87  | 
  object Namespace {
 | 
| 69805 | 88  | 
def apply(prefix: String, target: String): Namespace =  | 
89  | 
new Namespace(prefix, target)  | 
|
90  | 
}  | 
|
91  | 
||
| 75393 | 92  | 
  final class Namespace private(prefix: String, target: String) {
 | 
| 69805 | 93  | 
def apply(name: String): String = prefix + ":" + name  | 
94  | 
    val attribute: XML.Attribute = ("xmlns:" + prefix, target)
 | 
|
95  | 
||
96  | 
override def toString: String = attribute.toString  | 
|
97  | 
}  | 
|
98  | 
||
99  | 
||
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
100  | 
/* wrapped elements */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
101  | 
|
| 60215 | 102  | 
val XML_ELEM = "xml_elem"  | 
103  | 
val XML_NAME = "xml_name"  | 
|
104  | 
val XML_BODY = "xml_body"  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
105  | 
|
| 75393 | 106  | 
  object Wrapped_Elem {
 | 
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
107  | 
def apply(markup: Markup, body1: Body, body2: Body): XML.Elem =  | 
| 61026 | 108  | 
XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties),  | 
109  | 
XML.Elem(Markup(XML_BODY, Nil), body1) :: body2)  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
110  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
111  | 
def unapply(tree: Tree): Option[(Markup, Body, Body)] =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
112  | 
      tree match {
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
113  | 
case  | 
| 61026 | 114  | 
XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props),  | 
115  | 
XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) =>  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
116  | 
Some(Markup(name, props), body1, body2)  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
117  | 
case _ => None  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
118  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
119  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
120  | 
|
| 80455 | 121  | 
  object Wrapped_Elem_Body {
 | 
122  | 
def unapply(tree: Tree): Option[Body] =  | 
|
123  | 
      tree match {
 | 
|
124  | 
case  | 
|
125  | 
XML.Elem(Markup(XML_ELEM, (XML_NAME, _) :: _),  | 
|
126  | 
XML.Elem(Markup(XML_BODY, Nil), _) :: body) =>  | 
|
127  | 
Some(body)  | 
|
128  | 
case _ => None  | 
|
129  | 
}  | 
|
130  | 
}  | 
|
131  | 
||
| 75393 | 132  | 
  object Root_Elem {
 | 
| 
67818
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
133  | 
def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body)  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
134  | 
def unapply(tree: Tree): Option[Body] =  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
135  | 
      tree match {
 | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
136  | 
case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body)  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
137  | 
case _ => None  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
138  | 
}  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
139  | 
}  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
140  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
141  | 
|
| 80818 | 142  | 
/* filter markup elements */  | 
| 80816 | 143  | 
|
| 80818 | 144  | 
def filter_elements(xml: XML.Body,  | 
145  | 
remove: Markup.Elements = Markup.Elements.empty,  | 
|
146  | 
expose: Markup.Elements = Markup.Elements.empty  | 
|
147  | 
  ): XML.Body = {
 | 
|
148  | 
def filter(ts: XML.Body): XML.Body =  | 
|
| 80816 | 149  | 
      ts flatMap {
 | 
150  | 
case XML.Wrapped_Elem(markup, body1, body2) =>  | 
|
| 80818 | 151  | 
if (remove(markup.name)) Nil  | 
152  | 
else if (expose(markup.name)) filter(body2)  | 
|
153  | 
else List(XML.Wrapped_Elem(markup, body1, filter(body2)))  | 
|
| 80816 | 154  | 
case XML.Elem(markup, body) =>  | 
| 80818 | 155  | 
if (remove(markup.name)) Nil  | 
156  | 
else if (expose(markup.name)) filter(body)  | 
|
157  | 
else List(XML.Elem(markup, filter(body)))  | 
|
| 80816 | 158  | 
case t => List(t)  | 
159  | 
}  | 
|
| 80818 | 160  | 
filter(xml)  | 
| 80816 | 161  | 
}  | 
162  | 
||
163  | 
||
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
164  | 
/* traverse text */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
165  | 
|
| 80439 | 166  | 
  def traverse_text[A](body: Body, a: A, op: (A, String) => A): A = {
 | 
| 80435 | 167  | 
@tailrec def trav(x: A, list: List[Tree]): A =  | 
168  | 
      list match {
 | 
|
169  | 
case Nil => x  | 
|
| 80455 | 170  | 
case XML.Wrapped_Elem_Body(body) :: rest => trav(x, body ::: rest)  | 
| 80435 | 171  | 
case XML.Elem(_, body) :: rest => trav(x, body ::: rest)  | 
172  | 
case XML.Text(s) :: rest => trav(op(x, s), rest)  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
173  | 
}  | 
| 80435 | 174  | 
trav(a, body)  | 
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
175  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
176  | 
|
| 80439 | 177  | 
def text_length(body: Body): Int = traverse_text(body, 0, (n, s) => n + s.length)  | 
178  | 
def symbol_length(body: Body): Int = traverse_text(body, 0, (n, s) => n + Symbol.length(s))  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
179  | 
|
| 80457 | 180  | 
def content_is_empty(body: Body): Boolean =  | 
181  | 
traverse_text(body, true, (b, s) => b && s.isEmpty)  | 
|
182  | 
||
183  | 
  def content_lines(body: Body): Int = {
 | 
|
184  | 
val n = traverse_text(body, 0, (n, s) => n + Library.count_newlines(s))  | 
|
185  | 
if (n == 0 && content_is_empty(body)) 0 else n + 1  | 
|
186  | 
}  | 
|
| 80456 | 187  | 
|
| 80441 | 188  | 
def content(body: Body): String =  | 
189  | 
    Library.string_builder(hint = text_length(body)) { text =>
 | 
|
190  | 
traverse_text(body, (), (_, s) => text.append(s))  | 
|
191  | 
}  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
192  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
193  | 
def content(tree: Tree): String = content(List(tree))  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
194  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
195  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
196  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
197  | 
/** string representation **/  | 
| 29203 | 198  | 
|
| 69804 | 199  | 
val header: String = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"  | 
200  | 
||
| 80429 | 201  | 
  class Output(builder: StringBuilder) extends Traversal {
 | 
202  | 
    def string(str: String, permissive: Boolean = false): Unit = {
 | 
|
203  | 
      if (str == null) { builder ++= str }
 | 
|
204  | 
      else {
 | 
|
| 80432 | 205  | 
        str foreach {
 | 
206  | 
case '<' => builder ++= "<"  | 
|
207  | 
case '>' => builder ++= ">"  | 
|
208  | 
case '&' => builder ++= "&"  | 
|
209  | 
case '"' if !permissive => builder ++= """  | 
|
210  | 
case '\'' if !permissive => builder ++= "'"  | 
|
211  | 
case c => builder += c  | 
|
| 80429 | 212  | 
}  | 
213  | 
}  | 
|
| 65990 | 214  | 
}  | 
| 80429 | 215  | 
|
216  | 
override def text(str: String): Unit = string(str)  | 
|
| 65990 | 217  | 
|
| 80429 | 218  | 
    override def elem(markup: Markup, end: Boolean = false): Unit = {
 | 
219  | 
builder += '<'  | 
|
220  | 
builder ++= markup.name  | 
|
221  | 
      for ((a, b) <- markup.properties) {
 | 
|
222  | 
builder += ' '  | 
|
223  | 
builder ++= a  | 
|
224  | 
builder += '='  | 
|
225  | 
builder += '"'  | 
|
226  | 
string(b)  | 
|
227  | 
builder += '"'  | 
|
228  | 
}  | 
|
229  | 
if (end) builder += '/'  | 
|
230  | 
builder += '>'  | 
|
231  | 
}  | 
|
232  | 
||
233  | 
    def end_elem(name: String): Unit = {
 | 
|
234  | 
builder += '<'  | 
|
235  | 
builder += '/'  | 
|
236  | 
builder ++= name  | 
|
237  | 
builder += '>'  | 
|
238  | 
}  | 
|
239  | 
||
| 80430 | 240  | 
    def result(ts: List[Tree]): String = { traverse(ts); builder.toString }
 | 
| 65990 | 241  | 
}  | 
242  | 
||
| 80429 | 243  | 
def string_of_body(body: Body): String =  | 
244  | 
if (body.isEmpty) ""  | 
|
245  | 
else new Output(new StringBuilder).result(body)  | 
|
| 29203 | 246  | 
|
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
247  | 
def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))  | 
| 27941 | 248  | 
|
| 
73528
 
c337c798f64c
clarified HTML template (see also 04cb7e02ca38): avoid odd patching of sources;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
249  | 
def text(s: String): String = string_of_tree(XML.Text(s))  | 
| 27941 | 250  | 
|
| 44808 | 251  | 
|
| 80429 | 252  | 
|
| 68265 | 253  | 
/** cache **/  | 
| 34108 | 254  | 
|
| 75393 | 255  | 
  object Cache {
 | 
| 73024 | 256  | 
def make(  | 
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
75436 
diff
changeset
 | 
257  | 
compress: Compress.Cache = Compress.Cache.make(),  | 
| 
74731
 
161e84e6b40a
just one cache, via HTML_Context, via Sessions.Store or Session;
 
wenzelm 
parents: 
74683 
diff
changeset
 | 
258  | 
max_string: Int = isabelle.Cache.default_max_string,  | 
| 73024 | 259  | 
initial_size: Int = isabelle.Cache.default_initial_size): Cache =  | 
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
75436 
diff
changeset
 | 
260  | 
new Cache(compress, max_string, initial_size)  | 
| 68169 | 261  | 
|
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
75436 
diff
changeset
 | 
262  | 
val none: Cache = make(Compress.Cache.none, max_string = 0)  | 
| 73024 | 263  | 
}  | 
264  | 
||
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
75436 
diff
changeset
 | 
265  | 
class Cache(val compress: Compress.Cache, max_string: Int, initial_size: Int)  | 
| 75393 | 266  | 
  extends isabelle.Cache(max_string, initial_size) {
 | 
267  | 
    protected def cache_props(x: Properties.T): Properties.T = {
 | 
|
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
268  | 
if (x.isEmpty) x  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
269  | 
else  | 
| 34133 | 270  | 
        lookup(x) match {
 | 
271  | 
case Some(y) => y  | 
|
| 65903 | 272  | 
case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2))))  | 
| 34133 | 273  | 
}  | 
| 68265 | 274  | 
}  | 
275  | 
||
| 75393 | 276  | 
    protected def cache_markup(x: Markup): Markup = {
 | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
277  | 
      lookup(x) match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
278  | 
case Some(y) => y  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
279  | 
case None =>  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
280  | 
          x match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
281  | 
case Markup(name, props) =>  | 
| 51663 | 282  | 
store(Markup(cache_string(name), cache_props(props)))  | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
283  | 
}  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
284  | 
}  | 
| 68265 | 285  | 
}  | 
286  | 
||
| 75393 | 287  | 
    protected def cache_tree(x: XML.Tree): XML.Tree = {
 | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
288  | 
      lookup(x) match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
289  | 
case Some(y) => y  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
290  | 
case None =>  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
291  | 
          x match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
292  | 
case XML.Elem(markup, body) =>  | 
| 51663 | 293  | 
store(XML.Elem(cache_markup(markup), cache_body(body)))  | 
294  | 
case XML.Text(text) => store(XML.Text(cache_string(text)))  | 
|
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
295  | 
}  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
296  | 
}  | 
| 68265 | 297  | 
}  | 
298  | 
||
| 75393 | 299  | 
    protected def cache_body(x: XML.Body): XML.Body = {
 | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
300  | 
if (x.isEmpty) x  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
301  | 
else  | 
| 34133 | 302  | 
        lookup(x) match {
 | 
303  | 
case Some(y) => y  | 
|
| 71601 | 304  | 
case None => x.map(cache_tree)  | 
| 34133 | 305  | 
}  | 
| 68265 | 306  | 
}  | 
| 
38446
 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 
wenzelm 
parents: 
38268 
diff
changeset
 | 
307  | 
|
| 73030 | 308  | 
// support hash-consing  | 
309  | 
def tree0(x: XML.Tree): XML.Tree =  | 
|
310  | 
      if (no_cache) x else synchronized { lookup(x) getOrElse store(x) }
 | 
|
311  | 
||
| 
38446
 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 
wenzelm 
parents: 
38268 
diff
changeset
 | 
312  | 
// main methods  | 
| 73024 | 313  | 
def props(x: Properties.T): Properties.T =  | 
314  | 
      if (no_cache) x else synchronized { cache_props(x) }
 | 
|
315  | 
def markup(x: Markup): Markup =  | 
|
316  | 
      if (no_cache) x else synchronized { cache_markup(x) }
 | 
|
317  | 
def tree(x: XML.Tree): XML.Tree =  | 
|
318  | 
      if (no_cache) x else synchronized { cache_tree(x) }
 | 
|
319  | 
def body(x: XML.Body): XML.Body =  | 
|
320  | 
      if (no_cache) x else synchronized { cache_body(x) }
 | 
|
321  | 
def elem(x: XML.Elem): XML.Elem =  | 
|
322  | 
      if (no_cache) x else synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
 | 
|
| 34108 | 323  | 
}  | 
324  | 
||
325  | 
||
| 43767 | 326  | 
|
327  | 
/** XML as data representation language **/  | 
|
328  | 
||
| 51987 | 329  | 
abstract class Error(s: String) extends Exception(s)  | 
330  | 
class XML_Atom(s: String) extends Error(s)  | 
|
331  | 
  class XML_Body(body: XML.Body) extends Error("")
 | 
|
| 43767 | 332  | 
|
| 75393 | 333  | 
  object Encode {
 | 
| 43767 | 334  | 
type T[A] = A => XML.Body  | 
| 65334 | 335  | 
type V[A] = PartialFunction[A, (List[String], XML.Body)]  | 
| 70828 | 336  | 
type P[A] = PartialFunction[A, List[String]]  | 
| 43767 | 337  | 
|
338  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
339  | 
/* atomic values */  | 
| 43767 | 340  | 
|
| 
57909
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
341  | 
def long_atom(i: Long): String = Library.signed_string_of_long(i)  | 
| 43767 | 342  | 
|
| 
57909
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
343  | 
def int_atom(i: Int): String = Library.signed_string_of_int(i)  | 
| 43767 | 344  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
345  | 
def bool_atom(b: Boolean): String = if (b) "1" else "0"  | 
| 43767 | 346  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
347  | 
def unit_atom(u: Unit) = ""  | 
| 43767 | 348  | 
|
349  | 
||
350  | 
/* structural nodes */  | 
|
351  | 
||
352  | 
    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
 | 
|
353  | 
||
| 43781 | 354  | 
private def vector(xs: List[String]): XML.Attributes =  | 
| 
46839
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
355  | 
      xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
 | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
356  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
357  | 
private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
358  | 
XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)  | 
| 43767 | 359  | 
|
360  | 
||
361  | 
/* representation of standard types */  | 
|
362  | 
||
| 80461 | 363  | 
val self: T[XML.Body] = identity  | 
364  | 
||
| 65333 | 365  | 
val tree: T[XML.Tree] = (t => List(t))  | 
366  | 
||
| 43780 | 367  | 
val properties: T[Properties.T] =  | 
| 43767 | 368  | 
      (props => List(XML.Elem(Markup(":", props), Nil)))
 | 
369  | 
||
| 80458 | 370  | 
val string: T[String] = XML.string  | 
| 43767 | 371  | 
|
372  | 
val long: T[Long] = (x => string(long_atom(x)))  | 
|
373  | 
||
374  | 
val int: T[Int] = (x => string(int_atom(x)))  | 
|
375  | 
||
376  | 
val bool: T[Boolean] = (x => string(bool_atom(x)))  | 
|
377  | 
||
378  | 
val unit: T[Unit] = (x => string(unit_atom(x)))  | 
|
379  | 
||
380  | 
def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =  | 
|
381  | 
(x => List(node(f(x._1)), node(g(x._2))))  | 
|
382  | 
||
383  | 
def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =  | 
|
384  | 
(x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))  | 
|
385  | 
||
386  | 
def list[A](f: T[A]): T[List[A]] =  | 
|
387  | 
(xs => xs.map((x: A) => node(f(x))))  | 
|
388  | 
||
| 75393 | 389  | 
    def option[A](f: T[A]): T[Option[A]] = {
 | 
| 43767 | 390  | 
case None => Nil  | 
391  | 
case Some(x) => List(node(f(x)))  | 
|
392  | 
}  | 
|
393  | 
||
| 75393 | 394  | 
    def variant[A](fs: List[V[A]]): T[A] = {
 | 
| 43767 | 395  | 
case x =>  | 
396  | 
val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get  | 
|
397  | 
List(tagged(tag, f(x)))  | 
|
398  | 
}  | 
|
399  | 
}  | 
|
400  | 
||
| 75393 | 401  | 
  object Decode {
 | 
| 43767 | 402  | 
type T[A] = XML.Body => A  | 
| 75436 | 403  | 
type V[A] = PartialFunction[(List[String], XML.Body), A]  | 
| 70828 | 404  | 
type P[A] = PartialFunction[List[String], A]  | 
| 43767 | 405  | 
|
406  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
407  | 
/* atomic values */  | 
| 43767 | 408  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
409  | 
def long_atom(s: String): Long =  | 
| 43767 | 410  | 
      try { java.lang.Long.parseLong(s) }
 | 
411  | 
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | 
|
412  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
413  | 
def int_atom(s: String): Int =  | 
| 43767 | 414  | 
      try { Integer.parseInt(s) }
 | 
415  | 
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | 
|
416  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
417  | 
def bool_atom(s: String): Boolean =  | 
| 43767 | 418  | 
if (s == "1") true  | 
419  | 
else if (s == "0") false  | 
|
420  | 
else throw new XML_Atom(s)  | 
|
421  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
422  | 
def unit_atom(s: String): Unit =  | 
| 43767 | 423  | 
if (s == "") () else throw new XML_Atom(s)  | 
424  | 
||
425  | 
||
426  | 
/* structural nodes */  | 
|
427  | 
||
428  | 
private def node(t: XML.Tree): XML.Body =  | 
|
429  | 
      t match {
 | 
|
430  | 
        case XML.Elem(Markup(":", Nil), ts) => ts
 | 
|
431  | 
case _ => throw new XML_Body(List(t))  | 
|
432  | 
}  | 
|
433  | 
||
| 43781 | 434  | 
private def vector(atts: XML.Attributes): List[String] =  | 
| 
46839
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
435  | 
atts.iterator.zipWithIndex.map(  | 
| 
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
436  | 
        { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
 | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
437  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
438  | 
private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =  | 
| 43767 | 439  | 
      t match {
 | 
| 43781 | 440  | 
case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))  | 
| 43767 | 441  | 
case _ => throw new XML_Body(List(t))  | 
442  | 
}  | 
|
443  | 
||
444  | 
||
445  | 
/* representation of standard types */  | 
|
446  | 
||
| 80461 | 447  | 
val self: T[XML.Body] = identity  | 
448  | 
||
| 75393 | 449  | 
    val tree: T[XML.Tree] = {
 | 
| 65333 | 450  | 
case List(t) => t  | 
451  | 
case ts => throw new XML_Body(ts)  | 
|
452  | 
}  | 
|
453  | 
||
| 75393 | 454  | 
    val properties: T[Properties.T] = {
 | 
| 43767 | 455  | 
      case List(XML.Elem(Markup(":", props), Nil)) => props
 | 
456  | 
case ts => throw new XML_Body(ts)  | 
|
457  | 
}  | 
|
458  | 
||
| 75393 | 459  | 
    val string: T[String] = {
 | 
| 43767 | 460  | 
case Nil => ""  | 
461  | 
case List(XML.Text(s)) => s  | 
|
462  | 
case ts => throw new XML_Body(ts)  | 
|
463  | 
}  | 
|
464  | 
||
465  | 
val long: T[Long] = (x => long_atom(string(x)))  | 
|
466  | 
||
467  | 
val int: T[Int] = (x => int_atom(string(x)))  | 
|
468  | 
||
469  | 
val bool: T[Boolean] = (x => bool_atom(string(x)))  | 
|
470  | 
||
471  | 
val unit: T[Unit] = (x => unit_atom(string(x)))  | 
|
472  | 
||
| 75393 | 473  | 
    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = {
 | 
| 43767 | 474  | 
case List(t1, t2) => (f(node(t1)), g(node(t2)))  | 
475  | 
case ts => throw new XML_Body(ts)  | 
|
476  | 
}  | 
|
477  | 
||
| 75393 | 478  | 
    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = {
 | 
| 43767 | 479  | 
case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))  | 
480  | 
case ts => throw new XML_Body(ts)  | 
|
481  | 
}  | 
|
482  | 
||
483  | 
def list[A](f: T[A]): T[List[A]] =  | 
|
484  | 
(ts => ts.map(t => f(node(t))))  | 
|
485  | 
||
| 75393 | 486  | 
    def option[A](f: T[A]): T[Option[A]] = {
 | 
| 43767 | 487  | 
case Nil => None  | 
488  | 
case List(t) => Some(f(node(t)))  | 
|
489  | 
case ts => throw new XML_Body(ts)  | 
|
490  | 
}  | 
|
491  | 
||
| 75393 | 492  | 
    def variant[A](fs: List[V[A]]): T[A] = {
 | 
| 43767 | 493  | 
case List(t) =>  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
494  | 
val (tag, (xs, ts)) = tagged(t)  | 
| 43768 | 495  | 
val f =  | 
496  | 
          try { fs(tag) }
 | 
|
497  | 
          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
 | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
498  | 
f(xs, ts)  | 
| 43767 | 499  | 
case ts => throw new XML_Body(ts)  | 
500  | 
}  | 
|
501  | 
}  | 
|
| 27931 | 502  | 
}  |