| author | blanchet | 
| Wed, 06 Dec 2023 12:03:56 +0100 | |
| changeset 79140 | 2413181b10bb | 
| parent 76351 | 2cee31cd92f0 | 
| child 80429 | 6f4d5d922da7 | 
| permissions | -rw-r--r-- | 
| 44698 | 1  | 
/* Title: Pure/PIDE/xml.scala  | 
| 27931 | 2  | 
Author: Makarius  | 
3  | 
||
| 44698 | 4  | 
Untyped XML trees and basic data representation.  | 
| 27931 | 5  | 
*/  | 
6  | 
||
7  | 
package isabelle  | 
|
8  | 
||
| 55618 | 9  | 
|
| 75393 | 10  | 
object XML {
 | 
| 43767 | 11  | 
/** XML trees **/  | 
12  | 
||
| 27947 | 13  | 
/* datatype representation */  | 
14  | 
||
| 65753 | 15  | 
type Attribute = Properties.Entry  | 
| 43780 | 16  | 
type Attributes = Properties.T  | 
| 27931 | 17  | 
|
| 57912 | 18  | 
  sealed abstract class Tree { override def toString: String = string_of_tree(this) }
 | 
| 64354 | 19  | 
type Body = List[Tree]  | 
| 75393 | 20  | 
  case class Elem(markup: Markup, body: Body) extends Tree {
 | 
| 73032 | 21  | 
private lazy val hash: Int = (markup, body).hashCode()  | 
22  | 
override def hashCode(): Int = hash  | 
|
23  | 
||
| 52890 | 24  | 
def name: String = markup.name  | 
| 65753 | 25  | 
|
| 64358 | 26  | 
def update_attributes(more_attributes: Attributes): Elem =  | 
27  | 
if (more_attributes.isEmpty) this  | 
|
28  | 
else Elem(markup.update_properties(more_attributes), body)  | 
|
| 65753 | 29  | 
|
| 65772 | 30  | 
def + (att: Attribute): Elem = Elem(markup + att, body)  | 
| 52890 | 31  | 
}  | 
| 75393 | 32  | 
  case class Text(content: String) extends Tree {
 | 
| 73032 | 33  | 
private lazy val hash: Int = content.hashCode()  | 
34  | 
override def hashCode(): Int = hash  | 
|
35  | 
}  | 
|
| 29203 | 36  | 
|
| 66196 | 37  | 
def elem(markup: Markup): XML.Elem = XML.Elem(markup, Nil)  | 
| 64354 | 38  | 
def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body)  | 
39  | 
def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil)  | 
|
| 
38267
 
e50c283dd125
type XML.Body as basic data representation language (Scala version);
 
wenzelm 
parents: 
38263 
diff
changeset
 | 
40  | 
|
| 73028 | 41  | 
  val no_text: Text = Text("")
 | 
| 69867 | 42  | 
  val newline: Text = Text("\n")
 | 
43  | 
||
| 74785 | 44  | 
def string(s: String): Body = if (s.isEmpty) Nil else List(Text(s))  | 
45  | 
||
| 74789 | 46  | 
def enclose(bg: String, en:String, body: Body): Body =  | 
47  | 
string(bg) ::: body ::: string(en)  | 
|
48  | 
||
| 29203 | 49  | 
|
| 69805 | 50  | 
/* name space */  | 
51  | 
||
| 75393 | 52  | 
  object Namespace {
 | 
| 69805 | 53  | 
def apply(prefix: String, target: String): Namespace =  | 
54  | 
new Namespace(prefix, target)  | 
|
55  | 
}  | 
|
56  | 
||
| 75393 | 57  | 
  final class Namespace private(prefix: String, target: String) {
 | 
| 69805 | 58  | 
def apply(name: String): String = prefix + ":" + name  | 
59  | 
    val attribute: XML.Attribute = ("xmlns:" + prefix, target)
 | 
|
60  | 
||
61  | 
override def toString: String = attribute.toString  | 
|
62  | 
}  | 
|
63  | 
||
64  | 
||
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
65  | 
/* wrapped elements */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
66  | 
|
| 60215 | 67  | 
val XML_ELEM = "xml_elem"  | 
68  | 
val XML_NAME = "xml_name"  | 
|
69  | 
val XML_BODY = "xml_body"  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
70  | 
|
| 75393 | 71  | 
  object Wrapped_Elem {
 | 
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
72  | 
def apply(markup: Markup, body1: Body, body2: Body): XML.Elem =  | 
| 61026 | 73  | 
XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties),  | 
74  | 
XML.Elem(Markup(XML_BODY, Nil), body1) :: body2)  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
75  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
76  | 
def unapply(tree: Tree): Option[(Markup, Body, Body)] =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
77  | 
      tree match {
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
78  | 
case  | 
| 61026 | 79  | 
XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props),  | 
80  | 
XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) =>  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
81  | 
Some(Markup(name, props), body1, body2)  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
82  | 
case _ => None  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
83  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
84  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
85  | 
|
| 75393 | 86  | 
  object Root_Elem {
 | 
| 
67818
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
87  | 
def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body)  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
88  | 
def unapply(tree: Tree): Option[Body] =  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
89  | 
      tree match {
 | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
90  | 
case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body)  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
91  | 
case _ => None  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
92  | 
}  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
93  | 
}  | 
| 
 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 
wenzelm 
parents: 
67113 
diff
changeset
 | 
94  | 
|
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
95  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
96  | 
/* traverse text */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
97  | 
|
| 75393 | 98  | 
  def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A = {
 | 
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
99  | 
def traverse(x: A, t: Tree): A =  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
100  | 
      t match {
 | 
| 73359 | 101  | 
case XML.Wrapped_Elem(_, _, ts) => ts.foldLeft(x)(traverse)  | 
102  | 
case XML.Elem(_, ts) => ts.foldLeft(x)(traverse)  | 
|
| 61026 | 103  | 
case XML.Text(s) => op(x, s)  | 
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
104  | 
}  | 
| 73359 | 105  | 
body.foldLeft(a)(traverse)  | 
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
106  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
107  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
108  | 
  def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length }
 | 
| 
74683
 
c8327efc7af1
clarified signature: more direct XML.symbol_length;
 
wenzelm 
parents: 
73528 
diff
changeset
 | 
109  | 
  def symbol_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + Symbol.length(s) }
 | 
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
110  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
111  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
112  | 
/* text content */  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
113  | 
|
| 75393 | 114  | 
  def content(body: Body): String = {
 | 
| 
49650
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
115  | 
val text = new StringBuilder(text_length(body))  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
116  | 
    traverse_text(body)(()) { case (_, s) => text.append(s) }
 | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
117  | 
text.toString  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
118  | 
}  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
119  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
120  | 
def content(tree: Tree): String = content(List(tree))  | 
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
121  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
122  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
123  | 
|
| 
 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 
wenzelm 
parents: 
49613 
diff
changeset
 | 
124  | 
/** string representation **/  | 
| 29203 | 125  | 
|
| 69804 | 126  | 
val header: String = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"  | 
127  | 
||
| 75393 | 128  | 
  def output_char(s: StringBuilder, c: Char, permissive: Boolean = false): Unit = {
 | 
| 65990 | 129  | 
    c match {
 | 
130  | 
case '<' => s ++= "<"  | 
|
131  | 
case '>' => s ++= ">"  | 
|
132  | 
case '&' => s ++= "&"  | 
|
| 73203 | 133  | 
case '"' if !permissive => s ++= """  | 
134  | 
case '\'' if !permissive => s ++= "'"  | 
|
| 65990 | 135  | 
case _ => s += c  | 
136  | 
}  | 
|
137  | 
}  | 
|
138  | 
||
| 75393 | 139  | 
  def output_string(s: StringBuilder, str: String, permissive: Boolean = false): Unit = {
 | 
| 65990 | 140  | 
if (str == null) s ++= str  | 
| 73203 | 141  | 
else str.iterator.foreach(output_char(s, _, permissive = permissive))  | 
| 65990 | 142  | 
}  | 
143  | 
||
| 75393 | 144  | 
  def output_elem(s: StringBuilder, markup: Markup, end: Boolean = false): Unit = {
 | 
| 
73204
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
145  | 
s += '<'  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
146  | 
s ++= markup.name  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
147  | 
    for ((a, b) <- markup.properties) {
 | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
148  | 
s += ' '  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
149  | 
s ++= a  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
150  | 
s += '='  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
151  | 
s += '"'  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
152  | 
output_string(s, b)  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
153  | 
s += '"'  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
154  | 
}  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
155  | 
if (end) s += '/'  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
156  | 
s += '>'  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
157  | 
}  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
158  | 
|
| 75393 | 159  | 
  def output_elem_end(s: StringBuilder, name: String): Unit = {
 | 
| 
73204
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
160  | 
s += '<'  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
161  | 
s += '/'  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
162  | 
s ++= name  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
163  | 
s += '>'  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
164  | 
}  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
165  | 
|
| 75393 | 166  | 
  def string_of_body(body: Body): String = {
 | 
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
167  | 
val s = new StringBuilder  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
168  | 
|
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
169  | 
def tree(t: Tree): Unit =  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
170  | 
      t match {
 | 
| 61026 | 171  | 
case XML.Elem(markup, Nil) =>  | 
| 
73204
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
172  | 
output_elem(s, markup, end = true)  | 
| 61026 | 173  | 
case XML.Elem(markup, ts) =>  | 
| 
73204
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
174  | 
output_elem(s, markup)  | 
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
175  | 
ts.foreach(tree)  | 
| 
73204
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
176  | 
output_elem_end(s, markup.name)  | 
| 
 
aa3d4cf7825a
clarified signature: no symbol markup within XML attributes;
 
wenzelm 
parents: 
73203 
diff
changeset
 | 
177  | 
case XML.Text(txt) => output_string(s, txt)  | 
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
178  | 
}  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
179  | 
body.foreach(tree)  | 
| 
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
180  | 
s.toString  | 
| 29203 | 181  | 
}  | 
182  | 
||
| 
38268
 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 
wenzelm 
parents: 
38267 
diff
changeset
 | 
183  | 
def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))  | 
| 27941 | 184  | 
|
| 
73528
 
c337c798f64c
clarified HTML template (see also 04cb7e02ca38): avoid odd patching of sources;
 
wenzelm 
parents: 
73359 
diff
changeset
 | 
185  | 
def text(s: String): String = string_of_tree(XML.Text(s))  | 
| 27941 | 186  | 
|
| 44808 | 187  | 
|
| 68265 | 188  | 
/** cache **/  | 
| 34108 | 189  | 
|
| 75393 | 190  | 
  object Cache {
 | 
| 73024 | 191  | 
def make(  | 
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
75436 
diff
changeset
 | 
192  | 
compress: Compress.Cache = Compress.Cache.make(),  | 
| 
74731
 
161e84e6b40a
just one cache, via HTML_Context, via Sessions.Store or Session;
 
wenzelm 
parents: 
74683 
diff
changeset
 | 
193  | 
max_string: Int = isabelle.Cache.default_max_string,  | 
| 73024 | 194  | 
initial_size: Int = isabelle.Cache.default_initial_size): Cache =  | 
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
75436 
diff
changeset
 | 
195  | 
new Cache(compress, max_string, initial_size)  | 
| 68169 | 196  | 
|
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
75436 
diff
changeset
 | 
197  | 
val none: Cache = make(Compress.Cache.none, max_string = 0)  | 
| 73024 | 198  | 
}  | 
199  | 
||
| 
76351
 
2cee31cd92f0
generic support for XZ and Zstd compression in Isabelle/Scala;
 
wenzelm 
parents: 
75436 
diff
changeset
 | 
200  | 
class Cache(val compress: Compress.Cache, max_string: Int, initial_size: Int)  | 
| 75393 | 201  | 
  extends isabelle.Cache(max_string, initial_size) {
 | 
202  | 
    protected def cache_props(x: Properties.T): Properties.T = {
 | 
|
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
203  | 
if (x.isEmpty) x  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
204  | 
else  | 
| 34133 | 205  | 
        lookup(x) match {
 | 
206  | 
case Some(y) => y  | 
|
| 65903 | 207  | 
case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2))))  | 
| 34133 | 208  | 
}  | 
| 68265 | 209  | 
}  | 
210  | 
||
| 75393 | 211  | 
    protected def cache_markup(x: Markup): Markup = {
 | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
212  | 
      lookup(x) match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
213  | 
case Some(y) => y  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
214  | 
case None =>  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
215  | 
          x match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
216  | 
case Markup(name, props) =>  | 
| 51663 | 217  | 
store(Markup(cache_string(name), cache_props(props)))  | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
218  | 
}  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
219  | 
}  | 
| 68265 | 220  | 
}  | 
221  | 
||
| 75393 | 222  | 
    protected def cache_tree(x: XML.Tree): XML.Tree = {
 | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
223  | 
      lookup(x) match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
224  | 
case Some(y) => y  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
225  | 
case None =>  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
226  | 
          x match {
 | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
227  | 
case XML.Elem(markup, body) =>  | 
| 51663 | 228  | 
store(XML.Elem(cache_markup(markup), cache_body(body)))  | 
229  | 
case XML.Text(text) => store(XML.Text(cache_string(text)))  | 
|
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
230  | 
}  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
231  | 
}  | 
| 68265 | 232  | 
}  | 
233  | 
||
| 75393 | 234  | 
    protected def cache_body(x: XML.Body): XML.Body = {
 | 
| 
44704
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
235  | 
if (x.isEmpty) x  | 
| 
 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 
wenzelm 
parents: 
44698 
diff
changeset
 | 
236  | 
else  | 
| 34133 | 237  | 
        lookup(x) match {
 | 
238  | 
case Some(y) => y  | 
|
| 71601 | 239  | 
case None => x.map(cache_tree)  | 
| 34133 | 240  | 
}  | 
| 68265 | 241  | 
}  | 
| 
38446
 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 
wenzelm 
parents: 
38268 
diff
changeset
 | 
242  | 
|
| 73030 | 243  | 
// support hash-consing  | 
244  | 
def tree0(x: XML.Tree): XML.Tree =  | 
|
245  | 
      if (no_cache) x else synchronized { lookup(x) getOrElse store(x) }
 | 
|
246  | 
||
| 
38446
 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 
wenzelm 
parents: 
38268 
diff
changeset
 | 
247  | 
// main methods  | 
| 73024 | 248  | 
def props(x: Properties.T): Properties.T =  | 
249  | 
      if (no_cache) x else synchronized { cache_props(x) }
 | 
|
250  | 
def markup(x: Markup): Markup =  | 
|
251  | 
      if (no_cache) x else synchronized { cache_markup(x) }
 | 
|
252  | 
def tree(x: XML.Tree): XML.Tree =  | 
|
253  | 
      if (no_cache) x else synchronized { cache_tree(x) }
 | 
|
254  | 
def body(x: XML.Body): XML.Body =  | 
|
255  | 
      if (no_cache) x else synchronized { cache_body(x) }
 | 
|
256  | 
def elem(x: XML.Elem): XML.Elem =  | 
|
257  | 
      if (no_cache) x else synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
 | 
|
| 34108 | 258  | 
}  | 
259  | 
||
260  | 
||
| 43767 | 261  | 
|
262  | 
/** XML as data representation language **/  | 
|
263  | 
||
| 51987 | 264  | 
abstract class Error(s: String) extends Exception(s)  | 
265  | 
class XML_Atom(s: String) extends Error(s)  | 
|
266  | 
  class XML_Body(body: XML.Body) extends Error("")
 | 
|
| 43767 | 267  | 
|
| 75393 | 268  | 
  object Encode {
 | 
| 43767 | 269  | 
type T[A] = A => XML.Body  | 
| 65334 | 270  | 
type V[A] = PartialFunction[A, (List[String], XML.Body)]  | 
| 70828 | 271  | 
type P[A] = PartialFunction[A, List[String]]  | 
| 43767 | 272  | 
|
273  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
274  | 
/* atomic values */  | 
| 43767 | 275  | 
|
| 
57909
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
276  | 
def long_atom(i: Long): String = Library.signed_string_of_long(i)  | 
| 43767 | 277  | 
|
| 
57909
 
0fb331032f02
more compact representation of special string values;
 
wenzelm 
parents: 
55618 
diff
changeset
 | 
278  | 
def int_atom(i: Int): String = Library.signed_string_of_int(i)  | 
| 43767 | 279  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
280  | 
def bool_atom(b: Boolean): String = if (b) "1" else "0"  | 
| 43767 | 281  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
282  | 
def unit_atom(u: Unit) = ""  | 
| 43767 | 283  | 
|
284  | 
||
285  | 
/* structural nodes */  | 
|
286  | 
||
287  | 
    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
 | 
|
288  | 
||
| 43781 | 289  | 
private def vector(xs: List[String]): XML.Attributes =  | 
| 
46839
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
290  | 
      xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
 | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
291  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
292  | 
private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =  | 
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
293  | 
XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)  | 
| 43767 | 294  | 
|
295  | 
||
296  | 
/* representation of standard types */  | 
|
297  | 
||
| 65333 | 298  | 
val tree: T[XML.Tree] = (t => List(t))  | 
299  | 
||
| 43780 | 300  | 
val properties: T[Properties.T] =  | 
| 43767 | 301  | 
      (props => List(XML.Elem(Markup(":", props), Nil)))
 | 
302  | 
||
303  | 
val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))  | 
|
304  | 
||
305  | 
val long: T[Long] = (x => string(long_atom(x)))  | 
|
306  | 
||
307  | 
val int: T[Int] = (x => string(int_atom(x)))  | 
|
308  | 
||
309  | 
val bool: T[Boolean] = (x => string(bool_atom(x)))  | 
|
310  | 
||
311  | 
val unit: T[Unit] = (x => string(unit_atom(x)))  | 
|
312  | 
||
313  | 
def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =  | 
|
314  | 
(x => List(node(f(x._1)), node(g(x._2))))  | 
|
315  | 
||
316  | 
def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =  | 
|
317  | 
(x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))  | 
|
318  | 
||
319  | 
def list[A](f: T[A]): T[List[A]] =  | 
|
320  | 
(xs => xs.map((x: A) => node(f(x))))  | 
|
321  | 
||
| 75393 | 322  | 
    def option[A](f: T[A]): T[Option[A]] = {
 | 
| 43767 | 323  | 
case None => Nil  | 
324  | 
case Some(x) => List(node(f(x)))  | 
|
325  | 
}  | 
|
326  | 
||
| 75393 | 327  | 
    def variant[A](fs: List[V[A]]): T[A] = {
 | 
| 43767 | 328  | 
case x =>  | 
329  | 
val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get  | 
|
330  | 
List(tagged(tag, f(x)))  | 
|
331  | 
}  | 
|
332  | 
}  | 
|
333  | 
||
| 75393 | 334  | 
  object Decode {
 | 
| 43767 | 335  | 
type T[A] = XML.Body => A  | 
| 75436 | 336  | 
type V[A] = PartialFunction[(List[String], XML.Body), A]  | 
| 70828 | 337  | 
type P[A] = PartialFunction[List[String], A]  | 
| 43767 | 338  | 
|
339  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
340  | 
/* atomic values */  | 
| 43767 | 341  | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
342  | 
def long_atom(s: String): Long =  | 
| 43767 | 343  | 
      try { java.lang.Long.parseLong(s) }
 | 
344  | 
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | 
|
345  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
346  | 
def int_atom(s: String): Int =  | 
| 43767 | 347  | 
      try { Integer.parseInt(s) }
 | 
348  | 
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | 
|
349  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
350  | 
def bool_atom(s: String): Boolean =  | 
| 43767 | 351  | 
if (s == "1") true  | 
352  | 
else if (s == "0") false  | 
|
353  | 
else throw new XML_Atom(s)  | 
|
354  | 
||
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
355  | 
def unit_atom(s: String): Unit =  | 
| 43767 | 356  | 
if (s == "") () else throw new XML_Atom(s)  | 
357  | 
||
358  | 
||
359  | 
/* structural nodes */  | 
|
360  | 
||
361  | 
private def node(t: XML.Tree): XML.Body =  | 
|
362  | 
      t match {
 | 
|
363  | 
        case XML.Elem(Markup(":", Nil), ts) => ts
 | 
|
364  | 
case _ => throw new XML_Body(List(t))  | 
|
365  | 
}  | 
|
366  | 
||
| 43781 | 367  | 
private def vector(atts: XML.Attributes): List[String] =  | 
| 
46839
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
368  | 
atts.iterator.zipWithIndex.map(  | 
| 
 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 
wenzelm 
parents: 
45673 
diff
changeset
 | 
369  | 
        { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
 | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
370  | 
|
| 
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
371  | 
private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =  | 
| 43767 | 372  | 
      t match {
 | 
| 43781 | 373  | 
case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))  | 
| 43767 | 374  | 
case _ => throw new XML_Body(List(t))  | 
375  | 
}  | 
|
376  | 
||
377  | 
||
378  | 
/* representation of standard types */  | 
|
379  | 
||
| 75393 | 380  | 
    val tree: T[XML.Tree] = {
 | 
| 65333 | 381  | 
case List(t) => t  | 
382  | 
case ts => throw new XML_Body(ts)  | 
|
383  | 
}  | 
|
384  | 
||
| 75393 | 385  | 
    val properties: T[Properties.T] = {
 | 
| 43767 | 386  | 
      case List(XML.Elem(Markup(":", props), Nil)) => props
 | 
387  | 
case ts => throw new XML_Body(ts)  | 
|
388  | 
}  | 
|
389  | 
||
| 75393 | 390  | 
    val string: T[String] = {
 | 
| 43767 | 391  | 
case Nil => ""  | 
392  | 
case List(XML.Text(s)) => s  | 
|
393  | 
case ts => throw new XML_Body(ts)  | 
|
394  | 
}  | 
|
395  | 
||
396  | 
val long: T[Long] = (x => long_atom(string(x)))  | 
|
397  | 
||
398  | 
val int: T[Int] = (x => int_atom(string(x)))  | 
|
399  | 
||
400  | 
val bool: T[Boolean] = (x => bool_atom(string(x)))  | 
|
401  | 
||
402  | 
val unit: T[Unit] = (x => unit_atom(string(x)))  | 
|
403  | 
||
| 75393 | 404  | 
    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = {
 | 
| 43767 | 405  | 
case List(t1, t2) => (f(node(t1)), g(node(t2)))  | 
406  | 
case ts => throw new XML_Body(ts)  | 
|
407  | 
}  | 
|
408  | 
||
| 75393 | 409  | 
    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = {
 | 
| 43767 | 410  | 
case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))  | 
411  | 
case ts => throw new XML_Body(ts)  | 
|
412  | 
}  | 
|
413  | 
||
414  | 
def list[A](f: T[A]): T[List[A]] =  | 
|
415  | 
(ts => ts.map(t => f(node(t))))  | 
|
416  | 
||
| 75393 | 417  | 
    def option[A](f: T[A]): T[Option[A]] = {
 | 
| 43767 | 418  | 
case Nil => None  | 
419  | 
case List(t) => Some(f(node(t)))  | 
|
420  | 
case ts => throw new XML_Body(ts)  | 
|
421  | 
}  | 
|
422  | 
||
| 75393 | 423  | 
    def variant[A](fs: List[V[A]]): T[A] = {
 | 
| 43767 | 424  | 
case List(t) =>  | 
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
425  | 
val (tag, (xs, ts)) = tagged(t)  | 
| 43768 | 426  | 
val f =  | 
427  | 
          try { fs(tag) }
 | 
|
428  | 
          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
 | 
|
| 
43778
 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 
wenzelm 
parents: 
43768 
diff
changeset
 | 
429  | 
f(xs, ts)  | 
| 43767 | 430  | 
case ts => throw new XML_Body(ts)  | 
431  | 
}  | 
|
432  | 
}  | 
|
| 27931 | 433  | 
}  |