| author | ballarin | 
| Tue, 03 Sep 2013 22:12:48 +0200 | |
| changeset 53370 | 7a41ec2cc522 | 
| parent 52890 | 36e2c0c308eb | 
| child 55618 | 995162143ef4 | 
| permissions | -rw-r--r-- | 
| 44698 | 1 | /* Title: Pure/PIDE/xml.scala | 
| 45673 
cd41e3903fbf
separate compilation of PIDE vs. Pure sources, which enables independent Scala library;
 wenzelm parents: 
45667diff
changeset | 2 | Module: PIDE | 
| 27931 | 3 | Author: Makarius | 
| 4 | ||
| 44698 | 5 | Untyped XML trees and basic data representation. | 
| 27931 | 6 | */ | 
| 7 | ||
| 8 | package isabelle | |
| 9 | ||
| 34108 | 10 | import java.util.WeakHashMap | 
| 11 | import java.lang.ref.WeakReference | |
| 12 | import javax.xml.parsers.DocumentBuilderFactory | |
| 13 | ||
| 27947 | 14 | |
| 29203 | 15 | object XML | 
| 16 | {
 | |
| 43767 | 17 | /** XML trees **/ | 
| 18 | ||
| 27947 | 19 | /* datatype representation */ | 
| 20 | ||
| 43780 | 21 | type Attributes = Properties.T | 
| 27931 | 22 | |
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 23 |   sealed abstract class Tree { override def toString = string_of_tree(this) }
 | 
| 38230 
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
 wenzelm parents: 
36817diff
changeset | 24 | case class Elem(markup: Markup, body: List[Tree]) extends Tree | 
| 52890 | 25 |   {
 | 
| 26 | def name: String = markup.name | |
| 27 | } | |
| 29204 | 28 | case class Text(content: String) extends Tree | 
| 29203 | 29 | |
| 38230 
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
 wenzelm parents: 
36817diff
changeset | 30 | def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body) | 
| 
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
 wenzelm parents: 
36817diff
changeset | 31 | def elem(name: String) = Elem(Markup(name, Nil), Nil) | 
| 33999 | 32 | |
| 38267 
e50c283dd125
type XML.Body as basic data representation language (Scala version);
 wenzelm parents: 
38263diff
changeset | 33 | type Body = List[Tree] | 
| 
e50c283dd125
type XML.Body as basic data representation language (Scala version);
 wenzelm parents: 
38263diff
changeset | 34 | |
| 29203 | 35 | |
| 49650 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 36 | /* wrapped elements */ | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 37 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 38 | val XML_ELEM = "xml_elem"; | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 39 | val XML_NAME = "xml_name"; | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 40 | val XML_BODY = "xml_body"; | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 41 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 42 | object Wrapped_Elem | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 43 |   {
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 44 | def apply(markup: Markup, body1: Body, body2: Body): XML.Elem = | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 45 | Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties), | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 46 | Elem(Markup(XML_BODY, Nil), body1) :: body2) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 47 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 48 | def unapply(tree: Tree): Option[(Markup, Body, Body)] = | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 49 |       tree match {
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 50 | case | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 51 | Elem(Markup(XML_ELEM, (XML_NAME, name) :: props), | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 52 | Elem(Markup(XML_BODY, Nil), body1) :: body2) => | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 53 | Some(Markup(name, props), body1, body2) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 54 | case _ => None | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 55 | } | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 56 | } | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 57 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 58 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 59 | /* traverse text */ | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 60 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 61 | def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A = | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 62 |   {
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 63 | def traverse(x: A, t: Tree): A = | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 64 |       t match {
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 65 | case Wrapped_Elem(_, _, ts) => (x /: ts)(traverse) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 66 | case Elem(_, ts) => (x /: ts)(traverse) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 67 | case Text(s) => op(x, s) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 68 | } | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 69 | (a /: body)(traverse) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 70 | } | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 71 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 72 |   def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length }
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 73 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 74 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 75 | /* text content */ | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 76 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 77 | def content(body: Body): String = | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 78 |   {
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 79 | val text = new StringBuilder(text_length(body)) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 80 |     traverse_text(body)(()) { case (_, s) => text.append(s) }
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 81 | text.toString | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 82 | } | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 83 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 84 | def content(tree: Tree): String = content(List(tree)) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 85 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 86 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 87 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 88 | /** string representation **/ | 
| 29203 | 89 | |
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 90 | def string_of_body(body: Body): String = | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 91 |   {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 92 | val s = new StringBuilder | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 93 | |
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 94 |     def text(txt: String) {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 95 | if (txt == null) s ++= txt | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 96 |       else {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 97 |         for (c <- txt.iterator) c match {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 98 | case '<' => s ++= "<" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 99 | case '>' => s ++= ">" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 100 | case '&' => s ++= "&" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 101 | case '"' => s ++= """ | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 102 | case '\'' => s ++= "'" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 103 | case _ => s += c | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 104 | } | 
| 34005 | 105 | } | 
| 29203 | 106 | } | 
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 107 |     def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 108 |     def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 109 | def tree(t: Tree): Unit = | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 110 |       t match {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 111 | case Elem(markup, Nil) => | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 112 | s ++= "<"; elem(markup); s ++= "/>" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 113 | case Elem(markup, ts) => | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 114 | s ++= "<"; elem(markup); s ++= ">" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 115 | ts.foreach(tree) | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 116 | s ++= "</"; s ++= markup.name; s ++= ">" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 117 | case Text(txt) => text(txt) | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 118 | } | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 119 | body.foreach(tree) | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 120 | s.toString | 
| 29203 | 121 | } | 
| 122 | ||
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 123 | def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) | 
| 27941 | 124 | |
| 125 | ||
| 44808 | 126 | |
| 127 | /** cache for partial sharing (weak table) **/ | |
| 34108 | 128 | |
| 43745 | 129 | class Cache(initial_size: Int = 131071, max_string: Int = 100) | 
| 34108 | 130 |   {
 | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 131 | private var table = new WeakHashMap[Any, WeakReference[Any]](initial_size) | 
| 38446 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 132 | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 133 | private def lookup[A](x: A): Option[A] = | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 134 |     {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 135 | val ref = table.get(x) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 136 | if (ref == null) None | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 137 |       else {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 138 | val y = ref.asInstanceOf[WeakReference[A]].get | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 139 | if (y == null) None | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 140 | else Some(y) | 
| 38446 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 141 | } | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 142 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 143 | private def store[A](x: A): A = | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 144 |     {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 145 | table.put(x, new WeakReference[Any](x)) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 146 | x | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 147 | } | 
| 34108 | 148 | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 149 | private def trim_bytes(s: String): String = new String(s.toCharArray) | 
| 38869 | 150 | |
| 51663 | 151 | private def cache_string(x: String): String = | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 152 |       lookup(x) match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 153 | case Some(y) => y | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 154 | case None => | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 155 | val z = trim_bytes(x) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 156 | if (z.length > max_string) z else store(z) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 157 | } | 
| 51663 | 158 | private def cache_props(x: Properties.T): Properties.T = | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 159 | if (x.isEmpty) x | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 160 | else | 
| 34133 | 161 |         lookup(x) match {
 | 
| 162 | case Some(y) => y | |
| 51663 | 163 | case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2)))) | 
| 34133 | 164 | } | 
| 51663 | 165 | private def cache_markup(x: Markup): Markup = | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 166 |       lookup(x) match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 167 | case Some(y) => y | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 168 | case None => | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 169 |           x match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 170 | case Markup(name, props) => | 
| 51663 | 171 | store(Markup(cache_string(name), cache_props(props))) | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 172 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 173 | } | 
| 51663 | 174 | private def cache_tree(x: XML.Tree): XML.Tree = | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 175 |       lookup(x) match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 176 | case Some(y) => y | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 177 | case None => | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 178 |           x match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 179 | case XML.Elem(markup, body) => | 
| 51663 | 180 | store(XML.Elem(cache_markup(markup), cache_body(body))) | 
| 181 | case XML.Text(text) => store(XML.Text(cache_string(text))) | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 182 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 183 | } | 
| 51663 | 184 | private def cache_body(x: XML.Body): XML.Body = | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 185 | if (x.isEmpty) x | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 186 | else | 
| 34133 | 187 |         lookup(x) match {
 | 
| 188 | case Some(y) => y | |
| 51663 | 189 | case None => x.map(cache_tree(_)) | 
| 34133 | 190 | } | 
| 38446 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 191 | |
| 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 192 | // main methods | 
| 51663 | 193 |     def string(x: String): String = synchronized { cache_string(x) }
 | 
| 194 |     def props(x: Properties.T): Properties.T = synchronized { cache_props(x) }
 | |
| 195 |     def markup(x: Markup): Markup = synchronized { cache_markup(x) }
 | |
| 196 |     def tree(x: XML.Tree): XML.Tree = synchronized { cache_tree(x) }
 | |
| 197 |     def body(x: XML.Body): XML.Body = synchronized { cache_body(x) }
 | |
| 198 |     def elem(x: XML.Elem): XML.Elem = synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
 | |
| 34108 | 199 | } | 
| 200 | ||
| 201 | ||
| 43767 | 202 | |
| 203 | /** XML as data representation language **/ | |
| 204 | ||
| 51987 | 205 | abstract class Error(s: String) extends Exception(s) | 
| 206 | class XML_Atom(s: String) extends Error(s) | |
| 207 |   class XML_Body(body: XML.Body) extends Error("")
 | |
| 43767 | 208 | |
| 209 | object Encode | |
| 210 |   {
 | |
| 211 | type T[A] = A => XML.Body | |
| 212 | ||
| 213 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 214 | /* atomic values */ | 
| 43767 | 215 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 216 | def long_atom(i: Long): String = i.toString | 
| 43767 | 217 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 218 | def int_atom(i: Int): String = i.toString | 
| 43767 | 219 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 220 | def bool_atom(b: Boolean): String = if (b) "1" else "0" | 
| 43767 | 221 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 222 | def unit_atom(u: Unit) = "" | 
| 43767 | 223 | |
| 224 | ||
| 225 | /* structural nodes */ | |
| 226 | ||
| 227 |     private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
 | |
| 228 | ||
| 43781 | 229 | private def vector(xs: List[String]): XML.Attributes = | 
| 46839 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 wenzelm parents: 
45673diff
changeset | 230 |       xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
 | 
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 231 | |
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 232 | private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree = | 
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 233 | XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2) | 
| 43767 | 234 | |
| 235 | ||
| 236 | /* representation of standard types */ | |
| 237 | ||
| 43780 | 238 | val properties: T[Properties.T] = | 
| 43767 | 239 |       (props => List(XML.Elem(Markup(":", props), Nil)))
 | 
| 240 | ||
| 241 | val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s))) | |
| 242 | ||
| 243 | val long: T[Long] = (x => string(long_atom(x))) | |
| 244 | ||
| 245 | val int: T[Int] = (x => string(int_atom(x))) | |
| 246 | ||
| 247 | val bool: T[Boolean] = (x => string(bool_atom(x))) | |
| 248 | ||
| 249 | val unit: T[Unit] = (x => string(unit_atom(x))) | |
| 250 | ||
| 251 | def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = | |
| 252 | (x => List(node(f(x._1)), node(g(x._2)))) | |
| 253 | ||
| 254 | def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = | |
| 255 | (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3)))) | |
| 256 | ||
| 257 | def list[A](f: T[A]): T[List[A]] = | |
| 258 | (xs => xs.map((x: A) => node(f(x)))) | |
| 259 | ||
| 260 | def option[A](f: T[A]): T[Option[A]] = | |
| 261 |     {
 | |
| 262 | case None => Nil | |
| 263 | case Some(x) => List(node(f(x))) | |
| 264 | } | |
| 265 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 266 | def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] = | 
| 43767 | 267 |     {
 | 
| 268 | case x => | |
| 269 | val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get | |
| 270 | List(tagged(tag, f(x))) | |
| 271 | } | |
| 272 | } | |
| 273 | ||
| 274 | object Decode | |
| 275 |   {
 | |
| 276 | type T[A] = XML.Body => A | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 277 | type V[A] = (List[String], XML.Body) => A | 
| 43767 | 278 | |
| 279 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 280 | /* atomic values */ | 
| 43767 | 281 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 282 | def long_atom(s: String): Long = | 
| 43767 | 283 |       try { java.lang.Long.parseLong(s) }
 | 
| 284 |       catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | |
| 285 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 286 | def int_atom(s: String): Int = | 
| 43767 | 287 |       try { Integer.parseInt(s) }
 | 
| 288 |       catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | |
| 289 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 290 | def bool_atom(s: String): Boolean = | 
| 43767 | 291 | if (s == "1") true | 
| 292 | else if (s == "0") false | |
| 293 | else throw new XML_Atom(s) | |
| 294 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 295 | def unit_atom(s: String): Unit = | 
| 43767 | 296 | if (s == "") () else throw new XML_Atom(s) | 
| 297 | ||
| 298 | ||
| 299 | /* structural nodes */ | |
| 300 | ||
| 301 | private def node(t: XML.Tree): XML.Body = | |
| 302 |       t match {
 | |
| 303 |         case XML.Elem(Markup(":", Nil), ts) => ts
 | |
| 304 | case _ => throw new XML_Body(List(t)) | |
| 305 | } | |
| 306 | ||
| 43781 | 307 | private def vector(atts: XML.Attributes): List[String] = | 
| 46839 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 wenzelm parents: 
45673diff
changeset | 308 | atts.iterator.zipWithIndex.map( | 
| 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 wenzelm parents: 
45673diff
changeset | 309 |         { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
 | 
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 310 | |
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 311 | private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) = | 
| 43767 | 312 |       t match {
 | 
| 43781 | 313 | case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts)) | 
| 43767 | 314 | case _ => throw new XML_Body(List(t)) | 
| 315 | } | |
| 316 | ||
| 317 | ||
| 318 | /* representation of standard types */ | |
| 319 | ||
| 43780 | 320 | val properties: T[Properties.T] = | 
| 43767 | 321 |     {
 | 
| 322 |       case List(XML.Elem(Markup(":", props), Nil)) => props
 | |
| 323 | case ts => throw new XML_Body(ts) | |
| 324 | } | |
| 325 | ||
| 326 | val string: T[String] = | |
| 327 |     {
 | |
| 328 | case Nil => "" | |
| 329 | case List(XML.Text(s)) => s | |
| 330 | case ts => throw new XML_Body(ts) | |
| 331 | } | |
| 332 | ||
| 333 | val long: T[Long] = (x => long_atom(string(x))) | |
| 334 | ||
| 335 | val int: T[Int] = (x => int_atom(string(x))) | |
| 336 | ||
| 337 | val bool: T[Boolean] = (x => bool_atom(string(x))) | |
| 338 | ||
| 339 | val unit: T[Unit] = (x => unit_atom(string(x))) | |
| 340 | ||
| 341 | def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = | |
| 342 |     {
 | |
| 343 | case List(t1, t2) => (f(node(t1)), g(node(t2))) | |
| 344 | case ts => throw new XML_Body(ts) | |
| 345 | } | |
| 346 | ||
| 347 | def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = | |
| 348 |     {
 | |
| 349 | case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3))) | |
| 350 | case ts => throw new XML_Body(ts) | |
| 351 | } | |
| 352 | ||
| 353 | def list[A](f: T[A]): T[List[A]] = | |
| 354 | (ts => ts.map(t => f(node(t)))) | |
| 355 | ||
| 356 | def option[A](f: T[A]): T[Option[A]] = | |
| 357 |     {
 | |
| 358 | case Nil => None | |
| 359 | case List(t) => Some(f(node(t))) | |
| 360 | case ts => throw new XML_Body(ts) | |
| 361 | } | |
| 362 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 363 | def variant[A](fs: List[V[A]]): T[A] = | 
| 43767 | 364 |     {
 | 
| 365 | case List(t) => | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 366 | val (tag, (xs, ts)) = tagged(t) | 
| 43768 | 367 | val f = | 
| 368 |           try { fs(tag) }
 | |
| 369 |           catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 370 | f(xs, ts) | 
| 43767 | 371 | case ts => throw new XML_Body(ts) | 
| 372 | } | |
| 373 | } | |
| 27931 | 374 | } |