| author | wenzelm | 
| Wed, 25 Apr 2012 14:30:42 +0200 | |
| changeset 47747 | 47d1ffdbb6e7 | 
| parent 46839 | f7232c078fa5 | 
| child 49416 | 1053a564dd25 | 
| permissions | -rw-r--r-- | 
| 44698 | 1 | /* Title: Pure/PIDE/xml.scala | 
| 45673 
cd41e3903fbf
separate compilation of PIDE vs. Pure sources, which enables independent Scala library;
 wenzelm parents: 
45667diff
changeset | 2 | Module: PIDE | 
| 27931 | 3 | Author: Makarius | 
| 4 | ||
| 44698 | 5 | Untyped XML trees and basic data representation. | 
| 27931 | 6 | */ | 
| 7 | ||
| 8 | package isabelle | |
| 9 | ||
| 43520 
cec9b95fa35d
explicit import java.lang.System to prevent odd scope problems;
 wenzelm parents: 
38869diff
changeset | 10 | import java.lang.System | 
| 34108 | 11 | import java.util.WeakHashMap | 
| 12 | import java.lang.ref.WeakReference | |
| 13 | import javax.xml.parsers.DocumentBuilderFactory | |
| 14 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 15 | import scala.collection.mutable | 
| 38446 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 16 | |
| 27947 | 17 | |
| 29203 | 18 | object XML | 
| 19 | {
 | |
| 43767 | 20 | /** XML trees **/ | 
| 21 | ||
| 27947 | 22 | /* datatype representation */ | 
| 23 | ||
| 43780 | 24 | type Attributes = Properties.T | 
| 27931 | 25 | |
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 26 |   sealed abstract class Tree { override def toString = string_of_tree(this) }
 | 
| 38230 
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
 wenzelm parents: 
36817diff
changeset | 27 | case class Elem(markup: Markup, body: List[Tree]) extends Tree | 
| 29204 | 28 | case class Text(content: String) extends Tree | 
| 29203 | 29 | |
| 38230 
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
 wenzelm parents: 
36817diff
changeset | 30 | def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body) | 
| 
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
 wenzelm parents: 
36817diff
changeset | 31 | def elem(name: String) = Elem(Markup(name, Nil), Nil) | 
| 33999 | 32 | |
| 38267 
e50c283dd125
type XML.Body as basic data representation language (Scala version);
 wenzelm parents: 
38263diff
changeset | 33 | type Body = List[Tree] | 
| 
e50c283dd125
type XML.Body as basic data representation language (Scala version);
 wenzelm parents: 
38263diff
changeset | 34 | |
| 29203 | 35 | |
| 36 | /* string representation */ | |
| 37 | ||
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 38 | def string_of_body(body: Body): String = | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 39 |   {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 40 | val s = new StringBuilder | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 41 | |
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 42 |     def text(txt: String) {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 43 | if (txt == null) s ++= txt | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 44 |       else {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 45 |         for (c <- txt.iterator) c match {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 46 | case '<' => s ++= "<" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 47 | case '>' => s ++= ">" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 48 | case '&' => s ++= "&" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 49 | case '"' => s ++= """ | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 50 | case '\'' => s ++= "'" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 51 | case _ => s += c | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 52 | } | 
| 34005 | 53 | } | 
| 29203 | 54 | } | 
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 55 |     def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 56 |     def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 57 | def tree(t: Tree): Unit = | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 58 |       t match {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 59 | case Elem(markup, Nil) => | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 60 | s ++= "<"; elem(markup); s ++= "/>" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 61 | case Elem(markup, ts) => | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 62 | s ++= "<"; elem(markup); s ++= ">" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 63 | ts.foreach(tree) | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 64 | s ++= "</"; s ++= markup.name; s ++= ">" | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 65 | case Text(txt) => text(txt) | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 66 | } | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 67 | body.foreach(tree) | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 68 | s.toString | 
| 29203 | 69 | } | 
| 70 | ||
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 71 | def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) | 
| 27941 | 72 | |
| 73 | ||
| 38484 
9c1fde4e2487
tuned XML.content: Stream based iteration is supposed to be declarative *and* efficient;
 wenzelm parents: 
38446diff
changeset | 74 | /* text content */ | 
| 27941 | 75 | |
| 38484 
9c1fde4e2487
tuned XML.content: Stream based iteration is supposed to be declarative *and* efficient;
 wenzelm parents: 
38446diff
changeset | 76 | def content_stream(tree: Tree): Stream[String] = | 
| 
9c1fde4e2487
tuned XML.content: Stream based iteration is supposed to be declarative *and* efficient;
 wenzelm parents: 
38446diff
changeset | 77 |     tree match {
 | 
| 43747 | 78 | case Elem(_, body) => content_stream(body) | 
| 38484 
9c1fde4e2487
tuned XML.content: Stream based iteration is supposed to be declarative *and* efficient;
 wenzelm parents: 
38446diff
changeset | 79 | case Text(content) => Stream(content) | 
| 27941 | 80 | } | 
| 43747 | 81 | def content_stream(body: Body): Stream[String] = | 
| 82 | body.toStream.flatten(content_stream(_)) | |
| 27941 | 83 | |
| 38484 
9c1fde4e2487
tuned XML.content: Stream based iteration is supposed to be declarative *and* efficient;
 wenzelm parents: 
38446diff
changeset | 84 | def content(tree: Tree): Iterator[String] = content_stream(tree).iterator | 
| 43747 | 85 | def content(body: Body): Iterator[String] = content_stream(body).iterator | 
| 27941 | 86 | |
| 27947 | 87 | |
| 44808 | 88 | |
| 89 | /** cache for partial sharing (weak table) **/ | |
| 34108 | 90 | |
| 43745 | 91 | class Cache(initial_size: Int = 131071, max_string: Int = 100) | 
| 34108 | 92 |   {
 | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 93 | private var table = new WeakHashMap[Any, WeakReference[Any]](initial_size) | 
| 38446 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 94 | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 95 | private def lookup[A](x: A): Option[A] = | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 96 |     {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 97 | val ref = table.get(x) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 98 | if (ref == null) None | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 99 |       else {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 100 | val y = ref.asInstanceOf[WeakReference[A]].get | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 101 | if (y == null) None | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 102 | else Some(y) | 
| 38446 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 103 | } | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 104 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 105 | private def store[A](x: A): A = | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 106 |     {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 107 | table.put(x, new WeakReference[Any](x)) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 108 | x | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 109 | } | 
| 34108 | 110 | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 111 | private def trim_bytes(s: String): String = new String(s.toCharArray) | 
| 38869 | 112 | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 113 | private def _cache_string(x: String): String = | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 114 |       lookup(x) match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 115 | case Some(y) => y | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 116 | case None => | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 117 | val z = trim_bytes(x) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 118 | if (z.length > max_string) z else store(z) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 119 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 120 | private def _cache_props(x: Properties.T): Properties.T = | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 121 | if (x.isEmpty) x | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 122 | else | 
| 34133 | 123 |         lookup(x) match {
 | 
| 124 | case Some(y) => y | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 125 | case None => store(x.map(p => (trim_bytes(p._1).intern, _cache_string(p._2)))) | 
| 34133 | 126 | } | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 127 | private def _cache_markup(x: Markup): Markup = | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 128 |       lookup(x) match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 129 | case Some(y) => y | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 130 | case None => | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 131 |           x match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 132 | case Markup(name, props) => | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 133 | store(Markup(_cache_string(name), _cache_props(props))) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 134 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 135 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 136 | private def _cache_tree(x: XML.Tree): XML.Tree = | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 137 |       lookup(x) match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 138 | case Some(y) => y | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 139 | case None => | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 140 |           x match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 141 | case XML.Elem(markup, body) => | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 142 | store(XML.Elem(_cache_markup(markup), _cache_body(body))) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 143 | case XML.Text(text) => store(XML.Text(_cache_string(text))) | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 144 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 145 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 146 | private def _cache_body(x: XML.Body): XML.Body = | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 147 | if (x.isEmpty) x | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 148 | else | 
| 34133 | 149 |         lookup(x) match {
 | 
| 150 | case Some(y) => y | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 151 | case None => x.map(_cache_tree(_)) | 
| 34133 | 152 | } | 
| 38446 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 153 | |
| 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 154 | // main methods | 
| 44705 | 155 |     def cache_string(x: String): String = synchronized { _cache_string(x) }
 | 
| 156 |     def cache_markup(x: Markup): Markup = synchronized { _cache_markup(x) }
 | |
| 157 |     def cache_tree(x: XML.Tree): XML.Tree = synchronized { _cache_tree(x) }
 | |
| 158 |     def cache_body(x: XML.Body): XML.Body = synchronized { _cache_body(x) }
 | |
| 34108 | 159 | } | 
| 160 | ||
| 161 | ||
| 43767 | 162 | |
| 163 | /** document object model (W3C DOM) **/ | |
| 27948 
2638b611d3ce
renamed DOM to document, add xml version and optional stylesheets;
 wenzelm parents: 
27947diff
changeset | 164 | |
| 34871 
e596a0b71f3c
incorporate "proofdocument" part into main Isabelle/Pure.jar -- except for html_panel.scala, which depends on external library (Lobo/Cobra browser);
 wenzelm parents: 
34133diff
changeset | 165 | def get_data(node: org.w3c.dom.Node): Option[XML.Tree] = | 
| 38231 | 166 |     node.getUserData(Markup.Data.name) match {
 | 
| 34047 | 167 | case tree: XML.Tree => Some(tree) | 
| 168 | case _ => None | |
| 169 | } | |
| 170 | ||
| 34871 
e596a0b71f3c
incorporate "proofdocument" part into main Isabelle/Pure.jar -- except for html_panel.scala, which depends on external library (Lobo/Cobra browser);
 wenzelm parents: 
34133diff
changeset | 171 | def document_node(doc: org.w3c.dom.Document, tree: Tree): org.w3c.dom.Node = | 
| 33953 | 172 |   {
 | 
| 34871 
e596a0b71f3c
incorporate "proofdocument" part into main Isabelle/Pure.jar -- except for html_panel.scala, which depends on external library (Lobo/Cobra browser);
 wenzelm parents: 
34133diff
changeset | 173 |     def DOM(tr: Tree): org.w3c.dom.Node = tr match {
 | 
| 38231 | 174 | case Elem(Markup.Data, List(data, t)) => | 
| 34046 | 175 | val node = DOM(t) | 
| 38231 | 176 | node.setUserData(Markup.Data.name, data, null) | 
| 34046 | 177 | node | 
| 38230 
ed147003de4b
simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
 wenzelm parents: 
36817diff
changeset | 178 | case Elem(Markup(name, atts), ts) => | 
| 38231 | 179 | if (name == Markup.Data.name) | 
| 34046 | 180 |           error("Malformed data element: " + tr.toString)
 | 
| 27947 | 181 | val node = doc.createElement(name) | 
| 182 | for ((name, value) <- atts) node.setAttribute(name, value) | |
| 27952 | 183 | for (t <- ts) node.appendChild(DOM(t)) | 
| 27947 | 184 | node | 
| 185 | case Text(txt) => doc.createTextNode(txt) | |
| 186 | } | |
| 33953 | 187 | DOM(tree) | 
| 188 | } | |
| 43767 | 189 | |
| 190 | ||
| 191 | ||
| 192 | /** XML as data representation language **/ | |
| 193 | ||
| 194 | class XML_Atom(s: String) extends Exception(s) | |
| 195 | class XML_Body(body: XML.Body) extends Exception | |
| 196 | ||
| 197 | object Encode | |
| 198 |   {
 | |
| 199 | type T[A] = A => XML.Body | |
| 200 | ||
| 201 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 202 | /* atomic values */ | 
| 43767 | 203 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 204 | def long_atom(i: Long): String = i.toString | 
| 43767 | 205 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 206 | def int_atom(i: Int): String = i.toString | 
| 43767 | 207 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 208 | def bool_atom(b: Boolean): String = if (b) "1" else "0" | 
| 43767 | 209 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 210 | def unit_atom(u: Unit) = "" | 
| 43767 | 211 | |
| 212 | ||
| 213 | /* structural nodes */ | |
| 214 | ||
| 215 |     private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
 | |
| 216 | ||
| 43781 | 217 | private def vector(xs: List[String]): XML.Attributes = | 
| 46839 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 wenzelm parents: 
45673diff
changeset | 218 |       xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
 | 
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 219 | |
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 220 | private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree = | 
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 221 | XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2) | 
| 43767 | 222 | |
| 223 | ||
| 224 | /* representation of standard types */ | |
| 225 | ||
| 43780 | 226 | val properties: T[Properties.T] = | 
| 43767 | 227 |       (props => List(XML.Elem(Markup(":", props), Nil)))
 | 
| 228 | ||
| 229 | val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s))) | |
| 230 | ||
| 231 | val long: T[Long] = (x => string(long_atom(x))) | |
| 232 | ||
| 233 | val int: T[Int] = (x => string(int_atom(x))) | |
| 234 | ||
| 235 | val bool: T[Boolean] = (x => string(bool_atom(x))) | |
| 236 | ||
| 237 | val unit: T[Unit] = (x => string(unit_atom(x))) | |
| 238 | ||
| 239 | def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = | |
| 240 | (x => List(node(f(x._1)), node(g(x._2)))) | |
| 241 | ||
| 242 | def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = | |
| 243 | (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3)))) | |
| 244 | ||
| 245 | def list[A](f: T[A]): T[List[A]] = | |
| 246 | (xs => xs.map((x: A) => node(f(x)))) | |
| 247 | ||
| 248 | def option[A](f: T[A]): T[Option[A]] = | |
| 249 |     {
 | |
| 250 | case None => Nil | |
| 251 | case Some(x) => List(node(f(x))) | |
| 252 | } | |
| 253 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 254 | def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] = | 
| 43767 | 255 |     {
 | 
| 256 | case x => | |
| 257 | val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get | |
| 258 | List(tagged(tag, f(x))) | |
| 259 | } | |
| 260 | } | |
| 261 | ||
| 262 | object Decode | |
| 263 |   {
 | |
| 264 | type T[A] = XML.Body => A | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 265 | type V[A] = (List[String], XML.Body) => A | 
| 43767 | 266 | |
| 267 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 268 | /* atomic values */ | 
| 43767 | 269 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 270 | def long_atom(s: String): Long = | 
| 43767 | 271 |       try { java.lang.Long.parseLong(s) }
 | 
| 272 |       catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | |
| 273 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 274 | def int_atom(s: String): Int = | 
| 43767 | 275 |       try { Integer.parseInt(s) }
 | 
| 276 |       catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | |
| 277 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 278 | def bool_atom(s: String): Boolean = | 
| 43767 | 279 | if (s == "1") true | 
| 280 | else if (s == "0") false | |
| 281 | else throw new XML_Atom(s) | |
| 282 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 283 | def unit_atom(s: String): Unit = | 
| 43767 | 284 | if (s == "") () else throw new XML_Atom(s) | 
| 285 | ||
| 286 | ||
| 287 | /* structural nodes */ | |
| 288 | ||
| 289 | private def node(t: XML.Tree): XML.Body = | |
| 290 |       t match {
 | |
| 291 |         case XML.Elem(Markup(":", Nil), ts) => ts
 | |
| 292 | case _ => throw new XML_Body(List(t)) | |
| 293 | } | |
| 294 | ||
| 43781 | 295 | private def vector(atts: XML.Attributes): List[String] = | 
| 46839 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 wenzelm parents: 
45673diff
changeset | 296 | atts.iterator.zipWithIndex.map( | 
| 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 wenzelm parents: 
45673diff
changeset | 297 |         { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
 | 
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 298 | |
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 299 | private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) = | 
| 43767 | 300 |       t match {
 | 
| 43781 | 301 | case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts)) | 
| 43767 | 302 | case _ => throw new XML_Body(List(t)) | 
| 303 | } | |
| 304 | ||
| 305 | ||
| 306 | /* representation of standard types */ | |
| 307 | ||
| 43780 | 308 | val properties: T[Properties.T] = | 
| 43767 | 309 |     {
 | 
| 310 |       case List(XML.Elem(Markup(":", props), Nil)) => props
 | |
| 311 | case ts => throw new XML_Body(ts) | |
| 312 | } | |
| 313 | ||
| 314 | val string: T[String] = | |
| 315 |     {
 | |
| 316 | case Nil => "" | |
| 317 | case List(XML.Text(s)) => s | |
| 318 | case ts => throw new XML_Body(ts) | |
| 319 | } | |
| 320 | ||
| 321 | val long: T[Long] = (x => long_atom(string(x))) | |
| 322 | ||
| 323 | val int: T[Int] = (x => int_atom(string(x))) | |
| 324 | ||
| 325 | val bool: T[Boolean] = (x => bool_atom(string(x))) | |
| 326 | ||
| 327 | val unit: T[Unit] = (x => unit_atom(string(x))) | |
| 328 | ||
| 329 | def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = | |
| 330 |     {
 | |
| 331 | case List(t1, t2) => (f(node(t1)), g(node(t2))) | |
| 332 | case ts => throw new XML_Body(ts) | |
| 333 | } | |
| 334 | ||
| 335 | def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = | |
| 336 |     {
 | |
| 337 | case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3))) | |
| 338 | case ts => throw new XML_Body(ts) | |
| 339 | } | |
| 340 | ||
| 341 | def list[A](f: T[A]): T[List[A]] = | |
| 342 | (ts => ts.map(t => f(node(t)))) | |
| 343 | ||
| 344 | def option[A](f: T[A]): T[Option[A]] = | |
| 345 |     {
 | |
| 346 | case Nil => None | |
| 347 | case List(t) => Some(f(node(t))) | |
| 348 | case ts => throw new XML_Body(ts) | |
| 349 | } | |
| 350 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 351 | def variant[A](fs: List[V[A]]): T[A] = | 
| 43767 | 352 |     {
 | 
| 353 | case List(t) => | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 354 | val (tag, (xs, ts)) = tagged(t) | 
| 43768 | 355 | val f = | 
| 356 |           try { fs(tag) }
 | |
| 357 |           catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 358 | f(xs, ts) | 
| 43767 | 359 | case ts => throw new XML_Body(ts) | 
| 360 | } | |
| 361 | } | |
| 27931 | 362 | } |