| author | wenzelm | 
| Tue, 28 Aug 2018 11:40:11 +0200 | |
| changeset 68829 | 1a4fa494a4a8 | 
| parent 68265 | f0899dad4877 | 
| child 69804 | 9efccbad7d42 | 
| permissions | -rw-r--r-- | 
| 44698 | 1 | /* Title: Pure/PIDE/xml.scala | 
| 27931 | 2 | Author: Makarius | 
| 3 | ||
| 44698 | 4 | Untyped XML trees and basic data representation. | 
| 27931 | 5 | */ | 
| 6 | ||
| 7 | package isabelle | |
| 8 | ||
| 55618 | 9 | |
| 29203 | 10 | object XML | 
| 11 | {
 | |
| 43767 | 12 | /** XML trees **/ | 
| 13 | ||
| 27947 | 14 | /* datatype representation */ | 
| 15 | ||
| 65753 | 16 | type Attribute = Properties.Entry | 
| 43780 | 17 | type Attributes = Properties.T | 
| 27931 | 18 | |
| 57912 | 19 |   sealed abstract class Tree { override def toString: String = string_of_tree(this) }
 | 
| 64354 | 20 | type Body = List[Tree] | 
| 21 | case class Elem(markup: Markup, body: Body) extends Tree | |
| 52890 | 22 |   {
 | 
| 23 | def name: String = markup.name | |
| 65753 | 24 | |
| 64358 | 25 | def update_attributes(more_attributes: Attributes): Elem = | 
| 26 | if (more_attributes.isEmpty) this | |
| 27 | else Elem(markup.update_properties(more_attributes), body) | |
| 65753 | 28 | |
| 65772 | 29 | def + (att: Attribute): Elem = Elem(markup + att, body) | 
| 52890 | 30 | } | 
| 29204 | 31 | case class Text(content: String) extends Tree | 
| 29203 | 32 | |
| 66196 | 33 | def elem(markup: Markup): XML.Elem = XML.Elem(markup, Nil) | 
| 64354 | 34 | def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body) | 
| 35 | def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil) | |
| 38267 
e50c283dd125
type XML.Body as basic data representation language (Scala version);
 wenzelm parents: 
38263diff
changeset | 36 | |
| 29203 | 37 | |
| 49650 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 38 | /* wrapped elements */ | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 39 | |
| 60215 | 40 | val XML_ELEM = "xml_elem" | 
| 41 | val XML_NAME = "xml_name" | |
| 42 | val XML_BODY = "xml_body" | |
| 49650 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 43 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 44 | object Wrapped_Elem | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 45 |   {
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 46 | def apply(markup: Markup, body1: Body, body2: Body): XML.Elem = | 
| 61026 | 47 | XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties), | 
| 48 | XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) | |
| 49650 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 49 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 50 | def unapply(tree: Tree): Option[(Markup, Body, Body)] = | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 51 |       tree match {
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 52 | case | 
| 61026 | 53 | XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props), | 
| 54 | XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) => | |
| 49650 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 55 | Some(Markup(name, props), body1, body2) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 56 | case _ => None | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 57 | } | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 58 | } | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 59 | |
| 67818 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 wenzelm parents: 
67113diff
changeset | 60 | object Root_Elem | 
| 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 wenzelm parents: 
67113diff
changeset | 61 |   {
 | 
| 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 wenzelm parents: 
67113diff
changeset | 62 | def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body) | 
| 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 wenzelm parents: 
67113diff
changeset | 63 | def unapply(tree: Tree): Option[Body] = | 
| 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 wenzelm parents: 
67113diff
changeset | 64 |       tree match {
 | 
| 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 wenzelm parents: 
67113diff
changeset | 65 | case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body) | 
| 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 wenzelm parents: 
67113diff
changeset | 66 | case _ => None | 
| 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 wenzelm parents: 
67113diff
changeset | 67 | } | 
| 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 wenzelm parents: 
67113diff
changeset | 68 | } | 
| 
2457bea123e4
convenience to represent XML.Body as single XML.Elem;
 wenzelm parents: 
67113diff
changeset | 69 | |
| 49650 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 70 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 71 | /* traverse text */ | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 72 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 73 | def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A = | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 74 |   {
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 75 | def traverse(x: A, t: Tree): A = | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 76 |       t match {
 | 
| 61026 | 77 | case XML.Wrapped_Elem(_, _, ts) => (x /: ts)(traverse) | 
| 78 | case XML.Elem(_, ts) => (x /: ts)(traverse) | |
| 79 | case XML.Text(s) => op(x, s) | |
| 49650 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 80 | } | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 81 | (a /: body)(traverse) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 82 | } | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 83 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 84 |   def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length }
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 85 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 86 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 87 | /* text content */ | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 88 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 89 | def content(body: Body): String = | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 90 |   {
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 91 | val text = new StringBuilder(text_length(body)) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 92 |     traverse_text(body)(()) { case (_, s) => text.append(s) }
 | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 93 | text.toString | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 94 | } | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 95 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 96 | def content(tree: Tree): String = content(List(tree)) | 
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 97 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 98 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 99 | |
| 
9fad6480300d
support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
 wenzelm parents: 
49613diff
changeset | 100 | /** string representation **/ | 
| 29203 | 101 | |
| 65990 | 102 | def output_char(c: Char, s: StringBuilder) | 
| 103 |   {
 | |
| 104 |     c match {
 | |
| 105 | case '<' => s ++= "<" | |
| 106 | case '>' => s ++= ">" | |
| 107 | case '&' => s ++= "&" | |
| 108 | case '"' => s ++= """ | |
| 109 | case '\'' => s ++= "'" | |
| 110 | case _ => s += c | |
| 111 | } | |
| 112 | } | |
| 113 | ||
| 114 | def output_string(str: String, s: StringBuilder) | |
| 115 |   {
 | |
| 116 | if (str == null) s ++= str | |
| 117 | else str.iterator.foreach(c => output_char(c, s)) | |
| 118 | } | |
| 119 | ||
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 120 | def string_of_body(body: Body): String = | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 121 |   {
 | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 122 | val s = new StringBuilder | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 123 | |
| 65990 | 124 |     def text(txt: String) { output_string(txt, s) }
 | 
| 65991 | 125 | def elem(markup: Markup) | 
| 126 |     {
 | |
| 127 | s ++= markup.name | |
| 128 |       for ((a, b) <- markup.properties) {
 | |
| 129 | s += ' '; s ++= a; s += '='; s += '"'; text(b); s += '"' | |
| 130 | } | |
| 131 | } | |
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 132 | def tree(t: Tree): Unit = | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 133 |       t match {
 | 
| 61026 | 134 | case XML.Elem(markup, Nil) => | 
| 65991 | 135 | s += '<'; elem(markup); s ++= "/>" | 
| 61026 | 136 | case XML.Elem(markup, ts) => | 
| 65991 | 137 | s += '<'; elem(markup); s += '>' | 
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 138 | ts.foreach(tree) | 
| 65991 | 139 | s ++= "</"; s ++= markup.name; s += '>' | 
| 61026 | 140 | case XML.Text(txt) => text(txt) | 
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 141 | } | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 142 | body.foreach(tree) | 
| 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 143 | s.toString | 
| 29203 | 144 | } | 
| 145 | ||
| 38268 
beb86b805590
more uniform XML/YXML string_of_body/string_of_tree;
 wenzelm parents: 
38267diff
changeset | 146 | def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) | 
| 27941 | 147 | |
| 148 | ||
| 44808 | 149 | |
| 68265 | 150 | /** cache **/ | 
| 34108 | 151 | |
| 68169 | 152 | def make_cache(initial_size: Int = 131071, max_string: Int = 100): Cache = | 
| 153 | new Cache(initial_size, max_string) | |
| 154 | ||
| 155 | class Cache private[XML](initial_size: Int, max_string: Int) | |
| 68265 | 156 | extends isabelle.Cache(initial_size, max_string) | 
| 34108 | 157 |   {
 | 
| 68265 | 158 | protected def cache_props(x: Properties.T): Properties.T = | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 159 |     {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 160 | if (x.isEmpty) x | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 161 | else | 
| 34133 | 162 |         lookup(x) match {
 | 
| 163 | case Some(y) => y | |
| 65903 | 164 | case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2)))) | 
| 34133 | 165 | } | 
| 68265 | 166 | } | 
| 167 | ||
| 168 | protected def cache_markup(x: Markup): Markup = | |
| 169 |     {
 | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 170 |       lookup(x) match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 171 | case Some(y) => y | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 172 | case None => | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 173 |           x match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 174 | case Markup(name, props) => | 
| 51663 | 175 | store(Markup(cache_string(name), cache_props(props))) | 
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 176 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 177 | } | 
| 68265 | 178 | } | 
| 179 | ||
| 180 | protected def cache_tree(x: XML.Tree): XML.Tree = | |
| 181 |     {
 | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 182 |       lookup(x) match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 183 | case Some(y) => y | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 184 | case None => | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 185 |           x match {
 | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 186 | case XML.Elem(markup, body) => | 
| 51663 | 187 | store(XML.Elem(cache_markup(markup), cache_body(body))) | 
| 188 | case XML.Text(text) => store(XML.Text(cache_string(text))) | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 189 | } | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 190 | } | 
| 68265 | 191 | } | 
| 192 | ||
| 193 | protected def cache_body(x: XML.Body): XML.Body = | |
| 194 |     {
 | |
| 44704 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 195 | if (x.isEmpty) x | 
| 
528d635ef6f0
synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
 wenzelm parents: 
44698diff
changeset | 196 | else | 
| 34133 | 197 |         lookup(x) match {
 | 
| 198 | case Some(y) => y | |
| 51663 | 199 | case None => x.map(cache_tree(_)) | 
| 34133 | 200 | } | 
| 68265 | 201 | } | 
| 38446 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 202 | |
| 
9d59dab38fef
XML.Cache: pipe-lined (thread-safe) version using actor;
 wenzelm parents: 
38268diff
changeset | 203 | // main methods | 
| 51663 | 204 |     def props(x: Properties.T): Properties.T = synchronized { cache_props(x) }
 | 
| 205 |     def markup(x: Markup): Markup = synchronized { cache_markup(x) }
 | |
| 206 |     def tree(x: XML.Tree): XML.Tree = synchronized { cache_tree(x) }
 | |
| 207 |     def body(x: XML.Body): XML.Body = synchronized { cache_body(x) }
 | |
| 208 |     def elem(x: XML.Elem): XML.Elem = synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
 | |
| 34108 | 209 | } | 
| 210 | ||
| 211 | ||
| 43767 | 212 | |
| 213 | /** XML as data representation language **/ | |
| 214 | ||
| 51987 | 215 | abstract class Error(s: String) extends Exception(s) | 
| 216 | class XML_Atom(s: String) extends Error(s) | |
| 217 |   class XML_Body(body: XML.Body) extends Error("")
 | |
| 43767 | 218 | |
| 219 | object Encode | |
| 220 |   {
 | |
| 221 | type T[A] = A => XML.Body | |
| 65334 | 222 | type V[A] = PartialFunction[A, (List[String], XML.Body)] | 
| 43767 | 223 | |
| 224 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 225 | /* atomic values */ | 
| 43767 | 226 | |
| 57909 
0fb331032f02
more compact representation of special string values;
 wenzelm parents: 
55618diff
changeset | 227 | def long_atom(i: Long): String = Library.signed_string_of_long(i) | 
| 43767 | 228 | |
| 57909 
0fb331032f02
more compact representation of special string values;
 wenzelm parents: 
55618diff
changeset | 229 | def int_atom(i: Int): String = Library.signed_string_of_int(i) | 
| 43767 | 230 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 231 | def bool_atom(b: Boolean): String = if (b) "1" else "0" | 
| 43767 | 232 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 233 | def unit_atom(u: Unit) = "" | 
| 43767 | 234 | |
| 235 | ||
| 236 | /* structural nodes */ | |
| 237 | ||
| 238 |     private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
 | |
| 239 | ||
| 43781 | 240 | private def vector(xs: List[String]): XML.Attributes = | 
| 46839 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 wenzelm parents: 
45673diff
changeset | 241 |       xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
 | 
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 242 | |
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 243 | private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree = | 
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 244 | XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2) | 
| 43767 | 245 | |
| 246 | ||
| 247 | /* representation of standard types */ | |
| 248 | ||
| 65333 | 249 | val tree: T[XML.Tree] = (t => List(t)) | 
| 250 | ||
| 43780 | 251 | val properties: T[Properties.T] = | 
| 43767 | 252 |       (props => List(XML.Elem(Markup(":", props), Nil)))
 | 
| 253 | ||
| 254 | val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s))) | |
| 255 | ||
| 256 | val long: T[Long] = (x => string(long_atom(x))) | |
| 257 | ||
| 258 | val int: T[Int] = (x => string(int_atom(x))) | |
| 259 | ||
| 260 | val bool: T[Boolean] = (x => string(bool_atom(x))) | |
| 261 | ||
| 262 | val unit: T[Unit] = (x => string(unit_atom(x))) | |
| 263 | ||
| 264 | def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = | |
| 265 | (x => List(node(f(x._1)), node(g(x._2)))) | |
| 266 | ||
| 267 | def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = | |
| 268 | (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3)))) | |
| 269 | ||
| 270 | def list[A](f: T[A]): T[List[A]] = | |
| 271 | (xs => xs.map((x: A) => node(f(x)))) | |
| 272 | ||
| 273 | def option[A](f: T[A]): T[Option[A]] = | |
| 274 |     {
 | |
| 275 | case None => Nil | |
| 276 | case Some(x) => List(node(f(x))) | |
| 277 | } | |
| 278 | ||
| 65334 | 279 | def variant[A](fs: List[V[A]]): T[A] = | 
| 43767 | 280 |     {
 | 
| 281 | case x => | |
| 282 | val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get | |
| 283 | List(tagged(tag, f(x))) | |
| 284 | } | |
| 285 | } | |
| 286 | ||
| 287 | object Decode | |
| 288 |   {
 | |
| 289 | type T[A] = XML.Body => A | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 290 | type V[A] = (List[String], XML.Body) => A | 
| 43767 | 291 | |
| 292 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 293 | /* atomic values */ | 
| 43767 | 294 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 295 | def long_atom(s: String): Long = | 
| 43767 | 296 |       try { java.lang.Long.parseLong(s) }
 | 
| 297 |       catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | |
| 298 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 299 | def int_atom(s: String): Int = | 
| 43767 | 300 |       try { Integer.parseInt(s) }
 | 
| 301 |       catch { case e: NumberFormatException => throw new XML_Atom(s) }
 | |
| 302 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 303 | def bool_atom(s: String): Boolean = | 
| 43767 | 304 | if (s == "1") true | 
| 305 | else if (s == "0") false | |
| 306 | else throw new XML_Atom(s) | |
| 307 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 308 | def unit_atom(s: String): Unit = | 
| 43767 | 309 | if (s == "") () else throw new XML_Atom(s) | 
| 310 | ||
| 311 | ||
| 312 | /* structural nodes */ | |
| 313 | ||
| 314 | private def node(t: XML.Tree): XML.Body = | |
| 315 |       t match {
 | |
| 316 |         case XML.Elem(Markup(":", Nil), ts) => ts
 | |
| 317 | case _ => throw new XML_Body(List(t)) | |
| 318 | } | |
| 319 | ||
| 43781 | 320 | private def vector(atts: XML.Attributes): List[String] = | 
| 46839 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 wenzelm parents: 
45673diff
changeset | 321 | atts.iterator.zipWithIndex.map( | 
| 
f7232c078fa5
simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
 wenzelm parents: 
45673diff
changeset | 322 |         { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
 | 
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 323 | |
| 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 324 | private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) = | 
| 43767 | 325 |       t match {
 | 
| 43781 | 326 | case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts)) | 
| 43767 | 327 | case _ => throw new XML_Body(List(t)) | 
| 328 | } | |
| 329 | ||
| 330 | ||
| 331 | /* representation of standard types */ | |
| 332 | ||
| 65333 | 333 | val tree: T[XML.Tree] = | 
| 334 |     {
 | |
| 335 | case List(t) => t | |
| 336 | case ts => throw new XML_Body(ts) | |
| 337 | } | |
| 338 | ||
| 43780 | 339 | val properties: T[Properties.T] = | 
| 43767 | 340 |     {
 | 
| 341 |       case List(XML.Elem(Markup(":", props), Nil)) => props
 | |
| 342 | case ts => throw new XML_Body(ts) | |
| 343 | } | |
| 344 | ||
| 345 | val string: T[String] = | |
| 346 |     {
 | |
| 347 | case Nil => "" | |
| 348 | case List(XML.Text(s)) => s | |
| 349 | case ts => throw new XML_Body(ts) | |
| 350 | } | |
| 351 | ||
| 352 | val long: T[Long] = (x => long_atom(string(x))) | |
| 353 | ||
| 354 | val int: T[Int] = (x => int_atom(string(x))) | |
| 355 | ||
| 356 | val bool: T[Boolean] = (x => bool_atom(string(x))) | |
| 357 | ||
| 358 | val unit: T[Unit] = (x => unit_atom(string(x))) | |
| 359 | ||
| 360 | def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = | |
| 361 |     {
 | |
| 362 | case List(t1, t2) => (f(node(t1)), g(node(t2))) | |
| 363 | case ts => throw new XML_Body(ts) | |
| 364 | } | |
| 365 | ||
| 366 | def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = | |
| 367 |     {
 | |
| 368 | case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3))) | |
| 369 | case ts => throw new XML_Body(ts) | |
| 370 | } | |
| 371 | ||
| 372 | def list[A](f: T[A]): T[List[A]] = | |
| 373 | (ts => ts.map(t => f(node(t)))) | |
| 374 | ||
| 375 | def option[A](f: T[A]): T[Option[A]] = | |
| 376 |     {
 | |
| 377 | case Nil => None | |
| 378 | case List(t) => Some(f(node(t))) | |
| 379 | case ts => throw new XML_Body(ts) | |
| 380 | } | |
| 381 | ||
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 382 | def variant[A](fs: List[V[A]]): T[A] = | 
| 43767 | 383 |     {
 | 
| 384 | case List(t) => | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 385 | val (tag, (xs, ts)) = tagged(t) | 
| 43768 | 386 | val f = | 
| 387 |           try { fs(tag) }
 | |
| 388 |           catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
 | |
| 43778 
ce9189450447
more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
 wenzelm parents: 
43768diff
changeset | 389 | f(xs, ts) | 
| 43767 | 390 | case ts => throw new XML_Body(ts) | 
| 391 | } | |
| 392 | } | |
| 27931 | 393 | } |