src/Pure/PIDE/xml.scala
author wenzelm
Thu, 10 Apr 2025 14:12:33 +0200
changeset 82475 0a6d57c4d58b
parent 81428 257ec066b360
permissions -rw-r--r--
more explicit build stages;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
44698
0385292321a0 moved XML/YXML to src/Pure/PIDE;
wenzelm
parents: 44697
diff changeset
     1
/*  Title:      Pure/PIDE/xml.scala
27931
b533a9de87a7 Minimalistic XML tree values.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
b533a9de87a7 Minimalistic XML tree values.
wenzelm
parents:
diff changeset
     3
44698
0385292321a0 moved XML/YXML to src/Pure/PIDE;
wenzelm
parents: 44697
diff changeset
     4
Untyped XML trees and basic data representation.
27931
b533a9de87a7 Minimalistic XML tree values.
wenzelm
parents:
diff changeset
     5
*/
b533a9de87a7 Minimalistic XML tree values.
wenzelm
parents:
diff changeset
     6
b533a9de87a7 Minimalistic XML tree values.
wenzelm
parents:
diff changeset
     7
package isabelle
b533a9de87a7 Minimalistic XML tree values.
wenzelm
parents:
diff changeset
     8
80430
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
     9
import scala.annotation.tailrec
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    10
55618
995162143ef4 tuned imports;
wenzelm
parents: 52890
diff changeset
    11
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
    12
object XML {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
    13
  /** XML trees **/
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
    14
27947
b6dc0a396857 tuned comments;
wenzelm
parents: 27942
diff changeset
    15
  /* datatype representation */
b6dc0a396857 tuned comments;
wenzelm
parents: 27942
diff changeset
    16
65753
787e5ee6ef53 more operations;
wenzelm
parents: 65334
diff changeset
    17
  type Attribute = Properties.Entry
43780
2cb2310d68b6 more uniform Properties in ML and Scala;
wenzelm
parents: 43778
diff changeset
    18
  type Attributes = Properties.T
27931
b533a9de87a7 Minimalistic XML tree values.
wenzelm
parents:
diff changeset
    19
80430
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    20
  trait Trav
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    21
  case class End(name: String) extends Trav
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    22
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    23
  sealed abstract class Tree extends Trav {
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    24
    override def toString: String = string_of_tree(this)
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    25
  }
64354
wenzelm
parents: 61026
diff changeset
    26
  type Body = List[Tree]
80430
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    27
  case class Elem(markup: Markup, body: Body) extends Tree with Trav {
73032
72b13af7f266 persistent hash code: much faster caching;
wenzelm
parents: 73031
diff changeset
    28
    private lazy val hash: Int = (markup, body).hashCode()
72b13af7f266 persistent hash code: much faster caching;
wenzelm
parents: 73031
diff changeset
    29
    override def hashCode(): Int = hash
72b13af7f266 persistent hash code: much faster caching;
wenzelm
parents: 73031
diff changeset
    30
52890
36e2c0c308eb tuned signature;
wenzelm
parents: 51987
diff changeset
    31
    def name: String = markup.name
65753
787e5ee6ef53 more operations;
wenzelm
parents: 65334
diff changeset
    32
64358
15c90b744481 more operations (see also properties.ML);
wenzelm
parents: 64354
diff changeset
    33
    def update_attributes(more_attributes: Attributes): Elem =
15c90b744481 more operations (see also properties.ML);
wenzelm
parents: 64354
diff changeset
    34
      if (more_attributes.isEmpty) this
15c90b744481 more operations (see also properties.ML);
wenzelm
parents: 64354
diff changeset
    35
      else Elem(markup.update_properties(more_attributes), body)
65753
787e5ee6ef53 more operations;
wenzelm
parents: 65334
diff changeset
    36
65772
368399c5d87f proper type for iterated application;
wenzelm
parents: 65753
diff changeset
    37
    def + (att: Attribute): Elem = Elem(markup + att, body)
52890
36e2c0c308eb tuned signature;
wenzelm
parents: 51987
diff changeset
    38
  }
80430
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    39
  case class Text(content: String) extends Tree with Trav {
73032
72b13af7f266 persistent hash code: much faster caching;
wenzelm
parents: 73031
diff changeset
    40
    private lazy val hash: Int = content.hashCode()
72b13af7f266 persistent hash code: much faster caching;
wenzelm
parents: 73031
diff changeset
    41
    override def hashCode(): Int = hash
72b13af7f266 persistent hash code: much faster caching;
wenzelm
parents: 73031
diff changeset
    42
  }
29203
0c4effb73518 override toString method;
wenzelm
parents: 29140
diff changeset
    43
80429
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
    44
  trait Traversal {
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
    45
    def text(s: String): Unit
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
    46
    def elem(markup: Markup, end: Boolean = false): Unit
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
    47
    def end_elem(name: String): Unit
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
    48
80430
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    49
    def traverse(trees: List[Tree]): Unit = {
80454
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    50
      @tailrec def trav_atomic(list: List[Trav]): List[Trav] =
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    51
        list match {
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    52
          case Text(s) :: rest => text(s); trav_atomic(rest)
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    53
          case Elem(markup, Nil) :: rest =>
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    54
            if (!markup.is_empty) elem(markup, end = true)
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    55
            trav_atomic(rest)
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    56
          case End(name) :: rest => end_elem(name); trav_atomic(rest)
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    57
          case _ => list
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    58
        }
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    59
80430
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    60
      @tailrec def trav(list: List[Trav]): Unit =
80454
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    61
        (trav_atomic(list) : @unchecked) match {
80430
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    62
          case Nil =>
80454
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    63
          case Elem(markup, body) :: rest if body.nonEmpty =>
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    64
            if (markup.is_empty) trav(trav_atomic(body) ::: rest)
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    65
            else { elem(markup); trav(trav_atomic(body) ::: End(markup.name) :: rest) }
80430
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    66
        }
80454
1b5ba70a64b9 minor performance tuning;
wenzelm
parents: 80446
diff changeset
    67
80430
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    68
      trav(trees)
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
    69
    }
80429
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
    70
  }
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
    71
80434
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    72
  def elem(markup: Markup): XML.Elem = XML.Elem(markup, Nil)
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    73
  def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body)
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    74
  def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil)
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    75
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    76
  val no_text: Text = Text("")
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    77
  val newline: Text = Text("\n")
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    78
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    79
  def string(s: String): Body = if (s.isEmpty) Nil else List(Text(s))
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    80
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    81
  def enclose(bg: String, en:String, body: Body): Body =
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    82
    string(bg) ::: body ::: string(en)
6f1c8084f672 tuned module structure;
wenzelm
parents: 80432
diff changeset
    83
29203
0c4effb73518 override toString method;
wenzelm
parents: 29140
diff changeset
    84
69805
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    85
  /* name space */
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    86
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
    87
  object Namespace {
69805
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    88
    def apply(prefix: String, target: String): Namespace =
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    89
      new Namespace(prefix, target)
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    90
  }
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    91
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
    92
  final class Namespace private(prefix: String, target: String) {
69805
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    93
    def apply(name: String): String = prefix + ":" + name
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    94
    val attribute: XML.Attribute = ("xmlns:" + prefix, target)
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    95
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    96
    override def toString: String = attribute.toString
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    97
  }
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    98
a8debe27c36c support for XML name spaces;
wenzelm
parents: 69804
diff changeset
    99
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   100
  /* wrapped elements */
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   101
60215
5fb4990dfc73 misc tuning, based on warnings by IntelliJ IDEA;
wenzelm
parents: 57912
diff changeset
   102
  val XML_ELEM = "xml_elem"
5fb4990dfc73 misc tuning, based on warnings by IntelliJ IDEA;
wenzelm
parents: 57912
diff changeset
   103
  val XML_NAME = "xml_name"
5fb4990dfc73 misc tuning, based on warnings by IntelliJ IDEA;
wenzelm
parents: 57912
diff changeset
   104
  val XML_BODY = "xml_body"
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   105
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   106
  object Wrapped_Elem {
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   107
    def apply(markup: Markup, body1: Body, body2: Body): XML.Elem =
61026
397354b29935 tuned signature;
wenzelm
parents: 60215
diff changeset
   108
      XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties),
397354b29935 tuned signature;
wenzelm
parents: 60215
diff changeset
   109
        XML.Elem(Markup(XML_BODY, Nil), body1) :: body2)
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   110
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   111
    def unapply(tree: Tree): Option[(Markup, Body, Body)] =
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   112
      tree match {
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   113
        case
61026
397354b29935 tuned signature;
wenzelm
parents: 60215
diff changeset
   114
          XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props),
397354b29935 tuned signature;
wenzelm
parents: 60215
diff changeset
   115
            XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) =>
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   116
          Some(Markup(name, props), body1, body2)
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   117
        case _ => None
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   118
      }
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   119
  }
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   120
80455
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   121
  object Wrapped_Elem_Body {
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   122
    def unapply(tree: Tree): Option[Body] =
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   123
      tree match {
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   124
        case
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   125
          XML.Elem(Markup(XML_ELEM, (XML_NAME, _) :: _),
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   126
            XML.Elem(Markup(XML_BODY, Nil), _) :: body) =>
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   127
          Some(body)
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   128
        case _ => None
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   129
      }
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   130
  }
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   131
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   132
  object Root_Elem {
67818
2457bea123e4 convenience to represent XML.Body as single XML.Elem;
wenzelm
parents: 67113
diff changeset
   133
    def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body)
2457bea123e4 convenience to represent XML.Body as single XML.Elem;
wenzelm
parents: 67113
diff changeset
   134
    def unapply(tree: Tree): Option[Body] =
2457bea123e4 convenience to represent XML.Body as single XML.Elem;
wenzelm
parents: 67113
diff changeset
   135
      tree match {
2457bea123e4 convenience to represent XML.Body as single XML.Elem;
wenzelm
parents: 67113
diff changeset
   136
        case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body)
2457bea123e4 convenience to represent XML.Body as single XML.Elem;
wenzelm
parents: 67113
diff changeset
   137
        case _ => None
2457bea123e4 convenience to represent XML.Body as single XML.Elem;
wenzelm
parents: 67113
diff changeset
   138
      }
2457bea123e4 convenience to represent XML.Body as single XML.Elem;
wenzelm
parents: 67113
diff changeset
   139
  }
2457bea123e4 convenience to represent XML.Body as single XML.Elem;
wenzelm
parents: 67113
diff changeset
   140
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   141
80818
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   142
  /* filter markup elements */
80816
774e5a0c4c9e misc tuning and clarification;
wenzelm
parents: 80461
diff changeset
   143
80818
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   144
  def filter_elements(xml: XML.Body,
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   145
    remove: Markup.Elements = Markup.Elements.empty,
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   146
    expose: Markup.Elements = Markup.Elements.empty
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   147
  ): XML.Body = {
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   148
    def filter(ts: XML.Body): XML.Body =
80816
774e5a0c4c9e misc tuning and clarification;
wenzelm
parents: 80461
diff changeset
   149
      ts flatMap {
774e5a0c4c9e misc tuning and clarification;
wenzelm
parents: 80461
diff changeset
   150
        case XML.Wrapped_Elem(markup, body1, body2) =>
80818
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   151
          if (remove(markup.name)) Nil
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   152
          else if (expose(markup.name)) filter(body2)
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   153
          else List(XML.Wrapped_Elem(markup, body1, filter(body2)))
80816
774e5a0c4c9e misc tuning and clarification;
wenzelm
parents: 80461
diff changeset
   154
        case XML.Elem(markup, body) =>
80818
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   155
          if (remove(markup.name)) Nil
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   156
          else if (expose(markup.name)) filter(body)
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   157
          else List(XML.Elem(markup, filter(body)))
80816
774e5a0c4c9e misc tuning and clarification;
wenzelm
parents: 80461
diff changeset
   158
        case t => List(t)
774e5a0c4c9e misc tuning and clarification;
wenzelm
parents: 80461
diff changeset
   159
      }
80818
da2557168da7 clarified signature;
wenzelm
parents: 80816
diff changeset
   160
    filter(xml)
80816
774e5a0c4c9e misc tuning and clarification;
wenzelm
parents: 80461
diff changeset
   161
  }
774e5a0c4c9e misc tuning and clarification;
wenzelm
parents: 80461
diff changeset
   162
774e5a0c4c9e misc tuning and clarification;
wenzelm
parents: 80461
diff changeset
   163
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   164
  /* traverse text */
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   165
80439
2990f341e0c6 tuned signature;
wenzelm
parents: 80438
diff changeset
   166
  def traverse_text[A](body: Body, a: A, op: (A, String) => A): A = {
80435
de2ea807edd2 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80434
diff changeset
   167
    @tailrec def trav(x: A, list: List[Tree]): A =
de2ea807edd2 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80434
diff changeset
   168
      list match {
de2ea807edd2 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80434
diff changeset
   169
        case Nil => x
80455
99e276c44121 minor performance tuning;
wenzelm
parents: 80454
diff changeset
   170
        case XML.Wrapped_Elem_Body(body) :: rest => trav(x, body ::: rest)
80435
de2ea807edd2 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80434
diff changeset
   171
        case XML.Elem(_, body) :: rest => trav(x, body ::: rest)
de2ea807edd2 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80434
diff changeset
   172
        case XML.Text(s) :: rest => trav(op(x, s), rest)
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   173
      }
80435
de2ea807edd2 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80434
diff changeset
   174
    trav(a, body)
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   175
  }
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   176
80439
2990f341e0c6 tuned signature;
wenzelm
parents: 80438
diff changeset
   177
  def text_length(body: Body): Int = traverse_text(body, 0, (n, s) => n + s.length)
2990f341e0c6 tuned signature;
wenzelm
parents: 80438
diff changeset
   178
  def symbol_length(body: Body): Int = traverse_text(body, 0, (n, s) => n + Symbol.length(s))
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   179
80441
wenzelm
parents: 80439
diff changeset
   180
  def content(body: Body): String =
wenzelm
parents: 80439
diff changeset
   181
    Library.string_builder(hint = text_length(body)) { text =>
wenzelm
parents: 80439
diff changeset
   182
      traverse_text(body, (), (_, s) => text.append(s))
wenzelm
parents: 80439
diff changeset
   183
    }
49650
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   184
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   185
  def content(tree: Tree): String = content(List(tree))
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   186
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   187
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   188
9fad6480300d support for wrapped XML elements, which allows to preserve full markup tree information in to_XML/from_XML conversion;
wenzelm
parents: 49613
diff changeset
   189
  /** string representation **/
29203
0c4effb73518 override toString method;
wenzelm
parents: 29140
diff changeset
   190
69804
9efccbad7d42 uniform XML header;
wenzelm
parents: 68265
diff changeset
   191
  val header: String = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
9efccbad7d42 uniform XML header;
wenzelm
parents: 68265
diff changeset
   192
80429
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   193
  class Output(builder: StringBuilder) extends Traversal {
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   194
    def string(str: String, permissive: Boolean = false): Unit = {
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   195
      if (str == null) { builder ++= str }
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   196
      else {
80432
wenzelm
parents: 80431
diff changeset
   197
        str foreach {
wenzelm
parents: 80431
diff changeset
   198
          case '<' => builder ++= "&lt;"
wenzelm
parents: 80431
diff changeset
   199
          case '>' => builder ++= "&gt;"
wenzelm
parents: 80431
diff changeset
   200
          case '&' => builder ++= "&amp;"
wenzelm
parents: 80431
diff changeset
   201
          case '"' if !permissive => builder ++= "&quot;"
wenzelm
parents: 80431
diff changeset
   202
          case '\'' if !permissive => builder ++= "&apos;"
wenzelm
parents: 80431
diff changeset
   203
          case c => builder += c
80429
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   204
        }
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   205
      }
65990
868089ee9d60 uniform output of HTML as XML;
wenzelm
parents: 65903
diff changeset
   206
    }
80429
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   207
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   208
    override def text(str: String): Unit = string(str)
65990
868089ee9d60 uniform output of HTML as XML;
wenzelm
parents: 65903
diff changeset
   209
80429
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   210
    override def elem(markup: Markup, end: Boolean = false): Unit = {
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   211
      builder += '<'
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   212
      builder ++= markup.name
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   213
      for ((a, b) <- markup.properties) {
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   214
        builder += ' '
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   215
        builder ++= a
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   216
        builder += '='
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   217
        builder += '"'
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   218
        string(b)
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   219
        builder += '"'
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   220
      }
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   221
      if (end) builder += '/'
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   222
      builder += '>'
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   223
    }
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   224
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   225
    def end_elem(name: String): Unit = {
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   226
      builder += '<'
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   227
      builder += '/'
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   228
      builder ++= name
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   229
      builder += '>'
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   230
    }
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   231
80430
89cd8fedefa7 more robust: prefer tail-recursive traversal;
wenzelm
parents: 80429
diff changeset
   232
    def result(ts: List[Tree]): String = { traverse(ts); builder.toString }
65990
868089ee9d60 uniform output of HTML as XML;
wenzelm
parents: 65903
diff changeset
   233
  }
868089ee9d60 uniform output of HTML as XML;
wenzelm
parents: 65903
diff changeset
   234
80429
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   235
  def string_of_body(body: Body): String =
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   236
    if (body.isEmpty) ""
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   237
    else new Output(new StringBuilder).result(body)
29203
0c4effb73518 override toString method;
wenzelm
parents: 29140
diff changeset
   238
38268
beb86b805590 more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents: 38267
diff changeset
   239
  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
27941
b4656b671cce added iterator over content;
wenzelm
parents: 27931
diff changeset
   240
73528
c337c798f64c clarified HTML template (see also 04cb7e02ca38): avoid odd patching of sources;
wenzelm
parents: 73359
diff changeset
   241
  def text(s: String): String = string_of_tree(XML.Text(s))
27941
b4656b671cce added iterator over content;
wenzelm
parents: 27931
diff changeset
   242
44808
05b8997899a2 XML.cache for partial sharing (strings only);
wenzelm
parents: 44721
diff changeset
   243
80429
6f4d5d922da7 clarified signature: more explicit types;
wenzelm
parents: 76351
diff changeset
   244
68265
f0899dad4877 more general cache, also for term substructures;
wenzelm
parents: 68169
diff changeset
   245
  /** cache **/
34108
54d48ca8708f cache for partial sharing;
wenzelm
parents: 34047
diff changeset
   246
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   247
  object Cache {
73024
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   248
    def make(
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 75436
diff changeset
   249
        compress: Compress.Cache = Compress.Cache.make(),
74731
161e84e6b40a just one cache, via HTML_Context, via Sessions.Store or Session;
wenzelm
parents: 74683
diff changeset
   250
        max_string: Int = isabelle.Cache.default_max_string,
73024
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   251
        initial_size: Int = isabelle.Cache.default_initial_size): Cache =
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 75436
diff changeset
   252
      new Cache(compress, max_string, initial_size)
68169
395432e7516e tuned signature;
wenzelm
parents: 67827
diff changeset
   253
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 75436
diff changeset
   254
    val none: Cache = make(Compress.Cache.none, max_string = 0)
73024
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   255
  }
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   256
76351
2cee31cd92f0 generic support for XZ and Zstd compression in Isabelle/Scala;
wenzelm
parents: 75436
diff changeset
   257
  class Cache(val compress: Compress.Cache, max_string: Int, initial_size: Int)
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   258
  extends isabelle.Cache(max_string, initial_size) {
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   259
    protected def cache_props(x: Properties.T): Properties.T = {
44704
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   260
      if (x.isEmpty) x
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   261
      else
34133
wenzelm
parents: 34119
diff changeset
   262
        lookup(x) match {
wenzelm
parents: 34119
diff changeset
   263
          case Some(y) => y
65903
692e428803c8 clarified signature;
wenzelm
parents: 65772
diff changeset
   264
          case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2))))
34133
wenzelm
parents: 34119
diff changeset
   265
        }
68265
f0899dad4877 more general cache, also for term substructures;
wenzelm
parents: 68169
diff changeset
   266
    }
f0899dad4877 more general cache, also for term substructures;
wenzelm
parents: 68169
diff changeset
   267
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   268
    protected def cache_markup(x: Markup): Markup = {
44704
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   269
      lookup(x) match {
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   270
        case Some(y) => y
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   271
        case None =>
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   272
          x match {
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   273
            case Markup(name, props) =>
51663
098f3cf6c809 tuned signature;
wenzelm
parents: 51223
diff changeset
   274
              store(Markup(cache_string(name), cache_props(props)))
44704
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   275
          }
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   276
      }
68265
f0899dad4877 more general cache, also for term substructures;
wenzelm
parents: 68169
diff changeset
   277
    }
f0899dad4877 more general cache, also for term substructures;
wenzelm
parents: 68169
diff changeset
   278
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   279
    protected def cache_tree(x: XML.Tree): XML.Tree = {
44704
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   280
      lookup(x) match {
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   281
        case Some(y) => y
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   282
        case None =>
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   283
          x match {
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   284
            case XML.Elem(markup, body) =>
51663
098f3cf6c809 tuned signature;
wenzelm
parents: 51223
diff changeset
   285
              store(XML.Elem(cache_markup(markup), cache_body(body)))
098f3cf6c809 tuned signature;
wenzelm
parents: 51223
diff changeset
   286
            case XML.Text(text) => store(XML.Text(cache_string(text)))
44704
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   287
          }
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   288
      }
68265
f0899dad4877 more general cache, also for term substructures;
wenzelm
parents: 68169
diff changeset
   289
    }
f0899dad4877 more general cache, also for term substructures;
wenzelm
parents: 68169
diff changeset
   290
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   291
    protected def cache_body(x: XML.Body): XML.Body = {
44704
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   292
      if (x.isEmpty) x
528d635ef6f0 synchronous XML.Cache without actor -- potentially more efficient on machines with few cores;
wenzelm
parents: 44698
diff changeset
   293
      else
34133
wenzelm
parents: 34119
diff changeset
   294
        lookup(x) match {
wenzelm
parents: 34119
diff changeset
   295
          case Some(y) => y
71601
97ccf48c2f0c misc tuning based on hints by IntelliJ IDEA;
wenzelm
parents: 70828
diff changeset
   296
          case None => x.map(cache_tree)
34133
wenzelm
parents: 34119
diff changeset
   297
        }
68265
f0899dad4877 more general cache, also for term substructures;
wenzelm
parents: 68169
diff changeset
   298
    }
38446
9d59dab38fef XML.Cache: pipe-lined (thread-safe) version using actor;
wenzelm
parents: 38268
diff changeset
   299
73030
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 73028
diff changeset
   300
    // support hash-consing
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 73028
diff changeset
   301
    def tree0(x: XML.Tree): XML.Tree =
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 73028
diff changeset
   302
      if (no_cache) x else synchronized { lookup(x) getOrElse store(x) }
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 73028
diff changeset
   303
38446
9d59dab38fef XML.Cache: pipe-lined (thread-safe) version using actor;
wenzelm
parents: 38268
diff changeset
   304
    // main methods
73024
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   305
    def props(x: Properties.T): Properties.T =
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   306
      if (no_cache) x else synchronized { cache_props(x) }
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   307
    def markup(x: Markup): Markup =
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   308
      if (no_cache) x else synchronized { cache_markup(x) }
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   309
    def tree(x: XML.Tree): XML.Tree =
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   310
      if (no_cache) x else synchronized { cache_tree(x) }
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   311
    def body(x: XML.Body): XML.Body =
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   312
      if (no_cache) x else synchronized { cache_body(x) }
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   313
    def elem(x: XML.Elem): XML.Elem =
337e1b135d2f clarified signature --- internal Cache.none;
wenzelm
parents: 71601
diff changeset
   314
      if (no_cache) x else synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
34108
54d48ca8708f cache for partial sharing;
wenzelm
parents: 34047
diff changeset
   315
  }
54d48ca8708f cache for partial sharing;
wenzelm
parents: 34047
diff changeset
   316
54d48ca8708f cache for partial sharing;
wenzelm
parents: 34047
diff changeset
   317
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   318
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   319
  /** XML as data representation language **/
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   320
51987
7d8e0e3c553b tuned signature;
wenzelm
parents: 51663
diff changeset
   321
  abstract class Error(s: String) extends Exception(s)
7d8e0e3c553b tuned signature;
wenzelm
parents: 51663
diff changeset
   322
  class XML_Atom(s: String) extends Error(s)
7d8e0e3c553b tuned signature;
wenzelm
parents: 51663
diff changeset
   323
  class XML_Body(body: XML.Body) extends Error("")
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   324
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   325
  object Encode {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   326
    type T[A] = A => XML.Body
65334
wenzelm
parents: 65333
diff changeset
   327
    type V[A] = PartialFunction[A, (List[String], XML.Body)]
70828
cb70d84a9f5e more compact XML representation;
wenzelm
parents: 69867
diff changeset
   328
    type P[A] = PartialFunction[A, List[String]]
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   329
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   330
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   331
    /* atomic values */
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   332
57909
0fb331032f02 more compact representation of special string values;
wenzelm
parents: 55618
diff changeset
   333
    def long_atom(i: Long): String = Library.signed_string_of_long(i)
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   334
57909
0fb331032f02 more compact representation of special string values;
wenzelm
parents: 55618
diff changeset
   335
    def int_atom(i: Int): String = Library.signed_string_of_int(i)
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   336
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   337
    def bool_atom(b: Boolean): String = if (b) "1" else "0"
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   338
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   339
    def unit_atom(u: Unit) = ""
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   340
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   341
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   342
    /* structural nodes */
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   343
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   344
    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   345
43781
d43e5f79bdc2 retain some terminology of "XML attributes";
wenzelm
parents: 43780
diff changeset
   346
    private def vector(xs: List[String]): XML.Attributes =
46839
f7232c078fa5 simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
wenzelm
parents: 45673
diff changeset
   347
      xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   348
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   349
    private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   350
      XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   351
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   352
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   353
    /* representation of standard types */
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   354
80461
38d020af64aa tuned signature: more operations;
wenzelm
parents: 80458
diff changeset
   355
    val self: T[XML.Body] = identity
38d020af64aa tuned signature: more operations;
wenzelm
parents: 80458
diff changeset
   356
65333
289561ca4fa3 more operations;
wenzelm
parents: 64820
diff changeset
   357
    val tree: T[XML.Tree] = (t => List(t))
289561ca4fa3 more operations;
wenzelm
parents: 64820
diff changeset
   358
43780
2cb2310d68b6 more uniform Properties in ML and Scala;
wenzelm
parents: 43778
diff changeset
   359
    val properties: T[Properties.T] =
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   360
      (props => List(XML.Elem(Markup(":", props), Nil)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   361
80458
b66ece8770a9 tuned signature;
wenzelm
parents: 80457
diff changeset
   362
    val string: T[String] = XML.string
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   363
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   364
    val long: T[Long] = (x => string(long_atom(x)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   365
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   366
    val int: T[Int] = (x => string(int_atom(x)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   367
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   368
    val bool: T[Boolean] = (x => string(bool_atom(x)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   369
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   370
    val unit: T[Unit] = (x => string(unit_atom(x)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   371
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   372
    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   373
      (x => List(node(f(x._1)), node(g(x._2))))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   374
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   375
    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   376
      (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   377
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   378
    def list[A](f: T[A]): T[List[A]] =
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   379
      (xs => xs.map((x: A) => node(f(x))))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   380
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   381
    def option[A](f: T[A]): T[Option[A]] = {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   382
      case None => Nil
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   383
      case Some(x) => List(node(f(x)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   384
    }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   385
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   386
    def variant[A](fs: List[V[A]]): T[A] = {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   387
      case x =>
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   388
        val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   389
        List(tagged(tag, f(x)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   390
    }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   391
  }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   392
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   393
  object Decode {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   394
    type T[A] = XML.Body => A
75436
40630fec3b5d clarified signature;
wenzelm
parents: 75393
diff changeset
   395
    type V[A] = PartialFunction[(List[String], XML.Body), A]
70828
cb70d84a9f5e more compact XML representation;
wenzelm
parents: 69867
diff changeset
   396
    type P[A] = PartialFunction[List[String], A]
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   397
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   398
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   399
    /* atomic values */
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   400
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   401
    def long_atom(s: String): Long =
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   402
      try { java.lang.Long.parseLong(s) }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   403
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   404
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   405
    def int_atom(s: String): Int =
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   406
      try { Integer.parseInt(s) }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   407
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   408
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   409
    def bool_atom(s: String): Boolean =
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   410
      if (s == "1") true
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   411
      else if (s == "0") false
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   412
      else throw new XML_Atom(s)
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   413
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   414
    def unit_atom(s: String): Unit =
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   415
      if (s == "") () else throw new XML_Atom(s)
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   416
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   417
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   418
    /* structural nodes */
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   419
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   420
    private def node(t: XML.Tree): XML.Body =
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   421
      t match {
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   422
        case XML.Elem(Markup(":", Nil), ts) => ts
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   423
        case _ => throw new XML_Body(List(t))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   424
      }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   425
43781
d43e5f79bdc2 retain some terminology of "XML attributes";
wenzelm
parents: 43780
diff changeset
   426
    private def vector(atts: XML.Attributes): List[String] =
46839
f7232c078fa5 simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
wenzelm
parents: 45673
diff changeset
   427
      atts.iterator.zipWithIndex.map(
f7232c078fa5 simplified -- plain map_index is sufficient (pointed out by Enrico Tassi);
wenzelm
parents: 45673
diff changeset
   428
        { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   429
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   430
    private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   431
      t match {
43781
d43e5f79bdc2 retain some terminology of "XML attributes";
wenzelm
parents: 43780
diff changeset
   432
        case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   433
        case _ => throw new XML_Body(List(t))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   434
      }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   435
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   436
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   437
    /* representation of standard types */
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   438
80461
38d020af64aa tuned signature: more operations;
wenzelm
parents: 80458
diff changeset
   439
    val self: T[XML.Body] = identity
38d020af64aa tuned signature: more operations;
wenzelm
parents: 80458
diff changeset
   440
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   441
    val tree: T[XML.Tree] = {
65333
289561ca4fa3 more operations;
wenzelm
parents: 64820
diff changeset
   442
      case List(t) => t
289561ca4fa3 more operations;
wenzelm
parents: 64820
diff changeset
   443
      case ts => throw new XML_Body(ts)
289561ca4fa3 more operations;
wenzelm
parents: 64820
diff changeset
   444
    }
289561ca4fa3 more operations;
wenzelm
parents: 64820
diff changeset
   445
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   446
    val properties: T[Properties.T] = {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   447
      case List(XML.Elem(Markup(":", props), Nil)) => props
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   448
      case ts => throw new XML_Body(ts)
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   449
    }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   450
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   451
    val string: T[String] = {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   452
      case Nil => ""
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   453
      case List(XML.Text(s)) => s
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   454
      case ts => throw new XML_Body(ts)
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   455
    }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   456
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   457
    val long: T[Long] = (x => long_atom(string(x)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   458
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   459
    val int: T[Int] = (x => int_atom(string(x)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   460
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   461
    val bool: T[Boolean] = (x => bool_atom(string(x)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   462
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   463
    val unit: T[Unit] = (x => unit_atom(string(x)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   464
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   465
    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   466
      case List(t1, t2) => (f(node(t1)), g(node(t2)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   467
      case ts => throw new XML_Body(ts)
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   468
    }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   469
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   470
    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   471
      case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   472
      case ts => throw new XML_Body(ts)
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   473
    }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   474
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   475
    def list[A](f: T[A]): T[List[A]] =
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   476
      (ts => ts.map(t => f(node(t))))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   477
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   478
    def option[A](f: T[A]): T[Option[A]] = {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   479
      case Nil => None
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   480
      case List(t) => Some(f(node(t)))
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   481
      case ts => throw new XML_Body(ts)
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   482
    }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   483
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 74789
diff changeset
   484
    def variant[A](fs: List[V[A]]): T[A] = {
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   485
      case List(t) =>
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   486
        val (tag, (xs, ts)) = tagged(t)
43768
d52ab827d62b more precise exceptions;
wenzelm
parents: 43767
diff changeset
   487
        val f =
d52ab827d62b more precise exceptions;
wenzelm
parents: 43767
diff changeset
   488
          try { fs(tag) }
d52ab827d62b more precise exceptions;
wenzelm
parents: 43767
diff changeset
   489
          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
43778
ce9189450447 more compact representation of XML data (notably sort/typ/term), using properties as vector of atomic values;
wenzelm
parents: 43768
diff changeset
   490
        f(xs, ts)
43767
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   491
      case ts => throw new XML_Body(ts)
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   492
    }
e0219ef7f84c tuned XML modules;
wenzelm
parents: 43747
diff changeset
   493
  }
27931
b533a9de87a7 Minimalistic XML tree values.
wenzelm
parents:
diff changeset
   494
}