src/Pure/PIDE/yxml.scala
author wenzelm
Thu, 15 Aug 2024 12:43:17 +0200
changeset 80712 05b16602a683
parent 80562 d55cdb87f332
permissions -rw-r--r--
clarified signature;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
44698
0385292321a0 moved XML/YXML to src/Pure/PIDE;
wenzelm
parents: 44181
diff changeset
     1
/*  Title:      Pure/PIDE/yxml.scala
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
     3
44698
0385292321a0 moved XML/YXML to src/Pure/PIDE;
wenzelm
parents: 44181
diff changeset
     4
Efficient text representation of XML trees.  Suitable for direct
0385292321a0 moved XML/YXML to src/Pure/PIDE;
wenzelm
parents: 44181
diff changeset
     5
inlining into plain text.
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
     6
*/
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
     7
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
     8
package isabelle
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
     9
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    10
40453
wenzelm
parents: 38475
diff changeset
    11
import scala.collection.mutable
36684
943f1ca7b375 misc tuning -- accumulate body via ListBuffer;
wenzelm
parents: 34201
diff changeset
    12
943f1ca7b375 misc tuning -- accumulate body via ListBuffer;
wenzelm
parents: 34201
diff changeset
    13
75393
87ebf5a50283 clarified formatting, for the sake of scala3;
wenzelm
parents: 73712
diff changeset
    14
object YXML {
27943
f34ff5e7728f replaced Pattern.split by chunks iterator (more efficient, resembles ML version more closely);
wenzelm
parents: 27930
diff changeset
    15
  /* chunk markers */
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    16
80425
efa212a6699a clarified signature;
wenzelm
parents: 75393
diff changeset
    17
  val X_byte: Byte = 5
efa212a6699a clarified signature;
wenzelm
parents: 75393
diff changeset
    18
  val Y_byte: Byte = 6
efa212a6699a clarified signature;
wenzelm
parents: 75393
diff changeset
    19
80487
e25c6d4c219c clarified signature;
wenzelm
parents: 80486
diff changeset
    20
  val X_char: Char = X_byte.toChar
e25c6d4c219c clarified signature;
wenzelm
parents: 80486
diff changeset
    21
  val Y_char: Char = Y_byte.toChar
56600
628e039cc34d more specific support for sequence of words;
wenzelm
parents: 55554
diff changeset
    22
80487
e25c6d4c219c clarified signature;
wenzelm
parents: 80486
diff changeset
    23
  val X_string: String = X_char.toString
e25c6d4c219c clarified signature;
wenzelm
parents: 80486
diff changeset
    24
  val Y_string: String = Y_char.toString
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    25
80487
e25c6d4c219c clarified signature;
wenzelm
parents: 80486
diff changeset
    26
  def detect(s: String): Boolean = s.exists(c => c == X_char || c == Y_char)
80488
wenzelm
parents: 80487
diff changeset
    27
  def detect_elem(s: String): Boolean = s.length >= 2 && s(0) == X_char && s(1) == Y_char
43782
834de29572d5 clarified YXML.detect;
wenzelm
parents: 43774
diff changeset
    28
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    29
73029
wenzelm
parents: 73028
diff changeset
    30
  /* string representation */
38268
beb86b805590 more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents: 38267
diff changeset
    31
80433
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    32
  trait Output extends XML.Traversal {
80425
efa212a6699a clarified signature;
wenzelm
parents: 75393
diff changeset
    33
    def byte(b: Byte): Unit
efa212a6699a clarified signature;
wenzelm
parents: 75393
diff changeset
    34
    def string(s: String): Unit
80433
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    35
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    36
    // XML.Traversal
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    37
    override def text(s: String): Unit = string(s)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    38
    override def elem(markup: Markup, end: Boolean = false): Unit = {
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    39
      byte(X_byte)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    40
      byte(Y_byte)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    41
      string(markup.name)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    42
      for ((a, b) <- markup.properties) {
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    43
        byte(Y_byte)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    44
        string(a)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    45
        byte('='.toByte)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    46
        string(b)
38268
beb86b805590 more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents: 38267
diff changeset
    47
      }
80433
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    48
      byte(X_byte)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    49
      if (end) end_elem(markup.name)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    50
    }
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    51
    override def end_elem(name: String): Unit = {
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    52
      byte(X_byte)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    53
      byte(Y_byte)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    54
      byte(X_byte)
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    55
    }
71534
f10bffaa2bae more scalable output of YXML files;
wenzelm
parents: 67820
diff changeset
    56
  }
f10bffaa2bae more scalable output of YXML files;
wenzelm
parents: 67820
diff changeset
    57
80437
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    58
  class Output_String(builder: StringBuilder, recode: String => String = identity) extends Output {
80425
efa212a6699a clarified signature;
wenzelm
parents: 75393
diff changeset
    59
    override def byte(b: Byte): Unit = { builder += b.toChar }
80437
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    60
    override def string(s: String): Unit = { builder ++= recode(s) }
80433
48776d779d94 more robust: prefer tail-recursive XML.Traversal;
wenzelm
parents: 80427
diff changeset
    61
    def result(ts: List[XML.Tree]): String = { traverse(ts); builder.toString }
38268
beb86b805590 more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents: 38267
diff changeset
    62
  }
beb86b805590 more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents: 38267
diff changeset
    63
80437
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    64
  class Output_Bytes(builder: Bytes.Builder, recode: String => String = identity) extends Output {
80427
c92356464bb3 more operations;
wenzelm
parents: 80425
diff changeset
    65
    override def byte(b: Byte): Unit = { builder += b }
80437
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    66
    override def string(s: String): Unit = { builder += recode(s) }
80427
c92356464bb3 more operations;
wenzelm
parents: 80425
diff changeset
    67
  }
c92356464bb3 more operations;
wenzelm
parents: 80425
diff changeset
    68
80437
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    69
  def string_of_body(body: XML.Body, recode: String => String = identity): String =
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    70
    new Output_String(new StringBuilder, recode = recode).result(body)
80425
efa212a6699a clarified signature;
wenzelm
parents: 75393
diff changeset
    71
80437
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    72
  def string_of_tree(tree: XML.Tree, recode: String => String = identity): String =
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    73
    string_of_body(List(tree), recode = recode)
38268
beb86b805590 more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents: 38267
diff changeset
    74
80437
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    75
  def bytes_of_body(body: XML.Body, recode: String => String = identity): Bytes =
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    76
    Bytes.Builder.use()(builder => new Output_Bytes(builder, recode = recode).traverse(body))
80427
c92356464bb3 more operations;
wenzelm
parents: 80425
diff changeset
    77
80437
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    78
  def bytes_of_tree(tree: XML.Tree, recode: String => String = identity): Bytes =
2c07b9b2f9f4 minor performance tuning: more direct Bytes with Symbol.encode;
wenzelm
parents: 80433
diff changeset
    79
    bytes_of_body(List(tree), recode = recode)
80427
c92356464bb3 more operations;
wenzelm
parents: 80425
diff changeset
    80
38268
beb86b805590 more uniform XML/YXML string_of_body/string_of_tree;
wenzelm
parents: 38267
diff changeset
    81
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    82
  /* parsing */
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    83
27993
6dd90ef9f927 simplified exceptions: use plain error function / RuntimeException;
wenzelm
parents: 27971
diff changeset
    84
  private def err(msg: String) = error("Malformed YXML: " + msg)
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    85
  private def err_attribute() = err("bad attribute")
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    86
  private def err_element() = err("bad element")
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    87
  private def err_unbalanced(name: String) =
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    88
    if (name == "") err("unbalanced element")
44181
wenzelm
parents: 43841
diff changeset
    89
    else err("unbalanced element " + quote(name))
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
    90
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
    91
  object Source {
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
    92
    def apply(s: String): Source = new Source_String(s)
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
    93
  }
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
    94
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
    95
  trait Source {
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
    96
    def is_empty: Boolean
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
    97
    def is_X: Boolean
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
    98
    def is_Y: Boolean
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
    99
    def iterator_X: Iterator[Source]
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   100
    def iterator_Y: Iterator[Source]
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   101
    def text: String
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   102
  }
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   103
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   104
  class Source_String(source: String) extends Source {
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   105
    override def is_empty: Boolean = source.isEmpty
80487
e25c6d4c219c clarified signature;
wenzelm
parents: 80486
diff changeset
   106
    override def is_X: Boolean = source.length == 1 && source(0) == X_char
e25c6d4c219c clarified signature;
wenzelm
parents: 80486
diff changeset
   107
    override def is_Y: Boolean = source.length == 1 && source(0) == Y_char
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   108
    override def iterator_X: Iterator[Source] =
80487
e25c6d4c219c clarified signature;
wenzelm
parents: 80486
diff changeset
   109
      Library.separated_chunks(X_char, source).map(Source.apply)
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   110
    override def iterator_Y: Iterator[Source] =
80487
e25c6d4c219c clarified signature;
wenzelm
parents: 80486
diff changeset
   111
      Library.separated_chunks(Y_char, source).map(Source.apply)
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   112
    override def text: String = source
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   113
  }
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   114
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   115
  def parse_body(
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   116
    source: Source,
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   117
    recode: String => String = identity,
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   118
    cache: XML.Cache = XML.Cache.none
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   119
  ): XML.Body = {
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   120
    /* parse + recode */
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   121
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   122
    def parse_string(s: Source): String = recode(s.text)
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   123
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   124
    def parse_attrib(s: Source): (String, String) =
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   125
      Properties.Eq.unapply(parse_string(s)) getOrElse err_attribute()
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   126
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   127
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   128
    /* stack operations */
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   129
40453
wenzelm
parents: 38475
diff changeset
   130
    def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree]
wenzelm
parents: 38475
diff changeset
   131
    var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer()))
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   132
73340
0ffcad1f6130 tuned --- fewer warnings;
wenzelm
parents: 73030
diff changeset
   133
    def add(x: XML.Tree): Unit =
73029
wenzelm
parents: 73028
diff changeset
   134
      (stack: @unchecked) match { case (_, body) :: _ => body += x }
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   135
73340
0ffcad1f6130 tuned --- fewer warnings;
wenzelm
parents: 73030
diff changeset
   136
    def push(name: String, atts: XML.Attributes): Unit =
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   137
      if (name == "") err_element()
73030
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 73029
diff changeset
   138
      else stack = (cache.markup(Markup(name, atts)), buffer()) :: stack
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   139
73340
0ffcad1f6130 tuned --- fewer warnings;
wenzelm
parents: 73030
diff changeset
   140
    def pop(): Unit =
36684
943f1ca7b375 misc tuning -- accumulate body via ListBuffer;
wenzelm
parents: 34201
diff changeset
   141
      (stack: @unchecked) match {
73029
wenzelm
parents: 73028
diff changeset
   142
        case (Markup.Empty, _) :: _ => err_unbalanced("")
wenzelm
parents: 73028
diff changeset
   143
        case (markup, body) :: pending =>
38230
ed147003de4b simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents: 36685
diff changeset
   144
          stack = pending
73030
72a8fdfa185d support more direct hash-consing via XML.Cache;
wenzelm
parents: 73029
diff changeset
   145
          add(cache.tree0(XML.Elem(markup, body.toList)))
36684
943f1ca7b375 misc tuning -- accumulate body via ListBuffer;
wenzelm
parents: 34201
diff changeset
   146
      }
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   147
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   148
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   149
    /* parse chunks */
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   150
80562
wenzelm
parents: 80488
diff changeset
   151
    for (chunk <- source.iterator_X) {
wenzelm
parents: 80488
diff changeset
   152
      if (!chunk.is_empty) {
wenzelm
parents: 80488
diff changeset
   153
        if (chunk.is_Y) pop()
wenzelm
parents: 80488
diff changeset
   154
        else {
wenzelm
parents: 80488
diff changeset
   155
          chunk.iterator_Y.toList match {
wenzelm
parents: 80488
diff changeset
   156
            case ch :: name :: atts if ch.is_empty =>
wenzelm
parents: 80488
diff changeset
   157
              push(parse_string(name), atts.map(parse_attrib))
wenzelm
parents: 80488
diff changeset
   158
            case txts =>
wenzelm
parents: 80488
diff changeset
   159
              for (txt <- txts) add(cache.tree0(XML.Text(cache.string(parse_string(txt)))))
wenzelm
parents: 80488
diff changeset
   160
          }
27945
d2dc5a1903e8 tuned parse performance: avoid splitting terminal Y chunk;
wenzelm
parents: 27944
diff changeset
   161
        }
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   162
      }
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   163
    }
42719
wenzelm
parents: 40453
diff changeset
   164
    (stack: @unchecked) match {
38475
wenzelm
parents: 38268
diff changeset
   165
      case List((Markup.Empty, body)) => body.toList
38230
ed147003de4b simplified type XML.Tree: embed Markup directly, avoid slightly odd triple;
wenzelm
parents: 36685
diff changeset
   166
      case (Markup(name, _), _) :: _ => err_unbalanced(name)
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   167
    }
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   168
  }
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   169
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   170
  def parse(
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   171
    source: Source,
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   172
    recode: String => String = identity,
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   173
    cache: XML.Cache = XML.Cache.none
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   174
  ): XML.Tree =
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   175
    parse_body(source, recode = recode, cache = cache) match {
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   176
      case List(result) => result
73028
95e0f014cd24 tuned signature;
wenzelm
parents: 71601
diff changeset
   177
      case Nil => XML.no_text
67820
e30d6368c7c8 clarified argument formats: explicit Unit, allow XML.Elem as well;
wenzelm
parents: 64370
diff changeset
   178
      case _ => err("multiple XML trees")
e30d6368c7c8 clarified argument formats: explicit Unit, allow XML.Elem as well;
wenzelm
parents: 64370
diff changeset
   179
    }
e30d6368c7c8 clarified argument formats: explicit Unit, allow XML.Elem as well;
wenzelm
parents: 64370
diff changeset
   180
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   181
  def parse_elem(
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   182
    source: Source,
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   183
    recode: String => String = identity,
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   184
    cache: XML.Cache = XML.Cache.none
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   185
  ): XML.Tree =
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   186
    parse_body(source, recode = recode, cache = cache) match {
67820
e30d6368c7c8 clarified argument formats: explicit Unit, allow XML.Elem as well;
wenzelm
parents: 64370
diff changeset
   187
      case List(elem: XML.Elem) => elem
e30d6368c7c8 clarified argument formats: explicit Unit, allow XML.Elem as well;
wenzelm
parents: 64370
diff changeset
   188
      case _ => err("single XML element expected")
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   189
    }
27960
65b10d8ef0c6 added parse_failsafe;
wenzelm
parents: 27946
diff changeset
   190
29521
736bf7117153 added parse_body_failsafe;
wenzelm
parents: 29180
diff changeset
   191
736bf7117153 added parse_body_failsafe;
wenzelm
parents: 29180
diff changeset
   192
  /* failsafe parsing */
736bf7117153 added parse_body_failsafe;
wenzelm
parents: 29180
diff changeset
   193
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   194
  private def markup_broken(source: Source) =
55551
4a5f65df29fa tuned signature;
wenzelm
parents: 48996
diff changeset
   195
    XML.Elem(Markup.Broken, List(XML.Text(source.toString)))
29521
736bf7117153 added parse_body_failsafe;
wenzelm
parents: 29180
diff changeset
   196
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   197
  def parse_body_failsafe(
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   198
    source: Source,
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   199
    recode: String => String = identity,
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   200
    cache: XML.Cache = XML.Cache.none
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   201
  ): XML.Body = {
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   202
    try { parse_body(source, recode = recode, cache = cache) }
45666
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44698
diff changeset
   203
    catch { case ERROR(_) => List(markup_broken(source)) }
29521
736bf7117153 added parse_body_failsafe;
wenzelm
parents: 29180
diff changeset
   204
  }
736bf7117153 added parse_body_failsafe;
wenzelm
parents: 29180
diff changeset
   205
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   206
  def parse_failsafe(
80480
972f7a4cdc0e clarified YXML.Source: more direct support for String and Bytes, instead of CharSequence;
wenzelm
parents: 80447
diff changeset
   207
    source: Source,
80447
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   208
    recode: String => String = identity,
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   209
    cache: XML.Cache = XML.Cache.none
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   210
  ): XML.Tree = {
325907d85977 minor performance tuning: allow recode operation during YXML parsing;
wenzelm
parents: 80437
diff changeset
   211
    try { parse(source, recode = recode, cache = cache) }
45666
d83797ef0d2d separate module for concrete Isabelle markup;
wenzelm
parents: 44698
diff changeset
   212
    catch { case ERROR(_) => markup_broken(source) }
27960
65b10d8ef0c6 added parse_failsafe;
wenzelm
parents: 27946
diff changeset
   213
  }
27930
2b44df907cc2 Efficient text representation of XML trees.
wenzelm
parents:
diff changeset
   214
}