src/Pure/PIDE/xml.scala
author wenzelm
Sun Mar 10 14:19:30 2019 +0100 (4 months ago ago)
changeset 70070 be04e9a053a7
parent 70048 3fd9298dd200
permissions -rw-r--r--
markup and document markers for some meta data from "Dublin Core Metadata Element Set";
wenzelm@44698
     1
/*  Title:      Pure/PIDE/xml.scala
wenzelm@27931
     2
    Author:     Makarius
wenzelm@27931
     3
wenzelm@44698
     4
Untyped XML trees and basic data representation.
wenzelm@27931
     5
*/
wenzelm@27931
     6
wenzelm@27931
     7
package isabelle
wenzelm@27931
     8
wenzelm@55618
     9
wenzelm@29203
    10
object XML
wenzelm@29203
    11
{
wenzelm@43767
    12
  /** XML trees **/
wenzelm@43767
    13
wenzelm@27947
    14
  /* datatype representation */
wenzelm@27947
    15
wenzelm@65753
    16
  type Attribute = Properties.Entry
wenzelm@43780
    17
  type Attributes = Properties.T
wenzelm@27931
    18
wenzelm@57912
    19
  sealed abstract class Tree { override def toString: String = string_of_tree(this) }
wenzelm@64354
    20
  type Body = List[Tree]
wenzelm@64354
    21
  case class Elem(markup: Markup, body: Body) extends Tree
wenzelm@52890
    22
  {
wenzelm@52890
    23
    def name: String = markup.name
wenzelm@65753
    24
wenzelm@64358
    25
    def update_attributes(more_attributes: Attributes): Elem =
wenzelm@64358
    26
      if (more_attributes.isEmpty) this
wenzelm@64358
    27
      else Elem(markup.update_properties(more_attributes), body)
wenzelm@65753
    28
wenzelm@65772
    29
    def + (att: Attribute): Elem = Elem(markup + att, body)
wenzelm@52890
    30
  }
wenzelm@29204
    31
  case class Text(content: String) extends Tree
wenzelm@29203
    32
wenzelm@66196
    33
  def elem(markup: Markup): XML.Elem = XML.Elem(markup, Nil)
wenzelm@64354
    34
  def elem(name: String, body: Body): XML.Elem = XML.Elem(Markup(name, Nil), body)
wenzelm@64354
    35
  def elem(name: String): XML.Elem = XML.Elem(Markup(name, Nil), Nil)
wenzelm@38267
    36
wenzelm@70048
    37
  val newline: Text = Text("\n")
wenzelm@70048
    38
wenzelm@29203
    39
wenzelm@69984
    40
  /* name space */
wenzelm@69984
    41
wenzelm@69984
    42
  object Namespace
wenzelm@69984
    43
  {
wenzelm@69984
    44
    def apply(prefix: String, target: String): Namespace =
wenzelm@69984
    45
      new Namespace(prefix, target)
wenzelm@69984
    46
  }
wenzelm@69984
    47
wenzelm@69984
    48
  final class Namespace private(prefix: String, target: String)
wenzelm@69984
    49
  {
wenzelm@69984
    50
    def apply(name: String): String = prefix + ":" + name
wenzelm@69984
    51
    val attribute: XML.Attribute = ("xmlns:" + prefix, target)
wenzelm@69984
    52
wenzelm@69984
    53
    override def toString: String = attribute.toString
wenzelm@69984
    54
  }
wenzelm@69984
    55
wenzelm@69984
    56
wenzelm@49650
    57
  /* wrapped elements */
wenzelm@49650
    58
wenzelm@60215
    59
  val XML_ELEM = "xml_elem"
wenzelm@60215
    60
  val XML_NAME = "xml_name"
wenzelm@60215
    61
  val XML_BODY = "xml_body"
wenzelm@49650
    62
wenzelm@49650
    63
  object Wrapped_Elem
wenzelm@49650
    64
  {
wenzelm@49650
    65
    def apply(markup: Markup, body1: Body, body2: Body): XML.Elem =
wenzelm@61026
    66
      XML.Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties),
wenzelm@61026
    67
        XML.Elem(Markup(XML_BODY, Nil), body1) :: body2)
wenzelm@49650
    68
wenzelm@49650
    69
    def unapply(tree: Tree): Option[(Markup, Body, Body)] =
wenzelm@49650
    70
      tree match {
wenzelm@49650
    71
        case
wenzelm@61026
    72
          XML.Elem(Markup(XML_ELEM, (XML_NAME, name) :: props),
wenzelm@61026
    73
            XML.Elem(Markup(XML_BODY, Nil), body1) :: body2) =>
wenzelm@49650
    74
          Some(Markup(name, props), body1, body2)
wenzelm@49650
    75
        case _ => None
wenzelm@49650
    76
      }
wenzelm@49650
    77
  }
wenzelm@49650
    78
wenzelm@67818
    79
  object Root_Elem
wenzelm@67818
    80
  {
wenzelm@67818
    81
    def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body)
wenzelm@67818
    82
    def unapply(tree: Tree): Option[Body] =
wenzelm@67818
    83
      tree match {
wenzelm@67818
    84
        case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body)
wenzelm@67818
    85
        case _ => None
wenzelm@67818
    86
      }
wenzelm@67818
    87
  }
wenzelm@67818
    88
wenzelm@49650
    89
wenzelm@49650
    90
  /* traverse text */
wenzelm@49650
    91
wenzelm@49650
    92
  def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A =
wenzelm@49650
    93
  {
wenzelm@49650
    94
    def traverse(x: A, t: Tree): A =
wenzelm@49650
    95
      t match {
wenzelm@61026
    96
        case XML.Wrapped_Elem(_, _, ts) => (x /: ts)(traverse)
wenzelm@61026
    97
        case XML.Elem(_, ts) => (x /: ts)(traverse)
wenzelm@61026
    98
        case XML.Text(s) => op(x, s)
wenzelm@49650
    99
      }
wenzelm@49650
   100
    (a /: body)(traverse)
wenzelm@49650
   101
  }
wenzelm@49650
   102
wenzelm@49650
   103
  def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length }
wenzelm@49650
   104
wenzelm@49650
   105
wenzelm@49650
   106
  /* text content */
wenzelm@49650
   107
wenzelm@49650
   108
  def content(body: Body): String =
wenzelm@49650
   109
  {
wenzelm@49650
   110
    val text = new StringBuilder(text_length(body))
wenzelm@49650
   111
    traverse_text(body)(()) { case (_, s) => text.append(s) }
wenzelm@49650
   112
    text.toString
wenzelm@49650
   113
  }
wenzelm@49650
   114
wenzelm@49650
   115
  def content(tree: Tree): String = content(List(tree))
wenzelm@49650
   116
wenzelm@49650
   117
wenzelm@49650
   118
wenzelm@49650
   119
  /** string representation **/
wenzelm@29203
   120
wenzelm@69983
   121
  val header: String = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
wenzelm@69983
   122
wenzelm@65992
   123
  def output_char(c: Char, s: StringBuilder)
wenzelm@65992
   124
  {
wenzelm@65992
   125
    c match {
wenzelm@65992
   126
      case '<' => s ++= "&lt;"
wenzelm@65992
   127
      case '>' => s ++= "&gt;"
wenzelm@65992
   128
      case '&' => s ++= "&amp;"
wenzelm@65992
   129
      case '"' => s ++= "&quot;"
wenzelm@65992
   130
      case '\'' => s ++= "&apos;"
wenzelm@65992
   131
      case _ => s += c
wenzelm@65992
   132
    }
wenzelm@65992
   133
  }
wenzelm@65992
   134
wenzelm@65992
   135
  def output_string(str: String, s: StringBuilder)
wenzelm@65992
   136
  {
wenzelm@65992
   137
    if (str == null) s ++= str
wenzelm@65992
   138
    else str.iterator.foreach(c => output_char(c, s))
wenzelm@65992
   139
  }
wenzelm@65992
   140
wenzelm@38268
   141
  def string_of_body(body: Body): String =
wenzelm@38268
   142
  {
wenzelm@38268
   143
    val s = new StringBuilder
wenzelm@38268
   144
wenzelm@65992
   145
    def text(txt: String) { output_string(txt, s) }
wenzelm@65993
   146
    def elem(markup: Markup)
wenzelm@65993
   147
    {
wenzelm@65993
   148
      s ++= markup.name
wenzelm@65993
   149
      for ((a, b) <- markup.properties) {
wenzelm@65993
   150
        s += ' '; s ++= a; s += '='; s += '"'; text(b); s += '"'
wenzelm@65993
   151
      }
wenzelm@65993
   152
    }
wenzelm@38268
   153
    def tree(t: Tree): Unit =
wenzelm@38268
   154
      t match {
wenzelm@61026
   155
        case XML.Elem(markup, Nil) =>
wenzelm@65993
   156
          s += '<'; elem(markup); s ++= "/>"
wenzelm@61026
   157
        case XML.Elem(markup, ts) =>
wenzelm@65993
   158
          s += '<'; elem(markup); s += '>'
wenzelm@38268
   159
          ts.foreach(tree)
wenzelm@65993
   160
          s ++= "</"; s ++= markup.name; s += '>'
wenzelm@61026
   161
        case XML.Text(txt) => text(txt)
wenzelm@38268
   162
      }
wenzelm@38268
   163
    body.foreach(tree)
wenzelm@38268
   164
    s.toString
wenzelm@29203
   165
  }
wenzelm@29203
   166
wenzelm@38268
   167
  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
wenzelm@27941
   168
wenzelm@27941
   169
wenzelm@44808
   170
wenzelm@68267
   171
  /** cache **/
wenzelm@34108
   172
wenzelm@68169
   173
  def make_cache(initial_size: Int = 131071, max_string: Int = 100): Cache =
wenzelm@68169
   174
    new Cache(initial_size, max_string)
wenzelm@68169
   175
wenzelm@68169
   176
  class Cache private[XML](initial_size: Int, max_string: Int)
wenzelm@68267
   177
    extends isabelle.Cache(initial_size, max_string)
wenzelm@34108
   178
  {
wenzelm@68267
   179
    protected def cache_props(x: Properties.T): Properties.T =
wenzelm@44704
   180
    {
wenzelm@44704
   181
      if (x.isEmpty) x
wenzelm@44704
   182
      else
wenzelm@34133
   183
        lookup(x) match {
wenzelm@34133
   184
          case Some(y) => y
wenzelm@65903
   185
          case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2))))
wenzelm@34133
   186
        }
wenzelm@68267
   187
    }
wenzelm@68267
   188
wenzelm@68267
   189
    protected def cache_markup(x: Markup): Markup =
wenzelm@68267
   190
    {
wenzelm@44704
   191
      lookup(x) match {
wenzelm@44704
   192
        case Some(y) => y
wenzelm@44704
   193
        case None =>
wenzelm@44704
   194
          x match {
wenzelm@44704
   195
            case Markup(name, props) =>
wenzelm@51663
   196
              store(Markup(cache_string(name), cache_props(props)))
wenzelm@44704
   197
          }
wenzelm@44704
   198
      }
wenzelm@68267
   199
    }
wenzelm@68267
   200
wenzelm@68267
   201
    protected def cache_tree(x: XML.Tree): XML.Tree =
wenzelm@68267
   202
    {
wenzelm@44704
   203
      lookup(x) match {
wenzelm@44704
   204
        case Some(y) => y
wenzelm@44704
   205
        case None =>
wenzelm@44704
   206
          x match {
wenzelm@44704
   207
            case XML.Elem(markup, body) =>
wenzelm@51663
   208
              store(XML.Elem(cache_markup(markup), cache_body(body)))
wenzelm@51663
   209
            case XML.Text(text) => store(XML.Text(cache_string(text)))
wenzelm@44704
   210
          }
wenzelm@44704
   211
      }
wenzelm@68267
   212
    }
wenzelm@68267
   213
wenzelm@68267
   214
    protected def cache_body(x: XML.Body): XML.Body =
wenzelm@68267
   215
    {
wenzelm@44704
   216
      if (x.isEmpty) x
wenzelm@44704
   217
      else
wenzelm@34133
   218
        lookup(x) match {
wenzelm@34133
   219
          case Some(y) => y
wenzelm@51663
   220
          case None => x.map(cache_tree(_))
wenzelm@34133
   221
        }
wenzelm@68267
   222
    }
wenzelm@38446
   223
wenzelm@38446
   224
    // main methods
wenzelm@51663
   225
    def props(x: Properties.T): Properties.T = synchronized { cache_props(x) }
wenzelm@51663
   226
    def markup(x: Markup): Markup = synchronized { cache_markup(x) }
wenzelm@51663
   227
    def tree(x: XML.Tree): XML.Tree = synchronized { cache_tree(x) }
wenzelm@51663
   228
    def body(x: XML.Body): XML.Body = synchronized { cache_body(x) }
wenzelm@51663
   229
    def elem(x: XML.Elem): XML.Elem = synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
wenzelm@34108
   230
  }
wenzelm@34108
   231
wenzelm@34108
   232
wenzelm@43767
   233
wenzelm@43767
   234
  /** XML as data representation language **/
wenzelm@43767
   235
wenzelm@51987
   236
  abstract class Error(s: String) extends Exception(s)
wenzelm@51987
   237
  class XML_Atom(s: String) extends Error(s)
wenzelm@51987
   238
  class XML_Body(body: XML.Body) extends Error("")
wenzelm@43767
   239
wenzelm@43767
   240
  object Encode
wenzelm@43767
   241
  {
wenzelm@43767
   242
    type T[A] = A => XML.Body
wenzelm@65334
   243
    type V[A] = PartialFunction[A, (List[String], XML.Body)]
wenzelm@43767
   244
wenzelm@43767
   245
wenzelm@43778
   246
    /* atomic values */
wenzelm@43767
   247
wenzelm@57909
   248
    def long_atom(i: Long): String = Library.signed_string_of_long(i)
wenzelm@43767
   249
wenzelm@57909
   250
    def int_atom(i: Int): String = Library.signed_string_of_int(i)
wenzelm@43767
   251
wenzelm@43778
   252
    def bool_atom(b: Boolean): String = if (b) "1" else "0"
wenzelm@43767
   253
wenzelm@43778
   254
    def unit_atom(u: Unit) = ""
wenzelm@43767
   255
wenzelm@43767
   256
wenzelm@43767
   257
    /* structural nodes */
wenzelm@43767
   258
wenzelm@43767
   259
    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
wenzelm@43767
   260
wenzelm@43781
   261
    private def vector(xs: List[String]): XML.Attributes =
wenzelm@46839
   262
      xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
wenzelm@43778
   263
wenzelm@43778
   264
    private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
wenzelm@43778
   265
      XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
wenzelm@43767
   266
wenzelm@43767
   267
wenzelm@43767
   268
    /* representation of standard types */
wenzelm@43767
   269
wenzelm@65333
   270
    val tree: T[XML.Tree] = (t => List(t))
wenzelm@65333
   271
wenzelm@43780
   272
    val properties: T[Properties.T] =
wenzelm@43767
   273
      (props => List(XML.Elem(Markup(":", props), Nil)))
wenzelm@43767
   274
wenzelm@43767
   275
    val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
wenzelm@43767
   276
wenzelm@43767
   277
    val long: T[Long] = (x => string(long_atom(x)))
wenzelm@43767
   278
wenzelm@43767
   279
    val int: T[Int] = (x => string(int_atom(x)))
wenzelm@43767
   280
wenzelm@43767
   281
    val bool: T[Boolean] = (x => string(bool_atom(x)))
wenzelm@43767
   282
wenzelm@43767
   283
    val unit: T[Unit] = (x => string(unit_atom(x)))
wenzelm@43767
   284
wenzelm@43767
   285
    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
wenzelm@43767
   286
      (x => List(node(f(x._1)), node(g(x._2))))
wenzelm@43767
   287
wenzelm@43767
   288
    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
wenzelm@43767
   289
      (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
wenzelm@43767
   290
wenzelm@43767
   291
    def list[A](f: T[A]): T[List[A]] =
wenzelm@43767
   292
      (xs => xs.map((x: A) => node(f(x))))
wenzelm@43767
   293
wenzelm@43767
   294
    def option[A](f: T[A]): T[Option[A]] =
wenzelm@43767
   295
    {
wenzelm@43767
   296
      case None => Nil
wenzelm@43767
   297
      case Some(x) => List(node(f(x)))
wenzelm@43767
   298
    }
wenzelm@43767
   299
wenzelm@65334
   300
    def variant[A](fs: List[V[A]]): T[A] =
wenzelm@43767
   301
    {
wenzelm@43767
   302
      case x =>
wenzelm@43767
   303
        val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
wenzelm@43767
   304
        List(tagged(tag, f(x)))
wenzelm@43767
   305
    }
wenzelm@43767
   306
  }
wenzelm@43767
   307
wenzelm@43767
   308
  object Decode
wenzelm@43767
   309
  {
wenzelm@43767
   310
    type T[A] = XML.Body => A
wenzelm@43778
   311
    type V[A] = (List[String], XML.Body) => A
wenzelm@43767
   312
wenzelm@43767
   313
wenzelm@43778
   314
    /* atomic values */
wenzelm@43767
   315
wenzelm@43778
   316
    def long_atom(s: String): Long =
wenzelm@43767
   317
      try { java.lang.Long.parseLong(s) }
wenzelm@43767
   318
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
wenzelm@43767
   319
wenzelm@43778
   320
    def int_atom(s: String): Int =
wenzelm@43767
   321
      try { Integer.parseInt(s) }
wenzelm@43767
   322
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
wenzelm@43767
   323
wenzelm@43778
   324
    def bool_atom(s: String): Boolean =
wenzelm@43767
   325
      if (s == "1") true
wenzelm@43767
   326
      else if (s == "0") false
wenzelm@43767
   327
      else throw new XML_Atom(s)
wenzelm@43767
   328
wenzelm@43778
   329
    def unit_atom(s: String): Unit =
wenzelm@43767
   330
      if (s == "") () else throw new XML_Atom(s)
wenzelm@43767
   331
wenzelm@43767
   332
wenzelm@43767
   333
    /* structural nodes */
wenzelm@43767
   334
wenzelm@43767
   335
    private def node(t: XML.Tree): XML.Body =
wenzelm@43767
   336
      t match {
wenzelm@43767
   337
        case XML.Elem(Markup(":", Nil), ts) => ts
wenzelm@43767
   338
        case _ => throw new XML_Body(List(t))
wenzelm@43767
   339
      }
wenzelm@43767
   340
wenzelm@43781
   341
    private def vector(atts: XML.Attributes): List[String] =
wenzelm@46839
   342
      atts.iterator.zipWithIndex.map(
wenzelm@46839
   343
        { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
wenzelm@43778
   344
wenzelm@43778
   345
    private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
wenzelm@43767
   346
      t match {
wenzelm@43781
   347
        case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
wenzelm@43767
   348
        case _ => throw new XML_Body(List(t))
wenzelm@43767
   349
      }
wenzelm@43767
   350
wenzelm@43767
   351
wenzelm@43767
   352
    /* representation of standard types */
wenzelm@43767
   353
wenzelm@65333
   354
    val tree: T[XML.Tree] =
wenzelm@65333
   355
    {
wenzelm@65333
   356
      case List(t) => t
wenzelm@65333
   357
      case ts => throw new XML_Body(ts)
wenzelm@65333
   358
    }
wenzelm@65333
   359
wenzelm@43780
   360
    val properties: T[Properties.T] =
wenzelm@43767
   361
    {
wenzelm@43767
   362
      case List(XML.Elem(Markup(":", props), Nil)) => props
wenzelm@43767
   363
      case ts => throw new XML_Body(ts)
wenzelm@43767
   364
    }
wenzelm@43767
   365
wenzelm@43767
   366
    val string: T[String] =
wenzelm@43767
   367
    {
wenzelm@43767
   368
      case Nil => ""
wenzelm@43767
   369
      case List(XML.Text(s)) => s
wenzelm@43767
   370
      case ts => throw new XML_Body(ts)
wenzelm@43767
   371
    }
wenzelm@43767
   372
wenzelm@43767
   373
    val long: T[Long] = (x => long_atom(string(x)))
wenzelm@43767
   374
wenzelm@43767
   375
    val int: T[Int] = (x => int_atom(string(x)))
wenzelm@43767
   376
wenzelm@43767
   377
    val bool: T[Boolean] = (x => bool_atom(string(x)))
wenzelm@43767
   378
wenzelm@43767
   379
    val unit: T[Unit] = (x => unit_atom(string(x)))
wenzelm@43767
   380
wenzelm@43767
   381
    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
wenzelm@43767
   382
    {
wenzelm@43767
   383
      case List(t1, t2) => (f(node(t1)), g(node(t2)))
wenzelm@43767
   384
      case ts => throw new XML_Body(ts)
wenzelm@43767
   385
    }
wenzelm@43767
   386
wenzelm@43767
   387
    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
wenzelm@43767
   388
    {
wenzelm@43767
   389
      case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
wenzelm@43767
   390
      case ts => throw new XML_Body(ts)
wenzelm@43767
   391
    }
wenzelm@43767
   392
wenzelm@43767
   393
    def list[A](f: T[A]): T[List[A]] =
wenzelm@43767
   394
      (ts => ts.map(t => f(node(t))))
wenzelm@43767
   395
wenzelm@43767
   396
    def option[A](f: T[A]): T[Option[A]] =
wenzelm@43767
   397
    {
wenzelm@43767
   398
      case Nil => None
wenzelm@43767
   399
      case List(t) => Some(f(node(t)))
wenzelm@43767
   400
      case ts => throw new XML_Body(ts)
wenzelm@43767
   401
    }
wenzelm@43767
   402
wenzelm@43778
   403
    def variant[A](fs: List[V[A]]): T[A] =
wenzelm@43767
   404
    {
wenzelm@43767
   405
      case List(t) =>
wenzelm@43778
   406
        val (tag, (xs, ts)) = tagged(t)
wenzelm@43768
   407
        val f =
wenzelm@43768
   408
          try { fs(tag) }
wenzelm@43768
   409
          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
wenzelm@43778
   410
        f(xs, ts)
wenzelm@43767
   411
      case ts => throw new XML_Body(ts)
wenzelm@43767
   412
    }
wenzelm@43767
   413
  }
wenzelm@27931
   414
}