src/Pure/PIDE/xml.scala
author wenzelm
Sat Mar 01 22:46:31 2014 +0100 (2014-03-01)
changeset 55828 42ac3cfb89f6
parent 55618 995162143ef4
child 57909 0fb331032f02
permissions -rw-r--r--
clarified language markup: added "delimited" property;
type Symbol_Pos.source preserves information about delimited outer tokens (e.g string, cartouche);
observe Completion.Language_Context only for delimited languages, which is important to complete keywords after undelimited inner tokens, e.g. "lemma A pro";
wenzelm@44698
     1
/*  Title:      Pure/PIDE/xml.scala
wenzelm@45673
     2
    Module:     PIDE
wenzelm@27931
     3
    Author:     Makarius
wenzelm@27931
     4
wenzelm@44698
     5
Untyped XML trees and basic data representation.
wenzelm@27931
     6
*/
wenzelm@27931
     7
wenzelm@27931
     8
package isabelle
wenzelm@27931
     9
wenzelm@55618
    10
wenzelm@34108
    11
import java.util.WeakHashMap
wenzelm@34108
    12
import java.lang.ref.WeakReference
wenzelm@34108
    13
import javax.xml.parsers.DocumentBuilderFactory
wenzelm@34108
    14
wenzelm@27947
    15
wenzelm@29203
    16
object XML
wenzelm@29203
    17
{
wenzelm@43767
    18
  /** XML trees **/
wenzelm@43767
    19
wenzelm@27947
    20
  /* datatype representation */
wenzelm@27947
    21
wenzelm@43780
    22
  type Attributes = Properties.T
wenzelm@27931
    23
wenzelm@38268
    24
  sealed abstract class Tree { override def toString = string_of_tree(this) }
wenzelm@38230
    25
  case class Elem(markup: Markup, body: List[Tree]) extends Tree
wenzelm@52890
    26
  {
wenzelm@52890
    27
    def name: String = markup.name
wenzelm@52890
    28
  }
wenzelm@29204
    29
  case class Text(content: String) extends Tree
wenzelm@29203
    30
wenzelm@38230
    31
  def elem(name: String, body: List[Tree]) = Elem(Markup(name, Nil), body)
wenzelm@38230
    32
  def elem(name: String) = Elem(Markup(name, Nil), Nil)
wenzelm@33999
    33
wenzelm@38267
    34
  type Body = List[Tree]
wenzelm@38267
    35
wenzelm@29203
    36
wenzelm@49650
    37
  /* wrapped elements */
wenzelm@49650
    38
wenzelm@49650
    39
  val XML_ELEM = "xml_elem";
wenzelm@49650
    40
  val XML_NAME = "xml_name";
wenzelm@49650
    41
  val XML_BODY = "xml_body";
wenzelm@49650
    42
wenzelm@49650
    43
  object Wrapped_Elem
wenzelm@49650
    44
  {
wenzelm@49650
    45
    def apply(markup: Markup, body1: Body, body2: Body): XML.Elem =
wenzelm@49650
    46
      Elem(Markup(XML_ELEM, (XML_NAME, markup.name) :: markup.properties),
wenzelm@49650
    47
        Elem(Markup(XML_BODY, Nil), body1) :: body2)
wenzelm@49650
    48
wenzelm@49650
    49
    def unapply(tree: Tree): Option[(Markup, Body, Body)] =
wenzelm@49650
    50
      tree match {
wenzelm@49650
    51
        case
wenzelm@49650
    52
          Elem(Markup(XML_ELEM, (XML_NAME, name) :: props),
wenzelm@49650
    53
            Elem(Markup(XML_BODY, Nil), body1) :: body2) =>
wenzelm@49650
    54
          Some(Markup(name, props), body1, body2)
wenzelm@49650
    55
        case _ => None
wenzelm@49650
    56
      }
wenzelm@49650
    57
  }
wenzelm@49650
    58
wenzelm@49650
    59
wenzelm@49650
    60
  /* traverse text */
wenzelm@49650
    61
wenzelm@49650
    62
  def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A =
wenzelm@49650
    63
  {
wenzelm@49650
    64
    def traverse(x: A, t: Tree): A =
wenzelm@49650
    65
      t match {
wenzelm@49650
    66
        case Wrapped_Elem(_, _, ts) => (x /: ts)(traverse)
wenzelm@49650
    67
        case Elem(_, ts) => (x /: ts)(traverse)
wenzelm@49650
    68
        case Text(s) => op(x, s)
wenzelm@49650
    69
      }
wenzelm@49650
    70
    (a /: body)(traverse)
wenzelm@49650
    71
  }
wenzelm@49650
    72
wenzelm@49650
    73
  def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length }
wenzelm@49650
    74
wenzelm@49650
    75
wenzelm@49650
    76
  /* text content */
wenzelm@49650
    77
wenzelm@49650
    78
  def content(body: Body): String =
wenzelm@49650
    79
  {
wenzelm@49650
    80
    val text = new StringBuilder(text_length(body))
wenzelm@49650
    81
    traverse_text(body)(()) { case (_, s) => text.append(s) }
wenzelm@49650
    82
    text.toString
wenzelm@49650
    83
  }
wenzelm@49650
    84
wenzelm@49650
    85
  def content(tree: Tree): String = content(List(tree))
wenzelm@49650
    86
wenzelm@49650
    87
wenzelm@49650
    88
wenzelm@49650
    89
  /** string representation **/
wenzelm@29203
    90
wenzelm@38268
    91
  def string_of_body(body: Body): String =
wenzelm@38268
    92
  {
wenzelm@38268
    93
    val s = new StringBuilder
wenzelm@38268
    94
wenzelm@38268
    95
    def text(txt: String) {
wenzelm@38268
    96
      if (txt == null) s ++= txt
wenzelm@38268
    97
      else {
wenzelm@38268
    98
        for (c <- txt.iterator) c match {
wenzelm@38268
    99
          case '<' => s ++= "&lt;"
wenzelm@38268
   100
          case '>' => s ++= "&gt;"
wenzelm@38268
   101
          case '&' => s ++= "&amp;"
wenzelm@38268
   102
          case '"' => s ++= "&quot;"
wenzelm@38268
   103
          case '\'' => s ++= "&apos;"
wenzelm@38268
   104
          case _ => s += c
wenzelm@38268
   105
        }
wenzelm@34005
   106
      }
wenzelm@29203
   107
    }
wenzelm@38268
   108
    def attrib(p: (String, String)) { s ++= " "; s ++= p._1; s ++= "=\""; text(p._2); s ++= "\"" }
wenzelm@38268
   109
    def elem(markup: Markup) { s ++= markup.name; markup.properties.foreach(attrib) }
wenzelm@38268
   110
    def tree(t: Tree): Unit =
wenzelm@38268
   111
      t match {
wenzelm@38268
   112
        case Elem(markup, Nil) =>
wenzelm@38268
   113
          s ++= "<"; elem(markup); s ++= "/>"
wenzelm@38268
   114
        case Elem(markup, ts) =>
wenzelm@38268
   115
          s ++= "<"; elem(markup); s ++= ">"
wenzelm@38268
   116
          ts.foreach(tree)
wenzelm@38268
   117
          s ++= "</"; s ++= markup.name; s ++= ">"
wenzelm@38268
   118
        case Text(txt) => text(txt)
wenzelm@38268
   119
      }
wenzelm@38268
   120
    body.foreach(tree)
wenzelm@38268
   121
    s.toString
wenzelm@29203
   122
  }
wenzelm@29203
   123
wenzelm@38268
   124
  def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree))
wenzelm@27941
   125
wenzelm@27941
   126
wenzelm@44808
   127
wenzelm@44808
   128
  /** cache for partial sharing (weak table) **/
wenzelm@34108
   129
wenzelm@43745
   130
  class Cache(initial_size: Int = 131071, max_string: Int = 100)
wenzelm@34108
   131
  {
wenzelm@44704
   132
    private var table = new WeakHashMap[Any, WeakReference[Any]](initial_size)
wenzelm@38446
   133
wenzelm@44704
   134
    private def lookup[A](x: A): Option[A] =
wenzelm@44704
   135
    {
wenzelm@44704
   136
      val ref = table.get(x)
wenzelm@44704
   137
      if (ref == null) None
wenzelm@44704
   138
      else {
wenzelm@44704
   139
        val y = ref.asInstanceOf[WeakReference[A]].get
wenzelm@44704
   140
        if (y == null) None
wenzelm@44704
   141
        else Some(y)
wenzelm@38446
   142
      }
wenzelm@44704
   143
    }
wenzelm@44704
   144
    private def store[A](x: A): A =
wenzelm@44704
   145
    {
wenzelm@44704
   146
      table.put(x, new WeakReference[Any](x))
wenzelm@44704
   147
      x
wenzelm@44704
   148
    }
wenzelm@34108
   149
wenzelm@44704
   150
    private def trim_bytes(s: String): String = new String(s.toCharArray)
wenzelm@38869
   151
wenzelm@51663
   152
    private def cache_string(x: String): String =
wenzelm@44704
   153
      lookup(x) match {
wenzelm@44704
   154
        case Some(y) => y
wenzelm@44704
   155
        case None =>
wenzelm@44704
   156
          val z = trim_bytes(x)
wenzelm@44704
   157
          if (z.length > max_string) z else store(z)
wenzelm@44704
   158
      }
wenzelm@51663
   159
    private def cache_props(x: Properties.T): Properties.T =
wenzelm@44704
   160
      if (x.isEmpty) x
wenzelm@44704
   161
      else
wenzelm@34133
   162
        lookup(x) match {
wenzelm@34133
   163
          case Some(y) => y
wenzelm@51663
   164
          case None => store(x.map(p => (trim_bytes(p._1).intern, cache_string(p._2))))
wenzelm@34133
   165
        }
wenzelm@51663
   166
    private def cache_markup(x: Markup): Markup =
wenzelm@44704
   167
      lookup(x) match {
wenzelm@44704
   168
        case Some(y) => y
wenzelm@44704
   169
        case None =>
wenzelm@44704
   170
          x match {
wenzelm@44704
   171
            case Markup(name, props) =>
wenzelm@51663
   172
              store(Markup(cache_string(name), cache_props(props)))
wenzelm@44704
   173
          }
wenzelm@44704
   174
      }
wenzelm@51663
   175
    private def cache_tree(x: XML.Tree): XML.Tree =
wenzelm@44704
   176
      lookup(x) match {
wenzelm@44704
   177
        case Some(y) => y
wenzelm@44704
   178
        case None =>
wenzelm@44704
   179
          x match {
wenzelm@44704
   180
            case XML.Elem(markup, body) =>
wenzelm@51663
   181
              store(XML.Elem(cache_markup(markup), cache_body(body)))
wenzelm@51663
   182
            case XML.Text(text) => store(XML.Text(cache_string(text)))
wenzelm@44704
   183
          }
wenzelm@44704
   184
      }
wenzelm@51663
   185
    private def cache_body(x: XML.Body): XML.Body =
wenzelm@44704
   186
      if (x.isEmpty) x
wenzelm@44704
   187
      else
wenzelm@34133
   188
        lookup(x) match {
wenzelm@34133
   189
          case Some(y) => y
wenzelm@51663
   190
          case None => x.map(cache_tree(_))
wenzelm@34133
   191
        }
wenzelm@38446
   192
wenzelm@38446
   193
    // main methods
wenzelm@51663
   194
    def string(x: String): String = synchronized { cache_string(x) }
wenzelm@51663
   195
    def props(x: Properties.T): Properties.T = synchronized { cache_props(x) }
wenzelm@51663
   196
    def markup(x: Markup): Markup = synchronized { cache_markup(x) }
wenzelm@51663
   197
    def tree(x: XML.Tree): XML.Tree = synchronized { cache_tree(x) }
wenzelm@51663
   198
    def body(x: XML.Body): XML.Body = synchronized { cache_body(x) }
wenzelm@51663
   199
    def elem(x: XML.Elem): XML.Elem = synchronized { cache_tree(x).asInstanceOf[XML.Elem] }
wenzelm@34108
   200
  }
wenzelm@34108
   201
wenzelm@34108
   202
wenzelm@43767
   203
wenzelm@43767
   204
  /** XML as data representation language **/
wenzelm@43767
   205
wenzelm@51987
   206
  abstract class Error(s: String) extends Exception(s)
wenzelm@51987
   207
  class XML_Atom(s: String) extends Error(s)
wenzelm@51987
   208
  class XML_Body(body: XML.Body) extends Error("")
wenzelm@43767
   209
wenzelm@43767
   210
  object Encode
wenzelm@43767
   211
  {
wenzelm@43767
   212
    type T[A] = A => XML.Body
wenzelm@43767
   213
wenzelm@43767
   214
wenzelm@43778
   215
    /* atomic values */
wenzelm@43767
   216
wenzelm@43778
   217
    def long_atom(i: Long): String = i.toString
wenzelm@43767
   218
wenzelm@43778
   219
    def int_atom(i: Int): String = i.toString
wenzelm@43767
   220
wenzelm@43778
   221
    def bool_atom(b: Boolean): String = if (b) "1" else "0"
wenzelm@43767
   222
wenzelm@43778
   223
    def unit_atom(u: Unit) = ""
wenzelm@43767
   224
wenzelm@43767
   225
wenzelm@43767
   226
    /* structural nodes */
wenzelm@43767
   227
wenzelm@43767
   228
    private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts)
wenzelm@43767
   229
wenzelm@43781
   230
    private def vector(xs: List[String]): XML.Attributes =
wenzelm@46839
   231
      xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) })
wenzelm@43778
   232
wenzelm@43778
   233
    private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree =
wenzelm@43778
   234
      XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2)
wenzelm@43767
   235
wenzelm@43767
   236
wenzelm@43767
   237
    /* representation of standard types */
wenzelm@43767
   238
wenzelm@43780
   239
    val properties: T[Properties.T] =
wenzelm@43767
   240
      (props => List(XML.Elem(Markup(":", props), Nil)))
wenzelm@43767
   241
wenzelm@43767
   242
    val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s)))
wenzelm@43767
   243
wenzelm@43767
   244
    val long: T[Long] = (x => string(long_atom(x)))
wenzelm@43767
   245
wenzelm@43767
   246
    val int: T[Int] = (x => string(int_atom(x)))
wenzelm@43767
   247
wenzelm@43767
   248
    val bool: T[Boolean] = (x => string(bool_atom(x)))
wenzelm@43767
   249
wenzelm@43767
   250
    val unit: T[Unit] = (x => string(unit_atom(x)))
wenzelm@43767
   251
wenzelm@43767
   252
    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
wenzelm@43767
   253
      (x => List(node(f(x._1)), node(g(x._2))))
wenzelm@43767
   254
wenzelm@43767
   255
    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
wenzelm@43767
   256
      (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3))))
wenzelm@43767
   257
wenzelm@43767
   258
    def list[A](f: T[A]): T[List[A]] =
wenzelm@43767
   259
      (xs => xs.map((x: A) => node(f(x))))
wenzelm@43767
   260
wenzelm@43767
   261
    def option[A](f: T[A]): T[Option[A]] =
wenzelm@43767
   262
    {
wenzelm@43767
   263
      case None => Nil
wenzelm@43767
   264
      case Some(x) => List(node(f(x)))
wenzelm@43767
   265
    }
wenzelm@43767
   266
wenzelm@43778
   267
    def variant[A](fs: List[PartialFunction[A, (List[String], XML.Body)]]): T[A] =
wenzelm@43767
   268
    {
wenzelm@43767
   269
      case x =>
wenzelm@43767
   270
        val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get
wenzelm@43767
   271
        List(tagged(tag, f(x)))
wenzelm@43767
   272
    }
wenzelm@43767
   273
  }
wenzelm@43767
   274
wenzelm@43767
   275
  object Decode
wenzelm@43767
   276
  {
wenzelm@43767
   277
    type T[A] = XML.Body => A
wenzelm@43778
   278
    type V[A] = (List[String], XML.Body) => A
wenzelm@43767
   279
wenzelm@43767
   280
wenzelm@43778
   281
    /* atomic values */
wenzelm@43767
   282
wenzelm@43778
   283
    def long_atom(s: String): Long =
wenzelm@43767
   284
      try { java.lang.Long.parseLong(s) }
wenzelm@43767
   285
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
wenzelm@43767
   286
wenzelm@43778
   287
    def int_atom(s: String): Int =
wenzelm@43767
   288
      try { Integer.parseInt(s) }
wenzelm@43767
   289
      catch { case e: NumberFormatException => throw new XML_Atom(s) }
wenzelm@43767
   290
wenzelm@43778
   291
    def bool_atom(s: String): Boolean =
wenzelm@43767
   292
      if (s == "1") true
wenzelm@43767
   293
      else if (s == "0") false
wenzelm@43767
   294
      else throw new XML_Atom(s)
wenzelm@43767
   295
wenzelm@43778
   296
    def unit_atom(s: String): Unit =
wenzelm@43767
   297
      if (s == "") () else throw new XML_Atom(s)
wenzelm@43767
   298
wenzelm@43767
   299
wenzelm@43767
   300
    /* structural nodes */
wenzelm@43767
   301
wenzelm@43767
   302
    private def node(t: XML.Tree): XML.Body =
wenzelm@43767
   303
      t match {
wenzelm@43767
   304
        case XML.Elem(Markup(":", Nil), ts) => ts
wenzelm@43767
   305
        case _ => throw new XML_Body(List(t))
wenzelm@43767
   306
      }
wenzelm@43767
   307
wenzelm@43781
   308
    private def vector(atts: XML.Attributes): List[String] =
wenzelm@46839
   309
      atts.iterator.zipWithIndex.map(
wenzelm@46839
   310
        { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList
wenzelm@43778
   311
wenzelm@43778
   312
    private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) =
wenzelm@43767
   313
      t match {
wenzelm@43781
   314
        case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts))
wenzelm@43767
   315
        case _ => throw new XML_Body(List(t))
wenzelm@43767
   316
      }
wenzelm@43767
   317
wenzelm@43767
   318
wenzelm@43767
   319
    /* representation of standard types */
wenzelm@43767
   320
wenzelm@43780
   321
    val properties: T[Properties.T] =
wenzelm@43767
   322
    {
wenzelm@43767
   323
      case List(XML.Elem(Markup(":", props), Nil)) => props
wenzelm@43767
   324
      case ts => throw new XML_Body(ts)
wenzelm@43767
   325
    }
wenzelm@43767
   326
wenzelm@43767
   327
    val string: T[String] =
wenzelm@43767
   328
    {
wenzelm@43767
   329
      case Nil => ""
wenzelm@43767
   330
      case List(XML.Text(s)) => s
wenzelm@43767
   331
      case ts => throw new XML_Body(ts)
wenzelm@43767
   332
    }
wenzelm@43767
   333
wenzelm@43767
   334
    val long: T[Long] = (x => long_atom(string(x)))
wenzelm@43767
   335
wenzelm@43767
   336
    val int: T[Int] = (x => int_atom(string(x)))
wenzelm@43767
   337
wenzelm@43767
   338
    val bool: T[Boolean] = (x => bool_atom(string(x)))
wenzelm@43767
   339
wenzelm@43767
   340
    val unit: T[Unit] = (x => unit_atom(string(x)))
wenzelm@43767
   341
wenzelm@43767
   342
    def pair[A, B](f: T[A], g: T[B]): T[(A, B)] =
wenzelm@43767
   343
    {
wenzelm@43767
   344
      case List(t1, t2) => (f(node(t1)), g(node(t2)))
wenzelm@43767
   345
      case ts => throw new XML_Body(ts)
wenzelm@43767
   346
    }
wenzelm@43767
   347
wenzelm@43767
   348
    def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] =
wenzelm@43767
   349
    {
wenzelm@43767
   350
      case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3)))
wenzelm@43767
   351
      case ts => throw new XML_Body(ts)
wenzelm@43767
   352
    }
wenzelm@43767
   353
wenzelm@43767
   354
    def list[A](f: T[A]): T[List[A]] =
wenzelm@43767
   355
      (ts => ts.map(t => f(node(t))))
wenzelm@43767
   356
wenzelm@43767
   357
    def option[A](f: T[A]): T[Option[A]] =
wenzelm@43767
   358
    {
wenzelm@43767
   359
      case Nil => None
wenzelm@43767
   360
      case List(t) => Some(f(node(t)))
wenzelm@43767
   361
      case ts => throw new XML_Body(ts)
wenzelm@43767
   362
    }
wenzelm@43767
   363
wenzelm@43778
   364
    def variant[A](fs: List[V[A]]): T[A] =
wenzelm@43767
   365
    {
wenzelm@43767
   366
      case List(t) =>
wenzelm@43778
   367
        val (tag, (xs, ts)) = tagged(t)
wenzelm@43768
   368
        val f =
wenzelm@43768
   369
          try { fs(tag) }
wenzelm@43768
   370
          catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) }
wenzelm@43778
   371
        f(xs, ts)
wenzelm@43767
   372
      case ts => throw new XML_Body(ts)
wenzelm@43767
   373
    }
wenzelm@43767
   374
  }
wenzelm@27931
   375
}