Efficient text representation of XML trees.
authorwenzelm
Sun, 17 Aug 2008 21:11:06 +0200
changeset 27930 2b44df907cc2
parent 27929 ae23b2d5d2ca
child 27931 b533a9de87a7
Efficient text representation of XML trees.
src/Pure/General/yxml.scala
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/General/yxml.scala	Sun Aug 17 21:11:06 2008 +0200
@@ -0,0 +1,92 @@
+/*  Title:      Pure/General/yxml.scala
+    ID:         $Id$
+    Author:     Makarius
+
+Efficient text representation of XML trees.
+*/
+
+package isabelle
+
+import java.util.regex.Pattern
+
+
+object YXML {
+
+  /* markers */
+
+  private val X = '\5'
+  private val Y = '\6'
+
+  def detect(source: CharSequence) = {
+    source.length >= 2 &&
+    source.charAt(0) == X &&
+    source.charAt(1) == Y
+  }
+
+
+  /* parsing */
+
+  class BadYXML(msg: String) extends Exception
+
+  private def err(msg: String) = throw new BadYXML(msg)
+  private def err_attribute() = err("bad attribute")
+  private def err_element() = err("bad element")
+  private def err_unbalanced(name: String) =
+    if (name == "") err("unbalanced element")
+    else err("unbalanced element \"" + name + "\"")
+
+  private val X_pattern = Pattern.compile(Pattern.quote(X.toString))
+  private val Y_pattern = Pattern.compile(Pattern.quote(Y.toString))
+  private val eq_pattern = Pattern.compile(Pattern.quote("="))
+
+  private def parse_attrib(s: String) =
+    eq_pattern.split(s, 2).toList match {
+      case List(x, y) => if (x != "") (x, y) else err_attribute()
+      case _ => err_attribute()
+    }
+
+
+  def parse_body(source: CharSequence) = {
+
+    /* stack operations */
+
+    var stack: List[((String, XML.Attributes), List[XML.Tree])] = null
+
+    def add(x: XML.Tree) = stack match {
+      case ((elem, body) :: pending) => stack = (elem, x :: body) :: pending
+    }
+
+    def push(name: String, atts: XML.Attributes) =
+      if (name == "") err_element()
+      else stack = ((name, atts), Nil) :: stack
+
+    def pop() = stack match {
+      case ((("", _), _) :: _) => err_unbalanced("")
+      case (((name, atts), body) :: pending) =>
+        stack = pending; add(XML.Elem(name, atts, body.reverse))
+    }
+
+
+    /* parse chunks */
+
+    stack = List((("", Nil), Nil))
+    for (chunk <- X_pattern.split(source) if chunk != "") {
+      Y_pattern.split(chunk).toList match {
+        case Nil => pop()
+        case "" :: name :: atts => push(name, atts.map(parse_attrib))
+        case txts => for (txt <- txts) add(XML.Text(txt))
+      }
+    }
+    stack match {
+      case List((("", _), result)) => result.reverse
+      case ((name, _), _) :: _ => err_unbalanced(name)
+    }
+  }
+
+  def parse(source: CharSequence) =
+    parse_body(source) match {
+      case List(result) => result
+      case Nil => XML.Text("")
+      case _ => err("multiple results")
+    }
+}