diff -r ae23b2d5d2ca -r 2b44df907cc2 src/Pure/General/yxml.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/Pure/General/yxml.scala Sun Aug 17 21:11:06 2008 +0200 @@ -0,0 +1,92 @@ +/* Title: Pure/General/yxml.scala + ID: $Id$ + Author: Makarius + +Efficient text representation of XML trees. +*/ + +package isabelle + +import java.util.regex.Pattern + + +object YXML { + + /* markers */ + + private val X = '\5' + private val Y = '\6' + + def detect(source: CharSequence) = { + source.length >= 2 && + source.charAt(0) == X && + source.charAt(1) == Y + } + + + /* parsing */ + + class BadYXML(msg: String) extends Exception + + private def err(msg: String) = throw new BadYXML(msg) + private def err_attribute() = err("bad attribute") + private def err_element() = err("bad element") + private def err_unbalanced(name: String) = + if (name == "") err("unbalanced element") + else err("unbalanced element \"" + name + "\"") + + private val X_pattern = Pattern.compile(Pattern.quote(X.toString)) + private val Y_pattern = Pattern.compile(Pattern.quote(Y.toString)) + private val eq_pattern = Pattern.compile(Pattern.quote("=")) + + private def parse_attrib(s: String) = + eq_pattern.split(s, 2).toList match { + case List(x, y) => if (x != "") (x, y) else err_attribute() + case _ => err_attribute() + } + + + def parse_body(source: CharSequence) = { + + /* stack operations */ + + var stack: List[((String, XML.Attributes), List[XML.Tree])] = null + + def add(x: XML.Tree) = stack match { + case ((elem, body) :: pending) => stack = (elem, x :: body) :: pending + } + + def push(name: String, atts: XML.Attributes) = + if (name == "") err_element() + else stack = ((name, atts), Nil) :: stack + + def pop() = stack match { + case ((("", _), _) :: _) => err_unbalanced("") + case (((name, atts), body) :: pending) => + stack = pending; add(XML.Elem(name, atts, body.reverse)) + } + + + /* parse chunks */ + + stack = List((("", Nil), Nil)) + for (chunk <- X_pattern.split(source) if chunk != "") { + Y_pattern.split(chunk).toList match { + case Nil => pop() + case "" :: name :: atts => push(name, atts.map(parse_attrib)) + case txts => for (txt <- txts) add(XML.Text(txt)) + } + } + stack match { + case List((("", _), result)) => result.reverse + case ((name, _), _) :: _ => err_unbalanced(name) + } + } + + def parse(source: CharSequence) = + parse_body(source) match { + case List(result) => result + case Nil => XML.Text("") + case _ => err("multiple results") + } +}