1 /* Title: Pure/General/yxml.scala |
|
2 Author: Makarius |
|
3 |
|
4 Efficient text representation of XML trees. |
|
5 */ |
|
6 |
|
7 package isabelle |
|
8 |
|
9 |
|
10 import scala.collection.mutable |
|
11 |
|
12 |
|
13 object YXML |
|
14 { |
|
15 /* chunk markers */ |
|
16 |
|
17 val X = '\5' |
|
18 val Y = '\6' |
|
19 val X_string = X.toString |
|
20 val Y_string = Y.toString |
|
21 |
|
22 def detect(s: String): Boolean = s.exists(c => c == X || c == Y) |
|
23 |
|
24 |
|
25 /* string representation */ // FIXME byte array version with pseudo-utf-8 (!?) |
|
26 |
|
27 def string_of_body(body: XML.Body): String = |
|
28 { |
|
29 val s = new StringBuilder |
|
30 def attrib(p: (String, String)) { s += Y; s ++= p._1; s += '='; s ++= p._2 } |
|
31 def tree(t: XML.Tree): Unit = |
|
32 t match { |
|
33 case XML.Elem(Markup(name, atts), ts) => |
|
34 s += X; s += Y; s++= name; atts.foreach(attrib); s += X |
|
35 ts.foreach(tree) |
|
36 s += X; s += Y; s += X |
|
37 case XML.Text(text) => s ++= text |
|
38 } |
|
39 body.foreach(tree) |
|
40 s.toString |
|
41 } |
|
42 |
|
43 def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) |
|
44 |
|
45 |
|
46 /* parsing */ |
|
47 |
|
48 private def err(msg: String) = error("Malformed YXML: " + msg) |
|
49 private def err_attribute() = err("bad attribute") |
|
50 private def err_element() = err("bad element") |
|
51 private def err_unbalanced(name: String) = |
|
52 if (name == "") err("unbalanced element") |
|
53 else err("unbalanced element " + quote(name)) |
|
54 |
|
55 private def parse_attrib(source: CharSequence) = { |
|
56 val s = source.toString |
|
57 val i = s.indexOf('=') |
|
58 if (i <= 0) err_attribute() |
|
59 (s.substring(0, i), s.substring(i + 1)) |
|
60 } |
|
61 |
|
62 |
|
63 def parse_body(source: CharSequence): XML.Body = |
|
64 { |
|
65 /* stack operations */ |
|
66 |
|
67 def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree] |
|
68 var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer())) |
|
69 |
|
70 def add(x: XML.Tree) |
|
71 { |
|
72 (stack: @unchecked) match { case ((_, body) :: _) => body += x } |
|
73 } |
|
74 |
|
75 def push(name: String, atts: XML.Attributes) |
|
76 { |
|
77 if (name == "") err_element() |
|
78 else stack = (Markup(name, atts), buffer()) :: stack |
|
79 } |
|
80 |
|
81 def pop() |
|
82 { |
|
83 (stack: @unchecked) match { |
|
84 case ((Markup.Empty, _) :: _) => err_unbalanced("") |
|
85 case ((markup, body) :: pending) => |
|
86 stack = pending |
|
87 add(XML.Elem(markup, body.toList)) |
|
88 } |
|
89 } |
|
90 |
|
91 |
|
92 /* parse chunks */ |
|
93 |
|
94 for (chunk <- Library.chunks(source, X) if chunk.length != 0) { |
|
95 if (chunk.length == 1 && chunk.charAt(0) == Y) pop() |
|
96 else { |
|
97 Library.chunks(chunk, Y).toList match { |
|
98 case ch :: name :: atts if ch.length == 0 => |
|
99 push(name.toString, atts.map(parse_attrib)) |
|
100 case txts => for (txt <- txts) add(XML.Text(txt.toString)) |
|
101 } |
|
102 } |
|
103 } |
|
104 (stack: @unchecked) match { |
|
105 case List((Markup.Empty, body)) => body.toList |
|
106 case (Markup(name, _), _) :: _ => err_unbalanced(name) |
|
107 } |
|
108 } |
|
109 |
|
110 def parse(source: CharSequence): XML.Tree = |
|
111 parse_body(source) match { |
|
112 case List(result) => result |
|
113 case Nil => XML.Text("") |
|
114 case _ => err("multiple results") |
|
115 } |
|
116 |
|
117 |
|
118 /* failsafe parsing */ |
|
119 |
|
120 private def markup_malformed(source: CharSequence) = |
|
121 XML.elem(Markup.MALFORMED, List(XML.Text(source.toString))) |
|
122 |
|
123 def parse_body_failsafe(source: CharSequence): XML.Body = |
|
124 { |
|
125 try { parse_body(source) } |
|
126 catch { case ERROR(_) => List(markup_malformed(source)) } |
|
127 } |
|
128 |
|
129 def parse_failsafe(source: CharSequence): XML.Tree = |
|
130 { |
|
131 try { parse(source) } |
|
132 catch { case ERROR(_) => markup_malformed(source) } |
|
133 } |
|
134 } |
|