|
1 (* Title: Pure/PIDE/yxml.ML |
|
2 Author: Makarius |
|
3 |
|
4 Efficient text representation of XML trees using extra characters X |
|
5 and Y -- no escaping, may nest marked text verbatim. Suitable for |
|
6 direct inlining into plain text. |
|
7 |
|
8 Markup <elem att="val" ...>...body...</elem> is encoded as: |
|
9 |
|
10 X Y name Y att=val ... X |
|
11 ... |
|
12 body |
|
13 ... |
|
14 X Y X |
|
15 *) |
|
16 |
|
17 signature YXML = |
|
18 sig |
|
19 val X: Symbol.symbol |
|
20 val Y: Symbol.symbol |
|
21 val embed_controls: string -> string |
|
22 val detect: string -> bool |
|
23 val output_markup: Markup.T -> string * string |
|
24 val element: string -> XML.attributes -> string list -> string |
|
25 val string_of_body: XML.body -> string |
|
26 val string_of: XML.tree -> string |
|
27 val parse_body: string -> XML.body |
|
28 val parse: string -> XML.tree |
|
29 end; |
|
30 |
|
31 structure YXML: YXML = |
|
32 struct |
|
33 |
|
34 (** string representation **) |
|
35 |
|
36 (* idempotent recoding of certain low ASCII control characters *) |
|
37 |
|
38 fun pseudo_utf8 c = |
|
39 if Symbol.is_ascii_control c |
|
40 then chr 192 ^ chr (128 + ord c) |
|
41 else c; |
|
42 |
|
43 fun embed_controls str = |
|
44 if exists_string Symbol.is_ascii_control str |
|
45 then translate_string pseudo_utf8 str |
|
46 else str; |
|
47 |
|
48 |
|
49 (* markers *) |
|
50 |
|
51 val X = chr 5; |
|
52 val Y = chr 6; |
|
53 val XY = X ^ Y; |
|
54 val XYX = XY ^ X; |
|
55 |
|
56 val detect = exists_string (fn s => s = X orelse s = Y); |
|
57 |
|
58 |
|
59 (* output *) |
|
60 |
|
61 fun output_markup (markup as (name, atts)) = |
|
62 if Markup.is_empty markup then Markup.no_output |
|
63 else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX); |
|
64 |
|
65 fun element name atts body = |
|
66 let val (pre, post) = output_markup (name, atts) |
|
67 in pre ^ implode body ^ post end; |
|
68 |
|
69 fun string_of_body body = |
|
70 let |
|
71 fun attrib (a, x) = |
|
72 Buffer.add Y #> |
|
73 Buffer.add a #> Buffer.add "=" #> Buffer.add x; |
|
74 fun tree (XML.Elem ((name, atts), ts)) = |
|
75 Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #> |
|
76 trees ts #> |
|
77 Buffer.add XYX |
|
78 | tree (XML.Text s) = Buffer.add s |
|
79 and trees ts = fold tree ts; |
|
80 in Buffer.empty |> trees body |> Buffer.content end; |
|
81 |
|
82 val string_of = string_of_body o single; |
|
83 |
|
84 |
|
85 |
|
86 (** efficient YXML parsing **) |
|
87 |
|
88 local |
|
89 |
|
90 (* splitting *) |
|
91 |
|
92 fun is_char s c = ord s = Char.ord c; |
|
93 |
|
94 val split_string = |
|
95 Substring.full #> |
|
96 Substring.tokens (is_char X) #> |
|
97 map (Substring.fields (is_char Y) #> map Substring.string); |
|
98 |
|
99 |
|
100 (* structural errors *) |
|
101 |
|
102 fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg); |
|
103 fun err_attribute () = err "bad attribute"; |
|
104 fun err_element () = err "bad element"; |
|
105 fun err_unbalanced "" = err "unbalanced element" |
|
106 | err_unbalanced name = err ("unbalanced element " ^ quote name); |
|
107 |
|
108 |
|
109 (* stack operations *) |
|
110 |
|
111 fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending; |
|
112 |
|
113 fun push "" _ _ = err_element () |
|
114 | push name atts pending = ((name, atts), []) :: pending; |
|
115 |
|
116 fun pop ((("", _), _) :: _) = err_unbalanced "" |
|
117 | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending; |
|
118 |
|
119 |
|
120 (* parsing *) |
|
121 |
|
122 fun parse_attrib s = |
|
123 (case first_field "=" s of |
|
124 NONE => err_attribute () |
|
125 | SOME ("", _) => err_attribute () |
|
126 | SOME att => att); |
|
127 |
|
128 fun parse_chunk ["", ""] = pop |
|
129 | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts) |
|
130 | parse_chunk txts = fold (add o XML.Text) txts; |
|
131 |
|
132 in |
|
133 |
|
134 fun parse_body source = |
|
135 (case fold parse_chunk (split_string source) [(("", []), [])] of |
|
136 [(("", _), result)] => rev result |
|
137 | ((name, _), _) :: _ => err_unbalanced name); |
|
138 |
|
139 fun parse source = |
|
140 (case parse_body source of |
|
141 [result] => result |
|
142 | [] => XML.Text "" |
|
143 | _ => err "multiple results"); |
|
144 |
|
145 end; |
|
146 |
|
147 end; |
|
148 |