Why XML notation?
authorwenzelm
Thu Apr 03 16:03:59 2008 +0200 (2008-04-03)
changeset 26528944f9bf26d2d
parent 26527 c392354a1b79
child 26529 03ad378ed5f0
Why XML notation?
src/Pure/General/yxml.ML
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/Pure/General/yxml.ML	Thu Apr 03 16:03:59 2008 +0200
     1.3 @@ -0,0 +1,118 @@
     1.4 +(*  Title:      Pure/General/yxml.ML
     1.5 +    ID:         $Id$
     1.6 +    Author:     Makarius
     1.7 +
     1.8 +Why XML notation?  Efficient text representation of XML trees, using
     1.9 +extra characters ETX and EOT -- no escaping, may nest marked text
    1.10 +verbatim.  Markup <elem att="val" ...>...body...</elem> is encoded as:
    1.11 +
    1.12 +  ETX EOT name EOT att=val ... ETX
    1.13 +  ...
    1.14 +  body
    1.15 +  ...
    1.16 +  ETX EOT ETX
    1.17 +*)
    1.18 +
    1.19 +signature YXML =
    1.20 +sig
    1.21 +  val detect: string -> bool
    1.22 +  val element: string -> XML.attributes -> string list -> string
    1.23 +  val string_of: XML.tree -> string
    1.24 +  val parse_body: string -> XML.tree list
    1.25 +  val parse: string -> XML.tree
    1.26 +end;
    1.27 +
    1.28 +structure YXML: YXML =
    1.29 +struct
    1.30 +
    1.31 +(* string representation *)
    1.32 +
    1.33 +val ETX = Symbol.ETX;
    1.34 +val EOT = Symbol.EOT;
    1.35 +
    1.36 +fun detect s = ord s = ord EOT;
    1.37 +
    1.38 +
    1.39 +(*naive pasting of strings*)
    1.40 +fun element name atts body =
    1.41 +  ETX ^ EOT ^ name ^ implode (map (fn (a, x) => EOT ^ a ^ "=" ^ x) atts) ^ ETX ^
    1.42 +  implode body ^
    1.43 +  ETX ^ EOT ^ ETX;
    1.44 +
    1.45 +(*scalable buffer output*)
    1.46 +fun string_of t =
    1.47 +  let
    1.48 +    fun attrib (a, x) =
    1.49 +      Buffer.add EOT #>
    1.50 +      Buffer.add a #> Buffer.add "=" #> Buffer.add x;
    1.51 +    fun tree (XML.Elem (name, atts, ts)) =
    1.52 +          Buffer.add ETX #>
    1.53 +          Buffer.add EOT #> Buffer.add name #>
    1.54 +          fold attrib atts #>
    1.55 +          Buffer.add ETX #>
    1.56 +          fold tree ts #>
    1.57 +          Buffer.add ETX #>
    1.58 +          Buffer.add EOT #>
    1.59 +          Buffer.add ETX
    1.60 +      | tree (XML.Text s) = Buffer.add s
    1.61 +      | tree (XML.Output s) = Buffer.add s;
    1.62 +  in Buffer.empty |> tree t |> Buffer.content end;
    1.63 +
    1.64 +
    1.65 +(* efficient YXML parsing *)
    1.66 +
    1.67 +local
    1.68 +
    1.69 +(* errors *)
    1.70 +
    1.71 +fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
    1.72 +fun err_attribute () = err "bad attribute";
    1.73 +fun err_element () = err "bad element";
    1.74 +fun err_unbalanced "" = err "unbalanced element"
    1.75 +  | err_unbalanced name = err ("unbalanced element " ^ quote name);
    1.76 +
    1.77 +
    1.78 +(* stack operations *)
    1.79 +
    1.80 +fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
    1.81 +
    1.82 +fun push "" _ _ = err_element ()
    1.83 +  | push name atts pending = ((name, atts), []) :: pending;
    1.84 +
    1.85 +fun pop ((("", _), _) :: _) = err_unbalanced ""
    1.86 +  | pop (((name, atts), body) :: pending) = add (XML.Elem (name, atts, rev body)) pending;
    1.87 +
    1.88 +
    1.89 +(* parsers *)
    1.90 +
    1.91 +fun is_char s c = ord s = Char.ord c;
    1.92 +
    1.93 +fun parse_attrib s =
    1.94 +  (case String.fields (is_char "=") s of
    1.95 +    [] => err_attribute ()
    1.96 +  | "" :: _ => err_attribute ()
    1.97 +  | a :: xs => (a, space_implode "=" xs));
    1.98 +
    1.99 +fun parse_chunk chunk =
   1.100 +  (case String.fields (is_char EOT) chunk of
   1.101 +    ["", ""] => pop
   1.102 +  | "" :: name :: atts => push name (map parse_attrib atts)
   1.103 +  | [_] => add (XML.Text chunk)
   1.104 +  | _ => err "bad text");
   1.105 +
   1.106 +in
   1.107 +
   1.108 +fun parse_body source =
   1.109 +  (case fold parse_chunk (String.tokens (is_char ETX) source) [(("", []), [])] of
   1.110 +    [(("", _), result)] => rev result
   1.111 +  | ((name, _), _) :: _ => err_unbalanced name);
   1.112 +
   1.113 +fun parse source =
   1.114 +  (case parse_body source of
   1.115 +    [result as XML.Elem _] => result
   1.116 +  | _ => err "no root element");
   1.117 +
   1.118 +end;
   1.119 +
   1.120 +end;
   1.121 +