replaced ETX/EOT by ENQ/ACK, which are less likely to be interpreted by tty etc.;
authorwenzelm
Thu Apr 03 18:42:38 2008 +0200 (2008-04-03)
changeset 26540173d548ce9d2
parent 26539 a0754be538ab
child 26541 14b268974c4b
replaced ETX/EOT by ENQ/ACK, which are less likely to be interpreted by tty etc.;
added output_markup, parse_element;
detect: check XY;
tuned comments;
tuned;
src/Pure/General/yxml.ML
     1.1 --- a/src/Pure/General/yxml.ML	Thu Apr 03 18:42:37 2008 +0200
     1.2 +++ b/src/Pure/General/yxml.ML	Thu Apr 03 18:42:38 2008 +0200
     1.3 @@ -2,68 +2,84 @@
     1.4      ID:         $Id$
     1.5      Author:     Makarius
     1.6  
     1.7 -Why XML notation?  Efficient text representation of XML trees, using
     1.8 -extra characters ETX and EOT -- no escaping, may nest marked text
     1.9 -verbatim.  Markup <elem att="val" ...>...body...</elem> is encoded as:
    1.10 +Efficient text representation of XML trees using extra characters X
    1.11 +and Y -- no escaping, may nest marked text verbatim.
    1.12  
    1.13 -  ETX EOT name EOT att=val ... ETX
    1.14 +Markup <elem att="val" ...>...body...</elem> is encoded as:
    1.15 +
    1.16 +  X Y name Y att=val ... X
    1.17    ...
    1.18    body
    1.19    ...
    1.20 -  ETX EOT ETX
    1.21 +  X Y X
    1.22  *)
    1.23  
    1.24  signature YXML =
    1.25  sig
    1.26    val detect: string -> bool
    1.27 +  val output_markup: Markup.T -> string * string
    1.28    val element: string -> XML.attributes -> string list -> string
    1.29    val string_of: XML.tree -> string
    1.30    val parse_body: string -> XML.tree list
    1.31 +  val parse_element: string -> string * XML.attributes * XML.tree list
    1.32    val parse: string -> XML.tree
    1.33  end;
    1.34  
    1.35  structure YXML: YXML =
    1.36  struct
    1.37  
    1.38 -(* string representation *)
    1.39 +(** string representation **)
    1.40  
    1.41 -val ETX = Symbol.ETX;
    1.42 -val EOT = Symbol.EOT;
    1.43 +val X = Symbol.ENQ;
    1.44 +val Y = Symbol.ACK;
    1.45 +val XY = X ^ Y;
    1.46 +val XYX = XY ^ X;
    1.47  
    1.48 -fun detect s = ord s = ord EOT;
    1.49 +val detect = String.isPrefix XY;
    1.50  
    1.51  
    1.52 -(*naive pasting of strings*)
    1.53 +(* naive pasting of strings *)
    1.54 +
    1.55 +fun output_markup (name, atts) =
    1.56 +  (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
    1.57 +
    1.58  fun element name atts body =
    1.59 -  ETX ^ EOT ^ name ^ implode (map (fn (a, x) => EOT ^ a ^ "=" ^ x) atts) ^ ETX ^
    1.60 -  implode body ^
    1.61 -  ETX ^ EOT ^ ETX;
    1.62 +  let val (pre, post) = output_markup (name, atts)
    1.63 +  in pre ^ implode body ^ post end;
    1.64  
    1.65 -(*scalable buffer output*)
    1.66 +
    1.67 +(* scalable buffer output *)
    1.68 +
    1.69  fun string_of t =
    1.70    let
    1.71      fun attrib (a, x) =
    1.72 -      Buffer.add EOT #>
    1.73 +      Buffer.add Y #>
    1.74        Buffer.add a #> Buffer.add "=" #> Buffer.add x;
    1.75      fun tree (XML.Elem (name, atts, ts)) =
    1.76 -          Buffer.add ETX #>
    1.77 -          Buffer.add EOT #> Buffer.add name #>
    1.78 -          fold attrib atts #>
    1.79 -          Buffer.add ETX #>
    1.80 +          Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
    1.81            fold tree ts #>
    1.82 -          Buffer.add ETX #>
    1.83 -          Buffer.add EOT #>
    1.84 -          Buffer.add ETX
    1.85 +          Buffer.add XYX
    1.86        | tree (XML.Text s) = Buffer.add s
    1.87        | tree (XML.Output s) = Buffer.add s;
    1.88    in Buffer.empty |> tree t |> Buffer.content end;
    1.89  
    1.90  
    1.91 -(* efficient YXML parsing *)
    1.92 +
    1.93 +(** efficient YXML parsing **)
    1.94  
    1.95  local
    1.96  
    1.97 -(* errors *)
    1.98 +(* splitting *)
    1.99 +
   1.100 +fun is_char s c = ord s = Char.ord c;
   1.101 +
   1.102 +val split_string =
   1.103 +  Substring.full #>
   1.104 +  Substring.tokens (is_char X) #>
   1.105 +  map (Substring.fields (is_char Y) #> map Substring.string);
   1.106 +
   1.107 +
   1.108 +(* structural errors *)
   1.109  
   1.110  fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
   1.111  fun err_attribute () = err "bad attribute";
   1.112 @@ -83,9 +99,7 @@
   1.113    | pop (((name, atts), body) :: pending) = add (XML.Elem (name, atts, rev body)) pending;
   1.114  
   1.115  
   1.116 -(* parsers *)
   1.117 -
   1.118 -fun is_char s c = ord s = Char.ord c;
   1.119 +(* parsing *)
   1.120  
   1.121  fun parse_attrib s =
   1.122    (case String.fields (is_char "=") s of
   1.123 @@ -93,17 +107,14 @@
   1.124    | "" :: _ => err_attribute ()
   1.125    | a :: xs => (a, space_implode "=" xs));
   1.126  
   1.127 -fun parse_chunk chunk =
   1.128 -  (case String.fields (is_char EOT) chunk of
   1.129 -    ["", ""] => pop
   1.130 -  | "" :: name :: atts => push name (map parse_attrib atts)
   1.131 -  | [_] => add (XML.Text chunk)
   1.132 -  | _ => err "bad text");
   1.133 +fun parse_chunk ["", ""] = pop
   1.134 +  | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
   1.135 +  | parse_chunk txts = fold (add o XML.Text) txts;
   1.136  
   1.137  in
   1.138  
   1.139  fun parse_body source =
   1.140 -  (case fold parse_chunk (String.tokens (is_char ETX) source) [(("", []), [])] of
   1.141 +  (case fold parse_chunk (split_string source) [(("", []), [])] of
   1.142      [(("", _), result)] => rev result
   1.143    | ((name, _), _) :: _ => err_unbalanced name);
   1.144  
   1.145 @@ -112,6 +123,8 @@
   1.146      [result as XML.Elem _] => result
   1.147    | _ => err "no root element");
   1.148  
   1.149 +val parse_element = parse #> (fn XML.Elem elem => elem);
   1.150 +
   1.151  end;
   1.152  
   1.153  end;