src/Pure/General/yxml.ML
author wenzelm
Tue Aug 10 22:26:23 2010 +0200 (2010-08-10)
changeset 38266 492d377ecfe2
parent 38265 cc9fde54311f
child 38474 e498dc2eb576
permissions -rw-r--r--
type XML.body as basic data representation language;
tuned;
     1 (*  Title:      Pure/General/yxml.ML
     2     Author:     Makarius
     3 
     4 Efficient text representation of XML trees using extra characters X
     5 and Y -- no escaping, may nest marked text verbatim.
     6 
     7 Markup <elem att="val" ...>...body...</elem> is encoded as:
     8 
     9   X Y name Y att=val ... X
    10   ...
    11   body
    12   ...
    13   X Y X
    14 *)
    15 
    16 signature YXML =
    17 sig
    18   val escape_controls: string -> string
    19   val output_markup: Markup.T -> string * string
    20   val element: string -> XML.attributes -> string list -> string
    21   val string_of: XML.tree -> string
    22   val parse_body: string -> XML.body
    23   val parse: string -> XML.tree
    24 end;
    25 
    26 structure YXML: YXML =
    27 struct
    28 
    29 (** string representation **)
    30 
    31 (* idempotent recoding of certain low ASCII control characters *)
    32 
    33 fun pseudo_utf8 c =
    34   if Symbol.is_ascii_control c
    35   then chr 192 ^ chr (128 + ord c)
    36   else c;
    37 
    38 fun escape_controls str =
    39   if exists_string Symbol.is_ascii_control str
    40   then translate_string pseudo_utf8 str
    41   else str;
    42 
    43 
    44 (* markers *)
    45 
    46 val X = Symbol.ENQ;
    47 val Y = Symbol.ACK;
    48 val XY = X ^ Y;
    49 val XYX = XY ^ X;
    50 
    51 
    52 (* output *)
    53 
    54 fun output_markup (markup as (name, atts)) =
    55   if Markup.is_none markup then Markup.no_output
    56   else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
    57 
    58 fun element name atts body =
    59   let val (pre, post) = output_markup (name, atts)
    60   in pre ^ implode body ^ post end;
    61 
    62 fun string_of t =
    63   let
    64     fun attrib (a, x) =
    65       Buffer.add Y #>
    66       Buffer.add a #> Buffer.add "=" #> Buffer.add x;
    67     fun tree (XML.Elem ((name, atts), ts)) =
    68           Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
    69           fold tree ts #>
    70           Buffer.add XYX
    71       | tree (XML.Text s) = Buffer.add s;
    72   in Buffer.empty |> tree t |> Buffer.content end;
    73 
    74 
    75 
    76 (** efficient YXML parsing **)
    77 
    78 local
    79 
    80 (* splitting *)
    81 
    82 fun is_char s c = ord s = Char.ord c;
    83 
    84 val split_string =
    85   Substring.full #>
    86   Substring.tokens (is_char X) #>
    87   map (Substring.fields (is_char Y) #> map Substring.string);
    88 
    89 
    90 (* structural errors *)
    91 
    92 fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
    93 fun err_attribute () = err "bad attribute";
    94 fun err_element () = err "bad element";
    95 fun err_unbalanced "" = err "unbalanced element"
    96   | err_unbalanced name = err ("unbalanced element " ^ quote name);
    97 
    98 
    99 (* stack operations *)
   100 
   101 fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
   102 
   103 fun push "" _ _ = err_element ()
   104   | push name atts pending = ((name, atts), []) :: pending;
   105 
   106 fun pop ((("", _), _) :: _) = err_unbalanced ""
   107   | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
   108 
   109 
   110 (* parsing *)
   111 
   112 fun parse_attrib s =
   113   (case first_field "=" s of
   114     NONE => err_attribute ()
   115   | SOME ("", _) => err_attribute ()
   116   | SOME att => att);
   117 
   118 fun parse_chunk ["", ""] = pop
   119   | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
   120   | parse_chunk txts = fold (add o XML.Text) txts;
   121 
   122 in
   123 
   124 fun parse_body source =
   125   (case fold parse_chunk (split_string source) [(("", []), [])] of
   126     [(("", _), result)] => rev result
   127   | ((name, _), _) :: _ => err_unbalanced name);
   128 
   129 fun parse source =
   130   (case parse_body source of
   131     [result] => result
   132   | [] => XML.Text ""
   133   | _ => err "multiple results");
   134 
   135 end;
   136 
   137 end;
   138