src/Pure/PIDE/yxml.ML
author wenzelm
Wed Mar 07 23:21:24 2012 +0100 (2012-03-07 ago)
changeset 46832 050e344825c8
parent 46831 4a6085849a37
child 49656 7ff712de5747
permissions -rw-r--r--
tuned message (cf. ML version);
     1 (*  Title:      Pure/PIDE/yxml.ML
     2     Author:     Makarius
     3 
     4 Efficient text representation of XML trees using extra characters X
     5 and Y -- no escaping, may nest marked text verbatim.  Suitable for
     6 direct inlining into plain text.
     7 
     8 Markup <elem att="val" ...>...body...</elem> is encoded as:
     9 
    10   X Y name Y att=val ... X
    11   ...
    12   body
    13   ...
    14   X Y X
    15 *)
    16 
    17 signature YXML =
    18 sig
    19   val X: Symbol.symbol
    20   val Y: Symbol.symbol
    21   val embed_controls: string -> string
    22   val detect: string -> bool
    23   val output_markup: Markup.T -> string * string
    24   val string_of_body: XML.body -> string
    25   val string_of: XML.tree -> string
    26   val parse_body: string -> XML.body
    27   val parse: string -> XML.tree
    28 end;
    29 
    30 structure YXML: YXML =
    31 struct
    32 
    33 (** string representation **)
    34 
    35 (* idempotent recoding of certain low ASCII control characters *)
    36 
    37 fun pseudo_utf8 c =
    38   if Symbol.is_ascii_control c
    39   then chr 192 ^ chr (128 + ord c)
    40   else c;
    41 
    42 fun embed_controls str =
    43   if exists_string Symbol.is_ascii_control str
    44   then translate_string pseudo_utf8 str
    45   else str;
    46 
    47 
    48 (* markers *)
    49 
    50 val X = chr 5;
    51 val Y = chr 6;
    52 val XY = X ^ Y;
    53 val XYX = XY ^ X;
    54 
    55 val detect = exists_string (fn s => s = X orelse s = Y);
    56 
    57 
    58 (* output *)
    59 
    60 fun output_markup (markup as (name, atts)) =
    61   if Markup.is_empty markup then Markup.no_output
    62   else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
    63 
    64 fun string_of_body body =
    65   let
    66     fun attrib (a, x) = Buffer.add Y #> Buffer.add a #> Buffer.add "=" #> Buffer.add x;
    67     fun tree (XML.Elem ((name, atts), ts)) =
    68           Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
    69           trees ts #>
    70           Buffer.add XYX
    71       | tree (XML.Text s) = Buffer.add s
    72     and trees ts = fold tree ts;
    73   in Buffer.empty |> trees body |> Buffer.content end;
    74 
    75 val string_of = string_of_body o single;
    76 
    77 
    78 
    79 (** efficient YXML parsing **)
    80 
    81 local
    82 
    83 (* splitting *)
    84 
    85 fun is_char s c = ord s = Char.ord c;
    86 
    87 val split_string =
    88   Substring.full #>
    89   Substring.tokens (is_char X) #>
    90   map (Substring.fields (is_char Y) #> map Substring.string);
    91 
    92 
    93 (* structural errors *)
    94 
    95 fun err msg = raise Fail ("Malformed YXML: " ^ msg);
    96 fun err_attribute () = err "bad attribute";
    97 fun err_element () = err "bad element";
    98 fun err_unbalanced "" = err "unbalanced element"
    99   | err_unbalanced name = err ("unbalanced element " ^ quote name);
   100 
   101 
   102 (* stack operations *)
   103 
   104 fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
   105 
   106 fun push "" _ _ = err_element ()
   107   | push name atts pending = ((name, atts), []) :: pending;
   108 
   109 fun pop ((("", _), _) :: _) = err_unbalanced ""
   110   | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
   111 
   112 
   113 (* parsing *)
   114 
   115 fun parse_attrib s =
   116   (case first_field "=" s of
   117     NONE => err_attribute ()
   118   | SOME ("", _) => err_attribute ()
   119   | SOME att => att);
   120 
   121 fun parse_chunk ["", ""] = pop
   122   | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
   123   | parse_chunk txts = fold (add o XML.Text) txts;
   124 
   125 in
   126 
   127 fun parse_body source =
   128   (case fold parse_chunk (split_string source) [(("", []), [])] of
   129     [(("", _), result)] => rev result
   130   | ((name, _), _) :: _ => err_unbalanced name);
   131 
   132 fun parse source =
   133   (case parse_body source of
   134     [result] => result
   135   | [] => XML.Text ""
   136   | _ => err "multiple results");
   137 
   138 end;
   139 
   140 end;
   141