src/Pure/PIDE/yxml.ML
changeset 44698 0385292321a0
parent 43782 834de29572d5
child 46831 4a6085849a37
equal deleted inserted replaced
44697:b99dfee76538 44698:0385292321a0
       
     1 (*  Title:      Pure/PIDE/yxml.ML
       
     2     Author:     Makarius
       
     3 
       
     4 Efficient text representation of XML trees using extra characters X
       
     5 and Y -- no escaping, may nest marked text verbatim.  Suitable for
       
     6 direct inlining into plain text.
       
     7 
       
     8 Markup <elem att="val" ...>...body...</elem> is encoded as:
       
     9 
       
    10   X Y name Y att=val ... X
       
    11   ...
       
    12   body
       
    13   ...
       
    14   X Y X
       
    15 *)
       
    16 
       
    17 signature YXML =
       
    18 sig
       
    19   val X: Symbol.symbol
       
    20   val Y: Symbol.symbol
       
    21   val embed_controls: string -> string
       
    22   val detect: string -> bool
       
    23   val output_markup: Markup.T -> string * string
       
    24   val element: string -> XML.attributes -> string list -> string
       
    25   val string_of_body: XML.body -> string
       
    26   val string_of: XML.tree -> string
       
    27   val parse_body: string -> XML.body
       
    28   val parse: string -> XML.tree
       
    29 end;
       
    30 
       
    31 structure YXML: YXML =
       
    32 struct
       
    33 
       
    34 (** string representation **)
       
    35 
       
    36 (* idempotent recoding of certain low ASCII control characters *)
       
    37 
       
    38 fun pseudo_utf8 c =
       
    39   if Symbol.is_ascii_control c
       
    40   then chr 192 ^ chr (128 + ord c)
       
    41   else c;
       
    42 
       
    43 fun embed_controls str =
       
    44   if exists_string Symbol.is_ascii_control str
       
    45   then translate_string pseudo_utf8 str
       
    46   else str;
       
    47 
       
    48 
       
    49 (* markers *)
       
    50 
       
    51 val X = chr 5;
       
    52 val Y = chr 6;
       
    53 val XY = X ^ Y;
       
    54 val XYX = XY ^ X;
       
    55 
       
    56 val detect = exists_string (fn s => s = X orelse s = Y);
       
    57 
       
    58 
       
    59 (* output *)
       
    60 
       
    61 fun output_markup (markup as (name, atts)) =
       
    62   if Markup.is_empty markup then Markup.no_output
       
    63   else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
       
    64 
       
    65 fun element name atts body =
       
    66   let val (pre, post) = output_markup (name, atts)
       
    67   in pre ^ implode body ^ post end;
       
    68 
       
    69 fun string_of_body body =
       
    70   let
       
    71     fun attrib (a, x) =
       
    72       Buffer.add Y #>
       
    73       Buffer.add a #> Buffer.add "=" #> Buffer.add x;
       
    74     fun tree (XML.Elem ((name, atts), ts)) =
       
    75           Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
       
    76           trees ts #>
       
    77           Buffer.add XYX
       
    78       | tree (XML.Text s) = Buffer.add s
       
    79     and trees ts = fold tree ts;
       
    80   in Buffer.empty |> trees body |> Buffer.content end;
       
    81 
       
    82 val string_of = string_of_body o single;
       
    83 
       
    84 
       
    85 
       
    86 (** efficient YXML parsing **)
       
    87 
       
    88 local
       
    89 
       
    90 (* splitting *)
       
    91 
       
    92 fun is_char s c = ord s = Char.ord c;
       
    93 
       
    94 val split_string =
       
    95   Substring.full #>
       
    96   Substring.tokens (is_char X) #>
       
    97   map (Substring.fields (is_char Y) #> map Substring.string);
       
    98 
       
    99 
       
   100 (* structural errors *)
       
   101 
       
   102 fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
       
   103 fun err_attribute () = err "bad attribute";
       
   104 fun err_element () = err "bad element";
       
   105 fun err_unbalanced "" = err "unbalanced element"
       
   106   | err_unbalanced name = err ("unbalanced element " ^ quote name);
       
   107 
       
   108 
       
   109 (* stack operations *)
       
   110 
       
   111 fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
       
   112 
       
   113 fun push "" _ _ = err_element ()
       
   114   | push name atts pending = ((name, atts), []) :: pending;
       
   115 
       
   116 fun pop ((("", _), _) :: _) = err_unbalanced ""
       
   117   | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
       
   118 
       
   119 
       
   120 (* parsing *)
       
   121 
       
   122 fun parse_attrib s =
       
   123   (case first_field "=" s of
       
   124     NONE => err_attribute ()
       
   125   | SOME ("", _) => err_attribute ()
       
   126   | SOME att => att);
       
   127 
       
   128 fun parse_chunk ["", ""] = pop
       
   129   | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
       
   130   | parse_chunk txts = fold (add o XML.Text) txts;
       
   131 
       
   132 in
       
   133 
       
   134 fun parse_body source =
       
   135   (case fold parse_chunk (split_string source) [(("", []), [])] of
       
   136     [(("", _), result)] => rev result
       
   137   | ((name, _), _) :: _ => err_unbalanced name);
       
   138 
       
   139 fun parse source =
       
   140   (case parse_body source of
       
   141     [result] => result
       
   142   | [] => XML.Text ""
       
   143   | _ => err "multiple results");
       
   144 
       
   145 end;
       
   146 
       
   147 end;
       
   148