src/Pure/PIDE/yxml.ML
changeset 44698 0385292321a0
parent 43782 834de29572d5
child 46831 4a6085849a37
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/Pure/PIDE/yxml.ML	Sun Sep 04 15:21:50 2011 +0200
     1.3 @@ -0,0 +1,148 @@
     1.4 +(*  Title:      Pure/PIDE/yxml.ML
     1.5 +    Author:     Makarius
     1.6 +
     1.7 +Efficient text representation of XML trees using extra characters X
     1.8 +and Y -- no escaping, may nest marked text verbatim.  Suitable for
     1.9 +direct inlining into plain text.
    1.10 +
    1.11 +Markup <elem att="val" ...>...body...</elem> is encoded as:
    1.12 +
    1.13 +  X Y name Y att=val ... X
    1.14 +  ...
    1.15 +  body
    1.16 +  ...
    1.17 +  X Y X
    1.18 +*)
    1.19 +
    1.20 +signature YXML =
    1.21 +sig
    1.22 +  val X: Symbol.symbol
    1.23 +  val Y: Symbol.symbol
    1.24 +  val embed_controls: string -> string
    1.25 +  val detect: string -> bool
    1.26 +  val output_markup: Markup.T -> string * string
    1.27 +  val element: string -> XML.attributes -> string list -> string
    1.28 +  val string_of_body: XML.body -> string
    1.29 +  val string_of: XML.tree -> string
    1.30 +  val parse_body: string -> XML.body
    1.31 +  val parse: string -> XML.tree
    1.32 +end;
    1.33 +
    1.34 +structure YXML: YXML =
    1.35 +struct
    1.36 +
    1.37 +(** string representation **)
    1.38 +
    1.39 +(* idempotent recoding of certain low ASCII control characters *)
    1.40 +
    1.41 +fun pseudo_utf8 c =
    1.42 +  if Symbol.is_ascii_control c
    1.43 +  then chr 192 ^ chr (128 + ord c)
    1.44 +  else c;
    1.45 +
    1.46 +fun embed_controls str =
    1.47 +  if exists_string Symbol.is_ascii_control str
    1.48 +  then translate_string pseudo_utf8 str
    1.49 +  else str;
    1.50 +
    1.51 +
    1.52 +(* markers *)
    1.53 +
    1.54 +val X = chr 5;
    1.55 +val Y = chr 6;
    1.56 +val XY = X ^ Y;
    1.57 +val XYX = XY ^ X;
    1.58 +
    1.59 +val detect = exists_string (fn s => s = X orelse s = Y);
    1.60 +
    1.61 +
    1.62 +(* output *)
    1.63 +
    1.64 +fun output_markup (markup as (name, atts)) =
    1.65 +  if Markup.is_empty markup then Markup.no_output
    1.66 +  else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
    1.67 +
    1.68 +fun element name atts body =
    1.69 +  let val (pre, post) = output_markup (name, atts)
    1.70 +  in pre ^ implode body ^ post end;
    1.71 +
    1.72 +fun string_of_body body =
    1.73 +  let
    1.74 +    fun attrib (a, x) =
    1.75 +      Buffer.add Y #>
    1.76 +      Buffer.add a #> Buffer.add "=" #> Buffer.add x;
    1.77 +    fun tree (XML.Elem ((name, atts), ts)) =
    1.78 +          Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
    1.79 +          trees ts #>
    1.80 +          Buffer.add XYX
    1.81 +      | tree (XML.Text s) = Buffer.add s
    1.82 +    and trees ts = fold tree ts;
    1.83 +  in Buffer.empty |> trees body |> Buffer.content end;
    1.84 +
    1.85 +val string_of = string_of_body o single;
    1.86 +
    1.87 +
    1.88 +
    1.89 +(** efficient YXML parsing **)
    1.90 +
    1.91 +local
    1.92 +
    1.93 +(* splitting *)
    1.94 +
    1.95 +fun is_char s c = ord s = Char.ord c;
    1.96 +
    1.97 +val split_string =
    1.98 +  Substring.full #>
    1.99 +  Substring.tokens (is_char X) #>
   1.100 +  map (Substring.fields (is_char Y) #> map Substring.string);
   1.101 +
   1.102 +
   1.103 +(* structural errors *)
   1.104 +
   1.105 +fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
   1.106 +fun err_attribute () = err "bad attribute";
   1.107 +fun err_element () = err "bad element";
   1.108 +fun err_unbalanced "" = err "unbalanced element"
   1.109 +  | err_unbalanced name = err ("unbalanced element " ^ quote name);
   1.110 +
   1.111 +
   1.112 +(* stack operations *)
   1.113 +
   1.114 +fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
   1.115 +
   1.116 +fun push "" _ _ = err_element ()
   1.117 +  | push name atts pending = ((name, atts), []) :: pending;
   1.118 +
   1.119 +fun pop ((("", _), _) :: _) = err_unbalanced ""
   1.120 +  | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
   1.121 +
   1.122 +
   1.123 +(* parsing *)
   1.124 +
   1.125 +fun parse_attrib s =
   1.126 +  (case first_field "=" s of
   1.127 +    NONE => err_attribute ()
   1.128 +  | SOME ("", _) => err_attribute ()
   1.129 +  | SOME att => att);
   1.130 +
   1.131 +fun parse_chunk ["", ""] = pop
   1.132 +  | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
   1.133 +  | parse_chunk txts = fold (add o XML.Text) txts;
   1.134 +
   1.135 +in
   1.136 +
   1.137 +fun parse_body source =
   1.138 +  (case fold parse_chunk (split_string source) [(("", []), [])] of
   1.139 +    [(("", _), result)] => rev result
   1.140 +  | ((name, _), _) :: _ => err_unbalanced name);
   1.141 +
   1.142 +fun parse source =
   1.143 +  (case parse_body source of
   1.144 +    [result] => result
   1.145 +  | [] => XML.Text ""
   1.146 +  | _ => err "multiple results");
   1.147 +
   1.148 +end;
   1.149 +
   1.150 +end;
   1.151 +