src/Pure/PIDE/yxml.ML
author wenzelm
Sun Mar 10 14:19:30 2019 +0100 (4 months ago ago)
changeset 70070 be04e9a053a7
parent 70026 d28e8199dcb9
permissions -rw-r--r--
markup and document markers for some meta data from "Dublin Core Metadata Element Set";
     1 (*  Title:      Pure/PIDE/yxml.ML
     2     Author:     Makarius
     3 
     4 Efficient text representation of XML trees using extra characters X
     5 and Y -- no escaping, may nest marked text verbatim.  Suitable for
     6 direct inlining into plain text.
     7 
     8 Markup <elem att="val" ...>...body...</elem> is encoded as:
     9 
    10   X Y name Y att=val ... X
    11   ...
    12   body
    13   ...
    14   X Y X
    15 *)
    16 
    17 signature YXML =
    18 sig
    19   val X: Symbol.symbol
    20   val Y: Symbol.symbol
    21   val embed_controls: string -> string
    22   val detect: string -> bool
    23   val output_markup: Markup.T -> string * string
    24   val buffer_body: XML.body -> Buffer.T -> Buffer.T
    25   val buffer: XML.tree -> Buffer.T -> Buffer.T
    26   val chunks_of_body: XML.body -> string list
    27   val string_of_body: XML.body -> string
    28   val string_of: XML.tree -> string
    29   val output_markup_elem: Markup.T -> (string * string) * string
    30   val parse_body: string -> XML.body
    31   val parse: string -> XML.tree
    32   val content_of: string -> string
    33 end;
    34 
    35 structure YXML: YXML =
    36 struct
    37 
    38 (** string representation **)
    39 
    40 (* idempotent recoding of certain low ASCII control characters *)
    41 
    42 fun pseudo_utf8 c =
    43   if Symbol.is_ascii_control c
    44   then chr 192 ^ chr (128 + ord c)
    45   else c;
    46 
    47 fun embed_controls str =
    48   if exists_string Symbol.is_ascii_control str
    49   then translate_string pseudo_utf8 str
    50   else str;
    51 
    52 
    53 (* markers *)
    54 
    55 val X_char = Char.chr 5;
    56 val Y_char = Char.chr 6;
    57 
    58 val X = String.str X_char;
    59 val Y = String.str Y_char;
    60 val XY = X ^ Y;
    61 val XYX = XY ^ X;
    62 
    63 fun detect s = Char.contains s X_char orelse Char.contains s Y_char;
    64 
    65 
    66 (* output *)
    67 
    68 fun output_markup (markup as (name, atts)) =
    69   if Markup.is_empty markup then Markup.no_output
    70   else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
    71 
    72 fun buffer_attrib (a, x) =
    73   Buffer.add Y #> Buffer.add a #> Buffer.add "=" #> Buffer.add x;
    74 
    75 fun buffer_body ts = fold buffer ts
    76 and buffer (XML.Elem ((name, atts), ts)) =
    77       Buffer.add XY #> Buffer.add name #> fold buffer_attrib atts #> Buffer.add X #>
    78       buffer_body ts #>
    79       Buffer.add XYX
    80   | buffer (XML.Text s) = Buffer.add s
    81 
    82 fun chunks_of_body body = Buffer.empty |> buffer_body body |> Buffer.chunks;
    83 fun string_of_body body = Buffer.empty |> buffer_body body |> Buffer.content;
    84 val string_of = string_of_body o single;
    85 
    86 
    87 (* wrapped elements *)
    88 
    89 val Z = chr 0;
    90 val Z_text = [XML.Text Z];
    91 
    92 fun output_markup_elem markup =
    93   let val [bg1, bg2, en] = space_explode Z (string_of (XML.wrap_elem ((markup, Z_text), Z_text)))
    94   in ((bg1, bg2), en) end;
    95 
    96 
    97 
    98 (** efficient YXML parsing **)
    99 
   100 local
   101 
   102 (* splitting *)
   103 
   104 val split_string =
   105   Substring.full #>
   106   Substring.tokens (fn c => c = X_char) #>
   107   map (Substring.fields (fn c => c = Y_char) #> map Substring.string);
   108 
   109 
   110 (* structural errors *)
   111 
   112 fun err msg = raise Fail ("Malformed YXML: " ^ msg);
   113 fun err_attribute () = err "bad attribute";
   114 fun err_element () = err "bad element";
   115 fun err_unbalanced "" = err "unbalanced element"
   116   | err_unbalanced name = err ("unbalanced element " ^ quote name);
   117 
   118 
   119 (* stack operations *)
   120 
   121 fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
   122 
   123 fun push "" _ _ = err_element ()
   124   | push name atts pending = ((name, atts), []) :: pending;
   125 
   126 fun pop ((("", _), _) :: _) = err_unbalanced ""
   127   | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
   128 
   129 
   130 (* parsing *)
   131 
   132 fun parse_attrib s =
   133   (case first_field "=" s of
   134     NONE => err_attribute ()
   135   | SOME ("", _) => err_attribute ()
   136   | SOME att => att);
   137 
   138 fun parse_chunk ["", ""] = pop
   139   | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
   140   | parse_chunk txts = fold (add o XML.Text) txts;
   141 
   142 in
   143 
   144 fun parse_body source =
   145   (case fold parse_chunk (split_string source) [(("", []), [])] of
   146     [(("", _), result)] => rev result
   147   | ((name, _), _) :: _ => err_unbalanced name);
   148 
   149 fun parse source =
   150   (case parse_body source of
   151     [result] => result
   152   | [] => XML.Text ""
   153   | _ => err "multiple results");
   154 
   155 end;
   156 
   157 val content_of = parse_body #> XML.content_of;
   158 
   159 end;
   160