src/Pure/PIDE/yxml.ML
author wenzelm
Sat Mar 01 22:46:31 2014 +0100 (2014-03-01)
changeset 55828 42ac3cfb89f6
parent 49656 7ff712de5747
child 59433 9da5b2c61049
permissions -rw-r--r--
clarified language markup: added "delimited" property;
type Symbol_Pos.source preserves information about delimited outer tokens (e.g string, cartouche);
observe Completion.Language_Context only for delimited languages, which is important to complete keywords after undelimited inner tokens, e.g. "lemma A pro";
     1 (*  Title:      Pure/PIDE/yxml.ML
     2     Author:     Makarius
     3 
     4 Efficient text representation of XML trees using extra characters X
     5 and Y -- no escaping, may nest marked text verbatim.  Suitable for
     6 direct inlining into plain text.
     7 
     8 Markup <elem att="val" ...>...body...</elem> is encoded as:
     9 
    10   X Y name Y att=val ... X
    11   ...
    12   body
    13   ...
    14   X Y X
    15 *)
    16 
    17 signature YXML =
    18 sig
    19   val X: Symbol.symbol
    20   val Y: Symbol.symbol
    21   val embed_controls: string -> string
    22   val detect: string -> bool
    23   val output_markup: Markup.T -> string * string
    24   val string_of_body: XML.body -> string
    25   val string_of: XML.tree -> string
    26   val output_markup_elem: Markup.T -> (string * string) * string
    27   val parse_body: string -> XML.body
    28   val parse: string -> XML.tree
    29 end;
    30 
    31 structure YXML: YXML =
    32 struct
    33 
    34 (** string representation **)
    35 
    36 (* idempotent recoding of certain low ASCII control characters *)
    37 
    38 fun pseudo_utf8 c =
    39   if Symbol.is_ascii_control c
    40   then chr 192 ^ chr (128 + ord c)
    41   else c;
    42 
    43 fun embed_controls str =
    44   if exists_string Symbol.is_ascii_control str
    45   then translate_string pseudo_utf8 str
    46   else str;
    47 
    48 
    49 (* markers *)
    50 
    51 val X = chr 5;
    52 val Y = chr 6;
    53 val XY = X ^ Y;
    54 val XYX = XY ^ X;
    55 
    56 val detect = exists_string (fn s => s = X orelse s = Y);
    57 
    58 
    59 (* output *)
    60 
    61 fun output_markup (markup as (name, atts)) =
    62   if Markup.is_empty markup then Markup.no_output
    63   else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX);
    64 
    65 fun string_of_body body =
    66   let
    67     fun attrib (a, x) = Buffer.add Y #> Buffer.add a #> Buffer.add "=" #> Buffer.add x;
    68     fun tree (XML.Elem ((name, atts), ts)) =
    69           Buffer.add XY #> Buffer.add name #> fold attrib atts #> Buffer.add X #>
    70           trees ts #>
    71           Buffer.add XYX
    72       | tree (XML.Text s) = Buffer.add s
    73     and trees ts = fold tree ts;
    74   in Buffer.empty |> trees body |> Buffer.content end;
    75 
    76 val string_of = string_of_body o single;
    77 
    78 
    79 (* wrapped elements *)
    80 
    81 val Z = chr 0;
    82 val Z_text = [XML.Text Z];
    83 
    84 fun output_markup_elem markup =
    85   let val [bg1, bg2, en] = space_explode Z (string_of (XML.wrap_elem ((markup, Z_text), Z_text)))
    86   in ((bg1, bg2), en) end;
    87 
    88 
    89 
    90 (** efficient YXML parsing **)
    91 
    92 local
    93 
    94 (* splitting *)
    95 
    96 fun is_char s c = ord s = Char.ord c;
    97 
    98 val split_string =
    99   Substring.full #>
   100   Substring.tokens (is_char X) #>
   101   map (Substring.fields (is_char Y) #> map Substring.string);
   102 
   103 
   104 (* structural errors *)
   105 
   106 fun err msg = raise Fail ("Malformed YXML: " ^ msg);
   107 fun err_attribute () = err "bad attribute";
   108 fun err_element () = err "bad element";
   109 fun err_unbalanced "" = err "unbalanced element"
   110   | err_unbalanced name = err ("unbalanced element " ^ quote name);
   111 
   112 
   113 (* stack operations *)
   114 
   115 fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
   116 
   117 fun push "" _ _ = err_element ()
   118   | push name atts pending = ((name, atts), []) :: pending;
   119 
   120 fun pop ((("", _), _) :: _) = err_unbalanced ""
   121   | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending;
   122 
   123 
   124 (* parsing *)
   125 
   126 fun parse_attrib s =
   127   (case first_field "=" s of
   128     NONE => err_attribute ()
   129   | SOME ("", _) => err_attribute ()
   130   | SOME att => att);
   131 
   132 fun parse_chunk ["", ""] = pop
   133   | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts)
   134   | parse_chunk txts = fold (add o XML.Text) txts;
   135 
   136 in
   137 
   138 fun parse_body source =
   139   (case fold parse_chunk (split_string source) [(("", []), [])] of
   140     [(("", _), result)] => rev result
   141   | ((name, _), _) :: _ => err_unbalanced name);
   142 
   143 fun parse source =
   144   (case parse_body source of
   145     [result] => result
   146   | [] => XML.Text ""
   147   | _ => err "multiple results");
   148 
   149 end;
   150 
   151 end;
   152