src/Pure/General/yxml.ML
author wenzelm
Thu Apr 03 18:42:36 2008 +0200 (2008-04-03)
changeset 26538 d65504ffb47d
parent 26528 944f9bf26d2d
child 26540 173d548ce9d2
permissions -rw-r--r--
replaced ETX/EOT by ENQ/ACK, which are less likely to be interpreted by tty etc.;
wenzelm@26528
     1
(*  Title:      Pure/General/yxml.ML
wenzelm@26528
     2
    ID:         $Id$
wenzelm@26528
     3
    Author:     Makarius
wenzelm@26528
     4
wenzelm@26528
     5
Why XML notation?  Efficient text representation of XML trees, using
wenzelm@26528
     6
extra characters ETX and EOT -- no escaping, may nest marked text
wenzelm@26528
     7
verbatim.  Markup <elem att="val" ...>...body...</elem> is encoded as:
wenzelm@26528
     8
wenzelm@26528
     9
  ETX EOT name EOT att=val ... ETX
wenzelm@26528
    10
  ...
wenzelm@26528
    11
  body
wenzelm@26528
    12
  ...
wenzelm@26528
    13
  ETX EOT ETX
wenzelm@26528
    14
*)
wenzelm@26528
    15
wenzelm@26528
    16
signature YXML =
wenzelm@26528
    17
sig
wenzelm@26528
    18
  val detect: string -> bool
wenzelm@26528
    19
  val element: string -> XML.attributes -> string list -> string
wenzelm@26528
    20
  val string_of: XML.tree -> string
wenzelm@26528
    21
  val parse_body: string -> XML.tree list
wenzelm@26528
    22
  val parse: string -> XML.tree
wenzelm@26528
    23
end;
wenzelm@26528
    24
wenzelm@26528
    25
structure YXML: YXML =
wenzelm@26528
    26
struct
wenzelm@26528
    27
wenzelm@26528
    28
(* string representation *)
wenzelm@26528
    29
wenzelm@26528
    30
val ETX = Symbol.ETX;
wenzelm@26528
    31
val EOT = Symbol.EOT;
wenzelm@26528
    32
wenzelm@26528
    33
fun detect s = ord s = ord EOT;
wenzelm@26528
    34
wenzelm@26528
    35
wenzelm@26528
    36
(*naive pasting of strings*)
wenzelm@26528
    37
fun element name atts body =
wenzelm@26528
    38
  ETX ^ EOT ^ name ^ implode (map (fn (a, x) => EOT ^ a ^ "=" ^ x) atts) ^ ETX ^
wenzelm@26528
    39
  implode body ^
wenzelm@26528
    40
  ETX ^ EOT ^ ETX;
wenzelm@26528
    41
wenzelm@26528
    42
(*scalable buffer output*)
wenzelm@26528
    43
fun string_of t =
wenzelm@26528
    44
  let
wenzelm@26528
    45
    fun attrib (a, x) =
wenzelm@26528
    46
      Buffer.add EOT #>
wenzelm@26528
    47
      Buffer.add a #> Buffer.add "=" #> Buffer.add x;
wenzelm@26528
    48
    fun tree (XML.Elem (name, atts, ts)) =
wenzelm@26528
    49
          Buffer.add ETX #>
wenzelm@26528
    50
          Buffer.add EOT #> Buffer.add name #>
wenzelm@26528
    51
          fold attrib atts #>
wenzelm@26528
    52
          Buffer.add ETX #>
wenzelm@26528
    53
          fold tree ts #>
wenzelm@26528
    54
          Buffer.add ETX #>
wenzelm@26528
    55
          Buffer.add EOT #>
wenzelm@26528
    56
          Buffer.add ETX
wenzelm@26528
    57
      | tree (XML.Text s) = Buffer.add s
wenzelm@26528
    58
      | tree (XML.Output s) = Buffer.add s;
wenzelm@26528
    59
  in Buffer.empty |> tree t |> Buffer.content end;
wenzelm@26528
    60
wenzelm@26528
    61
wenzelm@26528
    62
(* efficient YXML parsing *)
wenzelm@26528
    63
wenzelm@26528
    64
local
wenzelm@26528
    65
wenzelm@26528
    66
(* errors *)
wenzelm@26528
    67
wenzelm@26528
    68
fun err msg = raise Fail ("Malformed YXML encoding: " ^ msg);
wenzelm@26528
    69
fun err_attribute () = err "bad attribute";
wenzelm@26528
    70
fun err_element () = err "bad element";
wenzelm@26528
    71
fun err_unbalanced "" = err "unbalanced element"
wenzelm@26528
    72
  | err_unbalanced name = err ("unbalanced element " ^ quote name);
wenzelm@26528
    73
wenzelm@26528
    74
wenzelm@26528
    75
(* stack operations *)
wenzelm@26528
    76
wenzelm@26528
    77
fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending;
wenzelm@26528
    78
wenzelm@26528
    79
fun push "" _ _ = err_element ()
wenzelm@26528
    80
  | push name atts pending = ((name, atts), []) :: pending;
wenzelm@26528
    81
wenzelm@26528
    82
fun pop ((("", _), _) :: _) = err_unbalanced ""
wenzelm@26528
    83
  | pop (((name, atts), body) :: pending) = add (XML.Elem (name, atts, rev body)) pending;
wenzelm@26528
    84
wenzelm@26528
    85
wenzelm@26528
    86
(* parsers *)
wenzelm@26528
    87
wenzelm@26528
    88
fun is_char s c = ord s = Char.ord c;
wenzelm@26528
    89
wenzelm@26528
    90
fun parse_attrib s =
wenzelm@26528
    91
  (case String.fields (is_char "=") s of
wenzelm@26528
    92
    [] => err_attribute ()
wenzelm@26528
    93
  | "" :: _ => err_attribute ()
wenzelm@26528
    94
  | a :: xs => (a, space_implode "=" xs));
wenzelm@26528
    95
wenzelm@26528
    96
fun parse_chunk chunk =
wenzelm@26528
    97
  (case String.fields (is_char EOT) chunk of
wenzelm@26528
    98
    ["", ""] => pop
wenzelm@26528
    99
  | "" :: name :: atts => push name (map parse_attrib atts)
wenzelm@26528
   100
  | [_] => add (XML.Text chunk)
wenzelm@26528
   101
  | _ => err "bad text");
wenzelm@26528
   102
wenzelm@26528
   103
in
wenzelm@26528
   104
wenzelm@26528
   105
fun parse_body source =
wenzelm@26528
   106
  (case fold parse_chunk (String.tokens (is_char ETX) source) [(("", []), [])] of
wenzelm@26528
   107
    [(("", _), result)] => rev result
wenzelm@26528
   108
  | ((name, _), _) :: _ => err_unbalanced name);
wenzelm@26528
   109
wenzelm@26528
   110
fun parse source =
wenzelm@26528
   111
  (case parse_body source of
wenzelm@26528
   112
    [result as XML.Elem _] => result
wenzelm@26528
   113
  | _ => err "no root element");
wenzelm@26528
   114
wenzelm@26528
   115
end;
wenzelm@26528
   116
wenzelm@26528
   117
end;
wenzelm@26528
   118