src/Pure/PIDE/xml0.ML
author nipkow
Tue, 17 Jun 2025 06:29:55 +0200
changeset 82732 71574900b6ba
parent 80860 64dc09f7f189
permissions -rw-r--r--
merged
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
80850
885343964b7f clarified files;
wenzelm
parents: 80838
diff changeset
     1
(*  Title:      Pure/PIDE/xml0.ML
80801
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
     2
    Author:     Makarius
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
     3
80860
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
     4
Untyped XML trees and YXML notation: minimal bootstrap version.
80801
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
     5
*)
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
     6
80860
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
     7
(** XML **)
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
     8
80801
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
     9
signature XML =
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    10
sig
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    11
  type attributes = (string * string) list
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    12
  datatype tree =
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    13
      Elem of (string * attributes) * tree list
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    14
    | Text of string
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    15
  type body = tree list
80803
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    16
  val xml_elemN: string
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    17
  val xml_nameN: string
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    18
  val xml_bodyN: string
80836
7f989a84284a tuned signature;
wenzelm
parents: 80803
diff changeset
    19
  val wrap_elem: ((string * attributes) * body) * body -> tree
7f989a84284a tuned signature;
wenzelm
parents: 80803
diff changeset
    20
  val unwrap_elem: tree -> (((string * attributes) * body) * body) option
80838
aaf9e8a392cc minor performance tuning, following Isabelle/Scala;
wenzelm
parents: 80837
diff changeset
    21
  val unwrap_elem_body: tree -> body option
80851
b1ed84a5215b clarified signature and modules;
wenzelm
parents: 80850
diff changeset
    22
  val traverse_text: (string -> 'a -> 'a) -> tree -> 'a -> 'a
b1ed84a5215b clarified signature and modules;
wenzelm
parents: 80850
diff changeset
    23
  val traverse_texts: (string -> 'a -> 'a) -> body -> 'a -> 'a
80803
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    24
  val content_of: body -> string
80801
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    25
end
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    26
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    27
structure XML: XML =
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    28
struct
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    29
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    30
type attributes = (string * string) list;
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    31
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    32
datatype tree =
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    33
    Elem of (string * attributes) * tree list
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    34
  | Text of string;
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    35
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    36
type body = tree list;
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    37
80803
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    38
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    39
(* wrapped elements *)
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    40
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    41
val xml_elemN = "xml_elem";
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    42
val xml_nameN = "xml_name";
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    43
val xml_bodyN = "xml_body";
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    44
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    45
fun wrap_elem (((a, atts), body1), body2) =
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    46
  Elem ((xml_elemN, (xml_nameN, a) :: atts), Elem ((xml_bodyN, []), body1) :: body2);
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    47
80837
wenzelm
parents: 80836
diff changeset
    48
fun unwrap_elem (Elem ((name, (n, a) :: atts), Elem ((name', []), body1) :: body2)) =
wenzelm
parents: 80836
diff changeset
    49
      if name = xml_elemN andalso n = xml_nameN andalso name' = xml_bodyN
80803
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    50
      then SOME (((a, atts), body1), body2) else NONE
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    51
  | unwrap_elem _ = NONE;
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    52
80838
aaf9e8a392cc minor performance tuning, following Isabelle/Scala;
wenzelm
parents: 80837
diff changeset
    53
fun unwrap_elem_body (Elem ((name, (n, _) :: _), Elem ((name', []), _) :: body)) =
aaf9e8a392cc minor performance tuning, following Isabelle/Scala;
wenzelm
parents: 80837
diff changeset
    54
      if name = xml_elemN andalso n = xml_nameN andalso name' = xml_bodyN
aaf9e8a392cc minor performance tuning, following Isabelle/Scala;
wenzelm
parents: 80837
diff changeset
    55
      then SOME body else NONE
aaf9e8a392cc minor performance tuning, following Isabelle/Scala;
wenzelm
parents: 80837
diff changeset
    56
  | unwrap_elem_body _ = NONE;
aaf9e8a392cc minor performance tuning, following Isabelle/Scala;
wenzelm
parents: 80837
diff changeset
    57
80803
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    58
80851
b1ed84a5215b clarified signature and modules;
wenzelm
parents: 80850
diff changeset
    59
(* traverse text content *)
80803
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    60
80851
b1ed84a5215b clarified signature and modules;
wenzelm
parents: 80850
diff changeset
    61
fun traverse_text f tree =
80838
aaf9e8a392cc minor performance tuning, following Isabelle/Scala;
wenzelm
parents: 80837
diff changeset
    62
  (case unwrap_elem_body tree of
80851
b1ed84a5215b clarified signature and modules;
wenzelm
parents: 80850
diff changeset
    63
    SOME ts => traverse_texts f ts
80803
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    64
  | NONE =>
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    65
      (case tree of
80851
b1ed84a5215b clarified signature and modules;
wenzelm
parents: 80850
diff changeset
    66
        Elem (_, ts) => traverse_texts f ts
b1ed84a5215b clarified signature and modules;
wenzelm
parents: 80850
diff changeset
    67
      | Text s => f s))
b1ed84a5215b clarified signature and modules;
wenzelm
parents: 80850
diff changeset
    68
and traverse_texts _ [] res = res
b1ed84a5215b clarified signature and modules;
wenzelm
parents: 80850
diff changeset
    69
  | traverse_texts f (t :: ts) res = traverse_texts f ts (traverse_text f t res);
80803
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    70
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    71
fun content_of body =
80851
b1ed84a5215b clarified signature and modules;
wenzelm
parents: 80850
diff changeset
    72
  String.concat (rev (traverse_texts (fn x => fn xs => x :: xs) body []));
80803
7e39c785ca5d clarified modules: enable pretty.ML to use XML/YXML more directly;
wenzelm
parents: 80801
diff changeset
    73
80801
090adcdceaae clarified modules;
wenzelm
parents:
diff changeset
    74
end;
80860
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    75
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    76
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    77
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    78
(** YXML **)
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    79
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    80
signature YXML =
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    81
sig
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    82
  val X_char: char
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    83
  val Y_char: char
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    84
  val X: string
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    85
  val Y: string
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    86
  val XY: string
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    87
  val XYX: string
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    88
  val detect: string -> bool
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    89
  val output_markup: string * XML.attributes -> string * string
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    90
end;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    91
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    92
structure YXML: YXML =
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    93
struct
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    94
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    95
(* markers *)
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    96
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    97
val X_char = Char.chr 5;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    98
val Y_char = Char.chr 6;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
    99
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   100
val X = String.str X_char;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   101
val Y = String.str Y_char;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   102
val XY = X ^ Y;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   103
val XYX = XY ^ X;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   104
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   105
fun detect s = Char.contains s X_char orelse Char.contains s Y_char;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   106
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   107
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   108
(* output markup *)
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   109
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   110
fun output_markup ("", _) = ("", "")
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   111
  | output_markup (name, atts) =
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   112
      let
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   113
        val bgs = XY :: name :: List.foldr (fn ((a, b), ps) => Y :: a :: "=" :: b :: ps) [X] atts;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   114
        val en = XYX;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   115
      in (String.concat bgs, en) end;
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   116
64dc09f7f189 clarified YXML bootstrap;
wenzelm
parents: 80851
diff changeset
   117
end;