src/Pure/PIDE/xml.ML
changeset 44698 0385292321a0
parent 43949 94033767ef9b
child 44808 05b8997899a2
equal deleted inserted replaced
44697:b99dfee76538 44698:0385292321a0
       
     1 (*  Title:      Pure/PIDE/xml.ML
       
     2     Author:     David Aspinall
       
     3     Author:     Stefan Berghofer
       
     4     Author:     Makarius
       
     5 
       
     6 Untyped XML trees and basic data representation.
       
     7 *)
       
     8 
       
     9 signature XML_DATA_OPS =
       
    10 sig
       
    11   type 'a A
       
    12   type 'a T
       
    13   type 'a V
       
    14   val int_atom: int A
       
    15   val bool_atom: bool A
       
    16   val unit_atom: unit A
       
    17   val properties: Properties.T T
       
    18   val string: string T
       
    19   val int: int T
       
    20   val bool: bool T
       
    21   val unit: unit T
       
    22   val pair: 'a T -> 'b T -> ('a * 'b) T
       
    23   val triple: 'a T -> 'b T -> 'c T -> ('a * 'b * 'c) T
       
    24   val list: 'a T -> 'a list T
       
    25   val option: 'a T -> 'a option T
       
    26   val variant: 'a V list -> 'a T
       
    27 end;
       
    28 
       
    29 signature XML =
       
    30 sig
       
    31   type attributes = Properties.T
       
    32   datatype tree =
       
    33       Elem of Markup.T * tree list
       
    34     | Text of string
       
    35   type body = tree list
       
    36   val add_content: tree -> Buffer.T -> Buffer.T
       
    37   val content_of: body -> string
       
    38   val header: string
       
    39   val text: string -> string
       
    40   val element: string -> attributes -> string list -> string
       
    41   val output_markup: Markup.T -> Output.output * Output.output
       
    42   val string_of: tree -> string
       
    43   val pretty: int -> tree -> Pretty.T
       
    44   val output: tree -> TextIO.outstream -> unit
       
    45   val parse_comments: string list -> unit * string list
       
    46   val parse_string : string -> string option
       
    47   val parse_element: string list -> tree * string list
       
    48   val parse_document: string list -> tree * string list
       
    49   val parse: string -> tree
       
    50   exception XML_ATOM of string
       
    51   exception XML_BODY of body
       
    52   structure Encode: XML_DATA_OPS
       
    53   structure Decode: XML_DATA_OPS
       
    54 end;
       
    55 
       
    56 structure XML: XML =
       
    57 struct
       
    58 
       
    59 (** XML trees **)
       
    60 
       
    61 type attributes = Properties.T;
       
    62 
       
    63 datatype tree =
       
    64     Elem of Markup.T * tree list
       
    65   | Text of string;
       
    66 
       
    67 type body = tree list;
       
    68 
       
    69 fun add_content (Elem (_, ts)) = fold add_content ts
       
    70   | add_content (Text s) = Buffer.add s;
       
    71 
       
    72 fun content_of body = Buffer.empty |> fold add_content body |> Buffer.content;
       
    73 
       
    74 
       
    75 
       
    76 (** string representation **)
       
    77 
       
    78 val header = "<?xml version=\"1.0\"?>\n";
       
    79 
       
    80 
       
    81 (* escaped text *)
       
    82 
       
    83 fun decode "&lt;" = "<"
       
    84   | decode "&gt;" = ">"
       
    85   | decode "&amp;" = "&"
       
    86   | decode "&apos;" = "'"
       
    87   | decode "&quot;" = "\""
       
    88   | decode c = c;
       
    89 
       
    90 fun encode "<" = "&lt;"
       
    91   | encode ">" = "&gt;"
       
    92   | encode "&" = "&amp;"
       
    93   | encode "'" = "&apos;"
       
    94   | encode "\"" = "&quot;"
       
    95   | encode c = c;
       
    96 
       
    97 val text = translate_string encode;
       
    98 
       
    99 
       
   100 (* elements *)
       
   101 
       
   102 fun elem name atts =
       
   103   space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
       
   104 
       
   105 fun element name atts body =
       
   106   let val b = implode body in
       
   107     if b = "" then enclose "<" "/>" (elem name atts)
       
   108     else enclose "<" ">" (elem name atts) ^ b ^ enclose "</" ">" name
       
   109   end;
       
   110 
       
   111 fun output_markup (markup as (name, atts)) =
       
   112   if Markup.is_empty markup then Markup.no_output
       
   113   else (enclose "<" ">" (elem name atts), enclose "</" ">" name);
       
   114 
       
   115 
       
   116 (* output *)
       
   117 
       
   118 fun buffer_of depth tree =
       
   119   let
       
   120     fun traverse _ (Elem ((name, atts), [])) =
       
   121           Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
       
   122       | traverse d (Elem ((name, atts), ts)) =
       
   123           Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
       
   124           traverse_body d ts #>
       
   125           Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
       
   126       | traverse _ (Text s) = Buffer.add (text s)
       
   127     and traverse_body 0 _ = Buffer.add "..."
       
   128       | traverse_body d ts = fold (traverse (d - 1)) ts;
       
   129   in Buffer.empty |> traverse depth tree end;
       
   130 
       
   131 val string_of = Buffer.content o buffer_of ~1;
       
   132 val output = Buffer.output o buffer_of ~1;
       
   133 
       
   134 fun pretty depth tree =
       
   135   Pretty.str (Buffer.content (buffer_of (Int.max (0, depth)) tree));
       
   136 
       
   137 
       
   138 
       
   139 (** XML parsing **)
       
   140 
       
   141 local
       
   142 
       
   143 fun err msg (xs, _) =
       
   144   fn () => "XML parsing error: " ^ msg () ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
       
   145 
       
   146 fun ignored _ = [];
       
   147 
       
   148 val blanks = Scan.many Symbol.is_blank;
       
   149 val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
       
   150 val regular = Scan.one Symbol.is_regular;
       
   151 fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
       
   152 
       
   153 val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
       
   154 
       
   155 val parse_cdata =
       
   156   Scan.this_string "<![CDATA[" |--
       
   157   (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
       
   158   Scan.this_string "]]>";
       
   159 
       
   160 val parse_att =
       
   161   (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
       
   162   (($$ "\"" || $$ "'") :|-- (fn s =>
       
   163     (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
       
   164 
       
   165 val parse_comment =
       
   166   Scan.this_string "<!--" --
       
   167   Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
       
   168   Scan.this_string "-->" >> ignored;
       
   169 
       
   170 val parse_processing_instruction =
       
   171   Scan.this_string "<?" --
       
   172   Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
       
   173   Scan.this_string "?>" >> ignored;
       
   174 
       
   175 val parse_doctype =
       
   176   Scan.this_string "<!DOCTYPE" --
       
   177   Scan.repeat (Scan.unless ($$ ">") regular) --
       
   178   $$ ">" >> ignored;
       
   179 
       
   180 val parse_misc =
       
   181   Scan.one Symbol.is_blank >> ignored ||
       
   182   parse_processing_instruction ||
       
   183   parse_comment;
       
   184 
       
   185 val parse_optional_text =
       
   186   Scan.optional (parse_chars >> (single o Text)) [];
       
   187 
       
   188 fun name_start_char c = Symbol.is_ascii_letter c orelse c = ":" orelse c = "_";
       
   189 fun name_char c = name_start_char c orelse Symbol.is_ascii_digit c orelse c = "-" orelse c = ".";
       
   190 val parse_name = Scan.one name_start_char ::: Scan.many name_char;
       
   191 
       
   192 in
       
   193 
       
   194 val parse_comments =
       
   195   blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
       
   196 
       
   197 val parse_string = Scan.read Symbol.stopper parse_chars o raw_explode;
       
   198 
       
   199 fun parse_content xs =
       
   200   (parse_optional_text @@@
       
   201     (Scan.repeat
       
   202       ((parse_element >> single ||
       
   203         parse_cdata >> (single o Text) ||
       
   204         parse_processing_instruction ||
       
   205         parse_comment)
       
   206       @@@ parse_optional_text) >> flat)) xs
       
   207 
       
   208 and parse_element xs =
       
   209   ($$ "<" |-- parse_name -- Scan.repeat (blanks |-- parse_att) --| blanks :--
       
   210     (fn (name, _) =>
       
   211       !! (err (fn () => "Expected > or />"))
       
   212        ($$ "/" -- $$ ">" >> ignored ||
       
   213         $$ ">" |-- parse_content --|
       
   214           !! (err (fn () => "Expected </" ^ implode name ^ ">"))
       
   215               ($$ "<" -- $$ "/" -- Scan.this name -- blanks -- $$ ">")))
       
   216     >> (fn ((name, atts), body) => Elem ((implode name, atts), body))) xs;
       
   217 
       
   218 val parse_document =
       
   219   (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
       
   220   |-- parse_element;
       
   221 
       
   222 fun parse s =
       
   223   (case Scan.finite Symbol.stopper (Scan.error (!! (err (fn () => "Malformed element"))
       
   224       (blanks |-- parse_document --| blanks))) (raw_explode s) of
       
   225     (x, []) => x
       
   226   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
       
   227 
       
   228 end;
       
   229 
       
   230 
       
   231 
       
   232 (** XML as data representation language **)
       
   233 
       
   234 exception XML_ATOM of string;
       
   235 exception XML_BODY of tree list;
       
   236 
       
   237 
       
   238 structure Encode =
       
   239 struct
       
   240 
       
   241 type 'a A = 'a -> string;
       
   242 type 'a T = 'a -> body;
       
   243 type 'a V = 'a -> string list * body;
       
   244 
       
   245 
       
   246 (* atomic values *)
       
   247 
       
   248 fun int_atom i = signed_string_of_int i;
       
   249 
       
   250 fun bool_atom false = "0"
       
   251   | bool_atom true = "1";
       
   252 
       
   253 fun unit_atom () = "";
       
   254 
       
   255 
       
   256 (* structural nodes *)
       
   257 
       
   258 fun node ts = Elem ((":", []), ts);
       
   259 
       
   260 fun vector xs = map_index (fn (i, x) => (int_atom i, x)) xs;
       
   261 
       
   262 fun tagged (tag, (xs, ts)) = Elem ((int_atom tag, vector xs), ts);
       
   263 
       
   264 
       
   265 (* representation of standard types *)
       
   266 
       
   267 fun properties props = [Elem ((":", props), [])];
       
   268 
       
   269 fun string "" = []
       
   270   | string s = [Text s];
       
   271 
       
   272 val int = string o int_atom;
       
   273 
       
   274 val bool = string o bool_atom;
       
   275 
       
   276 val unit = string o unit_atom;
       
   277 
       
   278 fun pair f g (x, y) = [node (f x), node (g y)];
       
   279 
       
   280 fun triple f g h (x, y, z) = [node (f x), node (g y), node (h z)];
       
   281 
       
   282 fun list f xs = map (node o f) xs;
       
   283 
       
   284 fun option _ NONE = []
       
   285   | option f (SOME x) = [node (f x)];
       
   286 
       
   287 fun variant fs x = [tagged (the (get_index (fn f => try f x) fs))];
       
   288 
       
   289 end;
       
   290 
       
   291 
       
   292 structure Decode =
       
   293 struct
       
   294 
       
   295 type 'a A = string -> 'a;
       
   296 type 'a T = body -> 'a;
       
   297 type 'a V = string list * body -> 'a;
       
   298 
       
   299 
       
   300 (* atomic values *)
       
   301 
       
   302 fun int_atom s =
       
   303   Markup.parse_int s
       
   304     handle Fail _ => raise XML_ATOM s;
       
   305 
       
   306 fun bool_atom "0" = false
       
   307   | bool_atom "1" = true
       
   308   | bool_atom s = raise XML_ATOM s;
       
   309 
       
   310 fun unit_atom "" = ()
       
   311   | unit_atom s = raise XML_ATOM s;
       
   312 
       
   313 
       
   314 (* structural nodes *)
       
   315 
       
   316 fun node (Elem ((":", []), ts)) = ts
       
   317   | node t = raise XML_BODY [t];
       
   318 
       
   319 fun vector atts =
       
   320   #1 (fold_map (fn (a, x) =>
       
   321         fn i => if int_atom a = i then (x, i + 1) else raise XML_ATOM a) atts 0);
       
   322 
       
   323 fun tagged (Elem ((name, atts), ts)) = (int_atom name, (vector atts, ts))
       
   324   | tagged t = raise XML_BODY [t];
       
   325 
       
   326 
       
   327 (* representation of standard types *)
       
   328 
       
   329 fun properties [Elem ((":", props), [])] = props
       
   330   | properties ts = raise XML_BODY ts;
       
   331 
       
   332 fun string [] = ""
       
   333   | string [Text s] = s
       
   334   | string ts = raise XML_BODY ts;
       
   335 
       
   336 val int = int_atom o string;
       
   337 
       
   338 val bool = bool_atom o string;
       
   339 
       
   340 val unit = unit_atom o string;
       
   341 
       
   342 fun pair f g [t1, t2] = (f (node t1), g (node t2))
       
   343   | pair _ _ ts = raise XML_BODY ts;
       
   344 
       
   345 fun triple f g h [t1, t2, t3] = (f (node t1), g (node t2), h (node t3))
       
   346   | triple _ _ _ ts = raise XML_BODY ts;
       
   347 
       
   348 fun list f ts = map (f o node) ts;
       
   349 
       
   350 fun option _ [] = NONE
       
   351   | option f [t] = SOME (f (node t))
       
   352   | option _ ts = raise XML_BODY ts;
       
   353 
       
   354 fun variant fs [t] =
       
   355       let
       
   356         val (tag, (xs, ts)) = tagged t;
       
   357         val f = nth fs tag handle General.Subscript => raise XML_BODY [t];
       
   358       in f (xs, ts) end
       
   359   | variant _ ts = raise XML_BODY ts;
       
   360 
       
   361 end;
       
   362 
       
   363 end;