further cleanup of XML signature;
authorwenzelm
Thu Apr 03 21:23:37 2008 +0200 (2008-04-03)
changeset 26546ba4cdf92c7c4
parent 26545 6e1aef001b3b
child 26547 1112375f6a69
further cleanup of XML signature;
replaced plain_content by incremental add_content;
added stream output;
src/Pure/General/xml.ML
     1.1 --- a/src/Pure/General/xml.ML	Thu Apr 03 21:23:36 2008 +0200
     1.2 +++ b/src/Pure/General/xml.ML	Thu Apr 03 21:23:37 2008 +0200
     1.3 @@ -7,33 +7,42 @@
     1.4  
     1.5  signature XML =
     1.6  sig
     1.7 -  (*string functions*)
     1.8 -  val detect: string -> bool
     1.9 -  val header: string
    1.10 -  val text: string -> string
    1.11    type attributes = Markup.property list
    1.12 -  val element: string -> attributes -> string list -> string
    1.13 -  val output_markup: Markup.T -> output * output
    1.14 -  (*tree functions*)
    1.15    datatype tree =
    1.16        Elem of string * attributes * tree list
    1.17      | Text of string
    1.18      | Output of output
    1.19 -  type content = tree list
    1.20 -  type element = string * attributes * content
    1.21 +  val add_content: tree -> Buffer.T -> Buffer.T
    1.22 +  val detect: string -> bool
    1.23 +  val header: string
    1.24 +  val text: string -> string
    1.25 +  val element: string -> attributes -> string list -> string
    1.26 +  val output_markup: Markup.T -> output * output
    1.27    val string_of: tree -> string
    1.28 -  val plain_content: tree -> string
    1.29 +  val output: tree -> TextIO.outstream -> unit
    1.30    val parse_string : string -> string option
    1.31    val parse_comment_whspc: string list -> unit * string list
    1.32 -  val parse_content: string list -> tree list * string list
    1.33 -  val parse_elem: string list -> tree * string list
    1.34 -  val parse_document: string list -> (string option * tree) * string list
    1.35 +  val parse_element: string list -> tree * string list
    1.36    val parse: string -> tree
    1.37  end;
    1.38  
    1.39  structure XML: XML =
    1.40  struct
    1.41  
    1.42 +(** XML trees **)
    1.43 +
    1.44 +type attributes = Markup.property list;
    1.45 +
    1.46 +datatype tree =
    1.47 +    Elem of string * attributes * tree list
    1.48 +  | Text of string
    1.49 +  | Output of output;   (* FIXME !? *)
    1.50 +
    1.51 +fun add_content (Elem (_, _, ts)) = fold add_content ts
    1.52 +  | add_content (Text s) = Buffer.add s
    1.53 +  | add_content (Output _) = I;    (* FIXME !? *)
    1.54 +
    1.55 +
    1.56  
    1.57  (** string representation **)
    1.58  
    1.59 @@ -41,7 +50,7 @@
    1.60  val header = "<?xml version=\"1.0\"?>\n";
    1.61  
    1.62  
    1.63 -(* text and character data *)
    1.64 +(* escaped text *)
    1.65  
    1.66  fun decode "&lt;" = "<"
    1.67    | decode "&gt;" = ">"
    1.68 @@ -62,8 +71,6 @@
    1.69  
    1.70  (* elements *)
    1.71  
    1.72 -type attributes = Markup.property list;
    1.73 -
    1.74  fun elem name atts =
    1.75    space_implode " " (name :: map (fn (a, x) => a ^ " = \"" ^ text x ^ "\"") atts);
    1.76  
    1.77 @@ -78,39 +85,28 @@
    1.78    enclose "</" ">" name);
    1.79  
    1.80  
    1.81 -
    1.82 -(** explicit XML trees **)
    1.83 +(* output *)
    1.84  
    1.85 -datatype tree =
    1.86 -    Elem of string * attributes * tree list
    1.87 -  | Text of string
    1.88 -  | Output of output;
    1.89 -
    1.90 -type content = tree list;
    1.91 -type element = string * attributes * content;
    1.92 -
    1.93 -fun string_of t =
    1.94 +fun buffer_of tree =
    1.95    let
    1.96 -    fun tree (Elem (name, atts, [])) =
    1.97 +    fun traverse (Elem (name, atts, [])) =
    1.98            Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add "/>"
    1.99 -      | tree (Elem (name, atts, ts)) =
   1.100 +      | traverse (Elem (name, atts, ts)) =
   1.101            Buffer.add "<" #> Buffer.add (elem name atts) #> Buffer.add ">" #>
   1.102 -          fold tree ts #>
   1.103 +          fold traverse ts #>
   1.104            Buffer.add "</" #> Buffer.add name #> Buffer.add ">"
   1.105 -      | tree (Text s) = Buffer.add (text s)
   1.106 -      | tree (Output s) = Buffer.add s;
   1.107 -  in Buffer.empty |> tree t |> Buffer.content end;
   1.108 +      | traverse (Text s) = Buffer.add (text s)
   1.109 +      | traverse (Output s) = Buffer.add s;
   1.110 +  in Buffer.empty |> traverse tree end;
   1.111  
   1.112 -fun plain_content tree =
   1.113 -  let
   1.114 -    fun content (Elem (_, _, ts)) = fold content ts
   1.115 -      | content (Text s) = Buffer.add s
   1.116 -      | content (Output _) = I;    (* FIXME !? *)
   1.117 -  in Buffer.empty |> content tree |> Buffer.content end;
   1.118 +val string_of = Buffer.content o buffer_of;
   1.119 +val output = Buffer.output o buffer_of;
   1.120  
   1.121  
   1.122  
   1.123 -(** XML parsing **)
   1.124 +(** XML parsing (slow) **)
   1.125 +
   1.126 +local
   1.127  
   1.128  fun err s (xs, _) =
   1.129    "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
   1.130 @@ -122,8 +118,6 @@
   1.131  val parse_chars = Scan.repeat1 (Scan.unless ((* scan_whspc -- *)$$ "<")
   1.132    (scan_special || Scan.one Symbol.is_regular)) >> implode;
   1.133  
   1.134 -val parse_string = Scan.read Symbol.stopper parse_chars o explode;
   1.135 -
   1.136  val parse_cdata = Scan.this_string "<![CDATA[" |--
   1.137    (Scan.repeat (Scan.unless (Scan.this_string "]]>") (Scan.one Symbol.is_regular)) >>
   1.138      implode) --| Scan.this_string "]]>";
   1.139 @@ -137,24 +131,25 @@
   1.140    Scan.repeat (Scan.unless (Scan.this_string "-->") (Scan.one Symbol.is_regular)) --
   1.141    Scan.this_string "-->";
   1.142  
   1.143 -val parse_comment_whspc =
   1.144 -  (scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K())));
   1.145 -
   1.146  val parse_pi = Scan.this_string "<?" |--
   1.147    Scan.repeat (Scan.unless (Scan.this_string "?>") (Scan.one Symbol.is_regular)) --|
   1.148    Scan.this_string "?>";
   1.149  
   1.150 +in
   1.151 +
   1.152 +val parse_string = Scan.read Symbol.stopper parse_chars o explode;
   1.153 +
   1.154  fun parse_content xs =
   1.155    ((Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] --
   1.156      (Scan.repeat ((* scan_whspc |-- *)
   1.157 -       (   parse_elem >> single
   1.158 +       (   parse_element >> single
   1.159          || parse_cdata >> (single o Text)
   1.160          || parse_pi >> K []
   1.161          || parse_comment >> K []) --
   1.162         Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) []
   1.163           >> op @) >> flat) >> op @)(* --| scan_whspc*)) xs
   1.164  
   1.165 -and parse_elem xs =
   1.166 +and parse_element xs =
   1.167    ($$ "<" |-- Symbol.scan_id --
   1.168      Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
   1.169        !! (err "Expected > or />")
   1.170 @@ -164,16 +159,15 @@
   1.171                (Scan.this_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
   1.172      (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
   1.173  
   1.174 -val parse_document =
   1.175 -  Scan.option (Scan.this_string "<!DOCTYPE" -- scan_whspc |--
   1.176 -    (Scan.repeat (Scan.unless ($$ ">")
   1.177 -      (Scan.one Symbol.is_regular)) >> implode) --| $$ ">" --| scan_whspc) --
   1.178 -  parse_elem;
   1.179 +val parse_comment_whspc =
   1.180 +  (scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K())));
   1.181  
   1.182  fun parse s =
   1.183    (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
   1.184 -      (scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of
   1.185 +      (scan_whspc |-- parse_element --| scan_whspc))) (Symbol.explode s) of
   1.186      (x, []) => x
   1.187    | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
   1.188  
   1.189  end;
   1.190 +
   1.191 +end;