added parse_document (optional unchecked header material);
authorwenzelm
Sat, 24 May 2008 20:12:18 +0200
changeset 26984 d0e098e206f3
parent 26983 e40f28cdd19b
child 26985 51c5acd57b75
added parse_document (optional unchecked header material); parse: parse_document instead of parse_element;
src/Pure/General/xml.ML
--- a/src/Pure/General/xml.ML	Sat May 24 20:12:17 2008 +0200
+++ b/src/Pure/General/xml.ML	Sat May 24 20:12:18 2008 +0200
@@ -20,9 +20,10 @@
   val output_markup: Markup.T -> output * output
   val string_of: tree -> string
   val output: tree -> TextIO.outstream -> unit
+  val parse_comments: string list -> unit * string list
   val parse_string : string -> string option
-  val parse_comments: string list -> unit * string list
   val parse_element: string list -> tree * string list
+  val parse_document: string list -> tree * string list
   val parse: string -> tree
 end;
 
@@ -111,6 +112,8 @@
 fun err s (xs, _) =
   "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
 
+fun ignored _ = [];
+
 val blanks = Scan.many Symbol.is_blank;
 val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
 val regular = Scan.one Symbol.is_regular;
@@ -131,18 +134,31 @@
 val parse_comment =
   Scan.this_string "<!--" --
   Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
-  Scan.this_string "-->" >> K [];
+  Scan.this_string "-->" >> ignored;
 
 val parse_processing_instruction =
   Scan.this_string "<?" --
   Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
-  Scan.this_string "?>" >> K [];
+  Scan.this_string "?>" >> ignored;
+
+val parse_doctype =
+  Scan.this_string "<!DOCTYPE" --
+  Scan.repeat (Scan.unless ($$ ">") regular) --
+  $$ ">" >> ignored;
+
+val parse_misc =
+  Scan.one Symbol.is_blank >> ignored ||
+  parse_processing_instruction ||
+  parse_comment;
 
 val parse_optional_text =
   Scan.optional (parse_chars >> (single o Text)) [];
 
 in
 
+val parse_comments =
+  blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
+
 val parse_string = Scan.read Symbol.stopper parse_chars o explode;
 
 fun parse_content xs =
@@ -150,26 +166,27 @@
     (Scan.repeat
       ((parse_element >> single ||
         parse_cdata >> (single o Text) ||
-        parse_processing_instruction >> K [] ||
-        parse_comment >> K [])
+        parse_processing_instruction ||
+        parse_comment)
       @@@ parse_optional_text) >> flat)) xs
 
 and parse_element xs =
   ($$ "<" |-- Symbol.scan_id --
     Scan.repeat (blanks |-- parse_att) --| blanks :-- (fn (s, _) =>
       !! (err "Expected > or />")
-        (Scan.this_string "/>" >> K []
+        (Scan.this_string "/>" >> ignored
          || $$ ">" |-- parse_content --|
             !! (err ("Expected </" ^ s ^ ">"))
               (Scan.this_string ("</" ^ s) --| blanks --| $$ ">"))) >>
     (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
 
-val parse_comments =
-  blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
+val parse_document =
+  (Scan.repeat parse_misc -- Scan.option parse_doctype -- Scan.repeat parse_misc)
+  |-- parse_element;
 
 fun parse s =
   (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
-      (blanks |-- parse_element --| blanks))) (explode s) of
+      (blanks |-- parse_document --| blanks))) (explode s) of
     (x, []) => x
   | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));