renamed parse_comment_whspc to parse_comments;
authorwenzelm
Thu Apr 03 22:21:26 2008 +0200 (2008-04-03)
changeset 26551da1cd11d8a25
parent 26550 a8740ad422d2
child 26552 5677b4faf295
renamed parse_comment_whspc to parse_comments;
major parser cleanup -- removed junk comments;
src/Pure/General/xml.ML
     1.1 --- a/src/Pure/General/xml.ML	Thu Apr 03 21:23:42 2008 +0200
     1.2 +++ b/src/Pure/General/xml.ML	Thu Apr 03 22:21:26 2008 +0200
     1.3 @@ -21,7 +21,7 @@
     1.4    val string_of: tree -> string
     1.5    val output: tree -> TextIO.outstream -> unit
     1.6    val parse_string : string -> string option
     1.7 -  val parse_comment_whspc: string list -> unit * string list
     1.8 +  val parse_comments: string list -> unit * string list
     1.9    val parse_element: string list -> tree * string list
    1.10    val parse: string -> tree
    1.11  end;
    1.12 @@ -72,7 +72,7 @@
    1.13  (* elements *)
    1.14  
    1.15  fun elem name atts =
    1.16 -  space_implode " " (name :: map (fn (a, x) => a ^ " = \"" ^ text x ^ "\"") atts);
    1.17 +  space_implode " " (name :: map (fn (a, x) => a ^ "=\"" ^ text x ^ "\"") atts);
    1.18  
    1.19  fun element name atts body =
    1.20    let val b = implode body in
    1.21 @@ -111,60 +111,65 @@
    1.22  fun err s (xs, _) =
    1.23    "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs);
    1.24  
    1.25 -val scan_whspc = Scan.many Symbol.is_blank;
    1.26 -
    1.27 -val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
    1.28 +val blanks = Scan.many Symbol.is_blank;
    1.29 +val special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode;
    1.30 +val regular = Scan.one Symbol.is_regular;
    1.31 +fun regular_except x = Scan.one (fn c => Symbol.is_regular c andalso c <> x);
    1.32  
    1.33 -val parse_chars = Scan.repeat1 (Scan.unless ((* scan_whspc -- *)$$ "<")
    1.34 -  (scan_special || Scan.one Symbol.is_regular)) >> implode;
    1.35 +val parse_chars = Scan.repeat1 (special || regular_except "<") >> implode;
    1.36  
    1.37 -val parse_cdata = Scan.this_string "<![CDATA[" |--
    1.38 -  (Scan.repeat (Scan.unless (Scan.this_string "]]>") (Scan.one Symbol.is_regular)) >>
    1.39 -    implode) --| Scan.this_string "]]>";
    1.40 +val parse_cdata =
    1.41 +  Scan.this_string "<![CDATA[" |--
    1.42 +  (Scan.repeat (Scan.unless (Scan.this_string "]]>") regular) >> implode) --|
    1.43 +  Scan.this_string "]]>";
    1.44  
    1.45  val parse_att =
    1.46 -  Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --
    1.47 -  (($$ "\"" || $$ "'") :|-- (fn s => (Scan.repeat (Scan.unless ($$ s)
    1.48 -    (scan_special || Scan.one Symbol.is_regular)) >> implode) --| $$ s));
    1.49 +  (Symbol.scan_id --| (blanks -- $$ "=" -- blanks)) --
    1.50 +  (($$ "\"" || $$ "'") :|-- (fn s =>
    1.51 +    (Scan.repeat (special || regular_except s) >> implode) --| $$ s));
    1.52  
    1.53 -val parse_comment = Scan.this_string "<!--" --
    1.54 -  Scan.repeat (Scan.unless (Scan.this_string "-->") (Scan.one Symbol.is_regular)) --
    1.55 -  Scan.this_string "-->";
    1.56 +val parse_comment =
    1.57 +  Scan.this_string "<!--" --
    1.58 +  Scan.repeat (Scan.unless (Scan.this_string "-->") regular) --
    1.59 +  Scan.this_string "-->" >> K [];
    1.60  
    1.61 -val parse_pi = Scan.this_string "<?" |--
    1.62 -  Scan.repeat (Scan.unless (Scan.this_string "?>") (Scan.one Symbol.is_regular)) --|
    1.63 -  Scan.this_string "?>";
    1.64 +val parse_processing_instruction =
    1.65 +  Scan.this_string "<?" --
    1.66 +  Scan.repeat (Scan.unless (Scan.this_string "?>") regular) --
    1.67 +  Scan.this_string "?>" >> K [];
    1.68 +
    1.69 +val parse_optional_text =
    1.70 +  Scan.optional (parse_chars >> (single o Text)) [];
    1.71  
    1.72  in
    1.73  
    1.74  val parse_string = Scan.read Symbol.stopper parse_chars o explode;
    1.75  
    1.76  fun parse_content xs =
    1.77 -  ((Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] --
    1.78 -    (Scan.repeat ((* scan_whspc |-- *)
    1.79 -       (   parse_element >> single
    1.80 -        || parse_cdata >> (single o Text)
    1.81 -        || parse_pi >> K []
    1.82 -        || parse_comment >> K []) --
    1.83 -       Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) []
    1.84 -         >> op @) >> flat) >> op @)(* --| scan_whspc*)) xs
    1.85 +  (parse_optional_text @@@
    1.86 +    (Scan.repeat
    1.87 +      ((parse_element >> single ||
    1.88 +        parse_cdata >> (single o Text) ||
    1.89 +        parse_processing_instruction >> K [] ||
    1.90 +        parse_comment >> K [])
    1.91 +      @@@ parse_optional_text) >> flat)) xs
    1.92  
    1.93  and parse_element xs =
    1.94    ($$ "<" |-- Symbol.scan_id --
    1.95 -    Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
    1.96 +    Scan.repeat (blanks |-- parse_att) --| blanks :-- (fn (s, _) =>
    1.97        !! (err "Expected > or />")
    1.98          (Scan.this_string "/>" >> K []
    1.99           || $$ ">" |-- parse_content --|
   1.100              !! (err ("Expected </" ^ s ^ ">"))
   1.101 -              (Scan.this_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
   1.102 +              (Scan.this_string ("</" ^ s) --| blanks --| $$ ">"))) >>
   1.103      (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
   1.104  
   1.105 -val parse_comment_whspc =
   1.106 -  (scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K())));
   1.107 +val parse_comments =
   1.108 +  blanks -- Scan.repeat (parse_comment -- blanks >> K ()) >> K ();
   1.109  
   1.110  fun parse s =
   1.111    (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element")
   1.112 -      (scan_whspc |-- parse_element --| scan_whspc))) (Symbol.explode s) of
   1.113 +      (blanks |-- parse_element --| blanks))) (Symbol.explode s) of
   1.114      (x, []) => x
   1.115    | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys));
   1.116