75 fun err s (xs, _) = |
75 fun err s (xs, _) = |
76 "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); |
76 "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); |
77 |
77 |
78 val scan_whspc = Scan.any Symbol.is_blank; |
78 val scan_whspc = Scan.any Symbol.is_blank; |
79 |
79 |
80 fun scan_string s = Scan.list (Symbol.explode s) >> K s; |
80 fun scan_lit s = Scan.this (Symbol.explode s) >> K s; |
81 |
81 |
82 val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; |
82 val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; |
83 |
83 |
84 val parse_chars = Scan.repeat1 (Scan.unless (scan_whspc -- $$ "<") |
84 val parse_chars = Scan.repeat1 (Scan.unless (scan_whspc -- $$ "<") |
85 (scan_special || Scan.one Symbol.not_eof)) >> implode; |
85 (scan_special || Scan.one Symbol.not_eof)) >> implode; |
86 |
86 |
87 val parse_cdata = scan_string "<![CDATA[" |-- |
87 val parse_cdata = scan_lit "<![CDATA[" |-- |
88 (Scan.repeat (Scan.unless (scan_string "]]>") (Scan.one Symbol.not_eof)) >> |
88 (Scan.repeat (Scan.unless (scan_lit "]]>") (Scan.one Symbol.not_eof)) >> |
89 implode) --| scan_string "]]>"; |
89 implode) --| scan_lit "]]>"; |
90 |
90 |
91 val parse_att = |
91 val parse_att = |
92 Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --| $$ "\"" -- |
92 Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --| $$ "\"" -- |
93 (Scan.repeat (Scan.unless ($$ "\"") |
93 (Scan.repeat (Scan.unless ($$ "\"") |
94 (scan_special || Scan.one Symbol.not_eof)) >> implode) --| $$ "\""; |
94 (scan_special || Scan.one Symbol.not_eof)) >> implode) --| $$ "\""; |
95 |
95 |
96 val parse_comment = scan_string "<!--" -- |
96 val parse_comment = scan_lit "<!--" -- |
97 Scan.repeat (Scan.unless (scan_string "-->") (Scan.one Symbol.not_eof)) -- |
97 Scan.repeat (Scan.unless (scan_lit "-->") (Scan.one Symbol.not_eof)) -- |
98 scan_string "-->"; |
98 scan_lit "-->"; |
99 |
99 |
100 val parse_pi = scan_string "<?" |-- |
100 val parse_pi = scan_lit "<?" |-- |
101 Scan.repeat (Scan.unless (scan_string "?>") (Scan.one Symbol.not_eof)) --| |
101 Scan.repeat (Scan.unless (scan_lit "?>") (Scan.one Symbol.not_eof)) --| |
102 scan_string "?>"; |
102 scan_lit "?>"; |
103 |
103 |
104 fun parse_content xs = |
104 fun parse_content xs = |
105 ((Scan.optional (scan_whspc |-- parse_chars >> (single o Text)) [] -- |
105 ((Scan.optional (scan_whspc |-- parse_chars >> (single o Text)) [] -- |
106 (Scan.repeat (scan_whspc |-- |
106 (Scan.repeat (scan_whspc |-- |
107 ( parse_elem >> single |
107 ( parse_elem >> single |
113 |
113 |
114 and parse_elem xs = |
114 and parse_elem xs = |
115 ($$ "<" |-- Symbol.scan_id -- |
115 ($$ "<" |-- Symbol.scan_id -- |
116 Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) => |
116 Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) => |
117 !! (err "Expected > or />") |
117 !! (err "Expected > or />") |
118 (scan_string "/>" >> K [] |
118 (scan_lit "/>" >> K [] |
119 || $$ ">" |-- parse_content --| |
119 || $$ ">" |-- parse_content --| |
120 !! (err ("Expected </" ^ s ^ ">")) |
120 !! (err ("Expected </" ^ s ^ ">")) |
121 (scan_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >> |
121 (scan_lit ("</" ^ s) --| scan_whspc --| $$ ">"))) >> |
122 (fn ((s, atts), ts) => Elem (s, atts, ts))) xs; |
122 (fn ((s, atts), ts) => Elem (s, atts, ts))) xs; |
123 |
123 |
124 val parse_document = |
124 val parse_document = |
125 Scan.option (scan_string "<!DOCTYPE" -- scan_whspc |-- |
125 Scan.option (scan_lit "<!DOCTYPE" -- scan_whspc |-- |
126 (Scan.repeat (Scan.unless ($$ ">") |
126 (Scan.repeat (Scan.unless ($$ ">") |
127 (Scan.one Symbol.not_eof)) >> implode) --| $$ ">" --| scan_whspc) -- |
127 (Scan.one Symbol.not_eof)) >> implode) --| $$ ">" --| scan_whspc) -- |
128 parse_elem; |
128 parse_elem; |
129 |
129 |
130 fun tree_of_string s = |
130 fun tree_of_string s = |