49 val cdata_open = "<![CDATA[" |
46 val cdata_open = "<![CDATA[" |
50 val cdata_close = "]]>" |
47 val cdata_close = "]]>" |
51 |
48 |
52 fun cdata s = cdata_open ^ s ^ cdata_close; |
49 fun cdata s = cdata_open ^ s ^ cdata_close; |
53 |
50 |
|
51 |
54 (* elements *) |
52 (* elements *) |
55 |
53 |
56 datatype tree = |
54 datatype tree = |
57 Elem of string * (string * string) list * tree list |
55 Elem of string * (string * string) list * tree list |
58 | Text of string; |
56 | Text of string; |
59 |
57 |
60 fun attribute (a, x) = a ^ " = \"" ^ (text x) ^ "\""; |
58 fun attribute (a, x) = a ^ " = \"" ^ text x ^ "\""; |
61 |
59 |
62 fun element name atts cs = |
60 fun element name atts cs = |
63 let val elem = space_implode " " (name :: map attribute atts) in |
61 let val elem = space_implode " " (name :: map attribute atts) in |
64 if null cs then enclose "<" "/>" elem |
62 if null cs then enclose "<" "/>" elem |
65 else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name |
63 else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name |
66 end; |
64 end; |
67 |
65 |
68 fun string_of_tree (Elem (name, atts, ts)) = |
66 fun string_of_tree (Elem (name, atts, ts)) = |
69 element name atts (map string_of_tree ts) |
67 element name atts (map string_of_tree ts) |
70 | string_of_tree (Text s) = s |
68 | string_of_tree (Text s) = s; |
71 |
69 |
72 val header = "<?xml version=\"1.0\"?>\n"; |
70 val header = "<?xml version=\"1.0\"?>\n"; |
73 |
71 |
74 |
72 |
75 (* parser *) |
73 (* parser *) |
76 |
74 |
77 fun err s (xs, _) = "XML parsing error: " ^ s ^ "\nfound:\n" ^ |
75 fun err s (xs, _) = |
78 implode (take (100, xs)); |
76 "XML parsing error: " ^ s ^ "\nfound: " ^ quote (Symbol.beginning 100 xs); |
79 |
77 |
80 val scan_whspc = Scan.repeat ($$ " " || $$ "\n" || $$ "\t"); |
78 val scan_whspc = Scan.any Symbol.is_blank; |
81 |
79 |
82 val literal = Scan.literal o Scan.make_lexicon o single o explode; |
80 fun scan_string s = Scan.list (Symbol.explode s) >> K s; |
83 |
81 |
84 val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; |
82 val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; |
85 |
83 |
86 val parse_chars = Scan.repeat1 (Scan.unless (scan_whspc -- $$ "<") |
84 val parse_chars = Scan.repeat1 (Scan.unless (scan_whspc -- $$ "<") |
87 (scan_special || Scan.one Symbol.not_eof)) >> implode; |
85 (scan_special || Scan.one Symbol.not_eof)) >> implode; |
88 |
86 |
89 val parse_cdata = literal "<![CDATA[" |-- |
87 val parse_cdata = scan_string "<![CDATA[" |-- |
90 (Scan.repeat (Scan.unless (literal "]]>") (Scan.one Symbol.not_eof)) >> |
88 (Scan.repeat (Scan.unless (scan_string "]]>") (Scan.one Symbol.not_eof)) >> |
91 implode) --| literal "]]>"; |
89 implode) --| scan_string "]]>"; |
92 |
90 |
93 val parse_att = |
91 val parse_att = |
94 Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --| $$ "\"" -- |
92 Symbol.scan_id --| scan_whspc --| $$ "=" --| scan_whspc --| $$ "\"" -- |
95 (Scan.repeat (Scan.unless ($$ "\"") |
93 (Scan.repeat (Scan.unless ($$ "\"") |
96 (scan_special || Scan.one Symbol.not_eof)) >> implode) --| $$ "\""; |
94 (scan_special || Scan.one Symbol.not_eof)) >> implode) --| $$ "\""; |
97 |
95 |
98 val parse_comment = literal "<!--" -- |
96 val parse_comment = scan_string "<!--" -- |
99 Scan.repeat (Scan.unless (literal "-->") (Scan.one Symbol.not_eof)) -- |
97 Scan.repeat (Scan.unless (scan_string "-->") (Scan.one Symbol.not_eof)) -- |
100 literal "-->"; |
98 scan_string "-->"; |
101 |
99 |
102 val parse_pi = literal "<?" |-- |
100 val parse_pi = scan_string "<?" |-- |
103 Scan.repeat (Scan.unless (literal "?>") (Scan.one Symbol.not_eof)) --| |
101 Scan.repeat (Scan.unless (scan_string "?>") (Scan.one Symbol.not_eof)) --| |
104 literal "?>"; |
102 scan_string "?>"; |
105 |
103 |
106 fun parse_content xs = |
104 fun parse_content xs = |
107 ((Scan.optional (scan_whspc |-- parse_chars >> (single o Text)) [] -- |
105 ((Scan.optional (scan_whspc |-- parse_chars >> (single o Text)) [] -- |
108 (Scan.repeat (scan_whspc |-- |
106 (Scan.repeat (scan_whspc |-- |
109 ( parse_elem >> single |
107 ( parse_elem >> single |
115 |
113 |
116 and parse_elem xs = |
114 and parse_elem xs = |
117 ($$ "<" |-- Symbol.scan_id -- |
115 ($$ "<" |-- Symbol.scan_id -- |
118 Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) => |
116 Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) => |
119 !! (err "Expected > or />") |
117 !! (err "Expected > or />") |
120 ( literal "/>" >> K [] |
118 (scan_string "/>" >> K [] |
121 || $$ ">" |-- parse_content --| |
119 || $$ ">" |-- parse_content --| |
122 !! (err ("Expected </" ^ s ^ ">")) |
120 !! (err ("Expected </" ^ s ^ ">")) |
123 (literal ("</" ^ s) --| scan_whspc --| $$ ">"))) >> |
121 (scan_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >> |
124 (fn ((s, atts), ts) => Elem (s, atts, ts))) xs; |
122 (fn ((s, atts), ts) => Elem (s, atts, ts))) xs; |
125 |
123 |
126 val parse_document = |
124 val parse_document = |
127 Scan.option (literal "<!DOCTYPE" -- scan_whspc |-- |
125 Scan.option (scan_string "<!DOCTYPE" -- scan_whspc |-- |
128 (Scan.repeat (Scan.unless ($$ ">") |
126 (Scan.repeat (Scan.unless ($$ ">") |
129 (Scan.one Symbol.not_eof)) >> implode) --| $$ ">" --| scan_whspc) -- |
127 (Scan.one Symbol.not_eof)) >> implode) --| $$ ">" --| scan_whspc) -- |
130 parse_elem; |
128 parse_elem; |
131 |
129 |
132 fun tree_of_string s = |
130 fun tree_of_string s = |
133 (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element") |
131 (case Scan.finite Symbol.stopper (Scan.error (!! (err "Malformed element") |
134 (scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of |
132 (scan_whspc |-- parse_elem --| scan_whspc))) (Symbol.explode s) of |
135 (x, []) => x |
133 (x, []) => x |
136 | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ |
134 | (_, ys) => error ("XML parsing error: Unprocessed input\n" ^ Symbol.beginning 100 ys)); |
137 implode (take (100, ys)))); |
|
138 |
135 |
139 end; |
136 end; |