author | berghofe |
Wed, 27 Nov 2002 17:17:53 +0100 | |
changeset 13729 | 1a8dda49fd86 |
parent 12416 | 9b3e7a35da30 |
child 14185 | 9b3841638c06 |
permissions | -rw-r--r-- |
12416 | 1 |
(* Title: Pure/General/xml.ML |
2 |
ID: $Id$ |
|
13729
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
3 |
Author: Markus Wenzel, LMU Muenchen |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
4 |
Stefan Berghofer, TU Muenchen |
12416 | 5 |
License: GPL (GNU GENERAL PUBLIC LICENSE) |
6 |
||
13729
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
7 |
Basic support for XML input and output. |
12416 | 8 |
*) |
9 |
||
10 |
signature XML = |
|
11 |
sig |
|
13729
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
12 |
datatype tree = |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
13 |
Elem of string * (string * string) list * tree list |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
14 |
| Text of string |
12416 | 15 |
val element: string -> (string * string) list -> string list -> string |
16 |
val text: string -> string |
|
17 |
val header: string |
|
13729
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
18 |
val string_of_tree: tree -> string |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
19 |
val tree_of_string: string -> tree |
12416 | 20 |
end; |
21 |
||
22 |
structure XML: XML = |
|
23 |
struct |
|
24 |
||
25 |
(* character data *) |
|
26 |
||
27 |
fun encode "<" = "<" |
|
28 |
| encode ">" = ">" |
|
29 |
| encode "&" = "&" |
|
30 |
| encode "'" = "'" |
|
31 |
| encode "\"" = """ |
|
32 |
| encode c = c; |
|
33 |
||
13729
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
34 |
fun decode "<" = "<" |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
35 |
| decode ">" = ">" |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
36 |
| decode "&" = "&" |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
37 |
| decode "'" = "'" |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
38 |
| decode """ = "\"" |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
39 |
| decode c = c; |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
40 |
|
12416 | 41 |
val text = implode o map encode o explode; |
42 |
||
43 |
||
44 |
(* elements *) |
|
45 |
||
13729
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
46 |
datatype tree = |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
47 |
Elem of string * (string * string) list * tree list |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
48 |
| Text of string; |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
49 |
|
12416 | 50 |
fun attribute (a, x) = a ^ " = " ^ quote (text x); |
51 |
||
52 |
fun element name atts cs = |
|
53 |
let val elem = space_implode " " (name :: map attribute atts) in |
|
54 |
if null cs then enclose "<" "/>" elem |
|
55 |
else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name |
|
56 |
end; |
|
57 |
||
13729
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
58 |
fun string_of_tree (Elem (name, atts, ts)) = |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
59 |
element name atts (map string_of_tree ts) |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
60 |
| string_of_tree (Text s) = s |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
61 |
|
12416 | 62 |
val header = "<?xml version=\"1.0\"?>\n"; |
63 |
||
13729
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
64 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
65 |
(* parser *) |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
66 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
67 |
datatype token_kind = Idt | Sym | Str | Chr; |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
68 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
69 |
fun is_kind k (k', _) = k = k'; |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
70 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
71 |
val stopper = ((Sym, ""), fn (Sym, "") => true | _ => false); |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
72 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
73 |
val scan_whspc = $$ " " || $$ "\n"; |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
74 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
75 |
val scan_symb = Scan.literal (Scan.make_lexicon |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
76 |
(map explode ["<", "</", ">", "/>", "\"", "=", "&"])) >> implode; |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
77 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
78 |
val scan_special = $$ "&" ^^ Symbol.scan_id ^^ $$ ";" >> decode; |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
79 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
80 |
val lexer = Scan.repeat (Scan.repeat scan_whspc |-- |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
81 |
( Scan.max (op <= o pairself (size o snd)) |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
82 |
(Symbol.scan_id >> pair Idt) |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
83 |
(Scan.repeat1 (scan_special || Scan.unless (scan_whspc || scan_symb) |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
84 |
(Scan.one Symbol.not_eof)) >> (pair Chr o implode)) |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
85 |
|| $$ "\"" |-- Scan.repeat |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
86 |
( scan_special |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
87 |
|| Scan.unless ($$ "\"") (Scan.one Symbol.not_eof)) --| |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
88 |
$$ "\"" >> (pair Str o implode) |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
89 |
|| scan_symb >> pair Sym) --| Scan.repeat scan_whspc); |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
90 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
91 |
val parse_att = (Scan.one (is_kind Idt) >> snd) --| $$ (Sym, "=") -- |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
92 |
(Scan.one (is_kind Str) >> snd); |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
93 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
94 |
fun parse_tree xs = |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
95 |
($$ (Sym, "<") |-- (Scan.one (is_kind Idt) >> snd) -- |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
96 |
Scan.repeat parse_att :-- (fn (s, _) => |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
97 |
$$ (Sym, "/>") >> K [] |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
98 |
|| $$ (Sym, ">") |-- Scan.repeat |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
99 |
( Scan.one (not o is_kind Sym) >> (Text o snd) |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
100 |
|| parse_tree) --| |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
101 |
$$ (Sym, "</") --| Scan.one (equal (Idt, s)) --| $$ (Sym, ">")) >> |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
102 |
(fn ((s, atts), ts) => Elem (s, atts, ts))) xs; |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
103 |
|
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
104 |
fun tree_of_string s = |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
105 |
let val msg = "Malformed XML expression:\n" ^ s |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
106 |
in case Scan.finite Symbol.stopper lexer (explode s) of |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
107 |
(toks, []) => (case Scan.finite stopper |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
108 |
(Scan.error (!! (K msg) parse_tree)) toks of |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
109 |
(t, []) => t | _ => error msg) |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
110 |
| _ => error msg |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
111 |
end; |
1a8dda49fd86
Added XML parser (useful for parsing PGIP / PGML).
berghofe
parents:
12416
diff
changeset
|
112 |
|
12416 | 113 |
end; |