author | aspinall |
Tue, 05 Dec 2006 22:04:24 +0100 | |
changeset 21655 | 01b2d13153c8 |
parent 21637 | a7b156c404e2 |
child 21867 | 8750fbc28d5c |
permissions | -rw-r--r-- |
21637 | 1 |
(* Title: Pure/ProofGeneral/parsing.ML |
2 |
ID: $Id$ |
|
3 |
Author: David Aspinall |
|
4 |
||
5 |
Parsing theory files to add PGIP markup. |
|
6 |
*) |
|
7 |
||
8 |
signature PGIP_PARSER = |
|
9 |
sig |
|
21655
01b2d13153c8
Document structure in pgip_markup.ML. Minor fixes.
aspinall
parents:
21637
diff
changeset
|
10 |
val xmls_of_str: string -> XML.content |
21637 | 11 |
end |
12 |
||
13 |
structure PgipParser : PGIP_PARSER = |
|
14 |
struct |
|
15 |
||
16 |
||
17 |
(** Parsing proof scripts without execution **) |
|
18 |
||
19 |
structure P = OuterParse; |
|
20 |
||
21 |
(* Notes. |
|
22 |
This is quite tricky, because 1) we need to put back whitespace which |
|
23 |
was removed during parsing so we can provide markup around commands; |
|
24 |
2) parsing is intertwined with execution in Isar so we have to repeat |
|
25 |
the parsing to extract interesting parts of commands. The trace of |
|
26 |
tokens parsed which is now recorded in each transition (added by |
|
27 |
Markus June '04) helps do this, but the mechanism is still a bad mess. |
|
28 |
||
29 |
If we had proper parse trees it would be easy, although having a |
|
30 |
fixed type of trees might be tricky with Isar's extensible parser. |
|
31 |
||
32 |
Probably the best solution is to produce the meta-information at |
|
33 |
the same time as the parse, for each command, e.g. by recording a |
|
34 |
list of (name,objtype) pairs which record the bindings created by |
|
35 |
a command. This would require changing the interfaces in |
|
36 |
outer_syntax.ML (or providing new ones), |
|
37 |
||
38 |
datatype metainfo = Binding of string * string (* name, objtype *) |
|
39 |
||
40 |
val command_withmetainfo: string -> string -> string -> |
|
41 |
(token list -> |
|
42 |
((Toplevel.transition -> Toplevel.transition) * metainfo list) * |
|
43 |
token list) -> parser |
|
44 |
||
45 |
We'd also like to render terms as they appear in output, but this |
|
46 |
will be difficult because inner syntax is extensible and we don't |
|
47 |
have the correct syntax to hand when just doing outer parsing |
|
48 |
without execution. A reasonable approximation could |
|
49 |
perhaps be obtained by using the syntax of the current context. |
|
50 |
However, this would mean more mess trying to pick out terms, |
|
51 |
so at this stage a more serious change to the Isar functions |
|
52 |
seems necessary. |
|
53 |
*) |
|
54 |
||
55 |
local |
|
56 |
fun markup_text str elt = [XML.Elem(elt, [], [XML.Text str])] |
|
57 |
fun markup_text_attrs str elt attrs = [XML.Elem(elt, attrs, [XML.Text str])] |
|
58 |
fun empty elt = [XML.Elem(elt, [], [])] |
|
59 |
||
60 |
fun whitespace str = XML.Elem("whitespace", [], [XML.Text str]) |
|
61 |
||
62 |
(* an extra token is needed to terminate the command *) |
|
63 |
val sync = OuterSyntax.scan "\\<^sync>"; |
|
64 |
||
65 |
fun named_item_elt_with nameattr toks str elt nameP objtyp = |
|
66 |
let |
|
67 |
val name = (fst (nameP (toks@sync))) |
|
68 |
handle _ => (error ("Error occurred in name parser for " ^ elt ^ |
|
69 |
"(objtype: " ^ objtyp ^ ")"); |
|
70 |
"") |
|
71 |
||
72 |
in |
|
73 |
[XML.Elem(elt, |
|
74 |
((if name="" then [] else [(nameattr, name)])@ |
|
75 |
(if objtyp="" then [] else [("objtype", objtyp)])), |
|
76 |
[XML.Text str])] |
|
77 |
end |
|
78 |
||
79 |
val named_item_elt = named_item_elt_with "name" |
|
80 |
val thmnamed_item_elt = named_item_elt_with "thmname" |
|
81 |
||
82 |
fun parse_warning msg = warning ("script->PGIP markup parser: " ^ msg) |
|
83 |
||
84 |
(* FIXME: allow dynamic extensions to language here: we need a hook in |
|
85 |
outer_syntax.ML to reset this table. *) |
|
86 |
||
87 |
val keywords_classification_table = ref (NONE: string Symtab.table option) |
|
88 |
||
89 |
fun get_keywords_classification_table () = |
|
90 |
case (!keywords_classification_table) of |
|
91 |
SOME t => t |
|
92 |
| NONE => (let |
|
93 |
val tab = fold (fn (c, _, k, _) => Symtab.update (c, k)) |
|
94 |
(OuterSyntax.dest_parsers ()) Symtab.empty; |
|
95 |
in (keywords_classification_table := SOME tab; tab) end) |
|
96 |
||
97 |
||
98 |
||
99 |
fun xmls_of_thy_begin (name,toks,str) = (* see isar_syn.ML/theoryP *) |
|
100 |
let |
|
101 |
val ((thyname,imports,files),_) = ThyHeader.args (toks@sync) |
|
102 |
in |
|
103 |
markup_text_attrs str "opentheory" |
|
104 |
([("thyname",thyname)] @ |
|
105 |
(if imports=[] then [] else |
|
106 |
[("parentnames", (space_implode ";" imports) ^ ";")])) |
|
107 |
end |
|
108 |
||
109 |
fun xmls_of_thy_decl (name,toks,str) = (* see isar_syn.ML/thy_decl cases *) |
|
110 |
let |
|
111 |
(* NB: PGIP only deals with single name bindings at the moment; |
|
112 |
potentially we could markup grouped definitions several times but |
|
113 |
that might suggest the wrong structure to the editor? |
|
114 |
Better alternative would be to put naming closer around text, |
|
115 |
but to do that we'd be much better off modifying the real parser |
|
116 |
instead of reconstructing it here. *) |
|
117 |
||
118 |
val plain_items = (* no names, unimportant names, or too difficult *) |
|
119 |
["defaultsort","arities","judgement","finalconsts", |
|
120 |
"syntax", "nonterminals", "translations", |
|
121 |
"global", "local", "hide", |
|
122 |
"ML_setup", "setup", "method_setup", |
|
123 |
"parse_ast_translation", "parse_translation", "print_translation", |
|
124 |
"typed_print_translation", "print_ast_translation", "token_translation", |
|
125 |
"oracle", |
|
126 |
"declare"] |
|
127 |
||
128 |
val plain_item = markup_text str "theoryitem" |
|
129 |
val comment_item = markup_text str "doccomment" |
|
130 |
val named_item = named_item_elt toks str "theoryitem" |
|
131 |
||
132 |
val opt_overloaded = P.opt_keyword "overloaded"; |
|
133 |
||
134 |
val thmnameP = (* see isar_syn.ML/theoremsP *) |
|
135 |
let |
|
136 |
val name_facts = P.and_list1 ((P.opt_thm_name "=" --| P.xthms1) >> #1) |
|
137 |
val theoremsP = P.opt_locale_target |-- name_facts >> (fn [] => "" | x::_ => x) |
|
138 |
in |
|
139 |
theoremsP |
|
140 |
end |
|
141 |
in |
|
142 |
(* TODO: ideally we would like to render terms properly, just as they are |
|
143 |
in output. This implies using PGML for symbols and variables/atoms. |
|
144 |
BUT it's rather tricky without having the correct concrete syntax to hand, |
|
145 |
and even if we did, we'd have to mess around here a whole lot more first |
|
146 |
to pick out the terms from the syntax. *) |
|
147 |
||
148 |
if member (op =) plain_items name then plain_item |
|
149 |
else case name of |
|
150 |
"text" => comment_item |
|
151 |
| "text_raw" => comment_item |
|
152 |
| "typedecl" => named_item (P.type_args |-- P.name) "type" |
|
153 |
| "types" => named_item (P.type_args |-- P.name) "type" |
|
154 |
| "classes" => named_item P.name "class" |
|
155 |
| "classrel" => named_item P.name "class" |
|
156 |
| "consts" => named_item (P.const >> #1) "term" |
|
157 |
| "axioms" => named_item (P.spec_name >> (#1 o #1)) "theorem" |
|
158 |
| "defs" => named_item (opt_overloaded |-- P.spec_name >> (#1 o #1)) "theorem" |
|
159 |
| "constdefs" => named_item P.name (* FIXME: heavily simplified/wrong *) "term" |
|
160 |
| "theorems" => named_item thmnameP "thmset" |
|
161 |
| "lemmas" => named_item thmnameP "thmset" |
|
162 |
| "oracle" => named_item P.name "oracle" |
|
163 |
| "locale" => named_item ((P.opt_keyword "open" >> not) |-- P.name) "locale" |
|
164 |
| _ => (parse_warning ("Unrecognized thy-decl command: " ^ name); plain_item) |
|
165 |
end |
|
166 |
||
167 |
fun xmls_of_thy_goal (name,toks,str) = |
|
168 |
let |
|
169 |
(* see isar_syn.ML/gen_theorem *) |
|
170 |
val statement = P.and_list1 (P.opt_thm_name ":" -- Scan.repeat1 P.propp); |
|
171 |
val general_statement = |
|
172 |
statement >> pair [] || Scan.repeat P.locale_element -- (P.$$$ "shows" |-- statement); |
|
173 |
||
174 |
val gen_theoremP = |
|
175 |
P.opt_locale_target -- Scan.optional (P.opt_thm_name ":" --| |
|
176 |
Scan.ahead (P.locale_keyword || P.$$$ "shows")) ("", []) -- |
|
177 |
general_statement >> (fn ((locale, a), (elems, concl)) => |
|
178 |
fst a) (* a : (bstring * Args.src list) *) |
|
179 |
||
180 |
val nameP = P.opt_locale_target |-- (Scan.optional ((P.opt_thm_name ":") >> #1) "") |
|
181 |
(* TODO: add preference values for attributes |
|
182 |
val prefval = XML.element "prefval" [("name","thm-kind"),("value",name)] |
|
183 |
*) |
|
184 |
in |
|
185 |
(thmnamed_item_elt toks str "opengoal" nameP "") @ |
|
186 |
(empty "openblock") |
|
187 |
end |
|
188 |
||
189 |
fun xmls_of_qed (name,markup) = |
|
190 |
let val qedmarkup = |
|
191 |
(case name of |
|
192 |
"sorry" => markup "postponegoal" |
|
193 |
| "oops" => markup "giveupgoal" |
|
194 |
| "done" => markup "closegoal" |
|
195 |
| "by" => markup "closegoal" (* nested or toplevel *) |
|
196 |
| "qed" => markup "closegoal" (* nested or toplevel *) |
|
197 |
| "." => markup "closegoal" (* nested or toplevel *) |
|
198 |
| ".." => markup "closegoal" (* nested or toplevel *) |
|
199 |
| other => (* default to closegoal: may be wrong for extns *) |
|
200 |
(parse_warning |
|
201 |
("Unrecognized qed command: " ^ quote other); |
|
202 |
markup "closegoal")) |
|
203 |
in qedmarkup @ (empty "closeblock") end |
|
204 |
||
205 |
fun xmls_of_kind kind (name,toks,str) = |
|
206 |
let val markup = markup_text str in |
|
207 |
case kind of |
|
208 |
"control" => markup "badcmd" |
|
209 |
| "diag" => markup "spuriouscmd" |
|
210 |
(* theory/files *) |
|
211 |
| "theory-begin" => xmls_of_thy_begin (name,toks,str) |
|
212 |
| "theory-decl" => xmls_of_thy_decl (name,toks,str) |
|
213 |
| "theory-heading" => markup "doccomment" |
|
214 |
| "theory-script" => markup "theorystep" |
|
215 |
| "theory-end" => markup "closetheory" |
|
216 |
(* proof control *) |
|
217 |
| "theory-goal" => xmls_of_thy_goal (name,toks,str) |
|
218 |
| "proof-heading" => markup "doccomment" |
|
219 |
| "proof-goal" => (markup "opengoal") @ (empty "openblock") (* nested proof: have, show, ... *) |
|
220 |
| "proof-block" => markup "proofstep" (* context-shift: proof, next; really "newgoal" *) |
|
221 |
| "proof-open" => (empty "openblock") @ (markup "proofstep") |
|
222 |
| "proof-close" => (markup "proofstep") @ (empty "closeblock") |
|
223 |
| "proof-script" => markup "proofstep" |
|
224 |
| "proof-chain" => markup "proofstep" |
|
225 |
| "proof-decl" => markup "proofstep" |
|
226 |
| "proof-asm" => markup "proofstep" |
|
227 |
| "proof-asm-goal" => (markup "opengoal") @ (empty "openblock") (* nested proof: obtain *) |
|
228 |
| "qed" => xmls_of_qed (name,markup) |
|
229 |
| "qed-block" => xmls_of_qed (name,markup) |
|
230 |
| "qed-global" => xmls_of_qed (name,markup) |
|
21655
01b2d13153c8
Document structure in pgip_markup.ML. Minor fixes.
aspinall
parents:
21637
diff
changeset
|
231 |
| other => (* default for anything else is "spuriouscmd", ignored for undo *) |
21637 | 232 |
(parse_warning ("Internal inconsistency, unrecognized keyword kind: " ^ quote other); |
233 |
markup "spuriouscmd") |
|
234 |
end |
|
235 |
in |
|
236 |
||
237 |
fun xmls_of_transition (name,str,toks) = |
|
238 |
let |
|
239 |
val classification = Symtab.lookup (get_keywords_classification_table ()) name |
|
240 |
in case classification of |
|
241 |
SOME k => (xmls_of_kind k (name,toks,str)) |
|
242 |
| NONE => (* an uncategorized keyword ignored for undo (maybe wrong) *) |
|
243 |
(parse_warning ("Uncategorized command found: " ^ name ^ " -- using spuriouscmd"); |
|
244 |
markup_text str "spuriouscmd") |
|
245 |
end |
|
246 |
||
247 |
fun markup_toks [] _ = [] |
|
248 |
| markup_toks toks x = markup_text (implode (map OuterLex.unparse toks)) x |
|
249 |
||
250 |
fun markup_comment_whs [] = [] |
|
251 |
| markup_comment_whs (toks as t::ts) = (* this would be a lot neater if we could match on toks! *) |
|
252 |
let |
|
253 |
val (prewhs,rest) = take_prefix (OuterLex.is_kind OuterLex.Space) toks |
|
254 |
in |
|
255 |
if (prewhs <> []) then |
|
256 |
whitespace (implode (map OuterLex.unparse prewhs)) |
|
257 |
:: (markup_comment_whs rest) |
|
258 |
else |
|
259 |
(markup_text (OuterLex.unparse t) "comment") @ |
|
260 |
(markup_comment_whs ts) |
|
261 |
end |
|
262 |
||
263 |
fun xmls_pre_cmd [] = ([],[]) |
|
264 |
| xmls_pre_cmd toks = |
|
265 |
let |
|
266 |
(* an extra token is needed to terminate the command *) |
|
267 |
val sync = OuterSyntax.scan "\\<^sync>"; |
|
268 |
val spaceP = Scan.bulk (Scan.one (fn t =>not (OuterLex.is_proper t)) >> OuterLex.val_of) >> implode |
|
269 |
val text_with_whs = |
|
270 |
((spaceP || Scan.succeed "") -- |
|
271 |
(P.short_ident || P.long_ident || P.sym_ident || P.string || P.number || P.verbatim) >> op^) |
|
272 |
-- (spaceP || Scan.succeed "") >> op^ |
|
273 |
val (prewhs,rest1) = take_prefix (not o OuterLex.is_proper) (toks@sync) |
|
274 |
(* NB: this collapses litcomment,(litcomment|whitespace)* to a single litcomment *) |
|
275 |
val (_,rest2) = (Scan.bulk (P.$$$ "--" |-- text_with_whs) >> implode || Scan.succeed "") rest1 |
|
276 |
val (postwhs,rest3) = take_prefix (not o OuterLex.is_proper) rest2 |
|
277 |
in |
|
278 |
((markup_comment_whs prewhs) @ |
|
279 |
(if (length rest2 = length rest1) then [] |
|
280 |
else markup_text (implode |
|
281 |
(map OuterLex.unparse (Library.take (length rest1 - length rest2, rest1)))) |
|
282 |
"doccomment") @ |
|
283 |
(markup_comment_whs postwhs), |
|
284 |
Library.take (length rest3 - 1,rest3)) |
|
285 |
end |
|
286 |
||
287 |
fun xmls_of_impropers toks = |
|
288 |
let |
|
289 |
val (xmls,rest) = xmls_pre_cmd toks |
|
290 |
in |
|
291 |
xmls @ (markup_toks rest "unparseable") |
|
292 |
end |
|
293 |
||
294 |
fun markup_unparseable str = markup_text str "unparseable" |
|
295 |
||
296 |
end |
|
297 |
||
298 |
||
299 |
local |
|
300 |
(* we have to weave together the whitespace/comments with proper tokens |
|
301 |
to reconstruct the input. *) |
|
302 |
(* TODO: see if duplicating isar_output/parse_thy can help here *) |
|
303 |
||
304 |
fun match_tokens ([],us,vs) = (rev vs,us) (* used, unused *) |
|
305 |
| match_tokens (t::ts,w::ws,vs) = |
|
306 |
if (t = w) then match_tokens (ts,ws,w::vs) |
|
307 |
else match_tokens (t::ts,ws,w::vs) |
|
308 |
| match_tokens _ = error ("match_tokens: mismatched input parse") |
|
309 |
in |
|
310 |
fun xmls_of_str str = |
|
311 |
let |
|
312 |
(* parsing: See outer_syntax.ML/toplevel_source *) |
|
313 |
fun parse_loop ([],[],xmls) = xmls |
|
314 |
| parse_loop ([],itoks,xmls) = xmls @ (xmls_of_impropers itoks) |
|
315 |
| parse_loop ((nm,toks,_)::trs,itoks,xmls) = |
|
316 |
let |
|
317 |
(* first proper token after whitespace/litcomment/whitespace is command *) |
|
318 |
val (xmls_pre_cmd,cmditoks') = xmls_pre_cmd itoks |
|
319 |
val (cmdtok,itoks') = case cmditoks' of x::xs => (x,xs) |
|
320 |
| _ => error("proof_general/parse_loop impossible") |
|
321 |
val (utoks,itoks'') = match_tokens (toks,itoks',[]) |
|
322 |
(* FIXME: should take trailing [w/s+] semicolon too in utoks *) |
|
323 |
||
324 |
val str = implode (map OuterLex.unparse (cmdtok::utoks)) |
|
325 |
||
326 |
val xmls_tr = xmls_of_transition (nm,str,toks) |
|
327 |
in |
|
328 |
parse_loop(trs,itoks'',xmls @ xmls_pre_cmd @ xmls_tr) |
|
329 |
end |
|
330 |
in |
|
331 |
(let val toks = OuterSyntax.scan str |
|
332 |
in |
|
333 |
parse_loop (OuterSyntax.read toks, toks, []) |
|
334 |
end) |
|
335 |
handle _ => markup_unparseable str |
|
336 |
end |
|
337 |
end |
|
338 |
||
339 |
end |