|
21637
|
1 |
(* Title: Pure/ProofGeneral/parsing.ML
|
|
|
2 |
ID: $Id$
|
|
|
3 |
Author: David Aspinall
|
|
|
4 |
|
|
21867
|
5 |
Parsing Isabelle theory files to add PGIP markup.
|
|
21637
|
6 |
*)
|
|
|
7 |
|
|
|
8 |
signature PGIP_PARSER =
|
|
|
9 |
sig
|
|
21867
|
10 |
val pgip_parser: PgipMarkup.pgip_parser
|
|
21637
|
11 |
end
|
|
|
12 |
|
|
|
13 |
structure PgipParser : PGIP_PARSER =
|
|
|
14 |
struct
|
|
|
15 |
|
|
|
16 |
|
|
|
17 |
(** Parsing proof scripts without execution **)
|
|
|
18 |
|
|
|
19 |
structure P = OuterParse;
|
|
|
20 |
|
|
21867
|
21 |
structure D = PgipMarkup;
|
|
|
22 |
|
|
21637
|
23 |
(* Notes.
|
|
|
24 |
This is quite tricky, because 1) we need to put back whitespace which
|
|
|
25 |
was removed during parsing so we can provide markup around commands;
|
|
|
26 |
2) parsing is intertwined with execution in Isar so we have to repeat
|
|
|
27 |
the parsing to extract interesting parts of commands. The trace of
|
|
|
28 |
tokens parsed which is now recorded in each transition (added by
|
|
|
29 |
Markus June '04) helps do this, but the mechanism is still a bad mess.
|
|
|
30 |
|
|
|
31 |
If we had proper parse trees it would be easy, although having a
|
|
21867
|
32 |
fixed type of trees might be hard with Isar's extensible parser.
|
|
21637
|
33 |
|
|
|
34 |
Probably the best solution is to produce the meta-information at
|
|
|
35 |
the same time as the parse, for each command, e.g. by recording a
|
|
|
36 |
list of (name,objtype) pairs which record the bindings created by
|
|
|
37 |
a command. This would require changing the interfaces in
|
|
|
38 |
outer_syntax.ML (or providing new ones),
|
|
|
39 |
|
|
|
40 |
datatype metainfo = Binding of string * string (* name, objtype *)
|
|
|
41 |
|
|
|
42 |
val command_withmetainfo: string -> string -> string ->
|
|
|
43 |
(token list ->
|
|
|
44 |
((Toplevel.transition -> Toplevel.transition) * metainfo list) *
|
|
|
45 |
token list) -> parser
|
|
|
46 |
|
|
|
47 |
We'd also like to render terms as they appear in output, but this
|
|
|
48 |
will be difficult because inner syntax is extensible and we don't
|
|
|
49 |
have the correct syntax to hand when just doing outer parsing
|
|
|
50 |
without execution. A reasonable approximation could
|
|
|
51 |
perhaps be obtained by using the syntax of the current context.
|
|
|
52 |
However, this would mean more mess trying to pick out terms,
|
|
|
53 |
so at this stage a more serious change to the Isar functions
|
|
|
54 |
seems necessary.
|
|
|
55 |
*)
|
|
|
56 |
|
|
|
57 |
local
|
|
|
58 |
|
|
21867
|
59 |
fun whitespace str = D.Whitespace { text=str }
|
|
21637
|
60 |
|
|
|
61 |
(* an extra token is needed to terminate the command *)
|
|
|
62 |
val sync = OuterSyntax.scan "\\<^sync>";
|
|
|
63 |
|
|
21867
|
64 |
fun nameparse elt objtyp nameP toks =
|
|
|
65 |
(fst (nameP (toks@sync)))
|
|
|
66 |
handle _ => (error ("Error occurred in name parser for " ^ elt ^
|
|
|
67 |
"(objtype: " ^ objtyp ^ ")");
|
|
|
68 |
"")
|
|
21637
|
69 |
|
|
|
70 |
fun parse_warning msg = warning ("script->PGIP markup parser: " ^ msg)
|
|
|
71 |
|
|
|
72 |
(* FIXME: allow dynamic extensions to language here: we need a hook in
|
|
|
73 |
outer_syntax.ML to reset this table. *)
|
|
|
74 |
|
|
|
75 |
val keywords_classification_table = ref (NONE: string Symtab.table option)
|
|
|
76 |
|
|
|
77 |
fun get_keywords_classification_table () =
|
|
|
78 |
case (!keywords_classification_table) of
|
|
|
79 |
SOME t => t
|
|
|
80 |
| NONE => (let
|
|
|
81 |
val tab = fold (fn (c, _, k, _) => Symtab.update (c, k))
|
|
|
82 |
(OuterSyntax.dest_parsers ()) Symtab.empty;
|
|
|
83 |
in (keywords_classification_table := SOME tab; tab) end)
|
|
|
84 |
|
|
|
85 |
|
|
|
86 |
|
|
|
87 |
fun xmls_of_thy_begin (name,toks,str) = (* see isar_syn.ML/theoryP *)
|
|
|
88 |
let
|
|
|
89 |
val ((thyname,imports,files),_) = ThyHeader.args (toks@sync)
|
|
|
90 |
in
|
|
21867
|
91 |
[D.Opentheory { thyname=thyname,
|
|
|
92 |
parentnames = imports,
|
|
|
93 |
text = str }]
|
|
21637
|
94 |
end
|
|
|
95 |
|
|
|
96 |
fun xmls_of_thy_decl (name,toks,str) = (* see isar_syn.ML/thy_decl cases *)
|
|
|
97 |
let
|
|
|
98 |
(* NB: PGIP only deals with single name bindings at the moment;
|
|
|
99 |
potentially we could markup grouped definitions several times but
|
|
|
100 |
that might suggest the wrong structure to the editor?
|
|
|
101 |
Better alternative would be to put naming closer around text,
|
|
|
102 |
but to do that we'd be much better off modifying the real parser
|
|
|
103 |
instead of reconstructing it here. *)
|
|
|
104 |
|
|
|
105 |
val plain_items = (* no names, unimportant names, or too difficult *)
|
|
|
106 |
["defaultsort","arities","judgement","finalconsts",
|
|
|
107 |
"syntax", "nonterminals", "translations",
|
|
|
108 |
"global", "local", "hide",
|
|
|
109 |
"ML_setup", "setup", "method_setup",
|
|
|
110 |
"parse_ast_translation", "parse_translation", "print_translation",
|
|
|
111 |
"typed_print_translation", "print_ast_translation", "token_translation",
|
|
|
112 |
"oracle",
|
|
|
113 |
"declare"]
|
|
|
114 |
|
|
21867
|
115 |
fun named_item nameP ty =
|
|
|
116 |
[D.Theoryitem {text=str,name=SOME (nameparse "theoryitem" ty nameP toks),objtype=SOME ty}]
|
|
|
117 |
val plain_item =
|
|
|
118 |
[D.Theoryitem {text=str,name=NONE,objtype=NONE}]
|
|
|
119 |
val doccomment =
|
|
|
120 |
[D.Doccomment {text=str}]
|
|
21637
|
121 |
|
|
|
122 |
val opt_overloaded = P.opt_keyword "overloaded";
|
|
|
123 |
|
|
|
124 |
val thmnameP = (* see isar_syn.ML/theoremsP *)
|
|
|
125 |
let
|
|
|
126 |
val name_facts = P.and_list1 ((P.opt_thm_name "=" --| P.xthms1) >> #1)
|
|
|
127 |
val theoremsP = P.opt_locale_target |-- name_facts >> (fn [] => "" | x::_ => x)
|
|
|
128 |
in
|
|
|
129 |
theoremsP
|
|
|
130 |
end
|
|
|
131 |
in
|
|
|
132 |
(* TODO: ideally we would like to render terms properly, just as they are
|
|
|
133 |
in output. This implies using PGML for symbols and variables/atoms.
|
|
|
134 |
BUT it's rather tricky without having the correct concrete syntax to hand,
|
|
|
135 |
and even if we did, we'd have to mess around here a whole lot more first
|
|
|
136 |
to pick out the terms from the syntax. *)
|
|
|
137 |
|
|
|
138 |
if member (op =) plain_items name then plain_item
|
|
|
139 |
else case name of
|
|
21867
|
140 |
"text" => doccomment
|
|
|
141 |
| "text_raw" => doccomment
|
|
21637
|
142 |
| "typedecl" => named_item (P.type_args |-- P.name) "type"
|
|
|
143 |
| "types" => named_item (P.type_args |-- P.name) "type"
|
|
|
144 |
| "classes" => named_item P.name "class"
|
|
|
145 |
| "classrel" => named_item P.name "class"
|
|
|
146 |
| "consts" => named_item (P.const >> #1) "term"
|
|
|
147 |
| "axioms" => named_item (P.spec_name >> (#1 o #1)) "theorem"
|
|
|
148 |
| "defs" => named_item (opt_overloaded |-- P.spec_name >> (#1 o #1)) "theorem"
|
|
|
149 |
| "constdefs" => named_item P.name (* FIXME: heavily simplified/wrong *) "term"
|
|
|
150 |
| "theorems" => named_item thmnameP "thmset"
|
|
|
151 |
| "lemmas" => named_item thmnameP "thmset"
|
|
|
152 |
| "oracle" => named_item P.name "oracle"
|
|
|
153 |
| "locale" => named_item ((P.opt_keyword "open" >> not) |-- P.name) "locale"
|
|
21867
|
154 |
| _ => (parse_warning ("Unrecognized thy-decl command: " ^ name);
|
|
|
155 |
plain_item)
|
|
21637
|
156 |
end
|
|
|
157 |
|
|
|
158 |
fun xmls_of_thy_goal (name,toks,str) =
|
|
|
159 |
let
|
|
|
160 |
(* see isar_syn.ML/gen_theorem *)
|
|
|
161 |
val statement = P.and_list1 (P.opt_thm_name ":" -- Scan.repeat1 P.propp);
|
|
|
162 |
val general_statement =
|
|
|
163 |
statement >> pair [] || Scan.repeat P.locale_element -- (P.$$$ "shows" |-- statement);
|
|
|
164 |
|
|
|
165 |
val gen_theoremP =
|
|
|
166 |
P.opt_locale_target -- Scan.optional (P.opt_thm_name ":" --|
|
|
|
167 |
Scan.ahead (P.locale_keyword || P.$$$ "shows")) ("", []) --
|
|
|
168 |
general_statement >> (fn ((locale, a), (elems, concl)) =>
|
|
|
169 |
fst a) (* a : (bstring * Args.src list) *)
|
|
|
170 |
|
|
|
171 |
val nameP = P.opt_locale_target |-- (Scan.optional ((P.opt_thm_name ":") >> #1) "")
|
|
21867
|
172 |
val thmname = nameparse "opengoal" "theorem" nameP toks
|
|
21637
|
173 |
in
|
|
21867
|
174 |
[D.Opengoal {thmname=SOME thmname, text=str},
|
|
|
175 |
D.Openblock {metavarid=NONE,name=NONE,objtype=SOME "theorem-proof"}]
|
|
21637
|
176 |
end
|
|
|
177 |
|
|
21867
|
178 |
fun xmls_of_qed (name,str) =
|
|
21637
|
179 |
let val qedmarkup =
|
|
|
180 |
(case name of
|
|
21867
|
181 |
"sorry" => D.Postponegoal {text=str}
|
|
|
182 |
| "oops" => D.Giveupgoal {text=str}
|
|
|
183 |
| "done" => D.Closegoal {text=str}
|
|
|
184 |
| "by" => D.Closegoal {text=str} (* nested or toplevel *)
|
|
|
185 |
| "qed" => D.Closegoal {text=str} (* nested or toplevel *)
|
|
|
186 |
| "." => D.Closegoal {text=str} (* nested or toplevel *)
|
|
|
187 |
| ".." => D.Closegoal {text=str} (* nested or toplevel *)
|
|
21637
|
188 |
| other => (* default to closegoal: may be wrong for extns *)
|
|
|
189 |
(parse_warning
|
|
|
190 |
("Unrecognized qed command: " ^ quote other);
|
|
21867
|
191 |
D.Closegoal {text=str}))
|
|
|
192 |
in qedmarkup :: [D.Closeblock {}] end
|
|
21637
|
193 |
|
|
|
194 |
fun xmls_of_kind kind (name,toks,str) =
|
|
|
195 |
case kind of
|
|
21867
|
196 |
"control" => [D.Badcmd {text=str}]
|
|
|
197 |
| "diag" => [D.Theoryitem {name=NONE,objtype=NONE,
|
|
|
198 |
text=str}] (* FIXME: check NOT spuriouscmd *)
|
|
21637
|
199 |
(* theory/files *)
|
|
|
200 |
| "theory-begin" => xmls_of_thy_begin (name,toks,str)
|
|
|
201 |
| "theory-decl" => xmls_of_thy_decl (name,toks,str)
|
|
21867
|
202 |
| "theory-heading" => [D.Doccomment {text=str}]
|
|
|
203 |
| "theory-script" => [D.Theoryitem {name=NONE,objtype=NONE,text=str}]
|
|
|
204 |
| "theory-end" => [D.Closetheory {text=str}]
|
|
21637
|
205 |
(* proof control *)
|
|
|
206 |
| "theory-goal" => xmls_of_thy_goal (name,toks,str)
|
|
21867
|
207 |
| "proof-heading" => [D.Doccomment {text=str}]
|
|
|
208 |
|
|
|
209 |
| "proof-goal" => (* nested proof: have, show, ... *)
|
|
|
210 |
[D.Opengoal {text=str,thmname=NONE},
|
|
|
211 |
D.Openblock {metavarid=NONE,name=NONE,objtype=NONE}]
|
|
|
212 |
|
|
|
213 |
| "proof-block" => [D.Proofstep {text=str}]
|
|
|
214 |
| "proof-open" => [D.Openblock {metavarid=NONE,name=NONE,objtype=NONE},
|
|
|
215 |
D.Proofstep {text=str} ]
|
|
|
216 |
| "proof-close" => [D.Proofstep {text=str}, D.Closeblock {}]
|
|
|
217 |
| "proof-script" => [D.Proofstep {text=str}]
|
|
|
218 |
| "proof-chain" => [D.Proofstep {text=str}]
|
|
|
219 |
| "proof-decl" => [D.Proofstep {text=str}]
|
|
|
220 |
| "proof-asm" => [D.Proofstep {text=str}]
|
|
|
221 |
| "proof-asm-goal" => (* nested proof: obtain, ... *)
|
|
|
222 |
[D.Opengoal {text=str,thmname=NONE},
|
|
|
223 |
D.Openblock {metavarid=NONE,name=NONE,objtype=NONE}]
|
|
|
224 |
|
|
|
225 |
| "qed" => xmls_of_qed (name,str)
|
|
|
226 |
| "qed-block" => xmls_of_qed (name,str)
|
|
|
227 |
| "qed-global" => xmls_of_qed (name,str)
|
|
|
228 |
| other => (* default for anything else is "spuriouscmd", ***ignored for undo*** *)
|
|
21637
|
229 |
(parse_warning ("Internal inconsistency, unrecognized keyword kind: " ^ quote other);
|
|
21867
|
230 |
[D.Spuriouscmd {text=str}])
|
|
21637
|
231 |
in
|
|
|
232 |
|
|
|
233 |
fun xmls_of_transition (name,str,toks) =
|
|
|
234 |
let
|
|
|
235 |
val classification = Symtab.lookup (get_keywords_classification_table ()) name
|
|
|
236 |
in case classification of
|
|
|
237 |
SOME k => (xmls_of_kind k (name,toks,str))
|
|
|
238 |
| NONE => (* an uncategorized keyword ignored for undo (maybe wrong) *)
|
|
|
239 |
(parse_warning ("Uncategorized command found: " ^ name ^ " -- using spuriouscmd");
|
|
21867
|
240 |
[D.Spuriouscmd {text=str}])
|
|
21637
|
241 |
end
|
|
|
242 |
|
|
|
243 |
fun markup_comment_whs [] = []
|
|
|
244 |
| markup_comment_whs (toks as t::ts) = (* this would be a lot neater if we could match on toks! *)
|
|
|
245 |
let
|
|
|
246 |
val (prewhs,rest) = take_prefix (OuterLex.is_kind OuterLex.Space) toks
|
|
|
247 |
in
|
|
|
248 |
if (prewhs <> []) then
|
|
21867
|
249 |
D.Whitespace {text=implode (map OuterLex.unparse prewhs)}
|
|
21637
|
250 |
:: (markup_comment_whs rest)
|
|
|
251 |
else
|
|
21867
|
252 |
D.Comment {text=OuterLex.unparse t} ::
|
|
21637
|
253 |
(markup_comment_whs ts)
|
|
|
254 |
end
|
|
|
255 |
|
|
|
256 |
fun xmls_pre_cmd [] = ([],[])
|
|
|
257 |
| xmls_pre_cmd toks =
|
|
|
258 |
let
|
|
|
259 |
(* an extra token is needed to terminate the command *)
|
|
|
260 |
val sync = OuterSyntax.scan "\\<^sync>";
|
|
|
261 |
val spaceP = Scan.bulk (Scan.one (fn t =>not (OuterLex.is_proper t)) >> OuterLex.val_of) >> implode
|
|
|
262 |
val text_with_whs =
|
|
|
263 |
((spaceP || Scan.succeed "") --
|
|
|
264 |
(P.short_ident || P.long_ident || P.sym_ident || P.string || P.number || P.verbatim) >> op^)
|
|
|
265 |
-- (spaceP || Scan.succeed "") >> op^
|
|
|
266 |
val (prewhs,rest1) = take_prefix (not o OuterLex.is_proper) (toks@sync)
|
|
|
267 |
(* NB: this collapses litcomment,(litcomment|whitespace)* to a single litcomment *)
|
|
|
268 |
val (_,rest2) = (Scan.bulk (P.$$$ "--" |-- text_with_whs) >> implode || Scan.succeed "") rest1
|
|
|
269 |
val (postwhs,rest3) = take_prefix (not o OuterLex.is_proper) rest2
|
|
|
270 |
in
|
|
|
271 |
((markup_comment_whs prewhs) @
|
|
|
272 |
(if (length rest2 = length rest1) then []
|
|
21867
|
273 |
else
|
|
|
274 |
[D.Doccomment
|
|
|
275 |
{text= implode
|
|
|
276 |
(map OuterLex.unparse (Library.take (length rest1 - length rest2, rest1)))}]
|
|
|
277 |
@
|
|
|
278 |
(markup_comment_whs postwhs)),
|
|
|
279 |
Library.take (length rest3 - 1,rest3))
|
|
21637
|
280 |
end
|
|
|
281 |
|
|
|
282 |
fun xmls_of_impropers toks =
|
|
|
283 |
let
|
|
|
284 |
val (xmls,rest) = xmls_pre_cmd toks
|
|
21867
|
285 |
val unparsed =
|
|
|
286 |
case rest of [] => []
|
|
|
287 |
| _ => [D.Unparseable
|
|
|
288 |
{text= implode (map OuterLex.unparse rest)}]
|
|
21637
|
289 |
in
|
|
21867
|
290 |
xmls @ unparsed
|
|
21637
|
291 |
end
|
|
|
292 |
|
|
|
293 |
end
|
|
|
294 |
|
|
|
295 |
|
|
|
296 |
local
|
|
|
297 |
(* we have to weave together the whitespace/comments with proper tokens
|
|
|
298 |
to reconstruct the input. *)
|
|
|
299 |
(* TODO: see if duplicating isar_output/parse_thy can help here *)
|
|
|
300 |
|
|
|
301 |
fun match_tokens ([],us,vs) = (rev vs,us) (* used, unused *)
|
|
|
302 |
| match_tokens (t::ts,w::ws,vs) =
|
|
|
303 |
if (t = w) then match_tokens (ts,ws,w::vs)
|
|
|
304 |
else match_tokens (t::ts,ws,w::vs)
|
|
|
305 |
| match_tokens _ = error ("match_tokens: mismatched input parse")
|
|
|
306 |
in
|
|
21867
|
307 |
fun pgip_parser str =
|
|
21637
|
308 |
let
|
|
|
309 |
(* parsing: See outer_syntax.ML/toplevel_source *)
|
|
|
310 |
fun parse_loop ([],[],xmls) = xmls
|
|
|
311 |
| parse_loop ([],itoks,xmls) = xmls @ (xmls_of_impropers itoks)
|
|
|
312 |
| parse_loop ((nm,toks,_)::trs,itoks,xmls) =
|
|
|
313 |
let
|
|
|
314 |
(* first proper token after whitespace/litcomment/whitespace is command *)
|
|
|
315 |
val (xmls_pre_cmd,cmditoks') = xmls_pre_cmd itoks
|
|
|
316 |
val (cmdtok,itoks') = case cmditoks' of x::xs => (x,xs)
|
|
|
317 |
| _ => error("proof_general/parse_loop impossible")
|
|
|
318 |
val (utoks,itoks'') = match_tokens (toks,itoks',[])
|
|
|
319 |
(* FIXME: should take trailing [w/s+] semicolon too in utoks *)
|
|
|
320 |
|
|
|
321 |
val str = implode (map OuterLex.unparse (cmdtok::utoks))
|
|
|
322 |
|
|
|
323 |
val xmls_tr = xmls_of_transition (nm,str,toks)
|
|
|
324 |
in
|
|
|
325 |
parse_loop(trs,itoks'',xmls @ xmls_pre_cmd @ xmls_tr)
|
|
|
326 |
end
|
|
|
327 |
in
|
|
|
328 |
(let val toks = OuterSyntax.scan str
|
|
|
329 |
in
|
|
|
330 |
parse_loop (OuterSyntax.read toks, toks, [])
|
|
|
331 |
end)
|
|
21867
|
332 |
handle _ => [D.Unparseable {text=str}]
|
|
21637
|
333 |
end
|
|
|
334 |
end
|
|
|
335 |
|
|
|
336 |
end
|