author | Lars Hupel <lars.hupel@mytum.de> |
Thu, 30 Aug 2018 10:42:42 +0200 | |
changeset 68909 | 34e777447ed5 |
parent 68503 | 8d9239158d7a |
child 69065 | 440f7a575760 |
permissions | -rw-r--r-- |
61656 | 1 |
(*:maxLineLen=78:*) |
2 |
||
28762 | 3 |
theory Inner_Syntax |
63531 | 4 |
imports Main Base |
28762 | 5 |
begin |
6 |
||
58618 | 7 |
chapter \<open>Inner syntax --- the term language \label{ch:inner-syntax}\<close> |
28762 | 8 |
|
62106 | 9 |
text \<open> |
10 |
The inner syntax of Isabelle provides concrete notation for the main |
|
11 |
entities of the logical framework, notably \<open>\<lambda>\<close>-terms with types and type |
|
12 |
classes. Applications may either extend existing syntactic categories by |
|
13 |
additional notation, or define new sub-languages that are linked to the |
|
14 |
standard term language via some explicit markers. For example \<^verbatim>\<open>FOO\<close>~\<open>foo\<close> |
|
15 |
could embed the syntax corresponding for some user-defined nonterminal \<open>foo\<close> |
|
16 |
--- within the bounds of the given lexical syntax of Isabelle/Pure. |
|
46282 | 17 |
|
62106 | 18 |
The most basic way to specify concrete syntax for logical entities works via |
19 |
mixfix annotations (\secref{sec:mixfix}), which may be usually given as part |
|
20 |
of the original declaration or via explicit notation commands later on |
|
21 |
(\secref{sec:notation}). This already covers many needs of concrete syntax |
|
22 |
without having to understand the full complexity of inner syntax layers. |
|
46282 | 23 |
|
62106 | 24 |
Further details of the syntax engine involves the classical distinction of |
25 |
lexical language versus context-free grammar (see \secref{sec:pure-syntax}), |
|
26 |
and various mechanisms for \<^emph>\<open>syntax transformations\<close> (see |
|
27 |
\secref{sec:syntax-transformations}). |
|
58618 | 28 |
\<close> |
46282 | 29 |
|
30 |
||
58618 | 31 |
section \<open>Printing logical entities\<close> |
28762 | 32 |
|
58618 | 33 |
subsection \<open>Diagnostic commands \label{sec:print-diag}\<close> |
28762 | 34 |
|
58618 | 35 |
text \<open> |
28762 | 36 |
\begin{matharray}{rcl} |
61493 | 37 |
@{command_def "typ"}\<open>\<^sup>*\<close> & : & \<open>context \<rightarrow>\<close> \\ |
38 |
@{command_def "term"}\<open>\<^sup>*\<close> & : & \<open>context \<rightarrow>\<close> \\ |
|
39 |
@{command_def "prop"}\<open>\<^sup>*\<close> & : & \<open>context \<rightarrow>\<close> \\ |
|
40 |
@{command_def "thm"}\<open>\<^sup>*\<close> & : & \<open>context \<rightarrow>\<close> \\ |
|
41 |
@{command_def "prf"}\<open>\<^sup>*\<close> & : & \<open>context \<rightarrow>\<close> \\ |
|
42 |
@{command_def "full_prf"}\<open>\<^sup>*\<close> & : & \<open>context \<rightarrow>\<close> \\ |
|
43 |
@{command_def "print_state"}\<open>\<^sup>*\<close> & : & \<open>any \<rightarrow>\<close> \\ |
|
28762 | 44 |
\end{matharray} |
45 |
||
46 |
These diagnostic commands assist interactive development by printing |
|
47 |
internal logical entities in a human-readable fashion. |
|
48 |
||
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55108
diff
changeset
|
49 |
@{rail \<open> |
48792 | 50 |
@@{command typ} @{syntax modes}? @{syntax type} ('::' @{syntax sort})? |
28762 | 51 |
; |
42596
6c621a9d612a
modernized rail diagrams using @{rail} antiquotation;
wenzelm
parents:
42358
diff
changeset
|
52 |
@@{command term} @{syntax modes}? @{syntax term} |
28762 | 53 |
; |
42596
6c621a9d612a
modernized rail diagrams using @{rail} antiquotation;
wenzelm
parents:
42358
diff
changeset
|
54 |
@@{command prop} @{syntax modes}? @{syntax prop} |
28762 | 55 |
; |
62969 | 56 |
@@{command thm} @{syntax modes}? @{syntax thms} |
28762 | 57 |
; |
62969 | 58 |
( @@{command prf} | @@{command full_prf} ) @{syntax modes}? @{syntax thms}? |
28762 | 59 |
; |
52430 | 60 |
@@{command print_state} @{syntax modes}? |
28762 | 61 |
; |
42596
6c621a9d612a
modernized rail diagrams using @{rail} antiquotation;
wenzelm
parents:
42358
diff
changeset
|
62 |
@{syntax_def modes}: '(' (@{syntax name} + ) ')' |
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55108
diff
changeset
|
63 |
\<close>} |
28762 | 64 |
|
62106 | 65 |
\<^descr> @{command "typ"}~\<open>\<tau>\<close> reads and prints a type expression according to the |
66 |
current context. |
|
48792 | 67 |
|
62106 | 68 |
\<^descr> @{command "typ"}~\<open>\<tau> :: s\<close> uses type-inference to determine the most |
69 |
general way to make \<open>\<tau>\<close> conform to sort \<open>s\<close>. For concrete \<open>\<tau>\<close> this checks if |
|
70 |
the type belongs to that sort. Dummy type parameters ``\<open>_\<close>'' (underscore) |
|
71 |
are assigned to fresh type variables with most general sorts, according the |
|
72 |
the principles of type-inference. |
|
28766
accab7594b8e
misc tuning and rearrangement of section "Printing logical entities";
wenzelm
parents:
28765
diff
changeset
|
73 |
|
62106 | 74 |
\<^descr> @{command "term"}~\<open>t\<close> and @{command "prop"}~\<open>\<phi>\<close> read, type-check and |
75 |
print terms or propositions according to the current theory or proof |
|
76 |
context; the inferred type of \<open>t\<close> is output as well. Note that these |
|
77 |
commands are also useful in inspecting the current environment of term |
|
78 |
abbreviations. |
|
28762 | 79 |
|
62106 | 80 |
\<^descr> @{command "thm"}~\<open>a\<^sub>1 \<dots> a\<^sub>n\<close> retrieves theorems from the current theory |
81 |
or proof context. Note that any attributes included in the theorem |
|
82 |
specifications are applied to a temporary context derived from the current |
|
83 |
theory or proof; the result is discarded, i.e.\ attributes involved in |
|
84 |
\<open>a\<^sub>1, \<dots>, a\<^sub>n\<close> do not have any permanent effect. |
|
28762 | 85 |
|
62106 | 86 |
\<^descr> @{command "prf"} displays the (compact) proof term of the current proof |
63624
994d1a1105ef
more informative 'prf' and 'full_prf', based on HOL/Proofs/ex/XML_Data.thy;
wenzelm
parents:
63531
diff
changeset
|
87 |
state (if present), or of the given theorems. Note that this requires an |
994d1a1105ef
more informative 'prf' and 'full_prf', based on HOL/Proofs/ex/XML_Data.thy;
wenzelm
parents:
63531
diff
changeset
|
88 |
underlying logic image with proof terms enabled, e.g. \<open>HOL-Proofs\<close>. |
28762 | 89 |
|
62106 | 90 |
\<^descr> @{command "full_prf"} is like @{command "prf"}, but displays the full |
91 |
proof term, i.e.\ also displays information omitted in the compact proof |
|
92 |
term, which is denoted by ``\<open>_\<close>'' placeholders there. |
|
28762 | 93 |
|
62106 | 94 |
\<^descr> @{command "print_state"} prints the current proof state (if present), |
95 |
including current facts and goals. |
|
28762 | 96 |
|
61997 | 97 |
All of the diagnostic commands above admit a list of \<open>modes\<close> to be |
98 |
specified, which is appended to the current print mode; see also |
|
99 |
\secref{sec:print-modes}. Thus the output behavior may be modified according |
|
100 |
particular print mode features. For example, @{command |
|
101 |
"print_state"}~\<open>(latex)\<close> prints the current proof state with mathematical |
|
102 |
symbols and special characters represented in {\LaTeX} source, according to |
|
103 |
the Isabelle style @{cite "isabelle-system"}. |
|
28762 | 104 |
|
62106 | 105 |
Note that antiquotations (cf.\ \secref{sec:antiq}) provide a more systematic |
106 |
way to include formal items into the printed text document. |
|
58618 | 107 |
\<close> |
28762 | 108 |
|
109 |
||
58618 | 110 |
subsection \<open>Details of printed content\<close> |
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
111 |
|
58618 | 112 |
text \<open> |
42655 | 113 |
\begin{tabular}{rcll} |
61493 | 114 |
@{attribute_def show_markup} & : & \<open>attribute\<close> \\ |
115 |
@{attribute_def show_types} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
116 |
@{attribute_def show_sorts} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
117 |
@{attribute_def show_consts} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
118 |
@{attribute_def show_abbrevs} & : & \<open>attribute\<close> & default \<open>true\<close> \\ |
|
119 |
@{attribute_def show_brackets} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
120 |
@{attribute_def names_long} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
121 |
@{attribute_def names_short} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
122 |
@{attribute_def names_unique} & : & \<open>attribute\<close> & default \<open>true\<close> \\ |
|
123 |
@{attribute_def eta_contract} & : & \<open>attribute\<close> & default \<open>true\<close> \\ |
|
124 |
@{attribute_def goals_limit} & : & \<open>attribute\<close> & default \<open>10\<close> \\ |
|
125 |
@{attribute_def show_main_goal} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
126 |
@{attribute_def show_hyps} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
127 |
@{attribute_def show_tags} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
128 |
@{attribute_def show_question_marks} & : & \<open>attribute\<close> & default \<open>true\<close> \\ |
|
42655 | 129 |
\end{tabular} |
61421 | 130 |
\<^medskip> |
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
131 |
|
62106 | 132 |
These configuration options control the detail of information that is |
133 |
displayed for types, terms, theorems, goals etc. See also |
|
42655 | 134 |
\secref{sec:config}. |
28765
da8f6f4a74be
misc tuning and rearrangement of section "Printing logical entities";
wenzelm
parents:
28763
diff
changeset
|
135 |
|
62106 | 136 |
\<^descr> @{attribute show_markup} controls direct inlining of markup into the |
137 |
printed representation of formal entities --- notably type and sort |
|
138 |
constraints. This enables Prover IDE users to retrieve that information via |
|
139 |
tooltips or popups while hovering with the mouse over the output window, for |
|
140 |
example. Consequently, this option is enabled by default for Isabelle/jEdit. |
|
49699 | 141 |
|
62106 | 142 |
\<^descr> @{attribute show_types} and @{attribute show_sorts} control printing of |
143 |
type constraints for term variables, and sort constraints for type |
|
144 |
variables. By default, neither of these are shown in output. If @{attribute |
|
145 |
show_sorts} is enabled, types are always shown as well. In Isabelle/jEdit, |
|
146 |
manual setting of these options is normally not required thanks to |
|
147 |
@{attribute show_markup} above. |
|
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
148 |
|
62106 | 149 |
Note that displaying types and sorts may explain why a polymorphic inference |
150 |
rule fails to resolve with some goal, or why a rewrite rule does not apply |
|
151 |
as expected. |
|
28765
da8f6f4a74be
misc tuning and rearrangement of section "Printing logical entities";
wenzelm
parents:
28763
diff
changeset
|
152 |
|
62106 | 153 |
\<^descr> @{attribute show_consts} controls printing of types of constants when |
154 |
displaying a goal state. |
|
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
155 |
|
62106 | 156 |
Note that the output can be enormous, because polymorphic constants often |
157 |
occur at several different type instances. |
|
158 |
||
159 |
\<^descr> @{attribute show_abbrevs} controls folding of constant abbreviations. |
|
40879
ca132ef44944
configuration option "show_abbrevs" supersedes print mode "no_abbrevs", with inverted meaning;
wenzelm
parents:
40255
diff
changeset
|
160 |
|
62106 | 161 |
\<^descr> @{attribute show_brackets} controls bracketing in pretty printed output. |
162 |
If enabled, all sub-expressions of the pretty printing tree will be |
|
163 |
parenthesized, even if this produces malformed term syntax! This crude way |
|
164 |
of showing the internal structure of pretty printed entities may |
|
165 |
occasionally help to diagnose problems with operator priorities, for |
|
166 |
example. |
|
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
167 |
|
62106 | 168 |
\<^descr> @{attribute names_long}, @{attribute names_short}, and @{attribute |
169 |
names_unique} control the way of printing fully qualified internal names in |
|
170 |
external form. See also \secref{sec:antiq} for the document antiquotation |
|
171 |
options of the same names. |
|
42358
b47d41d9f4b5
Name_Space: proper configuration options long_names, short_names, unique_names instead of former unsynchronized references;
wenzelm
parents:
42279
diff
changeset
|
172 |
|
62106 | 173 |
\<^descr> @{attribute eta_contract} controls \<open>\<eta>\<close>-contracted printing of terms. |
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
174 |
|
62106 | 175 |
The \<open>\<eta>\<close>-contraction law asserts @{prop "(\<lambda>x. f x) \<equiv> f"}, provided \<open>x\<close> is not |
176 |
free in \<open>f\<close>. It asserts \<^emph>\<open>extensionality\<close> of functions: @{prop "f \<equiv> g"} if |
|
177 |
@{prop "f x \<equiv> g x"} for all \<open>x\<close>. Higher-order unification frequently puts |
|
178 |
terms into a fully \<open>\<eta>\<close>-expanded form. For example, if \<open>F\<close> has type \<open>(\<tau> \<Rightarrow> \<tau>) |
|
179 |
\<Rightarrow> \<tau>\<close> then its expanded form is @{term "\<lambda>h. F (\<lambda>x. h x)"}. |
|
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
180 |
|
62106 | 181 |
Enabling @{attribute eta_contract} makes Isabelle perform \<open>\<eta>\<close>-contractions |
182 |
before printing, so that @{term "\<lambda>h. F (\<lambda>x. h x)"} appears simply as \<open>F\<close>. |
|
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
183 |
|
62106 | 184 |
Note that the distinction between a term and its \<open>\<eta>\<close>-expanded form |
185 |
occasionally matters. While higher-order resolution and rewriting operate |
|
186 |
modulo \<open>\<alpha>\<beta>\<eta>\<close>-conversion, some other tools might look at terms more |
|
187 |
discretely. |
|
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
188 |
|
62106 | 189 |
\<^descr> @{attribute goals_limit} controls the maximum number of subgoals to be |
190 |
printed. |
|
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
191 |
|
62106 | 192 |
\<^descr> @{attribute show_main_goal} controls whether the main result to be proven |
193 |
should be displayed. This information might be relevant for schematic goals, |
|
194 |
to inspect the current claim that has been synthesized so far. |
|
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
195 |
|
62106 | 196 |
\<^descr> @{attribute show_hyps} controls printing of implicit hypotheses of local |
197 |
facts. Normally, only those hypotheses are displayed that are \<^emph>\<open>not\<close> covered |
|
198 |
by the assumptions of the current context: this situation indicates a fault |
|
199 |
in some tool being used. |
|
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
200 |
|
62106 | 201 |
By enabling @{attribute show_hyps}, output of \<^emph>\<open>all\<close> hypotheses can be |
202 |
enforced, which is occasionally useful for diagnostic purposes. |
|
28763
b5e6122ff575
added pretty printing options (from old ref manual);
wenzelm
parents:
28762
diff
changeset
|
203 |
|
62106 | 204 |
\<^descr> @{attribute show_tags} controls printing of extra annotations within |
205 |
theorems, such as internal position information, or the case names being |
|
206 |
attached by the attribute @{attribute case_names}. |
|
28765
da8f6f4a74be
misc tuning and rearrangement of section "Printing logical entities";
wenzelm
parents:
28763
diff
changeset
|
207 |
|
62106 | 208 |
Note that the @{attribute tagged} and @{attribute untagged} attributes |
209 |
provide low-level access to the collection of tags associated with a |
|
210 |
theorem. |
|
28765
da8f6f4a74be
misc tuning and rearrangement of section "Printing logical entities";
wenzelm
parents:
28763
diff
changeset
|
211 |
|
62106 | 212 |
\<^descr> @{attribute show_question_marks} controls printing of question marks for |
213 |
schematic variables, such as \<open>?x\<close>. Only the leading question mark is |
|
214 |
affected, the remaining text is unchanged (including proper markup for |
|
215 |
schematic variables that might be relevant for user interfaces). |
|
58618 | 216 |
\<close> |
28765
da8f6f4a74be
misc tuning and rearrangement of section "Printing logical entities";
wenzelm
parents:
28763
diff
changeset
|
217 |
|
da8f6f4a74be
misc tuning and rearrangement of section "Printing logical entities";
wenzelm
parents:
28763
diff
changeset
|
218 |
|
58618 | 219 |
subsection \<open>Alternative print modes \label{sec:print-modes}\<close> |
46284 | 220 |
|
58618 | 221 |
text \<open> |
46284 | 222 |
\begin{mldecls} |
223 |
@{index_ML print_mode_value: "unit -> string list"} \\ |
|
224 |
@{index_ML Print_Mode.with_modes: "string list -> ('a -> 'b) -> 'a -> 'b"} \\ |
|
225 |
\end{mldecls} |
|
226 |
||
62106 | 227 |
The \<^emph>\<open>print mode\<close> facility allows to modify various operations for printing. |
228 |
Commands like @{command typ}, @{command term}, @{command thm} (see |
|
229 |
\secref{sec:print-diag}) take additional print modes as optional argument. |
|
230 |
The underlying ML operations are as follows. |
|
46284 | 231 |
|
62106 | 232 |
\<^descr> @{ML "print_mode_value ()"} yields the list of currently active print |
233 |
mode names. This should be understood as symbolic representation of |
|
234 |
certain individual features for printing (with precedence from left to |
|
235 |
right). |
|
46284 | 236 |
|
62106 | 237 |
\<^descr> @{ML Print_Mode.with_modes}~\<open>modes f x\<close> evaluates \<open>f x\<close> in an execution |
238 |
context where the print mode is prepended by the given \<open>modes\<close>. This |
|
239 |
provides a thread-safe way to augment print modes. It is also monotonic in |
|
240 |
the set of mode names: it retains the default print mode that certain |
|
241 |
user-interfaces might have installed for their proper functioning! |
|
46284 | 242 |
|
61421 | 243 |
\<^medskip> |
62106 | 244 |
The pretty printer for inner syntax maintains alternative mixfix productions |
245 |
for any print mode name invented by the user, say in commands like @{command |
|
246 |
notation} or @{command abbreviation}. Mode names can be arbitrary, but the |
|
247 |
following ones have a specific meaning by convention: |
|
46284 | 248 |
|
62106 | 249 |
\<^item> \<^verbatim>\<open>""\<close> (the empty string): default mode; implicitly active as last |
250 |
element in the list of modes. |
|
46284 | 251 |
|
62106 | 252 |
\<^item> \<^verbatim>\<open>input\<close>: dummy print mode that is never active; may be used to specify |
253 |
notation that is only available for input. |
|
46284 | 254 |
|
62106 | 255 |
\<^item> \<^verbatim>\<open>internal\<close> dummy print mode that is never active; used internally in |
256 |
Isabelle/Pure. |
|
46284 | 257 |
|
62106 | 258 |
\<^item> \<^verbatim>\<open>ASCII\<close>: prefer ASCII art over mathematical symbols. |
46284 | 259 |
|
62106 | 260 |
\<^item> \<^verbatim>\<open>latex\<close>: additional mode that is active in {\LaTeX} document |
261 |
preparation of Isabelle theory sources; allows to provide alternative |
|
262 |
output notation. |
|
58618 | 263 |
\<close> |
46284 | 264 |
|
265 |
||
58618 | 266 |
section \<open>Mixfix annotations \label{sec:mixfix}\<close> |
28762 | 267 |
|
62106 | 268 |
text \<open> |
269 |
Mixfix annotations specify concrete \<^emph>\<open>inner syntax\<close> of Isabelle types and |
|
270 |
terms. Locally fixed parameters in toplevel theorem statements, locale and |
|
271 |
class specifications also admit mixfix annotations in a fairly uniform |
|
272 |
manner. A mixfix annotation describes the concrete syntax, the translation |
|
273 |
to abstract syntax, and the pretty printing. Special case annotations |
|
274 |
provide a simple means of specifying infix operators and binders. |
|
46290 | 275 |
|
62106 | 276 |
Isabelle mixfix syntax is inspired by {\OBJ} @{cite OBJ}. It allows to |
277 |
specify any context-free priority grammar, which is more general than the |
|
278 |
fixity declarations of ML and Prolog. |
|
28762 | 279 |
|
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55108
diff
changeset
|
280 |
@{rail \<open> |
51654
8450b944e58a
just one syntax category "mixfix" -- check structure annotation semantically;
wenzelm
parents:
50636
diff
changeset
|
281 |
@{syntax_def mixfix}: '(' |
58761 | 282 |
(@{syntax template} prios? @{syntax nat}? | |
283 |
(@'infix' | @'infixl' | @'infixr') @{syntax template} @{syntax nat} | |
|
68272 | 284 |
@'binder' @{syntax template} prio? @{syntax nat} | |
58761 | 285 |
@'structure') ')' |
46290 | 286 |
; |
62807 | 287 |
@{syntax template}: string |
46289 | 288 |
; |
42596
6c621a9d612a
modernized rail diagrams using @{rail} antiquotation;
wenzelm
parents:
42358
diff
changeset
|
289 |
prios: '[' (@{syntax nat} + ',') ']' |
68272 | 290 |
; |
291 |
prio: '[' @{syntax nat} ']' |
|
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55108
diff
changeset
|
292 |
\<close>} |
28762 | 293 |
|
62106 | 294 |
The string given as \<open>template\<close> may include literal text, spacing, blocks, |
295 |
and arguments (denoted by ``\<open>_\<close>''); the special symbol ``\<^verbatim>\<open>\<index>\<close>'' (printed as |
|
296 |
``\<open>\<index>\<close>'') represents an index argument that specifies an implicit @{keyword |
|
297 |
"structure"} reference (see also \secref{sec:locale}). Only locally fixed |
|
298 |
variables may be declared as @{keyword "structure"}. |
|
51657
3db1bbc82d8d
more accurate documentation of "(structure)" mixfix;
wenzelm
parents:
51654
diff
changeset
|
299 |
|
62106 | 300 |
Infix and binder declarations provide common abbreviations for particular |
301 |
mixfix declarations. So in practice, mixfix templates mostly degenerate to |
|
302 |
literal text for concrete syntax, such as ``\<^verbatim>\<open>++\<close>'' for an infix symbol. |
|
61503 | 303 |
\<close> |
28762 | 304 |
|
46290 | 305 |
|
58618 | 306 |
subsection \<open>The general mixfix form\<close> |
46290 | 307 |
|
62106 | 308 |
text \<open> |
309 |
In full generality, mixfix declarations work as follows. Suppose a constant |
|
310 |
\<open>c :: \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>\<close> is annotated by \<open>(mixfix [p\<^sub>1, \<dots>, p\<^sub>n] p)\<close>, where |
|
311 |
\<open>mixfix\<close> is a string \<open>d\<^sub>0 _ d\<^sub>1 _ \<dots> _ d\<^sub>n\<close> consisting of delimiters that |
|
312 |
surround argument positions as indicated by underscores. |
|
28762 | 313 |
|
62106 | 314 |
Altogether this determines a production for a context-free priority grammar, |
315 |
where for each argument \<open>i\<close> the syntactic category is determined by \<open>\<tau>\<^sub>i\<close> |
|
316 |
(with priority \<open>p\<^sub>i\<close>), and the result category is determined from \<open>\<tau>\<close> (with |
|
317 |
priority \<open>p\<close>). Priority specifications are optional, with default 0 for |
|
318 |
arguments and 1000 for the result.\<^footnote>\<open>Omitting priorities is prone to |
|
319 |
syntactic ambiguities unless the delimiter tokens determine fully bracketed |
|
320 |
notation, as in \<open>if _ then _ else _ fi\<close>.\<close> |
|
28762 | 321 |
|
62106 | 322 |
Since \<open>\<tau>\<close> may be again a function type, the constant type scheme may have |
323 |
more argument positions than the mixfix pattern. Printing a nested |
|
324 |
application \<open>c t\<^sub>1 \<dots> t\<^sub>m\<close> for \<open>m > n\<close> works by attaching concrete notation |
|
325 |
only to the innermost part, essentially by printing \<open>(c t\<^sub>1 \<dots> t\<^sub>n) \<dots> t\<^sub>m\<close> |
|
326 |
instead. If a term has fewer arguments than specified in the mixfix |
|
28762 | 327 |
template, the concrete syntax is ignored. |
328 |
||
61421 | 329 |
\<^medskip> |
62106 | 330 |
A mixfix template may also contain additional directives for pretty |
331 |
printing, notably spaces, blocks, and breaks. The general template format is |
|
332 |
a sequence over any of the following entities. |
|
28762 | 333 |
|
63933 | 334 |
\<^descr> \<open>d\<close> is a delimiter, namely a non-empty sequence delimiter items of the |
335 |
following form: |
|
336 |
\<^enum> a control symbol followed by a cartouche |
|
337 |
\<^enum> a single symbol, excluding the following special characters: |
|
338 |
\<^medskip> |
|
339 |
\begin{tabular}{ll} |
|
340 |
\<^verbatim>\<open>'\<close> & single quote \\ |
|
341 |
\<^verbatim>\<open>_\<close> & underscore \\ |
|
342 |
\<open>\<index>\<close> & index symbol \\ |
|
343 |
\<^verbatim>\<open>(\<close> & open parenthesis \\ |
|
344 |
\<^verbatim>\<open>)\<close> & close parenthesis \\ |
|
345 |
\<^verbatim>\<open>/\<close> & slash \\ |
|
346 |
\<open>\<open> \<close>\<close> & cartouche delimiters \\ |
|
347 |
\end{tabular} |
|
348 |
\<^medskip> |
|
28762 | 349 |
|
62106 | 350 |
\<^descr> \<^verbatim>\<open>'\<close> escapes the special meaning of these meta-characters, producing a |
351 |
literal version of the following character, unless that is a blank. |
|
28771
4510201c6aaf
mixfix annotations: verbatim for special symbols;
wenzelm
parents:
28770
diff
changeset
|
352 |
|
62106 | 353 |
A single quote followed by a blank separates delimiters, without affecting |
354 |
printing, but input tokens may have additional white space here. |
|
28771
4510201c6aaf
mixfix annotations: verbatim for special symbols;
wenzelm
parents:
28770
diff
changeset
|
355 |
|
62106 | 356 |
\<^descr> \<^verbatim>\<open>_\<close> is an argument position, which stands for a certain syntactic |
357 |
category in the underlying grammar. |
|
28762 | 358 |
|
62106 | 359 |
\<^descr> \<open>\<index>\<close> is an indexed argument position; this is the place where implicit |
360 |
structure arguments can be attached. |
|
28762 | 361 |
|
62106 | 362 |
\<^descr> \<open>s\<close> is a non-empty sequence of spaces for printing. This and the following |
363 |
specifications do not affect parsing at all. |
|
364 |
||
62807 | 365 |
\<^descr> \<^verbatim>\<open>(\<close>\<open>n\<close> opens a pretty printing block. The optional natural number |
366 |
specifies the block indentation, i.e. how much spaces to add when a line |
|
367 |
break occurs within the block. The default indentation is 0. |
|
368 |
||
369 |
\<^descr> \<^verbatim>\<open>(\<close>\<open>\<open>properties\<close>\<close> opens a pretty printing block, with properties |
|
370 |
specified within the given text cartouche. The syntax and semantics of |
|
371 |
the category @{syntax_ref mixfix_properties} is described below. |
|
28762 | 372 |
|
61503 | 373 |
\<^descr> \<^verbatim>\<open>)\<close> closes a pretty printing block. |
28762 | 374 |
|
61503 | 375 |
\<^descr> \<^verbatim>\<open>//\<close> forces a line break. |
28762 | 376 |
|
62106 | 377 |
\<^descr> \<^verbatim>\<open>/\<close>\<open>s\<close> allows a line break. Here \<open>s\<close> stands for the string of spaces |
378 |
(zero or more) right after the slash. These spaces are printed if the break |
|
379 |
is \<^emph>\<open>not\<close> taken. |
|
28762 | 380 |
|
381 |
||
62807 | 382 |
\<^medskip> |
383 |
Block properties allow more control over the details of pretty-printed |
|
384 |
output. The concrete syntax is defined as follows. |
|
385 |
||
386 |
@{rail \<open> |
|
387 |
@{syntax_def "mixfix_properties"}: (entry *) |
|
388 |
; |
|
389 |
entry: atom ('=' atom)? |
|
390 |
; |
|
63138
70f4d67235a0
clarified syntax category names according to Isabelle/ML/Scala;
wenzelm
parents:
62969
diff
changeset
|
391 |
atom: @{syntax short_ident} | @{syntax int} | @{syntax float} | @{syntax cartouche} |
62807 | 392 |
\<close>} |
393 |
||
68503 | 394 |
Each @{syntax entry} is a name-value pair: if the value is omitted, it |
62807 | 395 |
defaults to \<^verbatim>\<open>true\<close> (intended for Boolean properties). The following |
396 |
standard block properties are supported: |
|
397 |
||
398 |
\<^item> \<open>indent\<close> (natural number): the block indentation --- the same as for the |
|
399 |
simple syntax without block properties. |
|
400 |
||
401 |
\<^item> \<open>consistent\<close> (Boolean): this block has consistent breaks (if one break |
|
402 |
is taken, all breaks are taken). |
|
403 |
||
404 |
\<^item> \<open>unbreakable\<close> (Boolean): all possible breaks of the block are disabled |
|
405 |
(turned into spaces). |
|
406 |
||
407 |
\<^item> \<open>markup\<close> (string): the optional name of the markup node. If this is |
|
408 |
provided, all remaining properties are turned into its XML attributes. |
|
409 |
This allows to specify free-form PIDE markup, e.g.\ for specialized |
|
410 |
output. |
|
411 |
||
412 |
\<^medskip> |
|
413 |
Note that the general idea of pretty printing with blocks and breaks is |
|
414 |
described in @{cite "paulson-ml2"}; it goes back to @{cite "Oppen:1980"}. |
|
58618 | 415 |
\<close> |
28762 | 416 |
|
417 |
||
58618 | 418 |
subsection \<open>Infixes\<close> |
46290 | 419 |
|
62106 | 420 |
text \<open> |
421 |
Infix operators are specified by convenient short forms that abbreviate |
|
422 |
general mixfix annotations as follows: |
|
46290 | 423 |
|
424 |
\begin{center} |
|
425 |
\begin{tabular}{lll} |
|
426 |
||
61503 | 427 |
\<^verbatim>\<open>(\<close>@{keyword_def "infix"}~\<^verbatim>\<open>"\<close>\<open>sy\<close>\<^verbatim>\<open>"\<close> \<open>p\<close>\<^verbatim>\<open>)\<close> |
61493 | 428 |
& \<open>\<mapsto>\<close> & |
61503 | 429 |
\<^verbatim>\<open>("(_\<close>~\<open>sy\<close>\<^verbatim>\<open>/ _)" [\<close>\<open>p + 1\<close>\<^verbatim>\<open>,\<close>~\<open>p + 1\<close>\<^verbatim>\<open>]\<close>~\<open>p\<close>\<^verbatim>\<open>)\<close> \\ |
430 |
\<^verbatim>\<open>(\<close>@{keyword_def "infixl"}~\<^verbatim>\<open>"\<close>\<open>sy\<close>\<^verbatim>\<open>"\<close> \<open>p\<close>\<^verbatim>\<open>)\<close> |
|
61493 | 431 |
& \<open>\<mapsto>\<close> & |
61503 | 432 |
\<^verbatim>\<open>("(_\<close>~\<open>sy\<close>\<^verbatim>\<open>/ _)" [\<close>\<open>p\<close>\<^verbatim>\<open>,\<close>~\<open>p + 1\<close>\<^verbatim>\<open>]\<close>~\<open>p\<close>\<^verbatim>\<open>)\<close> \\ |
433 |
\<^verbatim>\<open>(\<close>@{keyword_def "infixr"}~\<^verbatim>\<open>"\<close>\<open>sy\<close>\<^verbatim>\<open>"\<close>~\<open>p\<close>\<^verbatim>\<open>)\<close> |
|
61493 | 434 |
& \<open>\<mapsto>\<close> & |
61503 | 435 |
\<^verbatim>\<open>("(_\<close>~\<open>sy\<close>\<^verbatim>\<open>/ _)" [\<close>\<open>p + 1\<close>\<^verbatim>\<open>,\<close>~\<open>p\<close>\<^verbatim>\<open>]\<close>~\<open>p\<close>\<^verbatim>\<open>)\<close> \\ |
46290 | 436 |
|
437 |
\end{tabular} |
|
438 |
\end{center} |
|
439 |
||
62106 | 440 |
The mixfix template \<^verbatim>\<open>"(_\<close>~\<open>sy\<close>\<^verbatim>\<open>/ _)"\<close> specifies two argument positions; |
441 |
the delimiter is preceded by a space and followed by a space or line break; |
|
442 |
the entire phrase is a pretty printing block. |
|
46290 | 443 |
|
67398 | 444 |
The alternative notation \<^verbatim>\<open>(\<close>\<open>sy\<close>\<^verbatim>\<open>)\<close> is introduced in addition. Thus any |
445 |
infix operator may be written in prefix form (as in Haskell), independently of |
|
446 |
the number of arguments in the term. To avoid conflict with the comment brackets |
|
447 |
\<^verbatim>\<open>(*\<close> and \<^verbatim>\<open>*)\<close>, infix operators that begin or end with a \<^verbatim>\<open>*\<close> require |
|
448 |
extra spaces, e.g. \<^verbatim>\<open>( * )\<close>. |
|
58618 | 449 |
\<close> |
46290 | 450 |
|
451 |
||
58618 | 452 |
subsection \<open>Binders\<close> |
46290 | 453 |
|
62106 | 454 |
text \<open> |
455 |
A \<^emph>\<open>binder\<close> is a variable-binding construct such as a quantifier. The idea |
|
456 |
to formalize \<open>\<forall>x. b\<close> as \<open>All (\<lambda>x. b)\<close> for \<open>All :: ('a \<Rightarrow> bool) \<Rightarrow> bool\<close> |
|
457 |
already goes back to @{cite church40}. Isabelle declarations of certain |
|
458 |
higher-order operators may be annotated with @{keyword_def "binder"} |
|
459 |
annotations as follows: |
|
46290 | 460 |
|
461 |
\begin{center} |
|
61503 | 462 |
\<open>c ::\<close>~\<^verbatim>\<open>"\<close>\<open>(\<tau>\<^sub>1 \<Rightarrow> \<tau>\<^sub>2) \<Rightarrow> \<tau>\<^sub>3\<close>\<^verbatim>\<open>" (\<close>@{keyword "binder"}~\<^verbatim>\<open>"\<close>\<open>sy\<close>\<^verbatim>\<open>" [\<close>\<open>p\<close>\<^verbatim>\<open>]\<close>~\<open>q\<close>\<^verbatim>\<open>)\<close> |
46290 | 463 |
\end{center} |
464 |
||
62106 | 465 |
This introduces concrete binder syntax \<open>sy x. b\<close>, where \<open>x\<close> is a bound |
466 |
variable of type \<open>\<tau>\<^sub>1\<close>, the body \<open>b\<close> has type \<open>\<tau>\<^sub>2\<close> and the whole term has |
|
467 |
type \<open>\<tau>\<^sub>3\<close>. The optional integer \<open>p\<close> specifies the syntactic priority of the |
|
468 |
body; the default is \<open>q\<close>, which is also the priority of the whole construct. |
|
46290 | 469 |
|
470 |
Internally, the binder syntax is expanded to something like this: |
|
471 |
\begin{center} |
|
61503 | 472 |
\<open>c_binder ::\<close>~\<^verbatim>\<open>"\<close>\<open>idts \<Rightarrow> \<tau>\<^sub>2 \<Rightarrow> \<tau>\<^sub>3\<close>\<^verbatim>\<open>" ("(3\<close>\<open>sy\<close>\<^verbatim>\<open>_./ _)" [0,\<close>~\<open>p\<close>\<^verbatim>\<open>]\<close>~\<open>q\<close>\<^verbatim>\<open>)\<close> |
46290 | 473 |
\end{center} |
474 |
||
475 |
Here @{syntax (inner) idts} is the nonterminal symbol for a list of |
|
476 |
identifiers with optional type constraints (see also |
|
62106 | 477 |
\secref{sec:pure-grammar}). The mixfix template \<^verbatim>\<open>"(3\<close>\<open>sy\<close>\<^verbatim>\<open>_./ _)"\<close> defines |
478 |
argument positions for the bound identifiers and the body, separated by a |
|
479 |
dot with optional line break; the entire phrase is a pretty printing block |
|
480 |
of indentation level 3. Note that there is no extra space after \<open>sy\<close>, so it |
|
481 |
needs to be included user specification if the binder syntax ends with a |
|
482 |
token that may be continued by an identifier token at the start of @{syntax |
|
483 |
(inner) idts}. |
|
46290 | 484 |
|
62106 | 485 |
Furthermore, a syntax translation to transforms \<open>c_binder x\<^sub>1 \<dots> x\<^sub>n b\<close> into |
486 |
iterated application \<open>c (\<lambda>x\<^sub>1. \<dots> c (\<lambda>x\<^sub>n. b)\<dots>)\<close>. This works in both |
|
487 |
directions, for parsing and printing. |
|
488 |
\<close> |
|
46290 | 489 |
|
490 |
||
58618 | 491 |
section \<open>Explicit notation \label{sec:notation}\<close> |
28762 | 492 |
|
58618 | 493 |
text \<open> |
28762 | 494 |
\begin{matharray}{rcll} |
61493 | 495 |
@{command_def "type_notation"} & : & \<open>local_theory \<rightarrow> local_theory\<close> \\ |
496 |
@{command_def "no_type_notation"} & : & \<open>local_theory \<rightarrow> local_theory\<close> \\ |
|
497 |
@{command_def "notation"} & : & \<open>local_theory \<rightarrow> local_theory\<close> \\ |
|
498 |
@{command_def "no_notation"} & : & \<open>local_theory \<rightarrow> local_theory\<close> \\ |
|
499 |
@{command_def "write"} & : & \<open>proof(state) \<rightarrow> proof(state)\<close> \\ |
|
28762 | 500 |
\end{matharray} |
501 |
||
62106 | 502 |
Commands that introduce new logical entities (terms or types) usually allow |
503 |
to provide mixfix annotations on the spot, which is convenient for default |
|
504 |
notation. Nonetheless, the syntax may be modified later on by declarations |
|
505 |
for explicit notation. This allows to add or delete mixfix annotations for |
|
506 |
of existing logical entities within the current context. |
|
46288 | 507 |
|
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55108
diff
changeset
|
508 |
@{rail \<open> |
59783
00b62aa9f430
tuned syntax diagrams -- no duplication of "target";
wenzelm
parents:
58842
diff
changeset
|
509 |
(@@{command type_notation} | @@{command no_type_notation}) @{syntax mode}? \<newline> |
62969 | 510 |
(@{syntax name} @{syntax mixfix} + @'and') |
35413 | 511 |
; |
59783
00b62aa9f430
tuned syntax diagrams -- no duplication of "target";
wenzelm
parents:
58842
diff
changeset
|
512 |
(@@{command notation} | @@{command no_notation}) @{syntax mode}? \<newline> |
62969 | 513 |
(@{syntax name} @{syntax mixfix} + @'and') |
28762 | 514 |
; |
62969 | 515 |
@@{command write} @{syntax mode}? (@{syntax name} @{syntax mixfix} + @'and') |
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55108
diff
changeset
|
516 |
\<close>} |
28762 | 517 |
|
62106 | 518 |
\<^descr> @{command "type_notation"}~\<open>c (mx)\<close> associates mixfix syntax with an |
519 |
existing type constructor. The arity of the constructor is retrieved from |
|
520 |
the context. |
|
46282 | 521 |
|
62106 | 522 |
\<^descr> @{command "no_type_notation"} is similar to @{command "type_notation"}, |
523 |
but removes the specified syntax annotation from the present context. |
|
35413 | 524 |
|
62106 | 525 |
\<^descr> @{command "notation"}~\<open>c (mx)\<close> associates mixfix syntax with an existing |
526 |
constant or fixed variable. The type declaration of the given entity is |
|
527 |
retrieved from the context. |
|
46282 | 528 |
|
62106 | 529 |
\<^descr> @{command "no_notation"} is similar to @{command "notation"}, but removes |
530 |
the specified syntax annotation from the present context. |
|
28762 | 531 |
|
62106 | 532 |
\<^descr> @{command "write"} is similar to @{command "notation"}, but works within |
533 |
an Isar proof body. |
|
58618 | 534 |
\<close> |
28762 | 535 |
|
28778 | 536 |
|
58618 | 537 |
section \<open>The Pure syntax \label{sec:pure-syntax}\<close> |
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
538 |
|
58618 | 539 |
subsection \<open>Lexical matters \label{sec:inner-lex}\<close> |
46282 | 540 |
|
62106 | 541 |
text \<open> |
542 |
The inner lexical syntax vaguely resembles the outer one |
|
543 |
(\secref{sec:outer-lex}), but some details are different. There are two main |
|
544 |
categories of inner syntax tokens: |
|
46282 | 545 |
|
62106 | 546 |
\<^enum> \<^emph>\<open>delimiters\<close> --- the literal tokens occurring in productions of the given |
547 |
priority grammar (cf.\ \secref{sec:priority-grammar}); |
|
46282 | 548 |
|
61477 | 549 |
\<^enum> \<^emph>\<open>named tokens\<close> --- various categories of identifiers etc. |
46282 | 550 |
|
551 |
||
62106 | 552 |
Delimiters override named tokens and may thus render certain identifiers |
553 |
inaccessible. Sometimes the logical context admits alternative ways to refer |
|
554 |
to the same entity, potentially via qualified names. |
|
46282 | 555 |
|
61421 | 556 |
\<^medskip> |
62106 | 557 |
The categories for named tokens are defined once and for all as follows, |
558 |
reusing some categories of the outer token syntax (\secref{sec:outer-lex}). |
|
46282 | 559 |
|
560 |
\begin{center} |
|
561 |
\begin{supertabular}{rcl} |
|
63138
70f4d67235a0
clarified syntax category names according to Isabelle/ML/Scala;
wenzelm
parents:
62969
diff
changeset
|
562 |
@{syntax_def (inner) id} & = & @{syntax_ref short_ident} \\ |
70f4d67235a0
clarified syntax category names according to Isabelle/ML/Scala;
wenzelm
parents:
62969
diff
changeset
|
563 |
@{syntax_def (inner) longid} & = & @{syntax_ref long_ident} \\ |
46282 | 564 |
@{syntax_def (inner) var} & = & @{syntax_ref var} \\ |
63138
70f4d67235a0
clarified syntax category names according to Isabelle/ML/Scala;
wenzelm
parents:
62969
diff
changeset
|
565 |
@{syntax_def (inner) tid} & = & @{syntax_ref type_ident} \\ |
70f4d67235a0
clarified syntax category names according to Isabelle/ML/Scala;
wenzelm
parents:
62969
diff
changeset
|
566 |
@{syntax_def (inner) tvar} & = & @{syntax_ref type_var} \\ |
58410
6d46ad54a2ab
explicit separation of signed and unsigned numerals using existing lexical categories num and xnum
haftmann
parents:
58409
diff
changeset
|
567 |
@{syntax_def (inner) num_token} & = & @{syntax_ref nat} \\ |
61503 | 568 |
@{syntax_def (inner) float_token} & = & @{syntax_ref nat}\<^verbatim>\<open>.\<close>@{syntax_ref nat} \\ |
569 |
@{syntax_def (inner) str_token} & = & \<^verbatim>\<open>''\<close> \<open>\<dots>\<close> \<^verbatim>\<open>''\<close> \\ |
|
570 |
@{syntax_def (inner) string_token} & = & \<^verbatim>\<open>"\<close> \<open>\<dots>\<close> \<^verbatim>\<open>"\<close> \\ |
|
61493 | 571 |
@{syntax_def (inner) cartouche} & = & @{verbatim "\<open>"} \<open>\<dots>\<close> @{verbatim "\<close>"} \\ |
46282 | 572 |
\end{supertabular} |
573 |
\end{center} |
|
574 |
||
575 |
The token categories @{syntax (inner) num_token}, @{syntax (inner) |
|
58421 | 576 |
float_token}, @{syntax (inner) str_token}, @{syntax (inner) string_token}, |
577 |
and @{syntax (inner) cartouche} are not used in Pure. Object-logics may |
|
578 |
implement numerals and string literals by adding appropriate syntax |
|
63680 | 579 |
declarations, together with some translation functions (e.g.\ see |
580 |
\<^file>\<open>~~/src/HOL/Tools/string_syntax.ML\<close>). |
|
46282 | 581 |
|
58421 | 582 |
The derived categories @{syntax_def (inner) num_const}, and @{syntax_def |
583 |
(inner) float_const}, provide robust access to the respective tokens: the |
|
584 |
syntax tree holds a syntactic constant instead of a free variable. |
|
67352
5f7f339f3d7e
inner syntax comments may be written as "\<comment> \<open>text\<close>";
wenzelm
parents:
67146
diff
changeset
|
585 |
|
67448 | 586 |
Formal document comments (\secref{sec:comments}) may be also used within the |
587 |
inner syntax. |
|
58618 | 588 |
\<close> |
46282 | 589 |
|
590 |
||
58618 | 591 |
subsection \<open>Priority grammars \label{sec:priority-grammar}\<close> |
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
592 |
|
62106 | 593 |
text \<open> |
594 |
A context-free grammar consists of a set of \<^emph>\<open>terminal symbols\<close>, a set of |
|
595 |
\<^emph>\<open>nonterminal symbols\<close> and a set of \<^emph>\<open>productions\<close>. Productions have the |
|
596 |
form \<open>A = \<gamma>\<close>, where \<open>A\<close> is a nonterminal and \<open>\<gamma>\<close> is a string of terminals |
|
597 |
and nonterminals. One designated nonterminal is called the \<^emph>\<open>root symbol\<close>. |
|
598 |
The language defined by the grammar consists of all strings of terminals |
|
599 |
that can be derived from the root symbol by applying productions as rewrite |
|
600 |
rules. |
|
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
601 |
|
62106 | 602 |
The standard Isabelle parser for inner syntax uses a \<^emph>\<open>priority grammar\<close>. |
603 |
Each nonterminal is decorated by an integer priority: \<open>A\<^sup>(\<^sup>p\<^sup>)\<close>. In a |
|
604 |
derivation, \<open>A\<^sup>(\<^sup>p\<^sup>)\<close> may be rewritten using a production \<open>A\<^sup>(\<^sup>q\<^sup>) = \<gamma>\<close> only |
|
605 |
if \<open>p \<le> q\<close>. Any priority grammar can be translated into a normal |
|
606 |
context-free grammar by introducing new nonterminals and productions. |
|
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
607 |
|
61421 | 608 |
\<^medskip> |
62106 | 609 |
Formally, a set of context free productions \<open>G\<close> induces a derivation |
610 |
relation \<open>\<longrightarrow>\<^sub>G\<close> as follows. Let \<open>\<alpha>\<close> and \<open>\<beta>\<close> denote strings of terminal or |
|
611 |
nonterminal symbols. Then \<open>\<alpha> A\<^sup>(\<^sup>p\<^sup>) \<beta> \<longrightarrow>\<^sub>G \<alpha> \<gamma> \<beta>\<close> holds if and only if \<open>G\<close> |
|
61493 | 612 |
contains some production \<open>A\<^sup>(\<^sup>q\<^sup>) = \<gamma>\<close> for \<open>p \<le> q\<close>. |
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
613 |
|
61421 | 614 |
\<^medskip> |
62106 | 615 |
The following grammar for arithmetic expressions demonstrates how binding |
616 |
power and associativity of operators can be enforced by priorities. |
|
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
617 |
|
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
618 |
\begin{center} |
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
619 |
\begin{tabular}{rclr} |
61503 | 620 |
\<open>A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)\<close> & \<open>=\<close> & \<^verbatim>\<open>(\<close> \<open>A\<^sup>(\<^sup>0\<^sup>)\<close> \<^verbatim>\<open>)\<close> \\ |
621 |
\<open>A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)\<close> & \<open>=\<close> & \<^verbatim>\<open>0\<close> \\ |
|
622 |
\<open>A\<^sup>(\<^sup>0\<^sup>)\<close> & \<open>=\<close> & \<open>A\<^sup>(\<^sup>0\<^sup>)\<close> \<^verbatim>\<open>+\<close> \<open>A\<^sup>(\<^sup>1\<^sup>)\<close> \\ |
|
623 |
\<open>A\<^sup>(\<^sup>2\<^sup>)\<close> & \<open>=\<close> & \<open>A\<^sup>(\<^sup>3\<^sup>)\<close> \<^verbatim>\<open>*\<close> \<open>A\<^sup>(\<^sup>2\<^sup>)\<close> \\ |
|
624 |
\<open>A\<^sup>(\<^sup>3\<^sup>)\<close> & \<open>=\<close> & \<^verbatim>\<open>-\<close> \<open>A\<^sup>(\<^sup>3\<^sup>)\<close> \\ |
|
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
625 |
\end{tabular} |
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
626 |
\end{center} |
62106 | 627 |
The choice of priorities determines that \<^verbatim>\<open>-\<close> binds tighter than \<^verbatim>\<open>*\<close>, which |
628 |
binds tighter than \<^verbatim>\<open>+\<close>. Furthermore \<^verbatim>\<open>+\<close> associates to the left and \<^verbatim>\<open>*\<close> to |
|
629 |
the right. |
|
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
630 |
|
61421 | 631 |
\<^medskip> |
632 |
For clarity, grammars obey these conventions: |
|
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
633 |
|
62106 | 634 |
\<^item> All priorities must lie between 0 and 1000. |
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
635 |
|
62106 | 636 |
\<^item> Priority 0 on the right-hand side and priority 1000 on the left-hand |
637 |
side may be omitted. |
|
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
638 |
|
62106 | 639 |
\<^item> The production \<open>A\<^sup>(\<^sup>p\<^sup>) = \<alpha>\<close> is written as \<open>A = \<alpha> (p)\<close>, i.e.\ the |
640 |
priority of the left-hand side actually appears in a column on the far |
|
641 |
right. |
|
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
642 |
|
62106 | 643 |
\<^item> Alternatives are separated by \<open>|\<close>. |
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
644 |
|
62106 | 645 |
\<^item> Repetition is indicated by dots \<open>(\<dots>)\<close> in an informal but obvious way. |
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
646 |
|
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
647 |
Using these conventions, the example grammar specification above |
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
648 |
takes the form: |
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
649 |
\begin{center} |
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
650 |
\begin{tabular}{rclc} |
61503 | 651 |
\<open>A\<close> & \<open>=\<close> & \<^verbatim>\<open>(\<close> \<open>A\<close> \<^verbatim>\<open>)\<close> \\ |
652 |
& \<open>|\<close> & \<^verbatim>\<open>0\<close> & \qquad\qquad \\ |
|
653 |
& \<open>|\<close> & \<open>A\<close> \<^verbatim>\<open>+\<close> \<open>A\<^sup>(\<^sup>1\<^sup>)\<close> & \<open>(0)\<close> \\ |
|
654 |
& \<open>|\<close> & \<open>A\<^sup>(\<^sup>3\<^sup>)\<close> \<^verbatim>\<open>*\<close> \<open>A\<^sup>(\<^sup>2\<^sup>)\<close> & \<open>(2)\<close> \\ |
|
655 |
& \<open>|\<close> & \<^verbatim>\<open>-\<close> \<open>A\<^sup>(\<^sup>3\<^sup>)\<close> & \<open>(3)\<close> \\ |
|
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
656 |
\end{tabular} |
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
657 |
\end{center} |
58618 | 658 |
\<close> |
28769
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
659 |
|
8fc228f21861
added section "Priority grammars" (variant from old ref manual);
wenzelm
parents:
28767
diff
changeset
|
660 |
|
58618 | 661 |
subsection \<open>The Pure grammar \label{sec:pure-grammar}\<close> |
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
662 |
|
62106 | 663 |
text \<open> |
664 |
The priority grammar of the \<open>Pure\<close> theory is defined approximately like |
|
665 |
this: |
|
28774 | 666 |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
667 |
\begin{center} |
28773 | 668 |
\begin{supertabular}{rclr} |
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
669 |
|
61493 | 670 |
@{syntax_def (inner) any} & = & \<open>prop | logic\<close> \\\\ |
28772 | 671 |
|
61503 | 672 |
@{syntax_def (inner) prop} & = & \<^verbatim>\<open>(\<close> \<open>prop\<close> \<^verbatim>\<open>)\<close> \\ |
673 |
& \<open>|\<close> & \<open>prop\<^sup>(\<^sup>4\<^sup>)\<close> \<^verbatim>\<open>::\<close> \<open>type\<close> & \<open>(3)\<close> \\ |
|
674 |
& \<open>|\<close> & \<open>any\<^sup>(\<^sup>3\<^sup>)\<close> \<^verbatim>\<open>==\<close> \<open>any\<^sup>(\<^sup>3\<^sup>)\<close> & \<open>(2)\<close> \\ |
|
61493 | 675 |
& \<open>|\<close> & \<open>any\<^sup>(\<^sup>3\<^sup>)\<close> \<open>\<equiv>\<close> \<open>any\<^sup>(\<^sup>3\<^sup>)\<close> & \<open>(2)\<close> \\ |
61503 | 676 |
& \<open>|\<close> & \<open>prop\<^sup>(\<^sup>3\<^sup>)\<close> \<^verbatim>\<open>&&&\<close> \<open>prop\<^sup>(\<^sup>2\<^sup>)\<close> & \<open>(2)\<close> \\ |
677 |
& \<open>|\<close> & \<open>prop\<^sup>(\<^sup>2\<^sup>)\<close> \<^verbatim>\<open>==>\<close> \<open>prop\<^sup>(\<^sup>1\<^sup>)\<close> & \<open>(1)\<close> \\ |
|
61493 | 678 |
& \<open>|\<close> & \<open>prop\<^sup>(\<^sup>2\<^sup>)\<close> \<open>\<Longrightarrow>\<close> \<open>prop\<^sup>(\<^sup>1\<^sup>)\<close> & \<open>(1)\<close> \\ |
61503 | 679 |
& \<open>|\<close> & \<^verbatim>\<open>[|\<close> \<open>prop\<close> \<^verbatim>\<open>;\<close> \<open>\<dots>\<close> \<^verbatim>\<open>;\<close> \<open>prop\<close> \<^verbatim>\<open>|]\<close> \<^verbatim>\<open>==>\<close> \<open>prop\<^sup>(\<^sup>1\<^sup>)\<close> & \<open>(1)\<close> \\ |
680 |
& \<open>|\<close> & \<open>\<lbrakk>\<close> \<open>prop\<close> \<^verbatim>\<open>;\<close> \<open>\<dots>\<close> \<^verbatim>\<open>;\<close> \<open>prop\<close> \<open>\<rbrakk>\<close> \<open>\<Longrightarrow>\<close> \<open>prop\<^sup>(\<^sup>1\<^sup>)\<close> & \<open>(1)\<close> \\ |
|
681 |
& \<open>|\<close> & \<^verbatim>\<open>!!\<close> \<open>idts\<close> \<^verbatim>\<open>.\<close> \<open>prop\<close> & \<open>(0)\<close> \\ |
|
682 |
& \<open>|\<close> & \<open>\<And>\<close> \<open>idts\<close> \<^verbatim>\<open>.\<close> \<open>prop\<close> & \<open>(0)\<close> \\ |
|
683 |
& \<open>|\<close> & \<^verbatim>\<open>OFCLASS\<close> \<^verbatim>\<open>(\<close> \<open>type\<close> \<^verbatim>\<open>,\<close> \<open>logic\<close> \<^verbatim>\<open>)\<close> \\ |
|
684 |
& \<open>|\<close> & \<^verbatim>\<open>SORT_CONSTRAINT\<close> \<^verbatim>\<open>(\<close> \<open>type\<close> \<^verbatim>\<open>)\<close> \\ |
|
685 |
& \<open>|\<close> & \<^verbatim>\<open>TERM\<close> \<open>logic\<close> \\ |
|
686 |
& \<open>|\<close> & \<^verbatim>\<open>PROP\<close> \<open>aprop\<close> \\\\ |
|
28772 | 687 |
|
61503 | 688 |
@{syntax_def (inner) aprop} & = & \<^verbatim>\<open>(\<close> \<open>aprop\<close> \<^verbatim>\<open>)\<close> \\ |
689 |
& \<open>|\<close> & \<open>id | longid | var |\<close>~~\<^verbatim>\<open>_\<close>~~\<open>|\<close>~~\<^verbatim>\<open>...\<close> \\ |
|
690 |
& \<open>|\<close> & \<^verbatim>\<open>CONST\<close> \<open>id |\<close>~~\<^verbatim>\<open>CONST\<close> \<open>longid\<close> \\ |
|
691 |
& \<open>|\<close> & \<^verbatim>\<open>XCONST\<close> \<open>id |\<close>~~\<^verbatim>\<open>XCONST\<close> \<open>longid\<close> \\ |
|
61493 | 692 |
& \<open>|\<close> & \<open>logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)\<close> & \<open>(999)\<close> \\\\ |
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
693 |
|
61503 | 694 |
@{syntax_def (inner) logic} & = & \<^verbatim>\<open>(\<close> \<open>logic\<close> \<^verbatim>\<open>)\<close> \\ |
695 |
& \<open>|\<close> & \<open>logic\<^sup>(\<^sup>4\<^sup>)\<close> \<^verbatim>\<open>::\<close> \<open>type\<close> & \<open>(3)\<close> \\ |
|
696 |
& \<open>|\<close> & \<open>id | longid | var |\<close>~~\<^verbatim>\<open>_\<close>~~\<open>|\<close>~~\<^verbatim>\<open>...\<close> \\ |
|
697 |
& \<open>|\<close> & \<^verbatim>\<open>CONST\<close> \<open>id |\<close>~~\<^verbatim>\<open>CONST\<close> \<open>longid\<close> \\ |
|
698 |
& \<open>|\<close> & \<^verbatim>\<open>XCONST\<close> \<open>id |\<close>~~\<^verbatim>\<open>XCONST\<close> \<open>longid\<close> \\ |
|
61493 | 699 |
& \<open>|\<close> & \<open>logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)\<close> & \<open>(999)\<close> \\ |
61503 | 700 |
& \<open>|\<close> & \<^verbatim>\<open>%\<close> \<open>pttrns\<close> \<^verbatim>\<open>.\<close> \<open>any\<^sup>(\<^sup>3\<^sup>)\<close> & \<open>(3)\<close> \\ |
701 |
& \<open>|\<close> & \<open>\<lambda>\<close> \<open>pttrns\<close> \<^verbatim>\<open>.\<close> \<open>any\<^sup>(\<^sup>3\<^sup>)\<close> & \<open>(3)\<close> \\ |
|
67398 | 702 |
& \<open>|\<close> & \<^verbatim>\<open>(==)\<close>~~\<open>|\<close>~~\<^verbatim>\<open>(\<close>\<open>\<equiv>\<close>\<^verbatim>\<open>)\<close>~~\<open>|\<close>~~\<^verbatim>\<open>(&&&)\<close> \\ |
703 |
& \<open>|\<close> & \<^verbatim>\<open>(==>)\<close>~~\<open>|\<close>~~\<^verbatim>\<open>(\<close>\<open>\<Longrightarrow>\<close>\<^verbatim>\<open>)\<close> \\ |
|
61503 | 704 |
& \<open>|\<close> & \<^verbatim>\<open>TYPE\<close> \<^verbatim>\<open>(\<close> \<open>type\<close> \<^verbatim>\<open>)\<close> \\\\ |
28772 | 705 |
|
61503 | 706 |
@{syntax_def (inner) idt} & = & \<^verbatim>\<open>(\<close> \<open>idt\<close> \<^verbatim>\<open>)\<close>~~\<open>| id |\<close>~~\<^verbatim>\<open>_\<close> \\ |
707 |
& \<open>|\<close> & \<open>id\<close> \<^verbatim>\<open>::\<close> \<open>type\<close> & \<open>(0)\<close> \\ |
|
708 |
& \<open>|\<close> & \<^verbatim>\<open>_\<close> \<^verbatim>\<open>::\<close> \<open>type\<close> & \<open>(0)\<close> \\\\ |
|
28772 | 709 |
|
61503 | 710 |
@{syntax_def (inner) index} & = & \<^verbatim>\<open>\<^bsub>\<close> \<open>logic\<^sup>(\<^sup>0\<^sup>)\<close> \<^verbatim>\<open>\<^esub>\<close>~~\<open>| | \<index>\<close> \\\\ |
46287 | 711 |
|
61493 | 712 |
@{syntax_def (inner) idts} & = & \<open>idt | idt\<^sup>(\<^sup>1\<^sup>) idts\<close> & \<open>(0)\<close> \\\\ |
28772 | 713 |
|
61493 | 714 |
@{syntax_def (inner) pttrn} & = & \<open>idt\<close> \\\\ |
28772 | 715 |
|
61493 | 716 |
@{syntax_def (inner) pttrns} & = & \<open>pttrn | pttrn\<^sup>(\<^sup>1\<^sup>) pttrns\<close> & \<open>(0)\<close> \\\\ |
28774 | 717 |
|
61503 | 718 |
@{syntax_def (inner) type} & = & \<^verbatim>\<open>(\<close> \<open>type\<close> \<^verbatim>\<open>)\<close> \\ |
719 |
& \<open>|\<close> & \<open>tid | tvar |\<close>~~\<^verbatim>\<open>_\<close> \\ |
|
720 |
& \<open>|\<close> & \<open>tid\<close> \<^verbatim>\<open>::\<close> \<open>sort | tvar\<close>~~\<^verbatim>\<open>::\<close> \<open>sort |\<close>~~\<^verbatim>\<open>_\<close> \<^verbatim>\<open>::\<close> \<open>sort\<close> \\ |
|
61493 | 721 |
& \<open>|\<close> & \<open>type_name | type\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) type_name\<close> \\ |
61503 | 722 |
& \<open>|\<close> & \<^verbatim>\<open>(\<close> \<open>type\<close> \<^verbatim>\<open>,\<close> \<open>\<dots>\<close> \<^verbatim>\<open>,\<close> \<open>type\<close> \<^verbatim>\<open>)\<close> \<open>type_name\<close> \\ |
723 |
& \<open>|\<close> & \<open>type\<^sup>(\<^sup>1\<^sup>)\<close> \<^verbatim>\<open>=>\<close> \<open>type\<close> & \<open>(0)\<close> \\ |
|
61493 | 724 |
& \<open>|\<close> & \<open>type\<^sup>(\<^sup>1\<^sup>)\<close> \<open>\<Rightarrow>\<close> \<open>type\<close> & \<open>(0)\<close> \\ |
61503 | 725 |
& \<open>|\<close> & \<^verbatim>\<open>[\<close> \<open>type\<close> \<^verbatim>\<open>,\<close> \<open>\<dots>\<close> \<^verbatim>\<open>,\<close> \<open>type\<close> \<^verbatim>\<open>]\<close> \<^verbatim>\<open>=>\<close> \<open>type\<close> & \<open>(0)\<close> \\ |
726 |
& \<open>|\<close> & \<^verbatim>\<open>[\<close> \<open>type\<close> \<^verbatim>\<open>,\<close> \<open>\<dots>\<close> \<^verbatim>\<open>,\<close> \<open>type\<close> \<^verbatim>\<open>]\<close> \<open>\<Rightarrow>\<close> \<open>type\<close> & \<open>(0)\<close> \\ |
|
61493 | 727 |
@{syntax_def (inner) type_name} & = & \<open>id | longid\<close> \\\\ |
28772 | 728 |
|
67718 | 729 |
@{syntax_def (inner) sort} & = & @{syntax class_name}~~\<open>|\<close>~~\<^verbatim>\<open>_\<close>~~\<open>|\<close>~~\<^verbatim>\<open>{}\<close> \\ |
61503 | 730 |
& \<open>|\<close> & \<^verbatim>\<open>{\<close> @{syntax class_name} \<^verbatim>\<open>,\<close> \<open>\<dots>\<close> \<^verbatim>\<open>,\<close> @{syntax class_name} \<^verbatim>\<open>}\<close> \\ |
61493 | 731 |
@{syntax_def (inner) class_name} & = & \<open>id | longid\<close> \\ |
28773 | 732 |
\end{supertabular} |
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
733 |
\end{center} |
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
734 |
|
61421 | 735 |
\<^medskip> |
62106 | 736 |
Here literal terminals are printed \<^verbatim>\<open>verbatim\<close>; see also |
737 |
\secref{sec:inner-lex} for further token categories of the inner syntax. The |
|
738 |
meaning of the nonterminals defined by the above grammar is as follows: |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
739 |
|
61439 | 740 |
\<^descr> @{syntax_ref (inner) any} denotes any term. |
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
741 |
|
62106 | 742 |
\<^descr> @{syntax_ref (inner) prop} denotes meta-level propositions, which are |
743 |
terms of type @{typ prop}. The syntax of such formulae of the meta-logic is |
|
744 |
carefully distinguished from usual conventions for object-logics. In |
|
745 |
particular, plain \<open>\<lambda>\<close>-term notation is \<^emph>\<open>not\<close> recognized as @{syntax (inner) |
|
746 |
prop}. |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
747 |
|
62106 | 748 |
\<^descr> @{syntax_ref (inner) aprop} denotes atomic propositions, which are |
749 |
embedded into regular @{syntax (inner) prop} by means of an explicit \<^verbatim>\<open>PROP\<close> |
|
750 |
token. |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
751 |
|
62106 | 752 |
Terms of type @{typ prop} with non-constant head, e.g.\ a plain variable, |
753 |
are printed in this form. Constants that yield type @{typ prop} are expected |
|
754 |
to provide their own concrete syntax; otherwise the printed version will |
|
755 |
appear like @{syntax (inner) logic} and cannot be parsed again as @{syntax |
|
756 |
(inner) prop}. |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
757 |
|
62106 | 758 |
\<^descr> @{syntax_ref (inner) logic} denotes arbitrary terms of a logical type, |
759 |
excluding type @{typ prop}. This is the main syntactic category of |
|
760 |
object-logic entities, covering plain \<open>\<lambda>\<close>-term notation (variables, |
|
761 |
abstraction, application), plus anything defined by the user. |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
762 |
|
62106 | 763 |
When specifying notation for logical entities, all logical types (excluding |
764 |
@{typ prop}) are \<^emph>\<open>collapsed\<close> to this single category of @{syntax (inner) |
|
765 |
logic}. |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
766 |
|
62106 | 767 |
\<^descr> @{syntax_ref (inner) index} denotes an optional index term for indexed |
768 |
syntax. If omitted, it refers to the first @{keyword_ref "structure"} |
|
769 |
variable in the context. The special dummy ``\<open>\<index>\<close>'' serves as pattern |
|
770 |
variable in mixfix annotations that introduce indexed notation. |
|
46287 | 771 |
|
62106 | 772 |
\<^descr> @{syntax_ref (inner) idt} denotes identifiers, possibly constrained by |
773 |
types. |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
774 |
|
62106 | 775 |
\<^descr> @{syntax_ref (inner) idts} denotes a sequence of @{syntax_ref (inner) |
776 |
idt}. This is the most basic category for variables in iterated binders, |
|
777 |
such as \<open>\<lambda>\<close> or \<open>\<And>\<close>. |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
778 |
|
62106 | 779 |
\<^descr> @{syntax_ref (inner) pttrn} and @{syntax_ref (inner) pttrns} denote |
780 |
patterns for abstraction, cases bindings etc. In Pure, these categories |
|
781 |
start as a merely copy of @{syntax (inner) idt} and @{syntax (inner) idts}, |
|
782 |
respectively. Object-logics may add additional productions for binding |
|
783 |
forms. |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
784 |
|
61439 | 785 |
\<^descr> @{syntax_ref (inner) type} denotes types of the meta-logic. |
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
786 |
|
61439 | 787 |
\<^descr> @{syntax_ref (inner) sort} denotes meta-level sorts. |
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
788 |
|
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
789 |
|
28774 | 790 |
Here are some further explanations of certain syntax features. |
28773 | 791 |
|
62106 | 792 |
\<^item> In @{syntax (inner) idts}, note that \<open>x :: nat y\<close> is parsed as \<open>x :: (nat |
793 |
y)\<close>, treating \<open>y\<close> like a type constructor applied to \<open>nat\<close>. To avoid this |
|
794 |
interpretation, write \<open>(x :: nat) y\<close> with explicit parentheses. |
|
28773 | 795 |
|
62106 | 796 |
\<^item> Similarly, \<open>x :: nat y :: nat\<close> is parsed as \<open>x :: (nat y :: nat)\<close>. The |
797 |
correct form is \<open>(x :: nat) (y :: nat)\<close>, or \<open>(x :: nat) y :: nat\<close> if \<open>y\<close> is |
|
798 |
last in the sequence of identifiers. |
|
28773 | 799 |
|
62106 | 800 |
\<^item> Type constraints for terms bind very weakly. For example, \<open>x < y :: nat\<close> |
801 |
is normally parsed as \<open>(x < y) :: nat\<close>, unless \<open><\<close> has a very low priority, |
|
802 |
in which case the input is likely to be ambiguous. The correct form is \<open>x < |
|
803 |
(y :: nat)\<close>. |
|
28773 | 804 |
|
61421 | 805 |
\<^item> Dummy variables (written as underscore) may occur in different |
28774 | 806 |
roles. |
28773 | 807 |
|
67718 | 808 |
\<^descr> A sort ``\<open>_\<close>'' refers to a vacuous constraint for type variables, which |
809 |
is effectively ignored in type-inference. |
|
810 |
||
62106 | 811 |
\<^descr> A type ``\<open>_\<close>'' or ``\<open>_ :: sort\<close>'' acts like an anonymous inference |
812 |
parameter, which is filled-in according to the most general type produced |
|
813 |
by the type-checking phase. |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
814 |
|
62106 | 815 |
\<^descr> A bound ``\<open>_\<close>'' refers to a vacuous abstraction, where the body does not |
816 |
refer to the binding introduced here. As in the term @{term "\<lambda>x _. x"}, |
|
817 |
which is \<open>\<alpha>\<close>-equivalent to \<open>\<lambda>x y. x\<close>. |
|
28773 | 818 |
|
62106 | 819 |
\<^descr> A free ``\<open>_\<close>'' refers to an implicit outer binding. Higher definitional |
820 |
packages usually allow forms like \<open>f x _ = x\<close>. |
|
28773 | 821 |
|
62106 | 822 |
\<^descr> A schematic ``\<open>_\<close>'' (within a term pattern, see \secref{sec:term-decls}) |
823 |
refers to an anonymous variable that is implicitly abstracted over its |
|
824 |
context of locally bound variables. For example, this allows pattern |
|
825 |
matching of \<open>{x. f x = g x}\<close> against \<open>{x. _ = _}\<close>, or even \<open>{_. _ = _}\<close> by |
|
61458 | 826 |
using both bound and schematic dummies. |
28773 | 827 |
|
62106 | 828 |
\<^descr> The three literal dots ``\<^verbatim>\<open>...\<close>'' may be also written as ellipsis symbol |
829 |
\<^verbatim>\<open>\<dots>\<close>. In both cases this refers to a special schematic variable, which is |
|
830 |
bound in the context. This special term abbreviation works nicely with |
|
28774 | 831 |
calculational reasoning (\secref{sec:calculation}). |
832 |
||
62106 | 833 |
\<^descr> \<^verbatim>\<open>CONST\<close> ensures that the given identifier is treated as constant term, |
834 |
and passed through the parse tree in fully internalized form. This is |
|
835 |
particularly relevant for translation rules (\secref{sec:syn-trans}), |
|
836 |
notably on the RHS. |
|
46287 | 837 |
|
62106 | 838 |
\<^descr> \<^verbatim>\<open>XCONST\<close> is similar to \<^verbatim>\<open>CONST\<close>, but retains the constant name as given. |
839 |
This is only relevant to translation rules (\secref{sec:syn-trans}), notably |
|
840 |
on the LHS. |
|
58618 | 841 |
\<close> |
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
842 |
|
28777 | 843 |
|
58618 | 844 |
subsection \<open>Inspecting the syntax\<close> |
28777 | 845 |
|
58618 | 846 |
text \<open> |
46282 | 847 |
\begin{matharray}{rcl} |
61493 | 848 |
@{command_def "print_syntax"}\<open>\<^sup>*\<close> & : & \<open>context \<rightarrow>\<close> \\ |
46282 | 849 |
\end{matharray} |
28777 | 850 |
|
62106 | 851 |
\<^descr> @{command "print_syntax"} prints the inner syntax of the current context. |
852 |
The output can be quite large; the most important sections are explained |
|
853 |
below. |
|
28777 | 854 |
|
62106 | 855 |
\<^descr> \<open>lexicon\<close> lists the delimiters of the inner token language; see |
856 |
\secref{sec:inner-lex}. |
|
28777 | 857 |
|
67513 | 858 |
\<^descr> \<open>productions\<close> lists the productions of the underlying priority grammar; |
859 |
see \secref{sec:priority-grammar}. |
|
28777 | 860 |
|
67513 | 861 |
Many productions have an extra \<open>\<dots> \<^bold>\<Rightarrow> name\<close>. These names later become the |
862 |
heads of parse trees; they also guide the pretty printer. |
|
62106 | 863 |
|
864 |
Productions without such parse tree names are called \<^emph>\<open>copy productions\<close>. |
|
865 |
Their right-hand side must have exactly one nonterminal symbol (or named |
|
866 |
token). The parser does not create a new parse tree node for copy |
|
867 |
productions, but simply returns the parse tree of the right-hand symbol. |
|
46282 | 868 |
|
61458 | 869 |
If the right-hand side of a copy production consists of a single |
61477 | 870 |
nonterminal without any delimiters, then it is called a \<^emph>\<open>chain |
62106 | 871 |
production\<close>. Chain productions act as abbreviations: conceptually, they |
872 |
are removed from the grammar by adding new productions. Priority |
|
67513 | 873 |
information attached to chain productions is ignored. |
46282 | 874 |
|
62106 | 875 |
\<^descr> \<open>print modes\<close> lists the alternative print modes provided by this |
876 |
grammar; see \secref{sec:print-modes}. |
|
28777 | 877 |
|
62106 | 878 |
\<^descr> \<open>parse_rules\<close> and \<open>print_rules\<close> relate to syntax translations (macros); |
879 |
see \secref{sec:syn-trans}. |
|
46282 | 880 |
|
62106 | 881 |
\<^descr> \<open>parse_ast_translation\<close> and \<open>print_ast_translation\<close> list sets of |
882 |
constants that invoke translation functions for abstract syntax trees, |
|
883 |
which are only required in very special situations; see |
|
884 |
\secref{sec:tr-funs}. |
|
28777 | 885 |
|
62106 | 886 |
\<^descr> \<open>parse_translation\<close> and \<open>print_translation\<close> list the sets of constants |
887 |
that invoke regular translation functions; see \secref{sec:tr-funs}. |
|
58618 | 888 |
\<close> |
28774 | 889 |
|
28770
93a372e2dc7a
added section "The Pure grammar" (incomplete version, based on old ref manual);
wenzelm
parents:
28769
diff
changeset
|
890 |
|
58618 | 891 |
subsection \<open>Ambiguity of parsed expressions\<close> |
46291 | 892 |
|
58618 | 893 |
text \<open> |
46291 | 894 |
\begin{tabular}{rcll} |
61493 | 895 |
@{attribute_def syntax_ambiguity_warning} & : & \<open>attribute\<close> & default \<open>true\<close> \\ |
896 |
@{attribute_def syntax_ambiguity_limit} & : & \<open>attribute\<close> & default \<open>10\<close> \\ |
|
46291 | 897 |
\end{tabular} |
898 |
||
62106 | 899 |
Depending on the grammar and the given input, parsing may be ambiguous. |
900 |
Isabelle lets the Earley parser enumerate all possible parse trees, and then |
|
901 |
tries to make the best out of the situation. Terms that cannot be |
|
902 |
type-checked are filtered out, which often leads to a unique result in the |
|
903 |
end. Unlike regular type reconstruction, which is applied to the whole |
|
904 |
collection of input terms simultaneously, the filtering stage only treats |
|
905 |
each given term in isolation. Filtering is also not attempted for individual |
|
46291 | 906 |
types or raw ASTs (as required for @{command translations}). |
907 |
||
62106 | 908 |
Certain warning or error messages are printed, depending on the situation |
909 |
and the given configuration options. Parsing ultimately fails, if multiple |
|
910 |
results remain after the filtering phase. |
|
46291 | 911 |
|
62106 | 912 |
\<^descr> @{attribute syntax_ambiguity_warning} controls output of explicit warning |
913 |
messages about syntax ambiguity. |
|
46291 | 914 |
|
62106 | 915 |
\<^descr> @{attribute syntax_ambiguity_limit} determines the number of resulting |
916 |
parse trees that are shown as part of the printed message in case of an |
|
917 |
ambiguity. |
|
58618 | 918 |
\<close> |
46291 | 919 |
|
920 |
||
58618 | 921 |
section \<open>Syntax transformations \label{sec:syntax-transformations}\<close> |
48113 | 922 |
|
62106 | 923 |
text \<open> |
924 |
The inner syntax engine of Isabelle provides separate mechanisms to |
|
925 |
transform parse trees either via rewrite systems on first-order ASTs |
|
926 |
(\secref{sec:syn-trans}), or ML functions on ASTs or syntactic \<open>\<lambda>\<close>-terms |
|
927 |
(\secref{sec:tr-funs}). This works both for parsing and printing, as |
|
928 |
outlined in \figref{fig:parse-print}. |
|
48113 | 929 |
|
930 |
\begin{figure}[htbp] |
|
931 |
\begin{center} |
|
932 |
\begin{tabular}{cl} |
|
933 |
string & \\ |
|
61493 | 934 |
\<open>\<down>\<close> & lexer + parser \\ |
48113 | 935 |
parse tree & \\ |
61493 | 936 |
\<open>\<down>\<close> & parse AST translation \\ |
48113 | 937 |
AST & \\ |
61493 | 938 |
\<open>\<down>\<close> & AST rewriting (macros) \\ |
48113 | 939 |
AST & \\ |
61493 | 940 |
\<open>\<down>\<close> & parse translation \\ |
48113 | 941 |
--- pre-term --- & \\ |
61493 | 942 |
\<open>\<down>\<close> & print translation \\ |
48113 | 943 |
AST & \\ |
61493 | 944 |
\<open>\<down>\<close> & AST rewriting (macros) \\ |
48113 | 945 |
AST & \\ |
61493 | 946 |
\<open>\<down>\<close> & print AST translation \\ |
48113 | 947 |
string & |
948 |
\end{tabular} |
|
949 |
\end{center} |
|
950 |
\caption{Parsing and printing with translations}\label{fig:parse-print} |
|
951 |
\end{figure} |
|
952 |
||
62106 | 953 |
These intermediate syntax tree formats eventually lead to a pre-term with |
954 |
all names and binding scopes resolved, but most type information still |
|
955 |
missing. Explicit type constraints might be given by the user, or implicit |
|
956 |
position information by the system --- both need to be passed-through |
|
957 |
carefully by syntax transformations. |
|
48113 | 958 |
|
62106 | 959 |
Pre-terms are further processed by the so-called \<^emph>\<open>check\<close> and \<^emph>\<open>uncheck\<close> |
960 |
phases that are intertwined with type-inference (see also @{cite |
|
961 |
"isabelle-implementation"}). The latter allows to operate on higher-order |
|
962 |
abstract syntax with proper binding and type information already available. |
|
48113 | 963 |
|
62106 | 964 |
As a rule of thumb, anything that manipulates bindings of variables or |
965 |
constants needs to be implemented as syntax transformation (see below). |
|
966 |
Anything else is better done via check/uncheck: a prominent example |
|
967 |
application is the @{command abbreviation} concept of Isabelle/Pure. |
|
968 |
\<close> |
|
48113 | 969 |
|
970 |
||
58618 | 971 |
subsection \<open>Abstract syntax trees \label{sec:ast}\<close> |
48113 | 972 |
|
62106 | 973 |
text \<open> |
974 |
The ML datatype @{ML_type Ast.ast} explicitly represents the intermediate |
|
975 |
AST format that is used for syntax rewriting (\secref{sec:syn-trans}). It is |
|
976 |
defined in ML as follows: |
|
61408
9020a3ba6c9a
@{verbatim [display]} supersedes old alltt/ttbox;
wenzelm
parents:
61143
diff
changeset
|
977 |
@{verbatim [display] |
9020a3ba6c9a
@{verbatim [display]} supersedes old alltt/ttbox;
wenzelm
parents:
61143
diff
changeset
|
978 |
\<open>datatype ast = |
9020a3ba6c9a
@{verbatim [display]} supersedes old alltt/ttbox;
wenzelm
parents:
61143
diff
changeset
|
979 |
Constant of string | |
9020a3ba6c9a
@{verbatim [display]} supersedes old alltt/ttbox;
wenzelm
parents:
61143
diff
changeset
|
980 |
Variable of string | |
9020a3ba6c9a
@{verbatim [display]} supersedes old alltt/ttbox;
wenzelm
parents:
61143
diff
changeset
|
981 |
Appl of ast list\<close>} |
48114 | 982 |
|
62106 | 983 |
An AST is either an atom (constant or variable) or a list of (at least two) |
984 |
subtrees. Occasional diagnostic output of ASTs uses notation that resembles |
|
985 |
S-expression of LISP. Constant atoms are shown as quoted strings, variable |
|
986 |
atoms as non-quoted strings and applications as a parenthesized list of |
|
987 |
subtrees. For example, the AST |
|
58724 | 988 |
@{ML [display] \<open>Ast.Appl [Ast.Constant "_abs", Ast.Variable "x", Ast.Variable "t"]\<close>} |
62106 | 989 |
is pretty-printed as \<^verbatim>\<open>("_abs" x t)\<close>. Note that \<^verbatim>\<open>()\<close> and \<^verbatim>\<open>(x)\<close> are |
990 |
excluded as ASTs, because they have too few subtrees. |
|
48114 | 991 |
|
61421 | 992 |
\<^medskip> |
62106 | 993 |
AST application is merely a pro-forma mechanism to indicate certain |
994 |
syntactic structures. Thus \<^verbatim>\<open>(c a b)\<close> could mean either term application or |
|
995 |
type application, depending on the syntactic context. |
|
48114 | 996 |
|
62106 | 997 |
Nested application like \<^verbatim>\<open>(("_abs" x t) u)\<close> is also possible, but ASTs are |
998 |
definitely first-order: the syntax constant \<^verbatim>\<open>"_abs"\<close> does not bind the \<^verbatim>\<open>x\<close> |
|
999 |
in any way. Proper bindings are introduced in later stages of the term |
|
1000 |
syntax, where \<^verbatim>\<open>("_abs" x t)\<close> becomes an @{ML Abs} node and occurrences of |
|
1001 |
\<^verbatim>\<open>x\<close> in \<^verbatim>\<open>t\<close> are replaced by bound variables (represented as de-Bruijn |
|
1002 |
indices). |
|
58618 | 1003 |
\<close> |
48113 | 1004 |
|
1005 |
||
58618 | 1006 |
subsubsection \<open>AST constants versus variables\<close> |
48114 | 1007 |
|
62106 | 1008 |
text \<open> |
1009 |
Depending on the situation --- input syntax, output syntax, translation |
|
1010 |
patterns --- the distinction of atomic ASTs as @{ML Ast.Constant} versus |
|
1011 |
@{ML Ast.Variable} serves slightly different purposes. |
|
48114 | 1012 |
|
62106 | 1013 |
Input syntax of a term such as \<open>f a b = c\<close> does not yet indicate the scopes |
1014 |
of atomic entities \<open>f, a, b, c\<close>: they could be global constants or local |
|
1015 |
variables, even bound ones depending on the context of the term. @{ML |
|
1016 |
Ast.Variable} leaves this choice still open: later syntax layers (or |
|
1017 |
translation functions) may capture such a variable to determine its role |
|
1018 |
specifically, to make it a constant, bound variable, free variable etc. In |
|
1019 |
contrast, syntax translations that introduce already known constants would |
|
1020 |
rather do it via @{ML Ast.Constant} to prevent accidental re-interpretation |
|
1021 |
later on. |
|
48114 | 1022 |
|
62106 | 1023 |
Output syntax turns term constants into @{ML Ast.Constant} and variables |
1024 |
(free or schematic) into @{ML Ast.Variable}. This information is precise |
|
1025 |
when printing fully formal \<open>\<lambda>\<close>-terms. |
|
48114 | 1026 |
|
61421 | 1027 |
\<^medskip> |
62106 | 1028 |
AST translation patterns (\secref{sec:syn-trans}) that represent terms |
1029 |
cannot distinguish constants and variables syntactically. Explicit |
|
1030 |
indication of \<open>CONST c\<close> inside the term language is required, unless \<open>c\<close> is |
|
1031 |
known as special \<^emph>\<open>syntax constant\<close> (see also @{command syntax}). It is also |
|
1032 |
possible to use @{command syntax} declarations (without mixfix annotation) |
|
1033 |
to enforce that certain unqualified names are always treated as constant |
|
1034 |
within the syntax machinery. |
|
48114 | 1035 |
|
62106 | 1036 |
The situation is simpler for ASTs that represent types or sorts, since the |
1037 |
concrete syntax already distinguishes type variables from type constants |
|
1038 |
(constructors). So \<open>('a, 'b) foo\<close> corresponds to an AST application of some |
|
1039 |
constant for \<open>foo\<close> and variable arguments for \<open>'a\<close> and \<open>'b\<close>. Note that the |
|
1040 |
postfix application is merely a feature of the concrete syntax, while in the |
|
1041 |
AST the constructor occurs in head position. |
|
1042 |
\<close> |
|
48114 | 1043 |
|
1044 |
||
58618 | 1045 |
subsubsection \<open>Authentic syntax names\<close> |
48114 | 1046 |
|
62106 | 1047 |
text \<open> |
1048 |
Naming constant entities within ASTs is another delicate issue. Unqualified |
|
1049 |
names are resolved in the name space tables in the last stage of parsing, |
|
1050 |
after all translations have been applied. Since syntax transformations do |
|
1051 |
not know about this later name resolution, there can be surprises in |
|
1052 |
boundary cases. |
|
48114 | 1053 |
|
62106 | 1054 |
\<^emph>\<open>Authentic syntax names\<close> for @{ML Ast.Constant} avoid this problem: the |
1055 |
fully-qualified constant name with a special prefix for its formal category |
|
1056 |
(\<open>class\<close>, \<open>type\<close>, \<open>const\<close>, \<open>fixed\<close>) represents the information faithfully |
|
1057 |
within the untyped AST format. Accidental overlap with free or bound |
|
1058 |
variables is excluded as well. Authentic syntax names work implicitly in the |
|
1059 |
following situations: |
|
48114 | 1060 |
|
62106 | 1061 |
\<^item> Input of term constants (or fixed variables) that are introduced by |
1062 |
concrete syntax via @{command notation}: the correspondence of a |
|
1063 |
particular grammar production to some known term entity is preserved. |
|
48114 | 1064 |
|
62106 | 1065 |
\<^item> Input of type constants (constructors) and type classes --- thanks to |
1066 |
explicit syntactic distinction independently on the context. |
|
48114 | 1067 |
|
62106 | 1068 |
\<^item> Output of term constants, type constants, type classes --- this |
1069 |
information is already available from the internal term to be printed. |
|
48114 | 1070 |
|
62106 | 1071 |
In other words, syntax transformations that operate on input terms written |
1072 |
as prefix applications are difficult to make robust. Luckily, this case |
|
1073 |
rarely occurs in practice, because syntax forms to be translated usually |
|
1074 |
correspond to some concrete notation. |
|
1075 |
\<close> |
|
48114 | 1076 |
|
1077 |
||
58618 | 1078 |
subsection \<open>Raw syntax and translations \label{sec:syn-trans}\<close> |
28762 | 1079 |
|
58618 | 1080 |
text \<open> |
48117 | 1081 |
\begin{tabular}{rcll} |
61493 | 1082 |
@{command_def "nonterminal"} & : & \<open>theory \<rightarrow> theory\<close> \\ |
1083 |
@{command_def "syntax"} & : & \<open>theory \<rightarrow> theory\<close> \\ |
|
1084 |
@{command_def "no_syntax"} & : & \<open>theory \<rightarrow> theory\<close> \\ |
|
1085 |
@{command_def "translations"} & : & \<open>theory \<rightarrow> theory\<close> \\ |
|
1086 |
@{command_def "no_translations"} & : & \<open>theory \<rightarrow> theory\<close> \\ |
|
1087 |
@{attribute_def syntax_ast_trace} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
1088 |
@{attribute_def syntax_ast_stats} & : & \<open>attribute\<close> & default \<open>false\<close> \\ |
|
48117 | 1089 |
\end{tabular} |
61421 | 1090 |
\<^medskip> |
59783
00b62aa9f430
tuned syntax diagrams -- no duplication of "target";
wenzelm
parents:
58842
diff
changeset
|
1091 |
|
62106 | 1092 |
Unlike mixfix notation for existing formal entities (\secref{sec:notation}), |
1093 |
raw syntax declarations provide full access to the priority grammar of the |
|
1094 |
inner syntax, without any sanity checks. This includes additional syntactic |
|
1095 |
categories (via @{command nonterminal}) and free-form grammar productions |
|
1096 |
(via @{command syntax}). Additional syntax translations (or macros, via |
|
1097 |
@{command translations}) are required to turn resulting parse trees into |
|
1098 |
proper representations of formal entities again. |
|
46292 | 1099 |
|
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55108
diff
changeset
|
1100 |
@{rail \<open> |
42596
6c621a9d612a
modernized rail diagrams using @{rail} antiquotation;
wenzelm
parents:
42358
diff
changeset
|
1101 |
@@{command nonterminal} (@{syntax name} + @'and') |
28762 | 1102 |
; |
46494
ea2ae63336f3
clarified outer syntax "constdecl", which is only local to some rail diagrams;
wenzelm
parents:
46483
diff
changeset
|
1103 |
(@@{command syntax} | @@{command no_syntax}) @{syntax mode}? (constdecl +) |
28762 | 1104 |
; |
42596
6c621a9d612a
modernized rail diagrams using @{rail} antiquotation;
wenzelm
parents:
42358
diff
changeset
|
1105 |
(@@{command translations} | @@{command no_translations}) |
6c621a9d612a
modernized rail diagrams using @{rail} antiquotation;
wenzelm
parents:
42358
diff
changeset
|
1106 |
(transpat ('==' | '=>' | '<=' | '\<rightleftharpoons>' | '\<rightharpoonup>' | '\<leftharpoondown>') transpat +) |
28762 | 1107 |
; |
1108 |
||
46494
ea2ae63336f3
clarified outer syntax "constdecl", which is only local to some rail diagrams;
wenzelm
parents:
46483
diff
changeset
|
1109 |
constdecl: @{syntax name} '::' @{syntax type} @{syntax mixfix}? |
ea2ae63336f3
clarified outer syntax "constdecl", which is only local to some rail diagrams;
wenzelm
parents:
46483
diff
changeset
|
1110 |
; |
42596
6c621a9d612a
modernized rail diagrams using @{rail} antiquotation;
wenzelm
parents:
42358
diff
changeset
|
1111 |
mode: ('(' ( @{syntax name} | @'output' | @{syntax name} @'output' ) ')') |
28762 | 1112 |
; |
62969 | 1113 |
transpat: ('(' @{syntax name} ')')? @{syntax string} |
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55108
diff
changeset
|
1114 |
\<close>} |
28762 | 1115 |
|
62106 | 1116 |
\<^descr> @{command "nonterminal"}~\<open>c\<close> declares a type constructor \<open>c\<close> (without |
1117 |
arguments) to act as purely syntactic type: a nonterminal symbol of the |
|
1118 |
inner syntax. |
|
28762 | 1119 |
|
62106 | 1120 |
\<^descr> @{command "syntax"}~\<open>(mode) c :: \<sigma> (mx)\<close> augments the priority grammar and |
1121 |
the pretty printer table for the given print mode (default \<^verbatim>\<open>""\<close>). An |
|
1122 |
optional keyword @{keyword_ref "output"} means that only the pretty printer |
|
1123 |
table is affected. |
|
46292 | 1124 |
|
62106 | 1125 |
Following \secref{sec:mixfix}, the mixfix annotation \<open>mx = template ps q\<close> |
1126 |
together with type \<open>\<sigma> = \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>\<close> and specify a grammar production. |
|
1127 |
The \<open>template\<close> contains delimiter tokens that surround \<open>n\<close> argument |
|
1128 |
positions (\<^verbatim>\<open>_\<close>). The latter correspond to nonterminal symbols \<open>A\<^sub>i\<close> derived |
|
1129 |
from the argument types \<open>\<tau>\<^sub>i\<close> as follows: |
|
46292 | 1130 |
|
61493 | 1131 |
\<^item> \<open>prop\<close> if \<open>\<tau>\<^sub>i = prop\<close> |
46292 | 1132 |
|
62106 | 1133 |
\<^item> \<open>logic\<close> if \<open>\<tau>\<^sub>i = (\<dots>)\<kappa>\<close> for logical type constructor \<open>\<kappa> \<noteq> prop\<close> |
46292 | 1134 |
|
61493 | 1135 |
\<^item> \<open>any\<close> if \<open>\<tau>\<^sub>i = \<alpha>\<close> for type variables |
46292 | 1136 |
|
62106 | 1137 |
\<^item> \<open>\<kappa>\<close> if \<open>\<tau>\<^sub>i = \<kappa>\<close> for nonterminal \<open>\<kappa>\<close> (syntactic type constructor) |
46292 | 1138 |
|
62106 | 1139 |
Each \<open>A\<^sub>i\<close> is decorated by priority \<open>p\<^sub>i\<close> from the given list \<open>ps\<close>; missing |
1140 |
priorities default to 0. |
|
46292 | 1141 |
|
62106 | 1142 |
The resulting nonterminal of the production is determined similarly from |
1143 |
type \<open>\<tau>\<close>, with priority \<open>q\<close> and default 1000. |
|
46292 | 1144 |
|
61421 | 1145 |
\<^medskip> |
62106 | 1146 |
Parsing via this production produces parse trees \<open>t\<^sub>1, \<dots>, t\<^sub>n\<close> for the |
1147 |
argument slots. The resulting parse tree is composed as \<open>c t\<^sub>1 \<dots> t\<^sub>n\<close>, by |
|
1148 |
using the syntax constant \<open>c\<close> of the syntax declaration. |
|
46292 | 1149 |
|
62106 | 1150 |
Such syntactic constants are invented on the spot, without formal check |
1151 |
wrt.\ existing declarations. It is conventional to use plain identifiers |
|
1152 |
prefixed by a single underscore (e.g.\ \<open>_foobar\<close>). Names should be chosen |
|
1153 |
with care, to avoid clashes with other syntax declarations. |
|
46292 | 1154 |
|
61421 | 1155 |
\<^medskip> |
62106 | 1156 |
The special case of copy production is specified by \<open>c =\<close>~\<^verbatim>\<open>""\<close> (empty |
1157 |
string). It means that the resulting parse tree \<open>t\<close> is copied directly, |
|
1158 |
without any further decoration. |
|
46282 | 1159 |
|
62106 | 1160 |
\<^descr> @{command "no_syntax"}~\<open>(mode) decls\<close> removes grammar declarations (and |
1161 |
translations) resulting from \<open>decls\<close>, which are interpreted in the same |
|
1162 |
manner as for @{command "syntax"} above. |
|
1163 |
||
1164 |
\<^descr> @{command "translations"}~\<open>rules\<close> specifies syntactic translation rules |
|
1165 |
(i.e.\ macros) as first-order rewrite rules on ASTs (\secref{sec:ast}). The |
|
1166 |
theory context maintains two independent lists translation rules: parse |
|
1167 |
rules (\<^verbatim>\<open>=>\<close> or \<open>\<rightharpoonup>\<close>) and print rules (\<^verbatim>\<open><=\<close> or \<open>\<leftharpoondown>\<close>). For convenience, both |
|
1168 |
can be specified simultaneously as parse~/ print rules (\<^verbatim>\<open>==\<close> or \<open>\<rightleftharpoons>\<close>). |
|
46282 | 1169 |
|
62106 | 1170 |
Translation patterns may be prefixed by the syntactic category to be used |
1171 |
for parsing; the default is \<open>logic\<close> which means that regular term syntax is |
|
1172 |
used. Both sides of the syntax translation rule undergo parsing and parse |
|
1173 |
AST translations \secref{sec:tr-funs}, in order to perform some fundamental |
|
1174 |
normalization like \<open>\<lambda>x y. b \<leadsto> \<lambda>x. \<lambda>y. b\<close>, but other AST translation rules |
|
1175 |
are \<^emph>\<open>not\<close> applied recursively here. |
|
48115 | 1176 |
|
62106 | 1177 |
When processing AST patterns, the inner syntax lexer runs in a different |
1178 |
mode that allows identifiers to start with underscore. This accommodates the |
|
1179 |
usual naming convention for auxiliary syntax constants --- those that do not |
|
1180 |
have a logical counter part --- by allowing to specify arbitrary AST |
|
1181 |
applications within the term syntax, independently of the corresponding |
|
1182 |
concrete syntax. |
|
48115 | 1183 |
|
1184 |
Atomic ASTs are distinguished as @{ML Ast.Constant} versus @{ML |
|
62106 | 1185 |
Ast.Variable} as follows: a qualified name or syntax constant declared via |
1186 |
@{command syntax}, or parse tree head of concrete notation becomes @{ML |
|
1187 |
Ast.Constant}, anything else @{ML Ast.Variable}. Note that \<open>CONST\<close> and |
|
1188 |
\<open>XCONST\<close> within the term language (\secref{sec:pure-grammar}) allow to |
|
1189 |
enforce treatment as constants. |
|
48115 | 1190 |
|
62106 | 1191 |
AST rewrite rules \<open>(lhs, rhs)\<close> need to obey the following side-conditions: |
48115 | 1192 |
|
62106 | 1193 |
\<^item> Rules must be left linear: \<open>lhs\<close> must not contain repeated |
1194 |
variables.\<^footnote>\<open>The deeper reason for this is that AST equality is not |
|
1195 |
well-defined: different occurrences of the ``same'' AST could be decorated |
|
1196 |
differently by accidental type-constraints or source position information, |
|
1197 |
for example.\<close> |
|
48115 | 1198 |
|
61493 | 1199 |
\<^item> Every variable in \<open>rhs\<close> must also occur in \<open>lhs\<close>. |
48115 | 1200 |
|
62106 | 1201 |
\<^descr> @{command "no_translations"}~\<open>rules\<close> removes syntactic translation rules, |
1202 |
which are interpreted in the same manner as for @{command "translations"} |
|
1203 |
above. |
|
28762 | 1204 |
|
62106 | 1205 |
\<^descr> @{attribute syntax_ast_trace} and @{attribute syntax_ast_stats} control |
1206 |
diagnostic output in the AST normalization process, when translation rules |
|
1207 |
are applied to concrete input or output. |
|
48117 | 1208 |
|
46293 | 1209 |
|
62106 | 1210 |
Raw syntax and translations provides a slightly more low-level access to the |
1211 |
grammar and the form of resulting parse trees. It is often possible to avoid |
|
1212 |
this untyped macro mechanism, and use type-safe @{command abbreviation} or |
|
1213 |
@{command notation} instead. Some important situations where @{command |
|
1214 |
syntax} and @{command translations} are really need are as follows: |
|
46293 | 1215 |
|
62106 | 1216 |
\<^item> Iterated replacement via recursive @{command translations}. For example, |
1217 |
consider list enumeration @{term "[a, b, c, d]"} as defined in theory |
|
68484 | 1218 |
@{theory HOL.List}. |
46293 | 1219 |
|
62106 | 1220 |
\<^item> Change of binding status of variables: anything beyond the built-in |
1221 |
@{keyword "binder"} mixfix annotation requires explicit syntax translations. |
|
68249
949d93804740
First step to remove nonstandard "[x <- xs. P]" syntax: only input
nipkow
parents:
67718
diff
changeset
|
1222 |
For example, consider the set comprehension syntax @{term "{x. P}"} as |
68484 | 1223 |
defined in theory @{theory HOL.Set}. |
61458 | 1224 |
\<close> |
46293 | 1225 |
|
28762 | 1226 |
|
58618 | 1227 |
subsubsection \<open>Applying translation rules\<close> |
48117 | 1228 |
|
62106 | 1229 |
text \<open> |
1230 |
As a term is being parsed or printed, an AST is generated as an intermediate |
|
1231 |
form according to \figref{fig:parse-print}. The AST is normalized by |
|
1232 |
applying translation rules in the manner of a first-order term rewriting |
|
1233 |
system. We first examine how a single rule is applied. |
|
48117 | 1234 |
|
62106 | 1235 |
Let \<open>t\<close> be the abstract syntax tree to be normalized and \<open>(lhs, rhs)\<close> some |
1236 |
translation rule. A subtree \<open>u\<close> of \<open>t\<close> is called \<^emph>\<open>redex\<close> if it is an |
|
1237 |
instance of \<open>lhs\<close>; in this case the pattern \<open>lhs\<close> is said to match the |
|
1238 |
object \<open>u\<close>. A redex matched by \<open>lhs\<close> may be replaced by the corresponding |
|
1239 |
instance of \<open>rhs\<close>, thus \<^emph>\<open>rewriting\<close> the AST \<open>t\<close>. Matching requires some |
|
1240 |
notion of \<^emph>\<open>place-holders\<close> in rule patterns: @{ML Ast.Variable} serves this |
|
1241 |
purpose. |
|
48117 | 1242 |
|
62106 | 1243 |
More precisely, the matching of the object \<open>u\<close> against the pattern \<open>lhs\<close> is |
1244 |
performed as follows: |
|
48117 | 1245 |
|
62106 | 1246 |
\<^item> Objects of the form @{ML Ast.Variable}~\<open>x\<close> or @{ML Ast.Constant}~\<open>x\<close> are |
1247 |
matched by pattern @{ML Ast.Constant}~\<open>x\<close>. Thus all atomic ASTs in the |
|
1248 |
object are treated as (potential) constants, and a successful match makes |
|
1249 |
them actual constants even before name space resolution (see also |
|
1250 |
\secref{sec:ast}). |
|
48117 | 1251 |
|
62106 | 1252 |
\<^item> Object \<open>u\<close> is matched by pattern @{ML Ast.Variable}~\<open>x\<close>, binding \<open>x\<close> to |
1253 |
\<open>u\<close>. |
|
48117 | 1254 |
|
62106 | 1255 |
\<^item> Object @{ML Ast.Appl}~\<open>us\<close> is matched by @{ML Ast.Appl}~\<open>ts\<close> if \<open>us\<close> and |
1256 |
\<open>ts\<close> have the same length and each corresponding subtree matches. |
|
48117 | 1257 |
|
62106 | 1258 |
\<^item> In every other case, matching fails. |
48117 | 1259 |
|
62106 | 1260 |
A successful match yields a substitution that is applied to \<open>rhs\<close>, |
1261 |
generating the instance that replaces \<open>u\<close>. |
|
48117 | 1262 |
|
62106 | 1263 |
Normalizing an AST involves repeatedly applying translation rules until none |
1264 |
are applicable. This works yoyo-like: top-down, bottom-up, top-down, etc. At |
|
1265 |
each subtree position, rules are chosen in order of appearance in the theory |
|
1266 |
definitions. |
|
48117 | 1267 |
|
62106 | 1268 |
The configuration options @{attribute syntax_ast_trace} and @{attribute |
1269 |
syntax_ast_stats} might help to understand this process and diagnose |
|
1270 |
problems. |
|
48117 | 1271 |
|
1272 |
\begin{warn} |
|
62106 | 1273 |
If syntax translation rules work incorrectly, the output of @{command_ref |
1274 |
print_syntax} with its \<^emph>\<open>rules\<close> sections reveals the actual internal forms |
|
1275 |
of AST pattern, without potentially confusing concrete syntax. Recall that |
|
1276 |
AST constants appear as quoted strings and variables without quotes. |
|
48117 | 1277 |
\end{warn} |
1278 |
||
1279 |
\begin{warn} |
|
62106 | 1280 |
If @{attribute_ref eta_contract} is set to \<open>true\<close>, terms will be |
1281 |
\<open>\<eta>\<close>-contracted \<^emph>\<open>before\<close> the AST rewriter sees them. Thus some abstraction |
|
1282 |
nodes needed for print rules to match may vanish. For example, \<open>Ball A (\<lambda>x. |
|
1283 |
P x)\<close> would contract to \<open>Ball A P\<close> and the standard print rule would fail to |
|
1284 |
apply. This problem can be avoided by hand-written ML translation functions |
|
1285 |
(see also \secref{sec:tr-funs}), which is in fact the same mechanism used in |
|
1286 |
built-in @{keyword "binder"} declarations. |
|
48117 | 1287 |
\end{warn} |
58618 | 1288 |
\<close> |
48117 | 1289 |
|
28762 | 1290 |
|
58618 | 1291 |
subsection \<open>Syntax translation functions \label{sec:tr-funs}\<close> |
28762 | 1292 |
|
58618 | 1293 |
text \<open> |
28762 | 1294 |
\begin{matharray}{rcl} |
61493 | 1295 |
@{command_def "parse_ast_translation"} & : & \<open>theory \<rightarrow> theory\<close> \\ |
1296 |
@{command_def "parse_translation"} & : & \<open>theory \<rightarrow> theory\<close> \\ |
|
1297 |
@{command_def "print_translation"} & : & \<open>theory \<rightarrow> theory\<close> \\ |
|
1298 |
@{command_def "typed_print_translation"} & : & \<open>theory \<rightarrow> theory\<close> \\ |
|
1299 |
@{command_def "print_ast_translation"} & : & \<open>theory \<rightarrow> theory\<close> \\ |
|
1300 |
@{ML_antiquotation_def "class_syntax"} & : & \<open>ML antiquotation\<close> \\ |
|
1301 |
@{ML_antiquotation_def "type_syntax"} & : & \<open>ML antiquotation\<close> \\ |
|
1302 |
@{ML_antiquotation_def "const_syntax"} & : & \<open>ML antiquotation\<close> \\ |
|
1303 |
@{ML_antiquotation_def "syntax_const"} & : & \<open>ML antiquotation\<close> \\ |
|
28762 | 1304 |
\end{matharray} |
1305 |
||
48118 | 1306 |
Syntax translation functions written in ML admit almost arbitrary |
1307 |
manipulations of inner syntax, at the expense of some complexity and |
|
1308 |
obscurity in the implementation. |
|
1309 |
||
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55108
diff
changeset
|
1310 |
@{rail \<open> |
42596
6c621a9d612a
modernized rail diagrams using @{rail} antiquotation;
wenzelm
parents:
42358
diff
changeset
|
1311 |
( @@{command parse_ast_translation} | @@{command parse_translation} | |
6c621a9d612a
modernized rail diagrams using @{rail} antiquotation;
wenzelm
parents:
42358
diff
changeset
|
1312 |
@@{command print_translation} | @@{command typed_print_translation} | |
52143 | 1313 |
@@{command print_ast_translation}) @{syntax text} |
48119
55c305e29f4b
cover @{class_syntax}, @{type_syntax}, @{const_syntax}, @{syntax_const} in isar-ref, in contrast to other ML antiquotations in implementation manual;
wenzelm
parents:
48118
diff
changeset
|
1314 |
; |
55c305e29f4b
cover @{class_syntax}, @{type_syntax}, @{const_syntax}, @{syntax_const} in isar-ref, in contrast to other ML antiquotations in implementation manual;
wenzelm
parents:
48118
diff
changeset
|
1315 |
(@@{ML_antiquotation class_syntax} | |
55c305e29f4b
cover @{class_syntax}, @{type_syntax}, @{const_syntax}, @{syntax_const} in isar-ref, in contrast to other ML antiquotations in implementation manual;
wenzelm
parents:
48118
diff
changeset
|
1316 |
@@{ML_antiquotation type_syntax} | |
55c305e29f4b
cover @{class_syntax}, @{type_syntax}, @{const_syntax}, @{syntax_const} in isar-ref, in contrast to other ML antiquotations in implementation manual;
wenzelm
parents:
48118
diff
changeset
|
1317 |
@@{ML_antiquotation const_syntax} | |
67146 | 1318 |
@@{ML_antiquotation syntax_const}) embedded |
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55108
diff
changeset
|
1319 |
\<close>} |
28762 | 1320 |
|
62106 | 1321 |
\<^descr> @{command parse_translation} etc. declare syntax translation functions to |
1322 |
the theory. Any of these commands have a single @{syntax text} argument that |
|
1323 |
refers to an ML expression of appropriate type as follows: |
|
48118 | 1324 |
|
61421 | 1325 |
\<^medskip> |
48119
55c305e29f4b
cover @{class_syntax}, @{type_syntax}, @{const_syntax}, @{syntax_const} in isar-ref, in contrast to other ML antiquotations in implementation manual;
wenzelm
parents:
48118
diff
changeset
|
1326 |
{\footnotesize |
52143 | 1327 |
\begin{tabular}{l} |
1328 |
@{command parse_ast_translation} : \\ |
|
1329 |
\quad @{ML_type "(string * (Proof.context -> Ast.ast list -> Ast.ast)) list"} \\ |
|
1330 |
@{command parse_translation} : \\ |
|
1331 |
\quad @{ML_type "(string * (Proof.context -> term list -> term)) list"} \\ |
|
1332 |
@{command print_translation} : \\ |
|
1333 |
\quad @{ML_type "(string * (Proof.context -> term list -> term)) list"} \\ |
|
1334 |
@{command typed_print_translation} : \\ |
|
1335 |
\quad @{ML_type "(string * (Proof.context -> typ -> term list -> term)) list"} \\ |
|
1336 |
@{command print_ast_translation} : \\ |
|
1337 |
\quad @{ML_type "(string * (Proof.context -> Ast.ast list -> Ast.ast)) list"} \\ |
|
48118 | 1338 |
\end{tabular}} |
61421 | 1339 |
\<^medskip> |
28762 | 1340 |
|
62106 | 1341 |
The argument list consists of \<open>(c, tr)\<close> pairs, where \<open>c\<close> is the syntax name |
1342 |
of the formal entity involved, and \<open>tr\<close> a function that translates a syntax |
|
1343 |
form \<open>c args\<close> into \<open>tr ctxt args\<close> (depending on the context). The |
|
1344 |
Isabelle/ML naming convention for parse translations is \<open>c_tr\<close> and for print |
|
1345 |
translations \<open>c_tr'\<close>. |
|
48118 | 1346 |
|
1347 |
The @{command_ref print_syntax} command displays the sets of names |
|
62106 | 1348 |
associated with the translation functions of a theory under |
1349 |
\<open>parse_ast_translation\<close> etc. |
|
48118 | 1350 |
|
62106 | 1351 |
\<^descr> \<open>@{class_syntax c}\<close>, \<open>@{type_syntax c}\<close>, \<open>@{const_syntax c}\<close> inline the |
1352 |
authentic syntax name of the given formal entities into the ML source. This |
|
1353 |
is the fully-qualified logical name prefixed by a special marker to indicate |
|
1354 |
its kind: thus different logical name spaces are properly distinguished |
|
1355 |
within parse trees. |
|
48119
55c305e29f4b
cover @{class_syntax}, @{type_syntax}, @{const_syntax}, @{syntax_const} in isar-ref, in contrast to other ML antiquotations in implementation manual;
wenzelm
parents:
48118
diff
changeset
|
1356 |
|
62106 | 1357 |
\<^descr> \<open>@{const_syntax c}\<close> inlines the name \<open>c\<close> of the given syntax constant, |
1358 |
having checked that it has been declared via some @{command syntax} commands |
|
1359 |
within the theory context. Note that the usual naming convention makes |
|
1360 |
syntax constants start with underscore, to reduce the chance of accidental |
|
1361 |
clashes with other names occurring in parse trees (unqualified constants |
|
1362 |
etc.). |
|
58618 | 1363 |
\<close> |
48118 | 1364 |
|
48119
55c305e29f4b
cover @{class_syntax}, @{type_syntax}, @{const_syntax}, @{syntax_const} in isar-ref, in contrast to other ML antiquotations in implementation manual;
wenzelm
parents:
48118
diff
changeset
|
1365 |
|
58618 | 1366 |
subsubsection \<open>The translation strategy\<close> |
28762 | 1367 |
|
62106 | 1368 |
text \<open> |
1369 |
The different kinds of translation functions are invoked during the |
|
1370 |
transformations between parse trees, ASTs and syntactic terms (cf.\ |
|
1371 |
\figref{fig:parse-print}). Whenever a combination of the form \<open>c x\<^sub>1 \<dots> x\<^sub>n\<close> |
|
1372 |
is encountered, and a translation function \<open>f\<close> of appropriate kind is |
|
1373 |
declared for \<open>c\<close>, the result is produced by evaluation of \<open>f [x\<^sub>1, \<dots>, x\<^sub>n]\<close> |
|
1374 |
in ML. |
|
48118 | 1375 |
|
62106 | 1376 |
For AST translations, the arguments \<open>x\<^sub>1, \<dots>, x\<^sub>n\<close> are ASTs. A combination |
1377 |
has the form @{ML "Ast.Constant"}~\<open>c\<close> or @{ML "Ast.Appl"}~\<open>[\<close>@{ML |
|
1378 |
Ast.Constant}~\<open>c, x\<^sub>1, \<dots>, x\<^sub>n]\<close>. For term translations, the arguments are |
|
1379 |
terms and a combination has the form @{ML Const}~\<open>(c, \<tau>)\<close> or @{ML |
|
1380 |
Const}~\<open>(c, \<tau>) $ x\<^sub>1 $ \<dots> $ x\<^sub>n\<close>. Terms allow more sophisticated |
|
1381 |
transformations than ASTs do, typically involving abstractions and bound |
|
1382 |
variables. \<^emph>\<open>Typed\<close> print translations may even peek at the type \<open>\<tau>\<close> of the |
|
1383 |
constant they are invoked on, although some information might have been |
|
1384 |
suppressed for term output already. |
|
48118 | 1385 |
|
62106 | 1386 |
Regardless of whether they act on ASTs or terms, translation functions |
1387 |
called during the parsing process differ from those for printing in their |
|
1388 |
overall behaviour: |
|
48118 | 1389 |
|
62106 | 1390 |
\<^descr>[Parse translations] are applied bottom-up. The arguments are already in |
1391 |
translated form. The translations must not fail; exceptions trigger an |
|
1392 |
error message. There may be at most one function associated with any |
|
1393 |
syntactic name. |
|
46294 | 1394 |
|
62106 | 1395 |
\<^descr>[Print translations] are applied top-down. They are supplied with |
1396 |
arguments that are partly still in internal form. The result again |
|
1397 |
undergoes translation; therefore a print translation should not introduce |
|
1398 |
as head the very constant that invoked it. The function may raise |
|
1399 |
exception @{ML Match} to indicate failure; in this event it has no effect. |
|
1400 |
Multiple functions associated with some syntactic name are tried in the |
|
1401 |
order of declaration in the theory. |
|
48118 | 1402 |
|
62106 | 1403 |
Only constant atoms --- constructor @{ML Ast.Constant} for ASTs and @{ML |
1404 |
Const} for terms --- can invoke translation functions. This means that parse |
|
1405 |
translations can only be associated with parse tree heads of concrete |
|
1406 |
syntax, or syntactic constants introduced via other translations. For plain |
|
1407 |
identifiers within the term language, the status of constant versus variable |
|
1408 |
is not yet know during parsing. This is in contrast to print translations, |
|
1409 |
where constants are explicitly known from the given term in its fully |
|
1410 |
internal form. |
|
58618 | 1411 |
\<close> |
28762 | 1412 |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1413 |
|
58618 | 1414 |
subsection \<open>Built-in syntax transformations\<close> |
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1415 |
|
58618 | 1416 |
text \<open> |
62106 | 1417 |
Here are some further details of the main syntax transformation phases of |
1418 |
\figref{fig:parse-print}. |
|
58618 | 1419 |
\<close> |
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1420 |
|
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1421 |
|
58618 | 1422 |
subsubsection \<open>Transforming parse trees to ASTs\<close> |
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1423 |
|
62106 | 1424 |
text \<open> |
1425 |
The parse tree is the raw output of the parser. It is transformed into an |
|
1426 |
AST according to some basic scheme that may be augmented by AST translation |
|
1427 |
functions as explained in \secref{sec:tr-funs}. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1428 |
|
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1429 |
The parse tree is constructed by nesting the right-hand sides of the |
62106 | 1430 |
productions used to recognize the input. Such parse trees are simply lists |
1431 |
of tokens and constituent parse trees, the latter representing the |
|
1432 |
nonterminals of the productions. Ignoring AST translation functions, parse |
|
1433 |
trees are transformed to ASTs by stripping out delimiters and copy |
|
1434 |
productions, while retaining some source position information from input |
|
1435 |
tokens. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1436 |
|
62106 | 1437 |
The Pure syntax provides predefined AST translations to make the basic |
1438 |
\<open>\<lambda>\<close>-term structure more apparent within the (first-order) AST |
|
1439 |
representation, and thus facilitate the use of @{command translations} (see |
|
1440 |
also \secref{sec:syn-trans}). This covers ordinary term application, type |
|
1441 |
application, nested abstraction, iterated meta implications and function |
|
1442 |
types. The effect is illustrated on some representative input strings is as |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1443 |
follows: |
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1444 |
|
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1445 |
\begin{center} |
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1446 |
\begin{tabular}{ll} |
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1447 |
input source & AST \\ |
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1448 |
\hline |
61503 | 1449 |
\<open>f x y z\<close> & \<^verbatim>\<open>(f x y z)\<close> \\ |
1450 |
\<open>'a ty\<close> & \<^verbatim>\<open>(ty 'a)\<close> \\ |
|
1451 |
\<open>('a, 'b)ty\<close> & \<^verbatim>\<open>(ty 'a 'b)\<close> \\ |
|
1452 |
\<open>\<lambda>x y z. t\<close> & \<^verbatim>\<open>("_abs" x ("_abs" y ("_abs" z t)))\<close> \\ |
|
1453 |
\<open>\<lambda>x :: 'a. t\<close> & \<^verbatim>\<open>("_abs" ("_constrain" x 'a) t)\<close> \\ |
|
1454 |
\<open>\<lbrakk>P; Q; R\<rbrakk> \<Longrightarrow> S\<close> & \<^verbatim>\<open>("Pure.imp" P ("Pure.imp" Q ("Pure.imp" R S)))\<close> \\ |
|
1455 |
\<open>['a, 'b, 'c] \<Rightarrow> 'd\<close> & \<^verbatim>\<open>("fun" 'a ("fun" 'b ("fun" 'c 'd)))\<close> \\ |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1456 |
\end{tabular} |
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1457 |
\end{center} |
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1458 |
|
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1459 |
Note that type and sort constraints may occur in further places --- |
62106 | 1460 |
translations need to be ready to cope with them. The built-in syntax |
1461 |
transformation from parse trees to ASTs insert additional constraints that |
|
1462 |
represent source positions. |
|
58618 | 1463 |
\<close> |
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1464 |
|
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1465 |
|
58618 | 1466 |
subsubsection \<open>Transforming ASTs to terms\<close> |
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1467 |
|
62106 | 1468 |
text \<open> |
1469 |
After application of macros (\secref{sec:syn-trans}), the AST is transformed |
|
1470 |
into a term. This term still lacks proper type information, but it might |
|
1471 |
contain some constraints consisting of applications with head \<^verbatim>\<open>_constrain\<close>, |
|
1472 |
where the second argument is a type encoded as a pre-term within the syntax. |
|
1473 |
Type inference later introduces correct types, or indicates type errors in |
|
1474 |
the input. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1475 |
|
62106 | 1476 |
Ignoring parse translations, ASTs are transformed to terms by mapping AST |
1477 |
constants to term constants, AST variables to term variables or constants |
|
1478 |
(according to the name space), and AST applications to iterated term |
|
1479 |
applications. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1480 |
|
62106 | 1481 |
The outcome is still a first-order term. Proper abstractions and bound |
1482 |
variables are introduced by parse translations associated with certain |
|
1483 |
syntax constants. Thus \<^verbatim>\<open>("_abs" x x)\<close> eventually becomes a de-Bruijn term |
|
1484 |
\<^verbatim>\<open>Abs ("x", _, Bound 0)\<close>. |
|
58618 | 1485 |
\<close> |
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1486 |
|
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1487 |
|
58618 | 1488 |
subsubsection \<open>Printing of terms\<close> |
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1489 |
|
62106 | 1490 |
text \<open> |
1491 |
The output phase is essentially the inverse of the input phase. Terms are |
|
1492 |
translated via abstract syntax trees into pretty-printed text. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1493 |
|
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1494 |
Ignoring print translations, the transformation maps term constants, |
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1495 |
variables and applications to the corresponding constructs on ASTs. |
62106 | 1496 |
Abstractions are mapped to applications of the special constant \<^verbatim>\<open>_abs\<close> as |
1497 |
seen before. Type constraints are represented via special \<^verbatim>\<open>_constrain\<close> |
|
1498 |
forms, according to various policies of type annotation determined |
|
1499 |
elsewhere. Sort constraints of type variables are handled in a similar |
|
1500 |
fashion. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1501 |
|
62106 | 1502 |
After application of macros (\secref{sec:syn-trans}), the AST is finally |
1503 |
pretty-printed. The built-in print AST translations reverse the |
|
1504 |
corresponding parse AST translations. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1505 |
|
61421 | 1506 |
\<^medskip> |
1507 |
For the actual printing process, the priority grammar |
|
62106 | 1508 |
(\secref{sec:priority-grammar}) plays a vital role: productions are used as |
1509 |
templates for pretty printing, with argument slots stemming from |
|
1510 |
nonterminals, and syntactic sugar stemming from literal tokens. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1511 |
|
62106 | 1512 |
Each AST application with constant head \<open>c\<close> and arguments \<open>t\<^sub>1\<close>, \dots, |
1513 |
\<open>t\<^sub>n\<close> (for \<open>n = 0\<close> the AST is just the constant \<open>c\<close> itself) is printed |
|
1514 |
according to the first grammar production of result name \<open>c\<close>. The required |
|
1515 |
syntax priority of the argument slot is given by its nonterminal \<open>A\<^sup>(\<^sup>p\<^sup>)\<close>. |
|
1516 |
The argument \<open>t\<^sub>i\<close> that corresponds to the position of \<open>A\<^sup>(\<^sup>p\<^sup>)\<close> is printed |
|
1517 |
recursively, and then put in parentheses \<^emph>\<open>if\<close> its priority \<open>p\<close> requires |
|
1518 |
this. The resulting output is concatenated with the syntactic sugar |
|
1519 |
according to the grammar production. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1520 |
|
62106 | 1521 |
If an AST application \<open>(c x\<^sub>1 \<dots> x\<^sub>m)\<close> has more arguments than the |
1522 |
corresponding production, it is first split into \<open>((c x\<^sub>1 \<dots> x\<^sub>n) x\<^sub>n\<^sub>+\<^sub>1 \<dots> |
|
1523 |
x\<^sub>m)\<close> and then printed recursively as above. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1524 |
|
62106 | 1525 |
Applications with too few arguments or with non-constant head or without a |
1526 |
corresponding production are printed in prefix-form like \<open>f t\<^sub>1 \<dots> t\<^sub>n\<close> for |
|
1527 |
terms. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1528 |
|
62106 | 1529 |
Multiple productions associated with some name \<open>c\<close> are tried in order of |
1530 |
appearance within the grammar. An occurrence of some AST variable \<open>x\<close> is |
|
1531 |
printed as \<open>x\<close> outright. |
|
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1532 |
|
61421 | 1533 |
\<^medskip> |
62106 | 1534 |
White space is \<^emph>\<open>not\<close> inserted automatically. If blanks (or breaks) are |
1535 |
required to separate tokens, they need to be specified in the mixfix |
|
1536 |
declaration (\secref{sec:mixfix}). |
|
58618 | 1537 |
\<close> |
52414
8429123bc58a
more on built-in syntax transformations, based on reduced version of old material;
wenzelm
parents:
52413
diff
changeset
|
1538 |
|
28762 | 1539 |
end |