isabelle: comparison doc-src/IsarRef/syntax.tex

equal deleted inserted replaced

-:f59d33c6e1c7
+:c70d3402fef5
+%FIXME
+% - examples (!?)
 \chapter{Isar document syntax}
-\section{Inner versus outer syntax}
+FIXME important note: inner versus outer syntax
 \section{Lexical matters}
 \section{Common syntax entities}
-\subsection{Atoms}
+The Isar proof and theory language syntax has been carefully designed with
+orthogonality in mind.  Many common syntax entities such that those for names,
+terms, types etc.\ have been factored out.  Some of these basic syntactic
+entities usually represent the level of abstraction for error messages: e.g.\
+some higher syntax element such as $\CONSTS$ referring to \railtoken{name} or
+\railtoken{type}, would really report a missing \railtoken{name} or
+\railtoken{type} rather than any of its constituent primitive tokens (as
+defined below).  These quasi-tokens are represented in the syntax diagrams
+below using the same font as actual tokens (such as \railtoken{string}).
+\subsection{Names}
+Entity \railtoken{name} usually refers to any name of types, constants,
+theorems, etc.\ to be \emph{declared} or \emph{defined} (so qualified
+identifiers are excluded).  Already existing objects are typically referenced
+by \railtoken{nameref}.
+\indexoutertoken{name}\indexoutertoken{nameref}
 \begin{rail}
 name : ident | symident | string
 ;
 nameref : name | longident
 ;
+\end{rail}
-text : nameref | verbatim
-;
-\end{rail}
 \subsection{Comments}
-\begin{rail}
+Large chunks of verbatim \railtoken{text} are usually given
+\railtoken{verbatim}, i.e.\ enclosed in \verb|{*|~\verb|*}|; for convenience,
+any of the smaller text entities (\railtoken{ident}, \railtoken{string} etc.)
+are admitted as well.  Almost any of the Isar commands may be annotated by a
+marginal comment: \texttt{--} \railtoken{text}.  Note that this kind of
+comment is actually part of the language, while source level comments
+\verb|(*|~\verb|*)| are already stripped at the lexical level.  A few commands
+such as $\PROOFNAME$ admit some parts to be mark with a ``level of interest'':
+currently only \texttt{\%} for ``boring, don't read this''.
+\indexoutertoken{text}\indexouternonterm{comment}\indexouternonterm{interest}
+\begin{rail}
+text : verbatim | nameref
+;
 comment : (() | '--' text)
 ;
 interest : (() | '\%')
 ;
 \end{rail}
 \subsection{Sorts and arities}
+The syntax of sorts and arities is given directly at the outer level.  Note
+that this in contrast to that types and terms (see below).  Only few commands
+ever refer to sorts or arities explicitly.
+\indexouternonterm{sort}\indexouternonterm{arity}\indexouternonterm{simplearity}
 \begin{rail}
 sort : nameref | lbrace (nameref * ',') rbrace
 ;
 arity : ( () | '(' (sort + ',') ')' ) sort
 ;
-simple\-arity : ( () | '(' (sort + ',') ')' ) nameref
+simplearity : ( () | '(' (sort + ',') ')' ) nameref
 ;
 \end{rail}
-\subsection{Terms and Types}
+\subsection{Types and terms}
-\begin{rail}
+The actual inner Isabelle syntax, i.e.\ that of types and terms, is far too
+flexible in order to be modeled explicitly at the outer theory level.
-\end{rail}
+Basically, any such entity would have to be quoted at the outer level to turn
+it into a single token, with the actual parsing deferred to some functions
+that read and type-check terms etc.\ (note that \railtoken{prop}s will be
+handled differently from plain \railtoken{term}s here).  For convenience, the
+quotes may be omitted for any \emph{atomic} term or type (e.g.\ a single
+variable).
+\indexoutertoken{type}\indexoutertoken{term}\indexoutertoken{prop}
+\begin{rail}
+type : ident | longident | symident | typefree | typevar | string
+;
+term : ident | longident | symident | var | textvar | nat | string
+;
+prop : term
+;
+\end{rail}
+Type definitions etc.\ usually refer to \railnonterm{typespec} on the
+left-hand side.  This models basic type constructor application at the outer
+syntax level.  Note that only plain postfix notation is available here, but no
+infixes.
+\indexouternonterm{typespec}
+\begin{rail}
+typespec : (() | typefree | '(' ( typefree + ',' ) ')') name
+;
+\end{rail}
+\subsection{Term patterns}
+Statements like $\SHOWNAME$ involve propositions, some others like $\DEFNAME$
+plain terms.  Any of these usually admit automatic binding of schematic text
+variables by giving (optional) patterns $\IS{p@1 \dots p@n}$.  For
+\railtoken{prop}s the $\CONCLNAME$ part refers to the conclusion only, in case
+actual rules are involved, rather than atomic propositions.
+\indexouternonterm{termpat}\indexouternonterm{proppat}
+\begin{rail}
+termpat : '(' (term + 'is' ) ')'
+;
+proppat : '(' (() | (prop + 'is' )) (() | 'concl' (prop + 'is' )) ')'
+;
+\end{rail}
 \subsection{Mixfix annotations}
+Mixfix annotations specify concrete \emph{inner} syntax.  Some commands such
-\subsection{}
+as $\TYPES$ admit infixes only, while $\CONSTS$ etc.\ support the full range
+of general mixfixes and binders.
-\subsection{}
+\indexouternonterm{infix}\indexouternonterm{mixfix}
-\subsection{}
+\begin{rail}
+infix : '(' ('infixl' | 'infixr') (() | string) nat ')'
+;
+mixfix : infix | string (() | '[' (nat + ',') ']') (() | nat) |
+'binder' string (() | '[' (nat + ',') ']') nat
+;
+\end{rail}
+\subsection{Attributes and theorem specifications}\label{sec:syn-att}
+Attributes (and proof methods, see \S\ref{sec:syn-meth}) have their own
+``semi-inner'' syntax, which does not have to be atomic at the outer level
+unlike that of types and terms.  Instead, the attribute argument
+specifications may be any sequence of atomic entities (identifiers, strings
+etc.), or properly bracketed argument lists.  Below \railtoken{atom} refers to
+any atomic entity (\railtoken{ident}, \railtoken{longident},
+\railtoken{symident} etc.), including keywords that conform to
+\railtoken{symident}, but do not coincide with actual command names.
+\indexoutertoken{atom}\indexouternonterm{args}\indexouternonterm{attributes}
+\begin{rail}
+args : (atom | '(' (args *) ')' | '[' (args *) ']' | lbrace (args *) rbrace) *
+;
+attributes : '[' (name args + ',') ']'
+;
+\end{rail}
+Theorem specifications come in three flavours: \railnonterm{thmdecl} usually
+refers to the result of a goal statement (such as $\SHOWNAME$),
+\railnonterm{thmdef} collects lists of existing theorems (as in $\NOTENAME$),
+\railnonterm{thmref} refers to any list of existing theorems (e.g.\ occurring
+as proof method arguments).  Any of these may include lists of attributes,
+which are applied to the preceding theorem or list of theorems.
+\indexouternonterm{thmdecl}\indexouternonterm{thmdef}\indexouternonterm{thmref}
+\begin{rail}
+thmdecl : (() | name) (() | attributes) ':'
+;
+thmdef : (() | name) (() | attributes) '='
+;
+thmref : (name (() | attributes) +)
+;
+\end{rail}
+\subsection{Proof methods}\label{sec:syn-meth}
+Proof methods are either basic ones, or expressions composed of methods via
+``\texttt{,}'' (sequential composition), ``\texttt{|}'' (alternatives),
+``\texttt{?}'' (try), ``\texttt{*}'' (repeat, ${} \ge 0$ times),
+``\texttt{+}'' (repeat, ${} > 0$ times).  In practice, proof methods are
+typically just a comma separeted list of \railtoken{name}~\railtoken{args}
+specifications.  Thus the syntax is similar to that of attributes, with plain
+parentheses instead of square brackets (see also \S\ref{sec:syn-att}).  Note
+that parentheses may be dropped for methods without arguments.
+\indexouternonterm{method}
+\begin{rail}
+method : (name args | (name | '(' method ')') (() | '?' | '*' | '+')) + (',' | '|')
+;
+\end{rail}
 %%% Local Variables:
 %%% mode: latex
 %%% TeX-master: "isar-ref"

changeset 7050	c70d3402fef5
parent 7046	9f755ff43cff
child 7134	320b412e5800