doc-src/IsarRef/syntax.tex
author wenzelm
Tue, 20 Jul 1999 18:50:46 +0200
changeset 7050 c70d3402fef5
parent 7046 9f755ff43cff
child 7134 320b412e5800
permissions -rw-r--r--
checkpoint;


%FIXME
% - examples (!?)


\chapter{Isar document syntax}

FIXME important note: inner versus outer syntax

\section{Lexical matters}

\section{Common syntax entities}

The Isar proof and theory language syntax has been carefully designed with
orthogonality in mind.  Many common syntax entities such that those for names,
terms, types etc.\ have been factored out.  Some of these basic syntactic
entities usually represent the level of abstraction for error messages: e.g.\ 
some higher syntax element such as $\CONSTS$ referring to \railtoken{name} or
\railtoken{type}, would really report a missing \railtoken{name} or
\railtoken{type} rather than any of its constituent primitive tokens (as
defined below).  These quasi-tokens are represented in the syntax diagrams
below using the same font as actual tokens (such as \railtoken{string}).


\subsection{Names}

Entity \railtoken{name} usually refers to any name of types, constants,
theorems, etc.\ to be \emph{declared} or \emph{defined} (so qualified
identifiers are excluded).  Already existing objects are typically referenced
by \railtoken{nameref}.

\indexoutertoken{name}\indexoutertoken{nameref}
\begin{rail}
  name : ident | symident | string
  ;
  nameref : name | longident
  ;
\end{rail}


\subsection{Comments}

Large chunks of verbatim \railtoken{text} are usually given
\railtoken{verbatim}, i.e.\ enclosed in \verb|{*|~\verb|*}|; for convenience,
any of the smaller text entities (\railtoken{ident}, \railtoken{string} etc.)
are admitted as well.  Almost any of the Isar commands may be annotated by a
marginal comment: \texttt{--} \railtoken{text}.  Note that this kind of
comment is actually part of the language, while source level comments
\verb|(*|~\verb|*)| are already stripped at the lexical level.  A few commands
such as $\PROOFNAME$ admit some parts to be mark with a ``level of interest'':
currently only \texttt{\%} for ``boring, don't read this''.

\indexoutertoken{text}\indexouternonterm{comment}\indexouternonterm{interest}
\begin{rail}
  text : verbatim | nameref
  ;
  comment : (() | '--' text)
  ;
  interest : (() | '\%')
  ;
\end{rail}


\subsection{Sorts and arities}

The syntax of sorts and arities is given directly at the outer level.  Note
that this in contrast to that types and terms (see below).  Only few commands
ever refer to sorts or arities explicitly.

\indexouternonterm{sort}\indexouternonterm{arity}\indexouternonterm{simplearity}
\begin{rail}
  sort : nameref | lbrace (nameref * ',') rbrace
  ;
  arity : ( () | '(' (sort + ',') ')' ) sort
  ;
  simplearity : ( () | '(' (sort + ',') ')' ) nameref
  ;
\end{rail}


\subsection{Types and terms}

The actual inner Isabelle syntax, i.e.\ that of types and terms, is far too
flexible in order to be modeled explicitly at the outer theory level.
Basically, any such entity would have to be quoted at the outer level to turn
it into a single token, with the actual parsing deferred to some functions
that read and type-check terms etc.\ (note that \railtoken{prop}s will be
handled differently from plain \railtoken{term}s here).  For convenience, the
quotes may be omitted for any \emph{atomic} term or type (e.g.\ a single
variable).

\indexoutertoken{type}\indexoutertoken{term}\indexoutertoken{prop}
\begin{rail}
  type : ident | longident | symident | typefree | typevar | string
  ;
  term : ident | longident | symident | var | textvar | nat | string
  ;
  prop : term
  ;
\end{rail}

Type definitions etc.\ usually refer to \railnonterm{typespec} on the
left-hand side.  This models basic type constructor application at the outer
syntax level.  Note that only plain postfix notation is available here, but no
infixes.

\indexouternonterm{typespec}
\begin{rail}
  typespec : (() | typefree | '(' ( typefree + ',' ) ')') name
  ;
\end{rail}


\subsection{Term patterns}

Statements like $\SHOWNAME$ involve propositions, some others like $\DEFNAME$
plain terms.  Any of these usually admit automatic binding of schematic text
variables by giving (optional) patterns $\IS{p@1 \dots p@n}$.  For
\railtoken{prop}s the $\CONCLNAME$ part refers to the conclusion only, in case
actual rules are involved, rather than atomic propositions.

\indexouternonterm{termpat}\indexouternonterm{proppat}
\begin{rail}
  termpat : '(' (term + 'is' ) ')'
  ;
  proppat : '(' (() | (prop + 'is' )) (() | 'concl' (prop + 'is' )) ')'
  ;
\end{rail}


\subsection{Mixfix annotations}

Mixfix annotations specify concrete \emph{inner} syntax.  Some commands such
as $\TYPES$ admit infixes only, while $\CONSTS$ etc.\ support the full range
of general mixfixes and binders.

\indexouternonterm{infix}\indexouternonterm{mixfix}
\begin{rail}
  infix : '(' ('infixl' | 'infixr') (() | string) nat ')'
  ;

  mixfix : infix | string (() | '[' (nat + ',') ']') (() | nat) |
  'binder' string (() | '[' (nat + ',') ']') nat
  ;
\end{rail}


\subsection{Attributes and theorem specifications}\label{sec:syn-att}

Attributes (and proof methods, see \S\ref{sec:syn-meth}) have their own
``semi-inner'' syntax, which does not have to be atomic at the outer level
unlike that of types and terms.  Instead, the attribute argument
specifications may be any sequence of atomic entities (identifiers, strings
etc.), or properly bracketed argument lists.  Below \railtoken{atom} refers to
any atomic entity (\railtoken{ident}, \railtoken{longident},
\railtoken{symident} etc.), including keywords that conform to
\railtoken{symident}, but do not coincide with actual command names.

\indexoutertoken{atom}\indexouternonterm{args}\indexouternonterm{attributes}
\begin{rail}
  args : (atom | '(' (args *) ')' | '[' (args *) ']' | lbrace (args *) rbrace) *
  ;
  attributes : '[' (name args + ',') ']'
  ;
\end{rail}

Theorem specifications come in three flavours: \railnonterm{thmdecl} usually
refers to the result of a goal statement (such as $\SHOWNAME$),
\railnonterm{thmdef} collects lists of existing theorems (as in $\NOTENAME$),
\railnonterm{thmref} refers to any list of existing theorems (e.g.\ occurring
as proof method arguments).  Any of these may include lists of attributes,
which are applied to the preceding theorem or list of theorems.

\indexouternonterm{thmdecl}\indexouternonterm{thmdef}\indexouternonterm{thmref}
\begin{rail}
  thmdecl : (() | name) (() | attributes) ':'
  ;
  thmdef : (() | name) (() | attributes) '='
  ;
  thmref : (name (() | attributes) +)
  ;
\end{rail}


\subsection{Proof methods}\label{sec:syn-meth}

Proof methods are either basic ones, or expressions composed of methods via
``\texttt{,}'' (sequential composition), ``\texttt{|}'' (alternatives),
``\texttt{?}'' (try), ``\texttt{*}'' (repeat, ${} \ge 0$ times),
``\texttt{+}'' (repeat, ${} > 0$ times).  In practice, proof methods are
typically just a comma separeted list of \railtoken{name}~\railtoken{args}
specifications.  Thus the syntax is similar to that of attributes, with plain
parentheses instead of square brackets (see also \S\ref{sec:syn-att}).  Note
that parentheses may be dropped for methods without arguments.

\indexouternonterm{method}
\begin{rail}
  method : (name args | (name | '(' method ')') (() | '?' | '*' | '+')) + (',' | '|')
  ;
\end{rail}


%%% Local Variables: 
%%% mode: latex
%%% TeX-master: "isar-ref"
%%% End: