doc-src/IsarRef/syntax.tex
author wenzelm
Wed, 04 Aug 1999 18:20:24 +0200
changeset 7175 8263d0b50e12
parent 7167 0b2e3ef1d8f4
child 7315 76a39a3784b5
permissions -rw-r--r--
tuned;


%FIXME
% - examples (!?)


\chapter{Isar document syntax}

FIXME shortcut

FIXME important note: inner versus outer syntax

\section{Lexical matters}

\section{Common syntax entities}

The Isar proof and theory language syntax has been carefully designed with
orthogonality in mind.  Subsequently, we introduce several basic syntactic
entities, such as names, terms, theorem specifications, which have been
factored out of the actual Isar language elements described later.

Note that some of the basic syntactic entities introduced below act much like
tokens rather than nonterminals, in particular for the sake of error messages.
E.g.\ syntax elements such as $\CONSTS$ referring to \railqtoken{name} or
\railqtoken{type} would really report a missing name or type rather than any
of the constituent primitive tokens such as \railtoken{ident} or
\railtoken{string}.


\subsection{Names}

Entity \railqtoken{name} usually refers to any name of types, constants,
theorems etc.\ that are to be \emph{declared} or \emph{defined} (so qualified
identifiers are excluded).  Quoted strings provide an escape for
non-identifier names or those ruled out by outer syntax keywords (e.g.\ 
\verb|"let"|).  Already existing objects are usually referenced by
\railqtoken{nameref}.

\indexoutertoken{name}\indexoutertoken{parname}\indexoutertoken{nameref}
\begin{rail}
  name: ident | symident | string
  ;
  parname: '(' name ')'
  ;
  nameref: name | longident
  ;
\end{rail}


\subsection{Comments}

Large chunks of plain \railqtoken{text} are usually given
\railtoken{verbatim}, i.e.\ enclosed in \verb|{*|\dots\verb|*}|.  For
convenience, any of the smaller text units conforming to \railqtoken{nameref}
are admitted as well.  Almost any of the Isar commands may be annotated by
some marginal \railnonterm{comment} of the form \texttt{--} \railqtoken{text}.
Note that this kind of comment is actually part of the language, while source
level comments \verb|(*|\dots\verb|*)| are already stripped at the lexical
level.  A few commands such as $\PROOFNAME$ admit additional markup with a
``level of interest'': \texttt{\%} followed by an optional number $n$ (default
$n = 1$) indicates that the respective part of the document becomes $n$ levels
more boring or obscure; \texttt{\%\%} means that the interest drops by
$\infty$ --- abandon every hope, who enter here.

\indexoutertoken{text}\indexouternonterm{comment}\indexouternonterm{interest}
\begin{rail}
  text: verbatim | nameref
  ;
  comment: '--' text
  ;
  interest: percent nat? | ppercent
  ;
\end{rail}


\subsection{Type classes, Sorts and arities}

The syntax of sorts and arities is given directly at the outer level.  Note
that this is in contrast to that types and terms (see \ref{sec:types-terms}).

\indexouternonterm{sort}\indexouternonterm{arity}\indexouternonterm{simplearity}
\indexouternonterm{classdecl}
\begin{rail}
  classdecl: name ('<' (nameref ',' +))?
  ;
  sort: nameref | lbrace (nameref * ',') rbrace
  ;
  arity: ('(' (sort + ',') ')')? sort
  ;
  simplearity: ('(' (sort + ',') ')')? nameref
  ;
\end{rail}


\subsection{Types and terms}\label{sec:types-terms}

The actual inner Isabelle syntax, that of types and terms of the logic, is far
too flexible in order to be modeled explicitly at the outer theory level.
Basically, any such entity has to be quoted at the outer level to turn it into
a single token (the parsing and type-checking is performed later).  For
convenience, a slightly more liberal convention is adopted: quotes may be
omitted for any type or term that is already \emph{atomic at the outer level}.
E.g.\ one may write just \texttt{x} instead of \texttt{"x"}.

\indexoutertoken{type}\indexoutertoken{term}\indexoutertoken{prop}
\begin{rail}
  type: nameref | typefree | typevar
  ;
  term: nameref | var | textvar | nat
  ;
  prop: term
  ;
\end{rail}

Type declarations and definitions usually refer to \railnonterm{typespec} on
the left-hand side.  This models basic type constructor application at the
outer syntax level.  Note that only plain postfix notation is available here,
but no infixes.

\indexouternonterm{typespec}
\begin{rail}
  typespec: (() | typefree | '(' ( typefree + ',' ) ')') name
  ;
\end{rail}


\subsection{Term patterns}

Assumptions and goal statements usually admit automatic binding of schematic
text variables by giving (optional) patterns of the form $\IS{p@1 \dots p@n}$.
There are separate versions available for \railqtoken{term}s and
\railqtoken{prop}s.  The latter provides a $\CONCLNAME$ part with patterns
referring the (atomic) conclusion of a rule.

\indexouternonterm{termpat}\indexouternonterm{proppat}
\begin{rail}
  termpat: '(' ('is' term +) ')'
  ;
  proppat: '(' (('is' prop +) | 'concl' ('is' prop +) | ('is' prop +) 'concl' ('is' prop +)) ')'
  ;
\end{rail}


\subsection{Mixfix annotations}

Mixfix annotations specify concrete \emph{inner} syntax of Isabelle types and
terms.  Some commands such as $\TYPES$ admit infixes only, while $\CONSTS$ and
$\isarkeyword{syntax}$ support the full range of general mixfixes and binders.

\indexouternonterm{infix}\indexouternonterm{mixfix}
\begin{rail}
  infix: '(' ('infixl' | 'infixr') string? nat ')'
  ;
  mixfix: infix | '(' string prios? nat? ')' | '(' 'binder' string prios? nat ')'
  ;

  prios: '[' (nat + ',') ']'
  ;
\end{rail}


\subsection{Attributes and theorems}\label{sec:syn-att}

Attributes (and proof methods, see \S\ref{sec:syn-meth}) have their own
``semi-inner'' syntax, which does not have to be atomic at the outer level
unlike that of types and terms.  Instead, the attribute argument
specifications may be any sequence of atomic entities (identifiers, strings
etc.), or properly bracketed argument lists.  Below \railqtoken{atom} refers
to any atomic entity, including \railtoken{keyword}s conforming to
\railtoken{symident}.

\indexoutertoken{atom}\indexouternonterm{args}\indexouternonterm{attributes}
\begin{rail}
  atom: nameref | typefree | typevar | var | textvar | nat | keyword
  ;
  arg: atom | '(' args ')' | '[' args ']' | lbrace args rbrace
  ;
  args: arg *
  ;
  attributes: '[' (nameref args * ',') ']'
  ;
\end{rail}

Theorem specifications come in several flavors: \railnonterm{axmdecl} and
\railnonterm{thmdecl} usually refer to axioms, assumptions or results of goal
statements, \railnonterm{thmdef} collects lists of existing theorems.
Existing theorems are given by \railnonterm{thmref} and \railnonterm{thmrefs}
(the former requires an actual singleton result).  Any of these theorem
specifications may include lists of attributes both on the left and right hand
sides; attributes are applied to the any immediately preceding theorem.

\indexouternonterm{thmdecl}\indexouternonterm{axmdecl}
\indexouternonterm{thmdef}\indexouternonterm{thmrefs}
\begin{rail}
  axmdecl: name attributes? ':'
  ;
  thmdecl: thmname ':'
  ;
  thmdef: thmname '='
  ;
  thmref: nameref attributes?
  ;
  thmrefs: thmref +
  ;

  thmname: name attributes | name | attributes
  ;
\end{rail}


\subsection{Proof methods}\label{sec:syn-meth}

Proof methods are either basic ones, or expressions composed of methods via
``\texttt{,}'' (sequential composition), ``\texttt{|}'' (alternative choices),
``\texttt{?}'' (try), ``\texttt{*}'' (repeat ${} \ge 0$ times), ``\texttt{+}''
(repeat ${} > 0$ times).  In practice, proof methods are usually just a comma
separated list of (\railqtoken{nameref}~\railnonterm{args}) specifications.
Thus the syntax is similar to that of attributes, with plain parentheses
instead of square brackets (see also \S\ref{sec:syn-att}).  Note that
parentheses may be dropped for single method specifications without arguments.

\indexouternonterm{method}
\begin{rail}
  method: (nameref | '(' methods ')') (() | '?' | '*' | '+')
  ;
  methods: (nameref args | method) + (',' | '|')
  ;
\end{rail}


%%% Local Variables: 
%%% mode: latex
%%% TeX-master: "isar-ref"
%%% End: