--- a/doc-src/IsarRef/syntax.tex	Sun Aug 22 18:21:36 1999 +0200
+++ b/doc-src/IsarRef/syntax.tex	Sun Aug 22 21:13:20 1999 +0200
@@ -1,15 +1,69 @@
-
-%FIXME
-% - examples (!?)
-
 
 \chapter{Isar document syntax}
 
-FIXME shortcut
+We give a complete reference of all basic syntactic entities underlying the
+the Isabelle/Isar document syntax.  This chapter will not introduce any actual
+theory and proof commands, though (cf.\ chapter~\ref{ch:pure-syntax} and
+later).
+
+\medskip
+
+In order to get started with writing well-formed Isabelle/Isar documents, the
+most important aspect to be noted is the difference of \emph{inner} versus
+\emph{outer} syntax.  Inner syntax is that of Isabelle types and terms of the
+logic, while outer syntax is that of Isabelle/Isar theories (and proofs).  As
+a general rule, inner syntax entities may occur only as \emph{atomic entities}
+within outer syntax.  For example, quoted string \texttt{"x + y"} and
+identifier \texttt{z} are legal term specifications, while \texttt{x + y} is
+not.
+
+\begin{warn}
+  Note that old-style Isabelle theories used to fake parts of the inner type
+  syntax, with complicated rules when quotes may be omitted.  Despite the
+  minor drawback of requiring quotes more often, Isabelle/Isar is simpler and
+  more robust in that respect.
+\end{warn}
+
+
+\section{Lexical matters}\label{sec:lex-syntax}
+
+The Isabelle/Isar outer syntax provides token classes as presented below.
+Note that some of these coincide (by full intention) with inner lexical syntax
+as given in \cite{isabelle-ref}.  These different levels of syntax should not
+be confused, though.
 
-FIXME important note: inner versus outer syntax
+\begin{matharray}{rcl}
+  ident & = & letter~quasiletter^* \\
+  longident & = & ident\verb,.,ident~\dots~ident \\
+  symident & = & sym^+ \\
+  nat & = & digit^+ \\
+  var & = & \verb,?,ident ~|~ \verb,?,ident\verb,.,nat \\
+  textvar & = & \verb,??,ident \\
+  typefree & = & \verb,',ident \\
+  typevar & = & \verb,?,typefree ~|~ \verb,?,typefree\verb,.,nat \\
+  string & = & \verb,", ~\dots~ \verb,", \\
+  verbatim & = & \verb,{*, ~\dots~ \verb,*}, \\[2ex]
 
-\section{Lexical matters}
+  letter & = & \verb,a, ~|~ \dots ~|~ \verb,z, ~|~ \verb,A, ~|~ \dots ~|~ \verb,Z, \\
+  digit & = & \verb,0, ~|~ \dots ~|~ \verb,9, \\
+  quasiletter & = & letter ~|~ digit ~|~ \verb,_, ~|~ \verb,', \\
+  sym & = & \verb,!, ~|~ \verb,#, ~|~ \verb,$, ~|~ \verb,%, ~|~ \verb,&, ~|~  %$
+   \verb,*, ~|~ \verb,+, ~|~ \verb,-, ~|~ \verb,/, ~|~ \verb,:, ~|~ \verb,<, ~|~
+   \verb,=, ~|~ \verb,>, ~|~ \verb,?, ~|~ \mathtt{\at} ~|~ \verb,^, ~|~ \verb,_, ~|~
+   \verb,`, ~|~ \verb,|, ~|~ \verb,~, \\
+\end{matharray}
+
+The syntax of \texttt{string} admits any characters, including newlines;
+\verb|"| and \verb|\| have to be escaped by a backslash, though.  Note that
+ML-style control character notation is not supported.  The body of
+\texttt{verbatim} may consist of any text not containing \verb|*}|.
+
+Comments take the form \texttt{(*~\dots~*)} and may be nested, just as in ML.
+Note that these are \emph{source} comments only, which are stripped after
+lexical analysis of the input.  The Isar document syntax also provides several
+elements of \emph{formal comments} that are actually part of the text (see
+\S\ref{sec:comments}, \S\ref{sec:formal-cmt-thy}, \S\ref{sec:formal-cmt-prf}).
+
 
 \section{Common syntax entities}
 
@@ -46,7 +100,7 @@
 \end{rail}
 
 
-\subsection{Comments}
+\subsection{Comments}\label{sec:comments}
 
 Large chunks of plain \railqtoken{text} are usually given
 \railtoken{verbatim}, i.e.\ enclosed in \verb|{*|\dots\verb|*}|.  For
@@ -58,8 +112,8 @@
 level.  A few commands such as $\PROOFNAME$ admit additional markup with a
 ``level of interest'': \texttt{\%} followed by an optional number $n$ (default
 $n = 1$) indicates that the respective part of the document becomes $n$ levels
-more boring or obscure; \texttt{\%\%} means that the interest drops by
-$\infty$ --- abandon every hope, who enter here.
+more obscure; \texttt{\%\%} means that interest drops by $\infty$ ---
+\emph{abandon every hope, who enter here}.
 
 \indexoutertoken{text}\indexouternonterm{comment}\indexouternonterm{interest}
 \begin{rail}
@@ -94,7 +148,7 @@
 \subsection{Types and terms}\label{sec:types-terms}
 
 The actual inner Isabelle syntax, that of types and terms of the logic, is far
-too flexible in order to be modeled explicitly at the outer theory level.
+too flexible in order to be modelled explicitly at the outer theory level.
 Basically, any such entity has to be quoted at the outer level to turn it into
 a single token (the parsing and type-checking is performed later).  For
 convenience, a slightly more liberal convention is adopted: quotes may be
@@ -123,7 +177,7 @@
 \end{rail}
 
 
-\subsection{Term patterns}
+\subsection{Term patterns}\label{sec:term-pats}
 
 Assumptions and goal statements usually admit automatic binding of schematic
 text variables by giving (optional) patterns of the form $\IS{p@1 \dots p@n}$.
@@ -161,8 +215,8 @@
 \subsection{Attributes and theorems}\label{sec:syn-att}
 
 Attributes (and proof methods, see \S\ref{sec:syn-meth}) have their own
-``semi-inner'' syntax, which does not have to be atomic at the outer level
-unlike that of types and terms.  Instead, the attribute argument
+``semi-inner'' syntax, in the sense that input conforming \railnonterm{args}
+below are parsed by the attribute a second time.  The attribute argument
 specifications may be any sequence of atomic entities (identifiers, strings
 etc.), or properly bracketed argument lists.  Below \railqtoken{atom} refers
 to any atomic entity, including \railtoken{keyword}s conforming to
@@ -180,7 +234,7 @@
   ;
 \end{rail}
 
-Theorem specifications come in several flavors: \railnonterm{axmdecl} and
+Theorem specifications come in several flavours: \railnonterm{axmdecl} and
 \railnonterm{thmdecl} usually refer to axioms, assumptions or results of goal
 statements, \railnonterm{thmdef} collects lists of existing theorems.
 Existing theorems are given by \railnonterm{thmref} and \railnonterm{thmrefs}
changeset 7315	76a39a3784b5
parent 7175	8263d0b50e12
child 7319	3907d597cae6