doc-src/IsarRef/Thy/Outer_Syntax.thy
changeset 28775 d25fe9601dbd
parent 28774 0e25ef17b06b
child 28776 e4090e51b8b9
equal deleted inserted replaced
28774:0e25ef17b06b 28775:d25fe9601dbd
    66 *}
    66 *}
    67 
    67 
    68 
    68 
    69 section {* Lexical matters \label{sec:outer-lex} *}
    69 section {* Lexical matters \label{sec:outer-lex} *}
    70 
    70 
    71 text {* The Isabelle/Isar outer syntax provides token classes as
    71 text {* The outer lexical syntax consists of three main categories of
    72   presented below; most of these coincide with the inner lexical
    72   tokens:
    73   syntax as defined in \secref{sec:inner-lex}.
    73 
    74 
    74   \begin{enumerate}
    75   \begin{matharray}{rcl}
    75 
    76     @{syntax_def ident} & = & letter\,quasiletter^* \\
    76   \item \emph{major keywords} --- the command names that are available
    77     @{syntax_def longident} & = & ident (\verb,.,ident)^+ \\
    77   in the present logic session;
    78     @{syntax_def symident} & = & sym^+ ~|~ \verb,\,\verb,<,ident\verb,>, \\
    78 
    79     @{syntax_def nat} & = & digit^+ \\
    79   \item \emph{minor keywords} --- additional literal tokens required
    80     @{syntax_def var} & = & ident ~|~ \verb,?,ident ~|~ \verb,?,ident\verb,.,nat \\
    80   by the syntax of commands;
    81     @{syntax_def typefree} & = & \verb,',ident \\
    81 
    82     @{syntax_def typevar} & = & typefree ~|~ \verb,?,typefree ~|~ \verb,?,typefree\verb,.,nat \\
    82   \item \emph{named tokens} --- various categories of identifiers,
    83     @{syntax_def string} & = & \verb,", ~\dots~ \verb,", \\
    83   string tokens etc.
    84     @{syntax_def altstring} & = & \backquote ~\dots~ \backquote \\
    84 
    85     @{syntax_def verbatim} & = & \verb,{*, ~\dots~ \verb,*,\verb,}, \\[1ex]
    85   \end{enumerate}
    86 
    86 
    87     letter & = & latin ~|~ \verb,\,\verb,<,latin\verb,>, ~|~ \verb,\,\verb,<,latin\,latin\verb,>, ~|~ greek ~|~ \\
    87   Minor keywords and major keywords are guaranteed to be disjoint.
    88            &   & \verb,\<^isub>, ~|~ \verb,\<^isup>, \\
    88   This helps user-interfaces to determine the overall structure of a
    89     quasiletter & = & letter ~|~ digit ~|~ \verb,_, ~|~ \verb,', \\
    89   theory text, without knowing the full details of command syntax.
    90     latin & = & \verb,a, ~|~ \dots ~|~ \verb,z, ~|~ \verb,A, ~|~ \dots ~|~ \verb,Z, \\
    90 
    91     digit & = & \verb,0, ~|~ \dots ~|~ \verb,9, \\
    91   Keywords override named tokens.  For example, the presence of a
    92     sym & = & \verb,!, ~|~ \verb,#, ~|~ \verb,$, ~|~ \verb,%, ~|~ \verb,&, ~|~
    92   command called @{verbatim term} inhibits the identifier @{verbatim
    93      \verb,*, ~|~ \verb,+, ~|~ \verb,-, ~|~ \verb,/, ~|~ \\
    93   term}, but the string @{verbatim "\"term\""} can be used instead.
    94     & & \verb,<, ~|~ \verb,=, ~|~ \verb,>, ~|~ \verb,?, ~|~ \texttt{\at} ~|~
    94   By convention, the outer syntax always allows quoted strings in
    95     \verb,^, ~|~ \verb,_, ~|~ \verb,|, ~|~ \verb,~, \\
    95   addition to identifiers, wherever a named entity is expected.
    96     greek & = & \verb,\<alpha>, ~|~ \verb,\<beta>, ~|~ \verb,\<gamma>, ~|~ \verb,\<delta>, ~| \\
    96 
    97           &   & \verb,\<epsilon>, ~|~ \verb,\<zeta>, ~|~ \verb,\<eta>, ~|~ \verb,\<theta>, ~| \\
    97   \medskip The categories for named tokens are defined once and for
    98           &   & \verb,\<iota>, ~|~ \verb,\<kappa>, ~|~ \verb,\<mu>, ~|~ \verb,\<nu>, ~| \\
    98   all as follows.
    99           &   & \verb,\<xi>, ~|~ \verb,\<pi>, ~|~ \verb,\<rho>, ~|~ \verb,\<sigma>, ~|~ \verb,\<tau>, ~| \\
    99 
   100           &   & \verb,\<upsilon>, ~|~ \verb,\<phi>, ~|~ \verb,\<chi>, ~|~ \verb,\<psi>, ~| \\
   100   \smallskip
   101           &   & \verb,\<omega>, ~|~ \verb,\<Gamma>, ~|~ \verb,\<Delta>, ~|~ \verb,\<Theta>, ~| \\
   101   \begin{supertabular}{rcl}
   102           &   & \verb,\<Lambda>, ~|~ \verb,\<Xi>, ~|~ \verb,\<Pi>, ~|~ \verb,\<Sigma>, ~| \\
   102     @{syntax_def ident} & = & @{text "letter quasiletter\<^sup>*"} \\
   103           &   & \verb,\<Upsilon>, ~|~ \verb,\<Phi>, ~|~ \verb,\<Psi>, ~|~ \verb,\<Omega>, \\
   103     @{syntax_def longident} & = & @{text "ident("}@{verbatim "."}@{text "ident)\<^sup>+"} \\
   104   \end{matharray}
   104     @{syntax_def symident} & = & @{text "sym\<^sup>+  |  "}@{verbatim "\\"}@{verbatim "<"}@{text ident}@{verbatim ">"} \\
       
   105     @{syntax_def nat} & = & @{text "digit\<^sup>+"} \\
       
   106     @{syntax_def var} & = & @{verbatim "?"}@{text "ident  |  "}@{verbatim "?"}@{text ident}@{verbatim "."}@{text nat} \\
       
   107     @{syntax_def typefree} & = & @{verbatim "'"}@{text ident} \\
       
   108     @{syntax_def typevar} & = & @{verbatim "?"}@{text "typefree  |  "}@{verbatim "?"}@{text typefree}@{verbatim "."}@{text nat} \\
       
   109     @{syntax_def string} & = & @{verbatim "\""} @{text "\<dots>"} @{verbatim "\""} \\
       
   110     @{syntax_def altstring} & = & @{verbatim "`"} @{text "\<dots>"} @{verbatim "`"} \\
       
   111     @{syntax_def verbatim} & = & @{verbatim "{*"} @{text "\<dots>"} @{verbatim "*"}@{verbatim "}"} \\[1ex]
       
   112 
       
   113     @{text letter} & = & @{text "latin  |  "}@{verbatim "\\"}@{verbatim "<"}@{text latin}@{verbatim ">"}@{text "  |  "}@{verbatim "\\"}@{verbatim "<"}@{text "latin latin"}@{verbatim ">"}@{text "  |  greek  |"} \\
       
   114           &   & @{verbatim "\<^isub>"}@{text "  |  "}@{verbatim "\<^isup>"} \\
       
   115     @{text quasiletter} & = & @{text "letter  |  digit  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "'"} \\
       
   116     @{text latin} & = & @{verbatim a}@{text "  | \<dots> |  "}@{verbatim z}@{text "  |  "}@{verbatim A}@{text "  |  \<dots> |  "}@{verbatim Z} \\
       
   117     @{text digit} & = & @{verbatim "0"}@{text "  |  \<dots> |  "}@{verbatim "9"} \\
       
   118     @{text sym} & = & @{verbatim "!"}@{text "  |  "}@{verbatim "#"}@{text "  |  "}@{verbatim "$"}@{text "  |  "}@{verbatim "%"}@{text "  |  "}@{verbatim "&"}@{text "  |  "}@{verbatim "*"}@{text "  |  "}@{verbatim "+"}@{text "  |  "}@{verbatim "-"}@{text "  |  "}@{verbatim "/"}@{text "  |"} \\
       
   119     & & @{verbatim "<"}@{text "  |  "}@{verbatim "="}@{text "  |  "}@{verbatim ">"}@{text "  |  "}@{verbatim "?"}@{text "  |  "}@{verbatim "@"}@{text "  |  "}@{verbatim "^"}@{text "  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "|"}@{text "  |  "}@{verbatim "~"} \\
       
   120     @{text greek} & = & @{verbatim "\<alpha>"}@{text "  |  "}@{verbatim "\<beta>"}@{text "  |  "}@{verbatim "\<gamma>"}@{text "  |  "}@{verbatim "\<delta>"}@{text "  |"} \\
       
   121           &   & @{verbatim "\<epsilon>"}@{text "  |  "}@{verbatim "\<zeta>"}@{text "  |  "}@{verbatim "\<eta>"}@{text "  |  "}@{verbatim "\<theta>"}@{text "  |"} \\
       
   122           &   & @{verbatim "\<iota>"}@{text "  |  "}@{verbatim "\<kappa>"}@{text "  |  "}@{verbatim "\<mu>"}@{text "  |  "}@{verbatim "\<nu>"}@{text "  |"} \\
       
   123           &   & @{verbatim "\<xi>"}@{text "  |  "}@{verbatim "\<pi>"}@{text "  |  "}@{verbatim "\<rho>"}@{text "  |  "}@{verbatim "\<sigma>"}@{text "  |  "}@{verbatim "\<tau>"}@{text "  |"} \\
       
   124           &   & @{verbatim "\<upsilon>"}@{text "  |  "}@{verbatim "\<phi>"}@{text "  |  "}@{verbatim "\<chi>"}@{text "  |  "}@{verbatim "\<psi>"}@{text "  |"} \\
       
   125           &   & @{verbatim "\<omega>"}@{text "  |  "}@{verbatim "\<Gamma>"}@{text "  |  "}@{verbatim "\<Delta>"}@{text "  |  "}@{verbatim "\<Theta>"}@{text "  |"} \\
       
   126           &   & @{verbatim "\<Lambda>"}@{text "  |  "}@{verbatim "\<Xi>"}@{text "  |  "}@{verbatim "\<Pi>"}@{text "  |  "}@{verbatim "\<Sigma>"}@{text "  |"} \\
       
   127           &   & @{verbatim "\<Upsilon>"}@{text "  |  "}@{verbatim "\<Phi>"}@{text "  |  "}@{verbatim "\<Psi>"}@{text "  |  "}@{verbatim "\<Omega>"} \\
       
   128   \end{supertabular}
       
   129   \medskip
   105 
   130 
   106   The syntax of @{syntax string} admits any characters, including
   131   The syntax of @{syntax string} admits any characters, including
   107   newlines; ``@{verbatim "\""}'' (double-quote) and ``@{verbatim
   132   newlines; ``@{verbatim "\""}'' (double-quote) and ``@{verbatim
   108   "\\"}'' (backslash) need to be escaped by a backslash; arbitrary
   133   "\\"}'' (backslash) need to be escaped by a backslash; arbitrary
   109   character codes may be specified as ``@{verbatim "\\"}@{text ddd}'',
   134   character codes may be specified as ``@{verbatim "\\"}@{text ddd}'',
   121   tools such as {\LaTeX} or Proof~General with the X-Symbol package.
   146   tools such as {\LaTeX} or Proof~General with the X-Symbol package.
   122   A list of standard Isabelle symbols that work well with these tools
   147   A list of standard Isabelle symbols that work well with these tools
   123   is given in \cite[appendix~A]{isabelle-sys}.
   148   is given in \cite[appendix~A]{isabelle-sys}.
   124   
   149   
   125   Source comments take the form @{verbatim "(*"}~@{text
   150   Source comments take the form @{verbatim "(*"}~@{text
   126   "\<dots>"}~@{verbatim "*)"} and may be nested, although user-interface
   151   "\<dots>"}~@{verbatim "*)"} and may be nested, although the user-interface
   127   tools might prevent this.  Note that this form indicates source
   152   might prevent this.  Note that this form indicates source comments
   128   comments only, which are stripped after lexical analysis of the
   153   only, which are stripped after lexical analysis of the input.  The
   129   input.  The Isar syntax also provides proper \emph{document
   154   Isar syntax also provides proper \emph{document comments} that are
   130   comments} that are considered as part of the text (see
   155   considered as part of the text (see \secref{sec:comments}).
   131   \secref{sec:comments}).
       
   132 *}
   156 *}
   133 
   157 
   134 
   158 
   135 section {* Common syntax entities *}
   159 section {* Common syntax entities *}
   136 
   160