src/Doc/Isar_Ref/Inner_Syntax.thy
author wenzelm
Wed Mar 25 11:39:52 2015 +0100 (2015-03-25)
changeset 59809 87641097d0f3
parent 59783 00b62aa9f430
child 60254 52110106c0ca
permissions -rw-r--r--
tuned signature;
     1 theory Inner_Syntax
     2 imports Base Main
     3 begin
     4 
     5 chapter \<open>Inner syntax --- the term language \label{ch:inner-syntax}\<close>
     6 
     7 text \<open>The inner syntax of Isabelle provides concrete notation for
     8   the main entities of the logical framework, notably @{text
     9   "\<lambda>"}-terms with types and type classes.  Applications may either
    10   extend existing syntactic categories by additional notation, or
    11   define new sub-languages that are linked to the standard term
    12   language via some explicit markers.  For example @{verbatim
    13   FOO}~@{text "foo"} could embed the syntax corresponding for some
    14   user-defined nonterminal @{text "foo"} --- within the bounds of the
    15   given lexical syntax of Isabelle/Pure.
    16 
    17   The most basic way to specify concrete syntax for logical entities
    18   works via mixfix annotations (\secref{sec:mixfix}), which may be
    19   usually given as part of the original declaration or via explicit
    20   notation commands later on (\secref{sec:notation}).  This already
    21   covers many needs of concrete syntax without having to understand
    22   the full complexity of inner syntax layers.
    23 
    24   Further details of the syntax engine involves the classical
    25   distinction of lexical language versus context-free grammar (see
    26   \secref{sec:pure-syntax}), and various mechanisms for \emph{syntax
    27   transformations} (see \secref{sec:syntax-transformations}).
    28 \<close>
    29 
    30 
    31 section \<open>Printing logical entities\<close>
    32 
    33 subsection \<open>Diagnostic commands \label{sec:print-diag}\<close>
    34 
    35 text \<open>
    36   \begin{matharray}{rcl}
    37     @{command_def "typ"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    38     @{command_def "term"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    39     @{command_def "prop"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    40     @{command_def "thm"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    41     @{command_def "prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    42     @{command_def "full_prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    43     @{command_def "print_state"}@{text "\<^sup>*"} & : & @{text "any \<rightarrow>"} \\
    44   \end{matharray}
    45 
    46   These diagnostic commands assist interactive development by printing
    47   internal logical entities in a human-readable fashion.
    48 
    49   @{rail \<open>
    50     @@{command typ} @{syntax modes}? @{syntax type} ('::' @{syntax sort})?
    51     ;
    52     @@{command term} @{syntax modes}? @{syntax term}
    53     ;
    54     @@{command prop} @{syntax modes}? @{syntax prop}
    55     ;
    56     @@{command thm} @{syntax modes}? @{syntax thmrefs}
    57     ;
    58     ( @@{command prf} | @@{command full_prf} ) @{syntax modes}? @{syntax thmrefs}?
    59     ;
    60     @@{command print_state} @{syntax modes}?
    61     ;
    62     @{syntax_def modes}: '(' (@{syntax name} + ) ')'
    63   \<close>}
    64 
    65   \begin{description}
    66 
    67   \item @{command "typ"}~@{text \<tau>} reads and prints a type expression
    68   according to the current context.
    69 
    70   \item @{command "typ"}~@{text "\<tau> :: s"} uses type-inference to
    71   determine the most general way to make @{text "\<tau>"} conform to sort
    72   @{text "s"}.  For concrete @{text "\<tau>"} this checks if the type
    73   belongs to that sort.  Dummy type parameters ``@{text "_"}''
    74   (underscore) are assigned to fresh type variables with most general
    75   sorts, according the the principles of type-inference.
    76 
    77   \item @{command "term"}~@{text t} and @{command "prop"}~@{text \<phi>}
    78   read, type-check and print terms or propositions according to the
    79   current theory or proof context; the inferred type of @{text t} is
    80   output as well.  Note that these commands are also useful in
    81   inspecting the current environment of term abbreviations.
    82 
    83   \item @{command "thm"}~@{text "a\<^sub>1 \<dots> a\<^sub>n"} retrieves
    84   theorems from the current theory or proof context.  Note that any
    85   attributes included in the theorem specifications are applied to a
    86   temporary context derived from the current theory or proof; the
    87   result is discarded, i.e.\ attributes involved in @{text "a\<^sub>1,
    88   \<dots>, a\<^sub>n"} do not have any permanent effect.
    89 
    90   \item @{command "prf"} displays the (compact) proof term of the
    91   current proof state (if present), or of the given theorems. Note
    92   that this requires proof terms to be switched on for the current
    93   object logic (see the ``Proof terms'' section of the Isabelle
    94   reference manual for information on how to do this).
    95 
    96   \item @{command "full_prf"} is like @{command "prf"}, but displays
    97   the full proof term, i.e.\ also displays information omitted in the
    98   compact proof term, which is denoted by ``@{text _}'' placeholders
    99   there.
   100 
   101   \item @{command "print_state"} prints the current proof state (if
   102   present), including current facts and goals.
   103 
   104   \end{description}
   105 
   106   All of the diagnostic commands above admit a list of @{text modes}
   107   to be specified, which is appended to the current print mode; see
   108   also \secref{sec:print-modes}.  Thus the output behavior may be
   109   modified according particular print mode features.  For example,
   110   @{command "print_state"}~@{text "(latex xsymbols)"} prints the
   111   current proof state with mathematical symbols and special characters
   112   represented in {\LaTeX} source, according to the Isabelle style
   113   @{cite "isabelle-sys"}.
   114 
   115   Note that antiquotations (cf.\ \secref{sec:antiq}) provide a more
   116   systematic way to include formal items into the printed text
   117   document.
   118 \<close>
   119 
   120 
   121 subsection \<open>Details of printed content\<close>
   122 
   123 text \<open>
   124   \begin{tabular}{rcll}
   125     @{attribute_def show_markup} & : & @{text attribute} \\
   126     @{attribute_def show_types} & : & @{text attribute} & default @{text false} \\
   127     @{attribute_def show_sorts} & : & @{text attribute} & default @{text false} \\
   128     @{attribute_def show_consts} & : & @{text attribute} & default @{text false} \\
   129     @{attribute_def show_abbrevs} & : & @{text attribute} & default @{text true} \\
   130     @{attribute_def show_brackets} & : & @{text attribute} & default @{text false} \\
   131     @{attribute_def names_long} & : & @{text attribute} & default @{text false} \\
   132     @{attribute_def names_short} & : & @{text attribute} & default @{text false} \\
   133     @{attribute_def names_unique} & : & @{text attribute} & default @{text true} \\
   134     @{attribute_def eta_contract} & : & @{text attribute} & default @{text true} \\
   135     @{attribute_def goals_limit} & : & @{text attribute} & default @{text 10} \\
   136     @{attribute_def show_main_goal} & : & @{text attribute} & default @{text false} \\
   137     @{attribute_def show_hyps} & : & @{text attribute} & default @{text false} \\
   138     @{attribute_def show_tags} & : & @{text attribute} & default @{text false} \\
   139     @{attribute_def show_question_marks} & : & @{text attribute} & default @{text true} \\
   140   \end{tabular}
   141   \medskip
   142 
   143   These configuration options control the detail of information that
   144   is displayed for types, terms, theorems, goals etc.  See also
   145   \secref{sec:config}.
   146 
   147   \begin{description}
   148 
   149   \item @{attribute show_markup} controls direct inlining of markup
   150   into the printed representation of formal entities --- notably type
   151   and sort constraints.  This enables Prover IDE users to retrieve
   152   that information via tooltips or popups while hovering with the
   153   mouse over the output window, for example.  Consequently, this
   154   option is enabled by default for Isabelle/jEdit.
   155 
   156   \item @{attribute show_types} and @{attribute show_sorts} control
   157   printing of type constraints for term variables, and sort
   158   constraints for type variables.  By default, neither of these are
   159   shown in output.  If @{attribute show_sorts} is enabled, types are
   160   always shown as well.  In Isabelle/jEdit, manual setting of these
   161   options is normally not required thanks to @{attribute show_markup}
   162   above.
   163 
   164   Note that displaying types and sorts may explain why a polymorphic
   165   inference rule fails to resolve with some goal, or why a rewrite
   166   rule does not apply as expected.
   167 
   168   \item @{attribute show_consts} controls printing of types of
   169   constants when displaying a goal state.
   170 
   171   Note that the output can be enormous, because polymorphic constants
   172   often occur at several different type instances.
   173 
   174   \item @{attribute show_abbrevs} controls folding of constant
   175   abbreviations.
   176 
   177   \item @{attribute show_brackets} controls bracketing in pretty
   178   printed output.  If enabled, all sub-expressions of the pretty
   179   printing tree will be parenthesized, even if this produces malformed
   180   term syntax!  This crude way of showing the internal structure of
   181   pretty printed entities may occasionally help to diagnose problems
   182   with operator priorities, for example.
   183 
   184   \item @{attribute names_long}, @{attribute names_short}, and
   185   @{attribute names_unique} control the way of printing fully
   186   qualified internal names in external form.  See also
   187   \secref{sec:antiq} for the document antiquotation options of the
   188   same names.
   189 
   190   \item @{attribute eta_contract} controls @{text "\<eta>"}-contracted
   191   printing of terms.
   192 
   193   The @{text \<eta>}-contraction law asserts @{prop "(\<lambda>x. f x) \<equiv> f"},
   194   provided @{text x} is not free in @{text f}.  It asserts
   195   \emph{extensionality} of functions: @{prop "f \<equiv> g"} if @{prop "f x \<equiv>
   196   g x"} for all @{text x}.  Higher-order unification frequently puts
   197   terms into a fully @{text \<eta>}-expanded form.  For example, if @{text
   198   F} has type @{text "(\<tau> \<Rightarrow> \<tau>) \<Rightarrow> \<tau>"} then its expanded form is @{term
   199   "\<lambda>h. F (\<lambda>x. h x)"}.
   200 
   201   Enabling @{attribute eta_contract} makes Isabelle perform @{text
   202   \<eta>}-contractions before printing, so that @{term "\<lambda>h. F (\<lambda>x. h x)"}
   203   appears simply as @{text F}.
   204 
   205   Note that the distinction between a term and its @{text \<eta>}-expanded
   206   form occasionally matters.  While higher-order resolution and
   207   rewriting operate modulo @{text "\<alpha>\<beta>\<eta>"}-conversion, some other tools
   208   might look at terms more discretely.
   209 
   210   \item @{attribute goals_limit} controls the maximum number of
   211   subgoals to be printed.
   212 
   213   \item @{attribute show_main_goal} controls whether the main result
   214   to be proven should be displayed.  This information might be
   215   relevant for schematic goals, to inspect the current claim that has
   216   been synthesized so far.
   217 
   218   \item @{attribute show_hyps} controls printing of implicit
   219   hypotheses of local facts.  Normally, only those hypotheses are
   220   displayed that are \emph{not} covered by the assumptions of the
   221   current context: this situation indicates a fault in some tool being
   222   used.
   223 
   224   By enabling @{attribute show_hyps}, output of \emph{all} hypotheses
   225   can be enforced, which is occasionally useful for diagnostic
   226   purposes.
   227 
   228   \item @{attribute show_tags} controls printing of extra annotations
   229   within theorems, such as internal position information, or the case
   230   names being attached by the attribute @{attribute case_names}.
   231 
   232   Note that the @{attribute tagged} and @{attribute untagged}
   233   attributes provide low-level access to the collection of tags
   234   associated with a theorem.
   235 
   236   \item @{attribute show_question_marks} controls printing of question
   237   marks for schematic variables, such as @{text ?x}.  Only the leading
   238   question mark is affected, the remaining text is unchanged
   239   (including proper markup for schematic variables that might be
   240   relevant for user interfaces).
   241 
   242   \end{description}
   243 \<close>
   244 
   245 
   246 subsection \<open>Alternative print modes \label{sec:print-modes}\<close>
   247 
   248 text \<open>
   249   \begin{mldecls}
   250     @{index_ML print_mode_value: "unit -> string list"} \\
   251     @{index_ML Print_Mode.with_modes: "string list -> ('a -> 'b) -> 'a -> 'b"} \\
   252   \end{mldecls}
   253 
   254   The \emph{print mode} facility allows to modify various operations
   255   for printing.  Commands like @{command typ}, @{command term},
   256   @{command thm} (see \secref{sec:print-diag}) take additional print
   257   modes as optional argument.  The underlying ML operations are as
   258   follows.
   259 
   260   \begin{description}
   261 
   262   \item @{ML "print_mode_value ()"} yields the list of currently
   263   active print mode names.  This should be understood as symbolic
   264   representation of certain individual features for printing (with
   265   precedence from left to right).
   266 
   267   \item @{ML Print_Mode.with_modes}~@{text "modes f x"} evaluates
   268   @{text "f x"} in an execution context where the print mode is
   269   prepended by the given @{text "modes"}.  This provides a thread-safe
   270   way to augment print modes.  It is also monotonic in the set of mode
   271   names: it retains the default print mode that certain
   272   user-interfaces might have installed for their proper functioning!
   273 
   274   \end{description}
   275 
   276   \medskip The pretty printer for inner syntax maintains alternative
   277   mixfix productions for any print mode name invented by the user, say
   278   in commands like @{command notation} or @{command abbreviation}.
   279   Mode names can be arbitrary, but the following ones have a specific
   280   meaning by convention:
   281 
   282   \begin{itemize}
   283 
   284   \item @{verbatim \<open>""\<close>} (the empty string): default mode;
   285   implicitly active as last element in the list of modes.
   286 
   287   \item @{verbatim input}: dummy print mode that is never active; may
   288   be used to specify notation that is only available for input.
   289 
   290   \item @{verbatim internal} dummy print mode that is never active;
   291   used internally in Isabelle/Pure.
   292 
   293   \item @{verbatim xsymbols}: enable proper mathematical symbols
   294   instead of ASCII art.\footnote{This traditional mode name stems from
   295   the ``X-Symbol'' package for classic Proof~General with XEmacs.}
   296 
   297   \item @{verbatim HTML}: additional mode that is active in HTML
   298   presentation of Isabelle theory sources; allows to provide
   299   alternative output notation.
   300 
   301   \item @{verbatim latex}: additional mode that is active in {\LaTeX}
   302   document preparation of Isabelle theory sources; allows to provide
   303   alternative output notation.
   304 
   305   \end{itemize}
   306 \<close>
   307 
   308 
   309 section \<open>Mixfix annotations \label{sec:mixfix}\<close>
   310 
   311 text \<open>Mixfix annotations specify concrete \emph{inner syntax} of
   312   Isabelle types and terms.  Locally fixed parameters in toplevel
   313   theorem statements, locale and class specifications also admit
   314   mixfix annotations in a fairly uniform manner.  A mixfix annotation
   315   describes the concrete syntax, the translation to abstract
   316   syntax, and the pretty printing.  Special case annotations provide a
   317   simple means of specifying infix operators and binders.
   318 
   319   Isabelle mixfix syntax is inspired by {\OBJ} @{cite OBJ}.  It allows
   320   to specify any context-free priority grammar, which is more general
   321   than the fixity declarations of ML and Prolog.
   322 
   323   @{rail \<open>
   324     @{syntax_def mixfix}: '('
   325       (@{syntax template} prios? @{syntax nat}? |
   326         (@'infix' | @'infixl' | @'infixr') @{syntax template} @{syntax nat} |
   327         @'binder' @{syntax template} prios? @{syntax nat} |
   328         @'structure') ')'
   329     ;
   330     template: string
   331     ;
   332     prios: '[' (@{syntax nat} + ',') ']'
   333   \<close>}
   334 
   335   The string given as @{text template} may include literal text,
   336   spacing, blocks, and arguments (denoted by ``@{text _}''); the
   337   special symbol ``@{verbatim "\<index>"}'' (printed as ``@{text "\<index>"}'')
   338   represents an index argument that specifies an implicit @{keyword
   339   "structure"} reference (see also \secref{sec:locale}).  Only locally
   340   fixed variables may be declared as @{keyword "structure"}.
   341 
   342   Infix and binder declarations provide common abbreviations for
   343   particular mixfix declarations.  So in practice, mixfix templates
   344   mostly degenerate to literal text for concrete syntax, such as
   345   ``@{verbatim "++"}'' for an infix symbol.\<close>
   346 
   347 
   348 subsection \<open>The general mixfix form\<close>
   349 
   350 text \<open>In full generality, mixfix declarations work as follows.
   351   Suppose a constant @{text "c :: \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} is annotated by
   352   @{text "(mixfix [p\<^sub>1, \<dots>, p\<^sub>n] p)"}, where @{text "mixfix"} is a string
   353   @{text "d\<^sub>0 _ d\<^sub>1 _ \<dots> _ d\<^sub>n"} consisting of delimiters that surround
   354   argument positions as indicated by underscores.
   355 
   356   Altogether this determines a production for a context-free priority
   357   grammar, where for each argument @{text "i"} the syntactic category
   358   is determined by @{text "\<tau>\<^sub>i"} (with priority @{text "p\<^sub>i"}), and the
   359   result category is determined from @{text "\<tau>"} (with priority @{text
   360   "p"}).  Priority specifications are optional, with default 0 for
   361   arguments and 1000 for the result.\footnote{Omitting priorities is
   362   prone to syntactic ambiguities unless the delimiter tokens determine
   363   fully bracketed notation, as in @{text "if _ then _ else _ fi"}.}
   364 
   365   Since @{text "\<tau>"} may be again a function type, the constant
   366   type scheme may have more argument positions than the mixfix
   367   pattern.  Printing a nested application @{text "c t\<^sub>1 \<dots> t\<^sub>m"} for
   368   @{text "m > n"} works by attaching concrete notation only to the
   369   innermost part, essentially by printing @{text "(c t\<^sub>1 \<dots> t\<^sub>n) \<dots> t\<^sub>m"}
   370   instead.  If a term has fewer arguments than specified in the mixfix
   371   template, the concrete syntax is ignored.
   372 
   373   \medskip A mixfix template may also contain additional directives
   374   for pretty printing, notably spaces, blocks, and breaks.  The
   375   general template format is a sequence over any of the following
   376   entities.
   377 
   378   \begin{description}
   379 
   380   \item @{text "d"} is a delimiter, namely a non-empty sequence of
   381   characters other than the following special characters:
   382 
   383   \smallskip
   384   \begin{tabular}{ll}
   385     @{verbatim "'"} & single quote \\
   386     @{verbatim "_"} & underscore \\
   387     @{text "\<index>"} & index symbol \\
   388     @{verbatim "("} & open parenthesis \\
   389     @{verbatim ")"} & close parenthesis \\
   390     @{verbatim "/"} & slash \\
   391   \end{tabular}
   392   \medskip
   393 
   394   \item @{verbatim "'"} escapes the special meaning of these
   395   meta-characters, producing a literal version of the following
   396   character, unless that is a blank.
   397 
   398   A single quote followed by a blank separates delimiters, without
   399   affecting printing, but input tokens may have additional white space
   400   here.
   401 
   402   \item @{verbatim "_"} is an argument position, which stands for a
   403   certain syntactic category in the underlying grammar.
   404 
   405   \item @{text "\<index>"} is an indexed argument position; this is the place
   406   where implicit structure arguments can be attached.
   407 
   408   \item @{text "s"} is a non-empty sequence of spaces for printing.
   409   This and the following specifications do not affect parsing at all.
   410 
   411   \item @{verbatim "("}@{text n} opens a pretty printing block.  The
   412   optional number specifies how much indentation to add when a line
   413   break occurs within the block.  If the parenthesis is not followed
   414   by digits, the indentation defaults to 0.  A block specified via
   415   @{verbatim "(00"} is unbreakable.
   416 
   417   \item @{verbatim ")"} closes a pretty printing block.
   418 
   419   \item @{verbatim "//"} forces a line break.
   420 
   421   \item @{verbatim "/"}@{text s} allows a line break.  Here @{text s}
   422   stands for the string of spaces (zero or more) right after the
   423   slash.  These spaces are printed if the break is \emph{not} taken.
   424 
   425   \end{description}
   426 
   427   The general idea of pretty printing with blocks and breaks is also
   428   described in @{cite "paulson-ml2"}; it goes back to @{cite "Oppen:1980"}.
   429 \<close>
   430 
   431 
   432 subsection \<open>Infixes\<close>
   433 
   434 text \<open>Infix operators are specified by convenient short forms that
   435   abbreviate general mixfix annotations as follows:
   436 
   437   \begin{center}
   438   \begin{tabular}{lll}
   439 
   440   @{verbatim "("}@{keyword_def "infix"}~@{verbatim \<open>"\<close>}@{text sy}@{verbatim \<open>"\<close>} @{text "p"}@{verbatim ")"}
   441   & @{text "\<mapsto>"} &
   442   @{verbatim \<open>("(_\<close>}~@{text sy}@{verbatim \<open>/ _)" [\<close>}@{text "p + 1"}@{verbatim ","}~@{text "p + 1"}@{verbatim "]"}~@{text "p"}@{verbatim ")"} \\
   443   @{verbatim "("}@{keyword_def "infixl"}~@{verbatim \<open>"\<close>}@{text sy}@{verbatim \<open>"\<close>} @{text "p"}@{verbatim ")"}
   444   & @{text "\<mapsto>"} &
   445   @{verbatim \<open>("(_\<close>}~@{text sy}@{verbatim \<open>/ _)" [\<close>}@{text "p"}@{verbatim ","}~@{text "p + 1"}@{verbatim "]"}~@{text "p"}@{verbatim ")"} \\
   446   @{verbatim "("}@{keyword_def "infixr"}~@{verbatim \<open>"\<close>}@{text sy}@{verbatim \<open>"\<close>}~@{text "p"}@{verbatim ")"}
   447   & @{text "\<mapsto>"} &
   448   @{verbatim \<open>("(_\<close>}~@{text sy}@{verbatim \<open>/ _)" [\<close>}@{text "p + 1"}@{verbatim ","}~@{text "p"}@{verbatim "]"}~@{text "p"}@{verbatim ")"} \\
   449 
   450   \end{tabular}
   451   \end{center}
   452 
   453   The mixfix template @{verbatim \<open>"(_\<close>}~@{text sy}@{verbatim \<open>/ _)"\<close>}
   454   specifies two argument positions; the delimiter is preceded by a
   455   space and followed by a space or line break; the entire phrase is a
   456   pretty printing block.
   457 
   458   The alternative notation @{verbatim "op"}~@{text sy} is introduced
   459   in addition.  Thus any infix operator may be written in prefix form
   460   (as in ML), independently of the number of arguments in the term.
   461 \<close>
   462 
   463 
   464 subsection \<open>Binders\<close>
   465 
   466 text \<open>A \emph{binder} is a variable-binding construct such as a
   467   quantifier.  The idea to formalize @{text "\<forall>x. b"} as @{text "All
   468   (\<lambda>x. b)"} for @{text "All :: ('a \<Rightarrow> bool) \<Rightarrow> bool"} already goes back
   469   to @{cite church40}.  Isabelle declarations of certain higher-order
   470   operators may be annotated with @{keyword_def "binder"} annotations
   471   as follows:
   472 
   473   \begin{center}
   474   @{text "c :: "}@{verbatim \<open>"\<close>}@{text "(\<tau>\<^sub>1 \<Rightarrow> \<tau>\<^sub>2) \<Rightarrow> \<tau>\<^sub>3"}@{verbatim \<open>"  (\<close>}@{keyword "binder"}~@{verbatim \<open>"\<close>}@{text "sy"}@{verbatim \<open>" [\<close>}@{text "p"}@{verbatim "]"}~@{text "q"}@{verbatim ")"}
   475   \end{center}
   476 
   477   This introduces concrete binder syntax @{text "sy x. b"}, where
   478   @{text x} is a bound variable of type @{text "\<tau>\<^sub>1"}, the body @{text
   479   b} has type @{text "\<tau>\<^sub>2"} and the whole term has type @{text "\<tau>\<^sub>3"}.
   480   The optional integer @{text p} specifies the syntactic priority of
   481   the body; the default is @{text "q"}, which is also the priority of
   482   the whole construct.
   483 
   484   Internally, the binder syntax is expanded to something like this:
   485   \begin{center}
   486   @{text "c_binder :: "}@{verbatim \<open>"\<close>}@{text "idts \<Rightarrow> \<tau>\<^sub>2 \<Rightarrow> \<tau>\<^sub>3"}@{verbatim \<open>"  ("(3\<close>}@{text sy}@{verbatim \<open>_./ _)" [0,\<close>}~@{text "p"}@{verbatim "]"}~@{text "q"}@{verbatim ")"}
   487   \end{center}
   488 
   489   Here @{syntax (inner) idts} is the nonterminal symbol for a list of
   490   identifiers with optional type constraints (see also
   491   \secref{sec:pure-grammar}).  The mixfix template @{verbatim
   492   \<open>"(3\<close>}@{text sy}@{verbatim \<open>_./ _)"\<close>} defines argument positions
   493   for the bound identifiers and the body, separated by a dot with
   494   optional line break; the entire phrase is a pretty printing block of
   495   indentation level 3.  Note that there is no extra space after @{text
   496   "sy"}, so it needs to be included user specification if the binder
   497   syntax ends with a token that may be continued by an identifier
   498   token at the start of @{syntax (inner) idts}.
   499 
   500   Furthermore, a syntax translation to transforms @{text "c_binder x\<^sub>1
   501   \<dots> x\<^sub>n b"} into iterated application @{text "c (\<lambda>x\<^sub>1. \<dots> c (\<lambda>x\<^sub>n. b)\<dots>)"}.
   502   This works in both directions, for parsing and printing.\<close>
   503 
   504 
   505 section \<open>Explicit notation \label{sec:notation}\<close>
   506 
   507 text \<open>
   508   \begin{matharray}{rcll}
   509     @{command_def "type_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   510     @{command_def "no_type_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   511     @{command_def "notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   512     @{command_def "no_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   513     @{command_def "write"} & : & @{text "proof(state) \<rightarrow> proof(state)"} \\
   514   \end{matharray}
   515 
   516   Commands that introduce new logical entities (terms or types)
   517   usually allow to provide mixfix annotations on the spot, which is
   518   convenient for default notation.  Nonetheless, the syntax may be
   519   modified later on by declarations for explicit notation.  This
   520   allows to add or delete mixfix annotations for of existing logical
   521   entities within the current context.
   522 
   523   @{rail \<open>
   524     (@@{command type_notation} | @@{command no_type_notation}) @{syntax mode}? \<newline>
   525       (@{syntax nameref} @{syntax mixfix} + @'and')
   526     ;
   527     (@@{command notation} | @@{command no_notation}) @{syntax mode}? \<newline>
   528       (@{syntax nameref} @{syntax mixfix} + @'and')
   529     ;
   530     @@{command write} @{syntax mode}? (@{syntax nameref} @{syntax mixfix} + @'and')
   531   \<close>}
   532 
   533   \begin{description}
   534 
   535   \item @{command "type_notation"}~@{text "c (mx)"} associates mixfix
   536   syntax with an existing type constructor.  The arity of the
   537   constructor is retrieved from the context.
   538 
   539   \item @{command "no_type_notation"} is similar to @{command
   540   "type_notation"}, but removes the specified syntax annotation from
   541   the present context.
   542 
   543   \item @{command "notation"}~@{text "c (mx)"} associates mixfix
   544   syntax with an existing constant or fixed variable.  The type
   545   declaration of the given entity is retrieved from the context.
   546 
   547   \item @{command "no_notation"} is similar to @{command "notation"},
   548   but removes the specified syntax annotation from the present
   549   context.
   550 
   551   \item @{command "write"} is similar to @{command "notation"}, but
   552   works within an Isar proof body.
   553 
   554   \end{description}
   555 \<close>
   556 
   557 
   558 section \<open>The Pure syntax \label{sec:pure-syntax}\<close>
   559 
   560 subsection \<open>Lexical matters \label{sec:inner-lex}\<close>
   561 
   562 text \<open>The inner lexical syntax vaguely resembles the outer one
   563   (\secref{sec:outer-lex}), but some details are different.  There are
   564   two main categories of inner syntax tokens:
   565 
   566   \begin{enumerate}
   567 
   568   \item \emph{delimiters} --- the literal tokens occurring in
   569   productions of the given priority grammar (cf.\
   570   \secref{sec:priority-grammar});
   571 
   572   \item \emph{named tokens} --- various categories of identifiers etc.
   573 
   574   \end{enumerate}
   575 
   576   Delimiters override named tokens and may thus render certain
   577   identifiers inaccessible.  Sometimes the logical context admits
   578   alternative ways to refer to the same entity, potentially via
   579   qualified names.
   580 
   581   \medskip The categories for named tokens are defined once and for
   582   all as follows, reusing some categories of the outer token syntax
   583   (\secref{sec:outer-lex}).
   584 
   585   \begin{center}
   586   \begin{supertabular}{rcl}
   587     @{syntax_def (inner) id} & = & @{syntax_ref ident} \\
   588     @{syntax_def (inner) longid} & = & @{syntax_ref longident} \\
   589     @{syntax_def (inner) var} & = & @{syntax_ref var} \\
   590     @{syntax_def (inner) tid} & = & @{syntax_ref typefree} \\
   591     @{syntax_def (inner) tvar} & = & @{syntax_ref typevar} \\
   592     @{syntax_def (inner) num_token} & = & @{syntax_ref nat} \\
   593     @{syntax_def (inner) float_token} & = & @{syntax_ref nat}@{verbatim "."}@{syntax_ref nat} \\
   594     @{syntax_def (inner) str_token} & = & @{verbatim "''"} @{text "\<dots>"} @{verbatim "''"} \\
   595     @{syntax_def (inner) string_token} & = & @{verbatim \<open>"\<close>} @{text "\<dots>"} @{verbatim \<open>"\<close>} \\
   596     @{syntax_def (inner) cartouche} & = & @{verbatim "\<open>"} @{text "\<dots>"} @{verbatim "\<close>"} \\
   597   \end{supertabular}
   598   \end{center}
   599 
   600   The token categories @{syntax (inner) num_token}, @{syntax (inner)
   601   float_token}, @{syntax (inner) str_token}, @{syntax (inner) string_token},
   602   and @{syntax (inner) cartouche} are not used in Pure. Object-logics may
   603   implement numerals and string literals by adding appropriate syntax
   604   declarations, together with some translation functions (e.g.\ see @{file
   605   "~~/src/HOL/Tools/string_syntax.ML"}).
   606 
   607   The derived categories @{syntax_def (inner) num_const}, and @{syntax_def
   608   (inner) float_const}, provide robust access to the respective tokens: the
   609   syntax tree holds a syntactic constant instead of a free variable.
   610 \<close>
   611 
   612 
   613 subsection \<open>Priority grammars \label{sec:priority-grammar}\<close>
   614 
   615 text \<open>A context-free grammar consists of a set of \emph{terminal
   616   symbols}, a set of \emph{nonterminal symbols} and a set of
   617   \emph{productions}.  Productions have the form @{text "A = \<gamma>"},
   618   where @{text A} is a nonterminal and @{text \<gamma>} is a string of
   619   terminals and nonterminals.  One designated nonterminal is called
   620   the \emph{root symbol}.  The language defined by the grammar
   621   consists of all strings of terminals that can be derived from the
   622   root symbol by applying productions as rewrite rules.
   623 
   624   The standard Isabelle parser for inner syntax uses a \emph{priority
   625   grammar}.  Each nonterminal is decorated by an integer priority:
   626   @{text "A\<^sup>(\<^sup>p\<^sup>)"}.  In a derivation, @{text "A\<^sup>(\<^sup>p\<^sup>)"} may be rewritten
   627   using a production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} only if @{text "p \<le> q"}.  Any
   628   priority grammar can be translated into a normal context-free
   629   grammar by introducing new nonterminals and productions.
   630 
   631   \medskip Formally, a set of context free productions @{text G}
   632   induces a derivation relation @{text "\<longrightarrow>\<^sub>G"} as follows.  Let @{text
   633   \<alpha>} and @{text \<beta>} denote strings of terminal or nonterminal symbols.
   634   Then @{text "\<alpha> A\<^sup>(\<^sup>p\<^sup>) \<beta> \<longrightarrow>\<^sub>G \<alpha> \<gamma> \<beta>"} holds if and only if @{text G}
   635   contains some production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} for @{text "p \<le> q"}.
   636 
   637   \medskip The following grammar for arithmetic expressions
   638   demonstrates how binding power and associativity of operators can be
   639   enforced by priorities.
   640 
   641   \begin{center}
   642   \begin{tabular}{rclr}
   643   @{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim "("} @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim ")"} \\
   644   @{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim 0} \\
   645   @{text "A\<^sup>(\<^sup>0\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} \\
   646   @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} \\
   647   @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "="} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} \\
   648   \end{tabular}
   649   \end{center}
   650   The choice of priorities determines that @{verbatim "-"} binds
   651   tighter than @{verbatim "*"}, which binds tighter than @{verbatim
   652   "+"}.  Furthermore @{verbatim "+"} associates to the left and
   653   @{verbatim "*"} to the right.
   654 
   655   \medskip For clarity, grammars obey these conventions:
   656   \begin{itemize}
   657 
   658   \item All priorities must lie between 0 and 1000.
   659 
   660   \item Priority 0 on the right-hand side and priority 1000 on the
   661   left-hand side may be omitted.
   662 
   663   \item The production @{text "A\<^sup>(\<^sup>p\<^sup>) = \<alpha>"} is written as @{text "A = \<alpha>
   664   (p)"}, i.e.\ the priority of the left-hand side actually appears in
   665   a column on the far right.
   666 
   667   \item Alternatives are separated by @{text "|"}.
   668 
   669   \item Repetition is indicated by dots @{text "(\<dots>)"} in an informal
   670   but obvious way.
   671 
   672   \end{itemize}
   673 
   674   Using these conventions, the example grammar specification above
   675   takes the form:
   676   \begin{center}
   677   \begin{tabular}{rclc}
   678     @{text A} & @{text "="} & @{verbatim "("} @{text A} @{verbatim ")"} \\
   679               & @{text "|"} & @{verbatim 0} & \qquad\qquad \\
   680               & @{text "|"} & @{text A} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} & @{text "(0)"} \\
   681               & @{text "|"} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   682               & @{text "|"} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   683   \end{tabular}
   684   \end{center}
   685 \<close>
   686 
   687 
   688 subsection \<open>The Pure grammar \label{sec:pure-grammar}\<close>
   689 
   690 text \<open>The priority grammar of the @{text "Pure"} theory is defined
   691   approximately like this:
   692 
   693   \begin{center}
   694   \begin{supertabular}{rclr}
   695 
   696   @{syntax_def (inner) any} & = & @{text "prop  |  logic"} \\\\
   697 
   698   @{syntax_def (inner) prop} & = & @{verbatim "("} @{text prop} @{verbatim ")"} \\
   699     & @{text "|"} & @{text "prop\<^sup>(\<^sup>4\<^sup>)"} @{verbatim "::"} @{text type} & @{text "(3)"} \\
   700     & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "=="} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(2)"} \\
   701     & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{text "\<equiv>"} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(2)"} \\
   702     & @{text "|"} & @{text "prop\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "&&&"} @{text "prop\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   703     & @{text "|"} & @{text "prop\<^sup>(\<^sup>2\<^sup>)"} @{verbatim "==>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   704     & @{text "|"} & @{text "prop\<^sup>(\<^sup>2\<^sup>)"} @{text "\<Longrightarrow>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   705     & @{text "|"} & @{verbatim "[|"} @{text prop} @{verbatim ";"} @{text "\<dots>"} @{verbatim ";"} @{text prop} @{verbatim "|]"} @{verbatim "==>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   706     & @{text "|"} & @{text "\<lbrakk>"} @{text prop} @{verbatim ";"} @{text "\<dots>"} @{verbatim ";"} @{text prop} @{text "\<rbrakk>"} @{text "\<Longrightarrow>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
   707     & @{text "|"} & @{verbatim "!!"} @{text idts} @{verbatim "."} @{text prop} & @{text "(0)"} \\
   708     & @{text "|"} & @{text "\<And>"} @{text idts} @{verbatim "."} @{text prop} & @{text "(0)"} \\
   709     & @{text "|"} & @{verbatim OFCLASS} @{verbatim "("} @{text type} @{verbatim ","} @{text logic} @{verbatim ")"} \\
   710     & @{text "|"} & @{verbatim SORT_CONSTRAINT} @{verbatim "("} @{text type} @{verbatim ")"} \\
   711     & @{text "|"} & @{verbatim TERM} @{text logic} \\
   712     & @{text "|"} & @{verbatim PROP} @{text aprop} \\\\
   713 
   714   @{syntax_def (inner) aprop} & = & @{verbatim "("} @{text aprop} @{verbatim ")"} \\
   715     & @{text "|"} & @{text "id  |  longid  |  var  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "..."} \\
   716     & @{text "|"} & @{verbatim CONST} @{text "id  |  "}@{verbatim CONST} @{text "longid"} \\
   717     & @{text "|"} & @{verbatim XCONST} @{text "id  |  "}@{verbatim XCONST} @{text "longid"} \\
   718     & @{text "|"} & @{text "logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)  any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "(999)"} \\\\
   719 
   720   @{syntax_def (inner) logic} & = & @{verbatim "("} @{text logic} @{verbatim ")"} \\
   721     & @{text "|"} & @{text "logic\<^sup>(\<^sup>4\<^sup>)"} @{verbatim "::"} @{text type} & @{text "(3)"} \\
   722     & @{text "|"} & @{text "id  |  longid  |  var  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "..."} \\
   723     & @{text "|"} & @{verbatim CONST} @{text "id  |  "}@{verbatim CONST} @{text "longid"} \\
   724     & @{text "|"} & @{verbatim XCONST} @{text "id  |  "}@{verbatim XCONST} @{text "longid"} \\
   725     & @{text "|"} & @{text "logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)  any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "(999)"} \\
   726     & @{text "|"} & @{text "\<struct> index\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} \\
   727     & @{text "|"} & @{verbatim "%"} @{text pttrns} @{verbatim "."} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   728     & @{text "|"} & @{text \<lambda>} @{text pttrns} @{verbatim "."} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   729     & @{text "|"} & @{verbatim op} @{verbatim "=="}@{text "  |  "}@{verbatim op} @{text "\<equiv>"}@{text "  |  "}@{verbatim op} @{verbatim "&&&"} \\
   730     & @{text "|"} & @{verbatim op} @{verbatim "==>"}@{text "  |  "}@{verbatim op} @{text "\<Longrightarrow>"} \\
   731     & @{text "|"} & @{verbatim TYPE} @{verbatim "("} @{text type} @{verbatim ")"} \\\\
   732 
   733   @{syntax_def (inner) idt} & = & @{verbatim "("} @{text idt} @{verbatim ")"}@{text "  |  id  |  "}@{verbatim "_"} \\
   734     & @{text "|"} & @{text id} @{verbatim "::"} @{text type} & @{text "(0)"} \\
   735     & @{text "|"} & @{verbatim "_"} @{verbatim "::"} @{text type} & @{text "(0)"} \\\\
   736 
   737   @{syntax_def (inner) index} & = & @{verbatim "\<^bsub>"} @{text "logic\<^sup>(\<^sup>0\<^sup>)"} @{verbatim "\<^esub>"}@{text "  |  |  \<index>"} \\\\
   738 
   739   @{syntax_def (inner) idts} & = & @{text "idt  |  idt\<^sup>(\<^sup>1\<^sup>) idts"} & @{text "(0)"} \\\\
   740 
   741   @{syntax_def (inner) pttrn} & = & @{text idt} \\\\
   742 
   743   @{syntax_def (inner) pttrns} & = & @{text "pttrn  |  pttrn\<^sup>(\<^sup>1\<^sup>) pttrns"} & @{text "(0)"} \\\\
   744 
   745   @{syntax_def (inner) type} & = & @{verbatim "("} @{text type} @{verbatim ")"} \\
   746     & @{text "|"} & @{text "tid  |  tvar  |  "}@{verbatim "_"} \\
   747     & @{text "|"} & @{text "tid"} @{verbatim "::"} @{text "sort  |  tvar  "}@{verbatim "::"} @{text "sort  |  "}@{verbatim "_"} @{verbatim "::"} @{text "sort"} \\
   748     & @{text "|"} & @{text "type_name  |  type\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) type_name"} \\
   749     & @{text "|"} & @{verbatim "("} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim ")"} @{text type_name} \\
   750     & @{text "|"} & @{text "type\<^sup>(\<^sup>1\<^sup>)"} @{verbatim "=>"} @{text type} & @{text "(0)"} \\
   751     & @{text "|"} & @{text "type\<^sup>(\<^sup>1\<^sup>)"} @{text "\<Rightarrow>"} @{text type} & @{text "(0)"} \\
   752     & @{text "|"} & @{verbatim "["} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim "]"} @{verbatim "=>"} @{text type} & @{text "(0)"} \\
   753     & @{text "|"} & @{verbatim "["} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim "]"} @{text "\<Rightarrow>"} @{text type} & @{text "(0)"} \\
   754   @{syntax_def (inner) type_name} & = & @{text "id  |  longid"} \\\\
   755 
   756   @{syntax_def (inner) sort} & = & @{syntax class_name}~@{text "  |  "}@{verbatim "{}"} \\
   757     & @{text "|"} & @{verbatim "{"} @{syntax class_name} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{syntax class_name} @{verbatim "}"} \\
   758   @{syntax_def (inner) class_name} & = & @{text "id  |  longid"} \\
   759   \end{supertabular}
   760   \end{center}
   761 
   762   \medskip Here literal terminals are printed @{verbatim "verbatim"};
   763   see also \secref{sec:inner-lex} for further token categories of the
   764   inner syntax.  The meaning of the nonterminals defined by the above
   765   grammar is as follows:
   766 
   767   \begin{description}
   768 
   769   \item @{syntax_ref (inner) any} denotes any term.
   770 
   771   \item @{syntax_ref (inner) prop} denotes meta-level propositions,
   772   which are terms of type @{typ prop}.  The syntax of such formulae of
   773   the meta-logic is carefully distinguished from usual conventions for
   774   object-logics.  In particular, plain @{text "\<lambda>"}-term notation is
   775   \emph{not} recognized as @{syntax (inner) prop}.
   776 
   777   \item @{syntax_ref (inner) aprop} denotes atomic propositions, which
   778   are embedded into regular @{syntax (inner) prop} by means of an
   779   explicit @{verbatim PROP} token.
   780 
   781   Terms of type @{typ prop} with non-constant head, e.g.\ a plain
   782   variable, are printed in this form.  Constants that yield type @{typ
   783   prop} are expected to provide their own concrete syntax; otherwise
   784   the printed version will appear like @{syntax (inner) logic} and
   785   cannot be parsed again as @{syntax (inner) prop}.
   786 
   787   \item @{syntax_ref (inner) logic} denotes arbitrary terms of a
   788   logical type, excluding type @{typ prop}.  This is the main
   789   syntactic category of object-logic entities, covering plain @{text
   790   \<lambda>}-term notation (variables, abstraction, application), plus
   791   anything defined by the user.
   792 
   793   When specifying notation for logical entities, all logical types
   794   (excluding @{typ prop}) are \emph{collapsed} to this single category
   795   of @{syntax (inner) logic}.
   796 
   797   \item @{syntax_ref (inner) index} denotes an optional index term for
   798   indexed syntax.  If omitted, it refers to the first @{keyword_ref
   799   "structure"} variable in the context.  The special dummy ``@{text
   800   "\<index>"}'' serves as pattern variable in mixfix annotations that
   801   introduce indexed notation.
   802 
   803   \item @{syntax_ref (inner) idt} denotes identifiers, possibly
   804   constrained by types.
   805 
   806   \item @{syntax_ref (inner) idts} denotes a sequence of @{syntax_ref
   807   (inner) idt}.  This is the most basic category for variables in
   808   iterated binders, such as @{text "\<lambda>"} or @{text "\<And>"}.
   809 
   810   \item @{syntax_ref (inner) pttrn} and @{syntax_ref (inner) pttrns}
   811   denote patterns for abstraction, cases bindings etc.  In Pure, these
   812   categories start as a merely copy of @{syntax (inner) idt} and
   813   @{syntax (inner) idts}, respectively.  Object-logics may add
   814   additional productions for binding forms.
   815 
   816   \item @{syntax_ref (inner) type} denotes types of the meta-logic.
   817 
   818   \item @{syntax_ref (inner) sort} denotes meta-level sorts.
   819 
   820   \end{description}
   821 
   822   Here are some further explanations of certain syntax features.
   823 
   824   \begin{itemize}
   825 
   826   \item In @{syntax (inner) idts}, note that @{text "x :: nat y"} is
   827   parsed as @{text "x :: (nat y)"}, treating @{text y} like a type
   828   constructor applied to @{text nat}.  To avoid this interpretation,
   829   write @{text "(x :: nat) y"} with explicit parentheses.
   830 
   831   \item Similarly, @{text "x :: nat y :: nat"} is parsed as @{text "x ::
   832   (nat y :: nat)"}.  The correct form is @{text "(x :: nat) (y ::
   833   nat)"}, or @{text "(x :: nat) y :: nat"} if @{text y} is last in the
   834   sequence of identifiers.
   835 
   836   \item Type constraints for terms bind very weakly.  For example,
   837   @{text "x < y :: nat"} is normally parsed as @{text "(x < y) ::
   838   nat"}, unless @{text "<"} has a very low priority, in which case the
   839   input is likely to be ambiguous.  The correct form is @{text "x < (y
   840   :: nat)"}.
   841 
   842   \item Constraints may be either written with two literal colons
   843   ``@{verbatim "::"}'' or the double-colon symbol @{verbatim "\<Colon>"},
   844   which actually looks exactly the same in some {\LaTeX} styles.
   845 
   846   \item Dummy variables (written as underscore) may occur in different
   847   roles.
   848 
   849   \begin{description}
   850 
   851   \item A type ``@{text "_"}'' or ``@{text "_ :: sort"}'' acts like an
   852   anonymous inference parameter, which is filled-in according to the
   853   most general type produced by the type-checking phase.
   854 
   855   \item A bound ``@{text "_"}'' refers to a vacuous abstraction, where
   856   the body does not refer to the binding introduced here.  As in the
   857   term @{term "\<lambda>x _. x"}, which is @{text "\<alpha>"}-equivalent to @{text
   858   "\<lambda>x y. x"}.
   859 
   860   \item A free ``@{text "_"}'' refers to an implicit outer binding.
   861   Higher definitional packages usually allow forms like @{text "f x _
   862   = x"}.
   863 
   864   \item A schematic ``@{text "_"}'' (within a term pattern, see
   865   \secref{sec:term-decls}) refers to an anonymous variable that is
   866   implicitly abstracted over its context of locally bound variables.
   867   For example, this allows pattern matching of @{text "{x. f x = g
   868   x}"} against @{text "{x. _ = _}"}, or even @{text "{_. _ = _}"} by
   869   using both bound and schematic dummies.
   870 
   871   \end{description}
   872 
   873   \item The three literal dots ``@{verbatim "..."}'' may be also
   874   written as ellipsis symbol @{verbatim "\<dots>"}.  In both cases this
   875   refers to a special schematic variable, which is bound in the
   876   context.  This special term abbreviation works nicely with
   877   calculational reasoning (\secref{sec:calculation}).
   878 
   879   \item @{verbatim CONST} ensures that the given identifier is treated
   880   as constant term, and passed through the parse tree in fully
   881   internalized form.  This is particularly relevant for translation
   882   rules (\secref{sec:syn-trans}), notably on the RHS.
   883 
   884   \item @{verbatim XCONST} is similar to @{verbatim CONST}, but
   885   retains the constant name as given.  This is only relevant to
   886   translation rules (\secref{sec:syn-trans}), notably on the LHS.
   887 
   888   \end{itemize}
   889 \<close>
   890 
   891 
   892 subsection \<open>Inspecting the syntax\<close>
   893 
   894 text \<open>
   895   \begin{matharray}{rcl}
   896     @{command_def "print_syntax"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
   897   \end{matharray}
   898 
   899   \begin{description}
   900 
   901   \item @{command "print_syntax"} prints the inner syntax of the
   902   current context.  The output can be quite large; the most important
   903   sections are explained below.
   904 
   905   \begin{description}
   906 
   907   \item @{text "lexicon"} lists the delimiters of the inner token
   908   language; see \secref{sec:inner-lex}.
   909 
   910   \item @{text "prods"} lists the productions of the underlying
   911   priority grammar; see \secref{sec:priority-grammar}.
   912 
   913   The nonterminal @{text "A\<^sup>(\<^sup>p\<^sup>)"} is rendered in plain text as @{text
   914   "A[p]"}; delimiters are quoted.  Many productions have an extra
   915   @{text "\<dots> => name"}.  These names later become the heads of parse
   916   trees; they also guide the pretty printer.
   917 
   918   Productions without such parse tree names are called \emph{copy
   919   productions}.  Their right-hand side must have exactly one
   920   nonterminal symbol (or named token).  The parser does not create a
   921   new parse tree node for copy productions, but simply returns the
   922   parse tree of the right-hand symbol.
   923 
   924   If the right-hand side of a copy production consists of a single
   925   nonterminal without any delimiters, then it is called a \emph{chain
   926   production}.  Chain productions act as abbreviations: conceptually,
   927   they are removed from the grammar by adding new productions.
   928   Priority information attached to chain productions is ignored; only
   929   the dummy value @{text "-1"} is displayed.
   930 
   931   \item @{text "print modes"} lists the alternative print modes
   932   provided by this grammar; see \secref{sec:print-modes}.
   933 
   934   \item @{text "parse_rules"} and @{text "print_rules"} relate to
   935   syntax translations (macros); see \secref{sec:syn-trans}.
   936 
   937   \item @{text "parse_ast_translation"} and @{text
   938   "print_ast_translation"} list sets of constants that invoke
   939   translation functions for abstract syntax trees, which are only
   940   required in very special situations; see \secref{sec:tr-funs}.
   941 
   942   \item @{text "parse_translation"} and @{text "print_translation"}
   943   list the sets of constants that invoke regular translation
   944   functions; see \secref{sec:tr-funs}.
   945 
   946   \end{description}
   947 
   948   \end{description}
   949 \<close>
   950 
   951 
   952 subsection \<open>Ambiguity of parsed expressions\<close>
   953 
   954 text \<open>
   955   \begin{tabular}{rcll}
   956     @{attribute_def syntax_ambiguity_warning} & : & @{text attribute} & default @{text true} \\
   957     @{attribute_def syntax_ambiguity_limit} & : & @{text attribute} & default @{text 10} \\
   958   \end{tabular}
   959 
   960   Depending on the grammar and the given input, parsing may be
   961   ambiguous.  Isabelle lets the Earley parser enumerate all possible
   962   parse trees, and then tries to make the best out of the situation.
   963   Terms that cannot be type-checked are filtered out, which often
   964   leads to a unique result in the end.  Unlike regular type
   965   reconstruction, which is applied to the whole collection of input
   966   terms simultaneously, the filtering stage only treats each given
   967   term in isolation.  Filtering is also not attempted for individual
   968   types or raw ASTs (as required for @{command translations}).
   969 
   970   Certain warning or error messages are printed, depending on the
   971   situation and the given configuration options.  Parsing ultimately
   972   fails, if multiple results remain after the filtering phase.
   973 
   974   \begin{description}
   975 
   976   \item @{attribute syntax_ambiguity_warning} controls output of
   977   explicit warning messages about syntax ambiguity.
   978 
   979   \item @{attribute syntax_ambiguity_limit} determines the number of
   980   resulting parse trees that are shown as part of the printed message
   981   in case of an ambiguity.
   982 
   983   \end{description}
   984 \<close>
   985 
   986 
   987 section \<open>Syntax transformations \label{sec:syntax-transformations}\<close>
   988 
   989 text \<open>The inner syntax engine of Isabelle provides separate
   990   mechanisms to transform parse trees either via rewrite systems on
   991   first-order ASTs (\secref{sec:syn-trans}), or ML functions on ASTs
   992   or syntactic @{text "\<lambda>"}-terms (\secref{sec:tr-funs}).  This works
   993   both for parsing and printing, as outlined in
   994   \figref{fig:parse-print}.
   995 
   996   \begin{figure}[htbp]
   997   \begin{center}
   998   \begin{tabular}{cl}
   999   string          & \\
  1000   @{text "\<down>"}     & lexer + parser \\
  1001   parse tree      & \\
  1002   @{text "\<down>"}     & parse AST translation \\
  1003   AST             & \\
  1004   @{text "\<down>"}     & AST rewriting (macros) \\
  1005   AST             & \\
  1006   @{text "\<down>"}     & parse translation \\
  1007   --- pre-term ---    & \\
  1008   @{text "\<down>"}     & print translation \\
  1009   AST             & \\
  1010   @{text "\<down>"}     & AST rewriting (macros) \\
  1011   AST             & \\
  1012   @{text "\<down>"}     & print AST translation \\
  1013   string          &
  1014   \end{tabular}
  1015   \end{center}
  1016   \caption{Parsing and printing with translations}\label{fig:parse-print}
  1017   \end{figure}
  1018 
  1019   These intermediate syntax tree formats eventually lead to a pre-term
  1020   with all names and binding scopes resolved, but most type
  1021   information still missing.  Explicit type constraints might be given by
  1022   the user, or implicit position information by the system --- both
  1023   need to be passed-through carefully by syntax transformations.
  1024 
  1025   Pre-terms are further processed by the so-called \emph{check} and
  1026   \emph{unckeck} phases that are intertwined with type-inference (see
  1027   also @{cite "isabelle-implementation"}).  The latter allows to operate
  1028   on higher-order abstract syntax with proper binding and type
  1029   information already available.
  1030 
  1031   As a rule of thumb, anything that manipulates bindings of variables
  1032   or constants needs to be implemented as syntax transformation (see
  1033   below).  Anything else is better done via check/uncheck: a prominent
  1034   example application is the @{command abbreviation} concept of
  1035   Isabelle/Pure.\<close>
  1036 
  1037 
  1038 subsection \<open>Abstract syntax trees \label{sec:ast}\<close>
  1039 
  1040 text \<open>The ML datatype @{ML_type Ast.ast} explicitly represents the
  1041   intermediate AST format that is used for syntax rewriting
  1042   (\secref{sec:syn-trans}).  It is defined in ML as follows:
  1043   \begin{ttbox}
  1044   datatype ast =
  1045     Constant of string |
  1046     Variable of string |
  1047     Appl of ast list
  1048   \end{ttbox}
  1049 
  1050   An AST is either an atom (constant or variable) or a list of (at
  1051   least two) subtrees.  Occasional diagnostic output of ASTs uses
  1052   notation that resembles S-expression of LISP.  Constant atoms are
  1053   shown as quoted strings, variable atoms as non-quoted strings and
  1054   applications as a parenthesized list of subtrees.  For example, the
  1055   AST
  1056   @{ML [display] \<open>Ast.Appl [Ast.Constant "_abs", Ast.Variable "x", Ast.Variable "t"]\<close>}
  1057   is pretty-printed as @{verbatim \<open>("_abs" x t)\<close>}.  Note that
  1058   @{verbatim "()"} and @{verbatim "(x)"} are excluded as ASTs, because
  1059   they have too few subtrees.
  1060 
  1061   \medskip AST application is merely a pro-forma mechanism to indicate
  1062   certain syntactic structures.  Thus @{verbatim "(c a b)"} could mean
  1063   either term application or type application, depending on the
  1064   syntactic context.
  1065 
  1066   Nested application like @{verbatim \<open>(("_abs" x t) u)\<close>} is also
  1067   possible, but ASTs are definitely first-order: the syntax constant
  1068   @{verbatim \<open>"_abs"\<close>} does not bind the @{verbatim x} in any way.
  1069   Proper bindings are introduced in later stages of the term syntax,
  1070   where @{verbatim \<open>("_abs" x t)\<close>} becomes an @{ML Abs} node and
  1071   occurrences of @{verbatim x} in @{verbatim t} are replaced by bound
  1072   variables (represented as de-Bruijn indices).
  1073 \<close>
  1074 
  1075 
  1076 subsubsection \<open>AST constants versus variables\<close>
  1077 
  1078 text \<open>Depending on the situation --- input syntax, output syntax,
  1079   translation patterns --- the distinction of atomic ASTs as @{ML
  1080   Ast.Constant} versus @{ML Ast.Variable} serves slightly different
  1081   purposes.
  1082 
  1083   Input syntax of a term such as @{text "f a b = c"} does not yet
  1084   indicate the scopes of atomic entities @{text "f, a, b, c"}: they
  1085   could be global constants or local variables, even bound ones
  1086   depending on the context of the term.  @{ML Ast.Variable} leaves
  1087   this choice still open: later syntax layers (or translation
  1088   functions) may capture such a variable to determine its role
  1089   specifically, to make it a constant, bound variable, free variable
  1090   etc.  In contrast, syntax translations that introduce already known
  1091   constants would rather do it via @{ML Ast.Constant} to prevent
  1092   accidental re-interpretation later on.
  1093 
  1094   Output syntax turns term constants into @{ML Ast.Constant} and
  1095   variables (free or schematic) into @{ML Ast.Variable}.  This
  1096   information is precise when printing fully formal @{text "\<lambda>"}-terms.
  1097 
  1098   \medskip AST translation patterns (\secref{sec:syn-trans}) that
  1099   represent terms cannot distinguish constants and variables
  1100   syntactically.  Explicit indication of @{text "CONST c"} inside the
  1101   term language is required, unless @{text "c"} is known as special
  1102   \emph{syntax constant} (see also @{command syntax}).  It is also
  1103   possible to use @{command syntax} declarations (without mixfix
  1104   annotation) to enforce that certain unqualified names are always
  1105   treated as constant within the syntax machinery.
  1106 
  1107   The situation is simpler for ASTs that represent types or sorts,
  1108   since the concrete syntax already distinguishes type variables from
  1109   type constants (constructors).  So @{text "('a, 'b) foo"}
  1110   corresponds to an AST application of some constant for @{text foo}
  1111   and variable arguments for @{text "'a"} and @{text "'b"}.  Note that
  1112   the postfix application is merely a feature of the concrete syntax,
  1113   while in the AST the constructor occurs in head position.\<close>
  1114 
  1115 
  1116 subsubsection \<open>Authentic syntax names\<close>
  1117 
  1118 text \<open>Naming constant entities within ASTs is another delicate
  1119   issue.  Unqualified names are resolved in the name space tables in
  1120   the last stage of parsing, after all translations have been applied.
  1121   Since syntax transformations do not know about this later name
  1122   resolution, there can be surprises in boundary cases.
  1123 
  1124   \emph{Authentic syntax names} for @{ML Ast.Constant} avoid this
  1125   problem: the fully-qualified constant name with a special prefix for
  1126   its formal category (@{text "class"}, @{text "type"}, @{text
  1127   "const"}, @{text "fixed"}) represents the information faithfully
  1128   within the untyped AST format.  Accidental overlap with free or
  1129   bound variables is excluded as well.  Authentic syntax names work
  1130   implicitly in the following situations:
  1131 
  1132   \begin{itemize}
  1133 
  1134   \item Input of term constants (or fixed variables) that are
  1135   introduced by concrete syntax via @{command notation}: the
  1136   correspondence of a particular grammar production to some known term
  1137   entity is preserved.
  1138 
  1139   \item Input of type constants (constructors) and type classes ---
  1140   thanks to explicit syntactic distinction independently on the
  1141   context.
  1142 
  1143   \item Output of term constants, type constants, type classes ---
  1144   this information is already available from the internal term to be
  1145   printed.
  1146 
  1147   \end{itemize}
  1148 
  1149   In other words, syntax transformations that operate on input terms
  1150   written as prefix applications are difficult to make robust.
  1151   Luckily, this case rarely occurs in practice, because syntax forms
  1152   to be translated usually correspond to some concrete notation.\<close>
  1153 
  1154 
  1155 subsection \<open>Raw syntax and translations \label{sec:syn-trans}\<close>
  1156 
  1157 text \<open>
  1158   \begin{tabular}{rcll}
  1159     @{command_def "nonterminal"} & : & @{text "theory \<rightarrow> theory"} \\
  1160     @{command_def "syntax"} & : & @{text "theory \<rightarrow> theory"} \\
  1161     @{command_def "no_syntax"} & : & @{text "theory \<rightarrow> theory"} \\
  1162     @{command_def "translations"} & : & @{text "theory \<rightarrow> theory"} \\
  1163     @{command_def "no_translations"} & : & @{text "theory \<rightarrow> theory"} \\
  1164     @{attribute_def syntax_ast_trace} & : & @{text attribute} & default @{text false} \\
  1165     @{attribute_def syntax_ast_stats} & : & @{text attribute} & default @{text false} \\
  1166   \end{tabular}
  1167 
  1168   \medskip
  1169 
  1170   Unlike mixfix notation for existing formal entities
  1171   (\secref{sec:notation}), raw syntax declarations provide full access
  1172   to the priority grammar of the inner syntax, without any sanity
  1173   checks.  This includes additional syntactic categories (via
  1174   @{command nonterminal}) and free-form grammar productions (via
  1175   @{command syntax}).  Additional syntax translations (or macros, via
  1176   @{command translations}) are required to turn resulting parse trees
  1177   into proper representations of formal entities again.
  1178 
  1179   @{rail \<open>
  1180     @@{command nonterminal} (@{syntax name} + @'and')
  1181     ;
  1182     (@@{command syntax} | @@{command no_syntax}) @{syntax mode}? (constdecl +)
  1183     ;
  1184     (@@{command translations} | @@{command no_translations})
  1185       (transpat ('==' | '=>' | '<=' | '\<rightleftharpoons>' | '\<rightharpoonup>' | '\<leftharpoondown>') transpat +)
  1186     ;
  1187 
  1188     constdecl: @{syntax name} '::' @{syntax type} @{syntax mixfix}?
  1189     ;
  1190     mode: ('(' ( @{syntax name} | @'output' | @{syntax name} @'output' ) ')')
  1191     ;
  1192     transpat: ('(' @{syntax nameref} ')')? @{syntax string}
  1193   \<close>}
  1194 
  1195   \begin{description}
  1196 
  1197   \item @{command "nonterminal"}~@{text c} declares a type
  1198   constructor @{text c} (without arguments) to act as purely syntactic
  1199   type: a nonterminal symbol of the inner syntax.
  1200 
  1201   \item @{command "syntax"}~@{text "(mode) c :: \<sigma> (mx)"} augments the
  1202   priority grammar and the pretty printer table for the given print
  1203   mode (default @{verbatim \<open>""\<close>}). An optional keyword @{keyword_ref
  1204   "output"} means that only the pretty printer table is affected.
  1205 
  1206   Following \secref{sec:mixfix}, the mixfix annotation @{text "mx =
  1207   template ps q"} together with type @{text "\<sigma> = \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} and
  1208   specify a grammar production.  The @{text template} contains
  1209   delimiter tokens that surround @{text "n"} argument positions
  1210   (@{verbatim "_"}).  The latter correspond to nonterminal symbols
  1211   @{text "A\<^sub>i"} derived from the argument types @{text "\<tau>\<^sub>i"} as
  1212   follows:
  1213   \begin{itemize}
  1214 
  1215   \item @{text "prop"} if @{text "\<tau>\<^sub>i = prop"}
  1216 
  1217   \item @{text "logic"} if @{text "\<tau>\<^sub>i = (\<dots>)\<kappa>"} for logical type
  1218   constructor @{text "\<kappa> \<noteq> prop"}
  1219 
  1220   \item @{text any} if @{text "\<tau>\<^sub>i = \<alpha>"} for type variables
  1221 
  1222   \item @{text "\<kappa>"} if @{text "\<tau>\<^sub>i = \<kappa>"} for nonterminal @{text "\<kappa>"}
  1223   (syntactic type constructor)
  1224 
  1225   \end{itemize}
  1226 
  1227   Each @{text "A\<^sub>i"} is decorated by priority @{text "p\<^sub>i"} from the
  1228   given list @{text "ps"}; missing priorities default to 0.
  1229 
  1230   The resulting nonterminal of the production is determined similarly
  1231   from type @{text "\<tau>"}, with priority @{text "q"} and default 1000.
  1232 
  1233   \medskip Parsing via this production produces parse trees @{text
  1234   "t\<^sub>1, \<dots>, t\<^sub>n"} for the argument slots.  The resulting parse tree is
  1235   composed as @{text "c t\<^sub>1 \<dots> t\<^sub>n"}, by using the syntax constant @{text
  1236   "c"} of the syntax declaration.
  1237 
  1238   Such syntactic constants are invented on the spot, without formal
  1239   check wrt.\ existing declarations.  It is conventional to use plain
  1240   identifiers prefixed by a single underscore (e.g.\ @{text
  1241   "_foobar"}).  Names should be chosen with care, to avoid clashes
  1242   with other syntax declarations.
  1243 
  1244   \medskip The special case of copy production is specified by @{text
  1245   "c = "}@{verbatim \<open>""\<close>} (empty string).  It means that the
  1246   resulting parse tree @{text "t"} is copied directly, without any
  1247   further decoration.
  1248 
  1249   \item @{command "no_syntax"}~@{text "(mode) decls"} removes grammar
  1250   declarations (and translations) resulting from @{text decls}, which
  1251   are interpreted in the same manner as for @{command "syntax"} above.
  1252 
  1253   \item @{command "translations"}~@{text rules} specifies syntactic
  1254   translation rules (i.e.\ macros) as first-order rewrite rules on
  1255   ASTs (\secref{sec:ast}).  The theory context maintains two
  1256   independent lists translation rules: parse rules (@{verbatim "=>"}
  1257   or @{text "\<rightharpoonup>"}) and print rules (@{verbatim "<="} or @{text "\<leftharpoondown>"}).
  1258   For convenience, both can be specified simultaneously as parse~/
  1259   print rules (@{verbatim "=="} or @{text "\<rightleftharpoons>"}).
  1260 
  1261   Translation patterns may be prefixed by the syntactic category to be
  1262   used for parsing; the default is @{text logic} which means that
  1263   regular term syntax is used.  Both sides of the syntax translation
  1264   rule undergo parsing and parse AST translations
  1265   \secref{sec:tr-funs}, in order to perform some fundamental
  1266   normalization like @{text "\<lambda>x y. b \<leadsto> \<lambda>x. \<lambda>y. b"}, but other AST
  1267   translation rules are \emph{not} applied recursively here.
  1268 
  1269   When processing AST patterns, the inner syntax lexer runs in a
  1270   different mode that allows identifiers to start with underscore.
  1271   This accommodates the usual naming convention for auxiliary syntax
  1272   constants --- those that do not have a logical counter part --- by
  1273   allowing to specify arbitrary AST applications within the term
  1274   syntax, independently of the corresponding concrete syntax.
  1275 
  1276   Atomic ASTs are distinguished as @{ML Ast.Constant} versus @{ML
  1277   Ast.Variable} as follows: a qualified name or syntax constant
  1278   declared via @{command syntax}, or parse tree head of concrete
  1279   notation becomes @{ML Ast.Constant}, anything else @{ML
  1280   Ast.Variable}.  Note that @{text CONST} and @{text XCONST} within
  1281   the term language (\secref{sec:pure-grammar}) allow to enforce
  1282   treatment as constants.
  1283 
  1284   AST rewrite rules @{text "(lhs, rhs)"} need to obey the following
  1285   side-conditions:
  1286 
  1287   \begin{itemize}
  1288 
  1289   \item Rules must be left linear: @{text "lhs"} must not contain
  1290   repeated variables.\footnote{The deeper reason for this is that AST
  1291   equality is not well-defined: different occurrences of the ``same''
  1292   AST could be decorated differently by accidental type-constraints or
  1293   source position information, for example.}
  1294 
  1295   \item Every variable in @{text "rhs"} must also occur in @{text
  1296   "lhs"}.
  1297 
  1298   \end{itemize}
  1299 
  1300   \item @{command "no_translations"}~@{text rules} removes syntactic
  1301   translation rules, which are interpreted in the same manner as for
  1302   @{command "translations"} above.
  1303 
  1304   \item @{attribute syntax_ast_trace} and @{attribute
  1305   syntax_ast_stats} control diagnostic output in the AST normalization
  1306   process, when translation rules are applied to concrete input or
  1307   output.
  1308 
  1309   \end{description}
  1310 
  1311   Raw syntax and translations provides a slightly more low-level
  1312   access to the grammar and the form of resulting parse trees.  It is
  1313   often possible to avoid this untyped macro mechanism, and use
  1314   type-safe @{command abbreviation} or @{command notation} instead.
  1315   Some important situations where @{command syntax} and @{command
  1316   translations} are really need are as follows:
  1317 
  1318   \begin{itemize}
  1319 
  1320   \item Iterated replacement via recursive @{command translations}.
  1321   For example, consider list enumeration @{term "[a, b, c, d]"} as
  1322   defined in theory @{theory List} in Isabelle/HOL.
  1323 
  1324   \item Change of binding status of variables: anything beyond the
  1325   built-in @{keyword "binder"} mixfix annotation requires explicit
  1326   syntax translations.  For example, consider list filter
  1327   comprehension @{term "[x \<leftarrow> xs . P]"} as defined in theory @{theory
  1328   List} in Isabelle/HOL.
  1329 
  1330   \end{itemize}
  1331 \<close>
  1332 
  1333 subsubsection \<open>Applying translation rules\<close>
  1334 
  1335 text \<open>As a term is being parsed or printed, an AST is generated as
  1336   an intermediate form according to \figref{fig:parse-print}.  The AST
  1337   is normalized by applying translation rules in the manner of a
  1338   first-order term rewriting system.  We first examine how a single
  1339   rule is applied.
  1340 
  1341   Let @{text "t"} be the abstract syntax tree to be normalized and
  1342   @{text "(lhs, rhs)"} some translation rule.  A subtree @{text "u"}
  1343   of @{text "t"} is called \emph{redex} if it is an instance of @{text
  1344   "lhs"}; in this case the pattern @{text "lhs"} is said to match the
  1345   object @{text "u"}.  A redex matched by @{text "lhs"} may be
  1346   replaced by the corresponding instance of @{text "rhs"}, thus
  1347   \emph{rewriting} the AST @{text "t"}.  Matching requires some notion
  1348   of \emph{place-holders} in rule patterns: @{ML Ast.Variable} serves
  1349   this purpose.
  1350 
  1351   More precisely, the matching of the object @{text "u"} against the
  1352   pattern @{text "lhs"} is performed as follows:
  1353 
  1354   \begin{itemize}
  1355 
  1356   \item Objects of the form @{ML Ast.Variable}~@{text "x"} or @{ML
  1357   Ast.Constant}~@{text "x"} are matched by pattern @{ML
  1358   Ast.Constant}~@{text "x"}.  Thus all atomic ASTs in the object are
  1359   treated as (potential) constants, and a successful match makes them
  1360   actual constants even before name space resolution (see also
  1361   \secref{sec:ast}).
  1362 
  1363   \item Object @{text "u"} is matched by pattern @{ML
  1364   Ast.Variable}~@{text "x"}, binding @{text "x"} to @{text "u"}.
  1365 
  1366   \item Object @{ML Ast.Appl}~@{text "us"} is matched by @{ML
  1367   Ast.Appl}~@{text "ts"} if @{text "us"} and @{text "ts"} have the
  1368   same length and each corresponding subtree matches.
  1369 
  1370   \item In every other case, matching fails.
  1371 
  1372   \end{itemize}
  1373 
  1374   A successful match yields a substitution that is applied to @{text
  1375   "rhs"}, generating the instance that replaces @{text "u"}.
  1376 
  1377   Normalizing an AST involves repeatedly applying translation rules
  1378   until none are applicable.  This works yoyo-like: top-down,
  1379   bottom-up, top-down, etc.  At each subtree position, rules are
  1380   chosen in order of appearance in the theory definitions.
  1381 
  1382   The configuration options @{attribute syntax_ast_trace} and
  1383   @{attribute syntax_ast_stats} might help to understand this process
  1384   and diagnose problems.
  1385 
  1386   \begin{warn}
  1387   If syntax translation rules work incorrectly, the output of
  1388   @{command_ref print_syntax} with its \emph{rules} sections reveals the
  1389   actual internal forms of AST pattern, without potentially confusing
  1390   concrete syntax.  Recall that AST constants appear as quoted strings
  1391   and variables without quotes.
  1392   \end{warn}
  1393 
  1394   \begin{warn}
  1395   If @{attribute_ref eta_contract} is set to @{text "true"}, terms
  1396   will be @{text "\<eta>"}-contracted \emph{before} the AST rewriter sees
  1397   them.  Thus some abstraction nodes needed for print rules to match
  1398   may vanish.  For example, @{text "Ball A (\<lambda>x. P x)"} would contract
  1399   to @{text "Ball A P"} and the standard print rule would fail to
  1400   apply.  This problem can be avoided by hand-written ML translation
  1401   functions (see also \secref{sec:tr-funs}), which is in fact the same
  1402   mechanism used in built-in @{keyword "binder"} declarations.
  1403   \end{warn}
  1404 \<close>
  1405 
  1406 
  1407 subsection \<open>Syntax translation functions \label{sec:tr-funs}\<close>
  1408 
  1409 text \<open>
  1410   \begin{matharray}{rcl}
  1411     @{command_def "parse_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1412     @{command_def "parse_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1413     @{command_def "print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1414     @{command_def "typed_print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1415     @{command_def "print_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1416     @{ML_antiquotation_def "class_syntax"} & : & @{text "ML antiquotation"} \\
  1417     @{ML_antiquotation_def "type_syntax"} & : & @{text "ML antiquotation"} \\
  1418     @{ML_antiquotation_def "const_syntax"} & : & @{text "ML antiquotation"} \\
  1419     @{ML_antiquotation_def "syntax_const"} & : & @{text "ML antiquotation"} \\
  1420   \end{matharray}
  1421 
  1422   Syntax translation functions written in ML admit almost arbitrary
  1423   manipulations of inner syntax, at the expense of some complexity and
  1424   obscurity in the implementation.
  1425 
  1426   @{rail \<open>
  1427   ( @@{command parse_ast_translation} | @@{command parse_translation} |
  1428     @@{command print_translation} | @@{command typed_print_translation} |
  1429     @@{command print_ast_translation}) @{syntax text}
  1430   ;
  1431   (@@{ML_antiquotation class_syntax} |
  1432    @@{ML_antiquotation type_syntax} |
  1433    @@{ML_antiquotation const_syntax} |
  1434    @@{ML_antiquotation syntax_const}) name
  1435   \<close>}
  1436 
  1437   \begin{description}
  1438 
  1439   \item @{command parse_translation} etc. declare syntax translation
  1440   functions to the theory.  Any of these commands have a single
  1441   @{syntax text} argument that refers to an ML expression of
  1442   appropriate type as follows:
  1443 
  1444   \medskip
  1445   {\footnotesize
  1446   \begin{tabular}{l}
  1447   @{command parse_ast_translation} : \\
  1448   \quad @{ML_type "(string * (Proof.context -> Ast.ast list -> Ast.ast)) list"} \\
  1449   @{command parse_translation} : \\
  1450   \quad @{ML_type "(string * (Proof.context -> term list -> term)) list"} \\
  1451   @{command print_translation} : \\
  1452   \quad @{ML_type "(string * (Proof.context -> term list -> term)) list"} \\
  1453   @{command typed_print_translation} : \\
  1454   \quad @{ML_type "(string * (Proof.context -> typ -> term list -> term)) list"} \\
  1455   @{command print_ast_translation} : \\
  1456   \quad @{ML_type "(string * (Proof.context -> Ast.ast list -> Ast.ast)) list"} \\
  1457   \end{tabular}}
  1458   \medskip
  1459 
  1460   The argument list consists of @{text "(c, tr)"} pairs, where @{text
  1461   "c"} is the syntax name of the formal entity involved, and @{text
  1462   "tr"} a function that translates a syntax form @{text "c args"} into
  1463   @{text "tr ctxt args"} (depending on the context).  The Isabelle/ML
  1464   naming convention for parse translations is @{text "c_tr"} and for
  1465   print translations @{text "c_tr'"}.
  1466 
  1467   The @{command_ref print_syntax} command displays the sets of names
  1468   associated with the translation functions of a theory under @{text
  1469   "parse_ast_translation"} etc.
  1470 
  1471   \item @{text "@{class_syntax c}"}, @{text "@{type_syntax c}"},
  1472   @{text "@{const_syntax c}"} inline the authentic syntax name of the
  1473   given formal entities into the ML source.  This is the
  1474   fully-qualified logical name prefixed by a special marker to
  1475   indicate its kind: thus different logical name spaces are properly
  1476   distinguished within parse trees.
  1477 
  1478   \item @{text "@{const_syntax c}"} inlines the name @{text "c"} of
  1479   the given syntax constant, having checked that it has been declared
  1480   via some @{command syntax} commands within the theory context.  Note
  1481   that the usual naming convention makes syntax constants start with
  1482   underscore, to reduce the chance of accidental clashes with other
  1483   names occurring in parse trees (unqualified constants etc.).
  1484 
  1485   \end{description}
  1486 \<close>
  1487 
  1488 
  1489 subsubsection \<open>The translation strategy\<close>
  1490 
  1491 text \<open>The different kinds of translation functions are invoked during
  1492   the transformations between parse trees, ASTs and syntactic terms
  1493   (cf.\ \figref{fig:parse-print}).  Whenever a combination of the form
  1494   @{text "c x\<^sub>1 \<dots> x\<^sub>n"} is encountered, and a translation function
  1495   @{text "f"} of appropriate kind is declared for @{text "c"}, the
  1496   result is produced by evaluation of @{text "f [x\<^sub>1, \<dots>, x\<^sub>n]"} in ML.
  1497 
  1498   For AST translations, the arguments @{text "x\<^sub>1, \<dots>, x\<^sub>n"} are ASTs.  A
  1499   combination has the form @{ML "Ast.Constant"}~@{text "c"} or @{ML
  1500   "Ast.Appl"}~@{text "["}@{ML Ast.Constant}~@{text "c, x\<^sub>1, \<dots>, x\<^sub>n]"}.
  1501   For term translations, the arguments are terms and a combination has
  1502   the form @{ML Const}~@{text "(c, \<tau>)"} or @{ML Const}~@{text "(c, \<tau>)
  1503   $ x\<^sub>1 $ \<dots> $ x\<^sub>n"}.  Terms allow more sophisticated transformations
  1504   than ASTs do, typically involving abstractions and bound
  1505   variables. \emph{Typed} print translations may even peek at the type
  1506   @{text "\<tau>"} of the constant they are invoked on, although some
  1507   information might have been suppressed for term output already.
  1508 
  1509   Regardless of whether they act on ASTs or terms, translation
  1510   functions called during the parsing process differ from those for
  1511   printing in their overall behaviour:
  1512 
  1513   \begin{description}
  1514 
  1515   \item [Parse translations] are applied bottom-up.  The arguments are
  1516   already in translated form.  The translations must not fail;
  1517   exceptions trigger an error message.  There may be at most one
  1518   function associated with any syntactic name.
  1519 
  1520   \item [Print translations] are applied top-down.  They are supplied
  1521   with arguments that are partly still in internal form.  The result
  1522   again undergoes translation; therefore a print translation should
  1523   not introduce as head the very constant that invoked it.  The
  1524   function may raise exception @{ML Match} to indicate failure; in
  1525   this event it has no effect.  Multiple functions associated with
  1526   some syntactic name are tried in the order of declaration in the
  1527   theory.
  1528 
  1529   \end{description}
  1530 
  1531   Only constant atoms --- constructor @{ML Ast.Constant} for ASTs and
  1532   @{ML Const} for terms --- can invoke translation functions.  This
  1533   means that parse translations can only be associated with parse tree
  1534   heads of concrete syntax, or syntactic constants introduced via
  1535   other translations.  For plain identifiers within the term language,
  1536   the status of constant versus variable is not yet know during
  1537   parsing.  This is in contrast to print translations, where constants
  1538   are explicitly known from the given term in its fully internal form.
  1539 \<close>
  1540 
  1541 
  1542 subsection \<open>Built-in syntax transformations\<close>
  1543 
  1544 text \<open>
  1545   Here are some further details of the main syntax transformation
  1546   phases of \figref{fig:parse-print}.
  1547 \<close>
  1548 
  1549 
  1550 subsubsection \<open>Transforming parse trees to ASTs\<close>
  1551 
  1552 text \<open>The parse tree is the raw output of the parser.  It is
  1553   transformed into an AST according to some basic scheme that may be
  1554   augmented by AST translation functions as explained in
  1555   \secref{sec:tr-funs}.
  1556 
  1557   The parse tree is constructed by nesting the right-hand sides of the
  1558   productions used to recognize the input.  Such parse trees are
  1559   simply lists of tokens and constituent parse trees, the latter
  1560   representing the nonterminals of the productions.  Ignoring AST
  1561   translation functions, parse trees are transformed to ASTs by
  1562   stripping out delimiters and copy productions, while retaining some
  1563   source position information from input tokens.
  1564 
  1565   The Pure syntax provides predefined AST translations to make the
  1566   basic @{text "\<lambda>"}-term structure more apparent within the
  1567   (first-order) AST representation, and thus facilitate the use of
  1568   @{command translations} (see also \secref{sec:syn-trans}).  This
  1569   covers ordinary term application, type application, nested
  1570   abstraction, iterated meta implications and function types.  The
  1571   effect is illustrated on some representative input strings is as
  1572   follows:
  1573 
  1574   \begin{center}
  1575   \begin{tabular}{ll}
  1576   input source & AST \\
  1577   \hline
  1578   @{text "f x y z"} & @{verbatim "(f x y z)"} \\
  1579   @{text "'a ty"} & @{verbatim "(ty 'a)"} \\
  1580   @{text "('a, 'b)ty"} & @{verbatim "(ty 'a 'b)"} \\
  1581   @{text "\<lambda>x y z. t"} & @{verbatim \<open>("_abs" x ("_abs" y ("_abs" z t)))\<close>} \\
  1582   @{text "\<lambda>x :: 'a. t"} & @{verbatim \<open>("_abs" ("_constrain" x 'a) t)\<close>} \\
  1583   @{text "\<lbrakk>P; Q; R\<rbrakk> \<Longrightarrow> S"} & @{verbatim \<open>("Pure.imp" P ("Pure.imp" Q ("Pure.imp" R S)))\<close>} \\
  1584    @{text "['a, 'b, 'c] \<Rightarrow> 'd"} & @{verbatim \<open>("fun" 'a ("fun" 'b ("fun" 'c 'd)))\<close>} \\
  1585   \end{tabular}
  1586   \end{center}
  1587 
  1588   Note that type and sort constraints may occur in further places ---
  1589   translations need to be ready to cope with them.  The built-in
  1590   syntax transformation from parse trees to ASTs insert additional
  1591   constraints that represent source positions.
  1592 \<close>
  1593 
  1594 
  1595 subsubsection \<open>Transforming ASTs to terms\<close>
  1596 
  1597 text \<open>After application of macros (\secref{sec:syn-trans}), the AST
  1598   is transformed into a term.  This term still lacks proper type
  1599   information, but it might contain some constraints consisting of
  1600   applications with head @{verbatim "_constrain"}, where the second
  1601   argument is a type encoded as a pre-term within the syntax.  Type
  1602   inference later introduces correct types, or indicates type errors
  1603   in the input.
  1604 
  1605   Ignoring parse translations, ASTs are transformed to terms by
  1606   mapping AST constants to term constants, AST variables to term
  1607   variables or constants (according to the name space), and AST
  1608   applications to iterated term applications.
  1609 
  1610   The outcome is still a first-order term.  Proper abstractions and
  1611   bound variables are introduced by parse translations associated with
  1612   certain syntax constants.  Thus @{verbatim \<open>("_abs" x x)\<close>} eventually
  1613   becomes a de-Bruijn term @{verbatim \<open>Abs ("x", _, Bound 0)\<close>}.
  1614 \<close>
  1615 
  1616 
  1617 subsubsection \<open>Printing of terms\<close>
  1618 
  1619 text \<open>The output phase is essentially the inverse of the input
  1620   phase.  Terms are translated via abstract syntax trees into
  1621   pretty-printed text.
  1622 
  1623   Ignoring print translations, the transformation maps term constants,
  1624   variables and applications to the corresponding constructs on ASTs.
  1625   Abstractions are mapped to applications of the special constant
  1626   @{verbatim "_abs"} as seen before.  Type constraints are represented
  1627   via special @{verbatim "_constrain"} forms, according to various
  1628   policies of type annotation determined elsewhere.  Sort constraints
  1629   of type variables are handled in a similar fashion.
  1630 
  1631   After application of macros (\secref{sec:syn-trans}), the AST is
  1632   finally pretty-printed.  The built-in print AST translations reverse
  1633   the corresponding parse AST translations.
  1634 
  1635   \medskip For the actual printing process, the priority grammar
  1636   (\secref{sec:priority-grammar}) plays a vital role: productions are
  1637   used as templates for pretty printing, with argument slots stemming
  1638   from nonterminals, and syntactic sugar stemming from literal tokens.
  1639 
  1640   Each AST application with constant head @{text "c"} and arguments
  1641   @{text "t\<^sub>1"}, \dots, @{text "t\<^sub>n"} (for @{text "n = 0"} the AST is
  1642   just the constant @{text "c"} itself) is printed according to the
  1643   first grammar production of result name @{text "c"}.  The required
  1644   syntax priority of the argument slot is given by its nonterminal
  1645   @{text "A\<^sup>(\<^sup>p\<^sup>)"}.  The argument @{text "t\<^sub>i"} that corresponds to the
  1646   position of @{text "A\<^sup>(\<^sup>p\<^sup>)"} is printed recursively, and then put in
  1647   parentheses \emph{if} its priority @{text "p"} requires this.  The
  1648   resulting output is concatenated with the syntactic sugar according
  1649   to the grammar production.
  1650 
  1651   If an AST application @{text "(c x\<^sub>1 \<dots> x\<^sub>m)"} has more arguments than
  1652   the corresponding production, it is first split into @{text "((c x\<^sub>1
  1653   \<dots> x\<^sub>n) x\<^sub>n\<^sub>+\<^sub>1 \<dots> x\<^sub>m)"} and then printed recursively as above.
  1654 
  1655   Applications with too few arguments or with non-constant head or
  1656   without a corresponding production are printed in prefix-form like
  1657   @{text "f t\<^sub>1 \<dots> t\<^sub>n"} for terms.
  1658 
  1659   Multiple productions associated with some name @{text "c"} are tried
  1660   in order of appearance within the grammar.  An occurrence of some
  1661   AST variable @{text "x"} is printed as @{text "x"} outright.
  1662 
  1663   \medskip White space is \emph{not} inserted automatically.  If
  1664   blanks (or breaks) are required to separate tokens, they need to be
  1665   specified in the mixfix declaration (\secref{sec:mixfix}).
  1666 \<close>
  1667 
  1668 end