src/Doc/Isar_Ref/Inner_Syntax.thy
author wenzelm
Wed Mar 25 11:39:52 2015 +0100 (2015-03-25)
changeset 59809 87641097d0f3
parent 59783 00b62aa9f430
child 60254 52110106c0ca
permissions -rw-r--r--
tuned signature;
wenzelm@28762
     1
theory Inner_Syntax
wenzelm@42651
     2
imports Base Main
wenzelm@28762
     3
begin
wenzelm@28762
     4
wenzelm@58618
     5
chapter \<open>Inner syntax --- the term language \label{ch:inner-syntax}\<close>
wenzelm@28762
     6
wenzelm@58618
     7
text \<open>The inner syntax of Isabelle provides concrete notation for
wenzelm@46282
     8
  the main entities of the logical framework, notably @{text
wenzelm@46282
     9
  "\<lambda>"}-terms with types and type classes.  Applications may either
wenzelm@46282
    10
  extend existing syntactic categories by additional notation, or
wenzelm@46282
    11
  define new sub-languages that are linked to the standard term
wenzelm@46282
    12
  language via some explicit markers.  For example @{verbatim
wenzelm@46282
    13
  FOO}~@{text "foo"} could embed the syntax corresponding for some
wenzelm@46282
    14
  user-defined nonterminal @{text "foo"} --- within the bounds of the
wenzelm@46282
    15
  given lexical syntax of Isabelle/Pure.
wenzelm@46282
    16
wenzelm@46282
    17
  The most basic way to specify concrete syntax for logical entities
wenzelm@46282
    18
  works via mixfix annotations (\secref{sec:mixfix}), which may be
wenzelm@46282
    19
  usually given as part of the original declaration or via explicit
wenzelm@46282
    20
  notation commands later on (\secref{sec:notation}).  This already
wenzelm@46282
    21
  covers many needs of concrete syntax without having to understand
wenzelm@46282
    22
  the full complexity of inner syntax layers.
wenzelm@46282
    23
wenzelm@46282
    24
  Further details of the syntax engine involves the classical
wenzelm@46282
    25
  distinction of lexical language versus context-free grammar (see
wenzelm@46282
    26
  \secref{sec:pure-syntax}), and various mechanisms for \emph{syntax
wenzelm@48113
    27
  transformations} (see \secref{sec:syntax-transformations}).
wenzelm@58618
    28
\<close>
wenzelm@46282
    29
wenzelm@46282
    30
wenzelm@58618
    31
section \<open>Printing logical entities\<close>
wenzelm@28762
    32
wenzelm@58618
    33
subsection \<open>Diagnostic commands \label{sec:print-diag}\<close>
wenzelm@28762
    34
wenzelm@58618
    35
text \<open>
wenzelm@28762
    36
  \begin{matharray}{rcl}
wenzelm@28766
    37
    @{command_def "typ"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762
    38
    @{command_def "term"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762
    39
    @{command_def "prop"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28766
    40
    @{command_def "thm"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762
    41
    @{command_def "prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762
    42
    @{command_def "full_prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@52430
    43
    @{command_def "print_state"}@{text "\<^sup>*"} & : & @{text "any \<rightarrow>"} \\
wenzelm@28762
    44
  \end{matharray}
wenzelm@28762
    45
wenzelm@28762
    46
  These diagnostic commands assist interactive development by printing
wenzelm@28762
    47
  internal logical entities in a human-readable fashion.
wenzelm@28762
    48
wenzelm@55112
    49
  @{rail \<open>
wenzelm@48792
    50
    @@{command typ} @{syntax modes}? @{syntax type} ('::' @{syntax sort})?
wenzelm@28762
    51
    ;
wenzelm@42596
    52
    @@{command term} @{syntax modes}? @{syntax term}
wenzelm@28762
    53
    ;
wenzelm@42596
    54
    @@{command prop} @{syntax modes}? @{syntax prop}
wenzelm@28762
    55
    ;
wenzelm@42596
    56
    @@{command thm} @{syntax modes}? @{syntax thmrefs}
wenzelm@28762
    57
    ;
wenzelm@42596
    58
    ( @@{command prf} | @@{command full_prf} ) @{syntax modes}? @{syntax thmrefs}?
wenzelm@28762
    59
    ;
wenzelm@52430
    60
    @@{command print_state} @{syntax modes}?
wenzelm@28762
    61
    ;
wenzelm@42596
    62
    @{syntax_def modes}: '(' (@{syntax name} + ) ')'
wenzelm@55112
    63
  \<close>}
wenzelm@28762
    64
wenzelm@28762
    65
  \begin{description}
wenzelm@28762
    66
wenzelm@48792
    67
  \item @{command "typ"}~@{text \<tau>} reads and prints a type expression
wenzelm@48792
    68
  according to the current context.
wenzelm@48792
    69
wenzelm@48792
    70
  \item @{command "typ"}~@{text "\<tau> :: s"} uses type-inference to
wenzelm@48792
    71
  determine the most general way to make @{text "\<tau>"} conform to sort
wenzelm@48792
    72
  @{text "s"}.  For concrete @{text "\<tau>"} this checks if the type
wenzelm@48792
    73
  belongs to that sort.  Dummy type parameters ``@{text "_"}''
wenzelm@48792
    74
  (underscore) are assigned to fresh type variables with most general
wenzelm@48792
    75
  sorts, according the the principles of type-inference.
wenzelm@28766
    76
wenzelm@28766
    77
  \item @{command "term"}~@{text t} and @{command "prop"}~@{text \<phi>}
wenzelm@28766
    78
  read, type-check and print terms or propositions according to the
wenzelm@28766
    79
  current theory or proof context; the inferred type of @{text t} is
wenzelm@28766
    80
  output as well.  Note that these commands are also useful in
wenzelm@28766
    81
  inspecting the current environment of term abbreviations.
wenzelm@28762
    82
wenzelm@28762
    83
  \item @{command "thm"}~@{text "a\<^sub>1 \<dots> a\<^sub>n"} retrieves
wenzelm@28762
    84
  theorems from the current theory or proof context.  Note that any
wenzelm@28762
    85
  attributes included in the theorem specifications are applied to a
wenzelm@28762
    86
  temporary context derived from the current theory or proof; the
wenzelm@28762
    87
  result is discarded, i.e.\ attributes involved in @{text "a\<^sub>1,
wenzelm@28762
    88
  \<dots>, a\<^sub>n"} do not have any permanent effect.
wenzelm@28762
    89
wenzelm@28762
    90
  \item @{command "prf"} displays the (compact) proof term of the
wenzelm@28762
    91
  current proof state (if present), or of the given theorems. Note
wenzelm@28762
    92
  that this requires proof terms to be switched on for the current
wenzelm@28762
    93
  object logic (see the ``Proof terms'' section of the Isabelle
wenzelm@28762
    94
  reference manual for information on how to do this).
wenzelm@28762
    95
wenzelm@28762
    96
  \item @{command "full_prf"} is like @{command "prf"}, but displays
wenzelm@28762
    97
  the full proof term, i.e.\ also displays information omitted in the
wenzelm@28762
    98
  compact proof term, which is denoted by ``@{text _}'' placeholders
wenzelm@28762
    99
  there.
wenzelm@28762
   100
wenzelm@52430
   101
  \item @{command "print_state"} prints the current proof state (if
wenzelm@52430
   102
  present), including current facts and goals.
wenzelm@28766
   103
wenzelm@28762
   104
  \end{description}
wenzelm@28762
   105
wenzelm@28762
   106
  All of the diagnostic commands above admit a list of @{text modes}
wenzelm@42926
   107
  to be specified, which is appended to the current print mode; see
wenzelm@46284
   108
  also \secref{sec:print-modes}.  Thus the output behavior may be
wenzelm@46284
   109
  modified according particular print mode features.  For example,
wenzelm@52430
   110
  @{command "print_state"}~@{text "(latex xsymbols)"} prints the
wenzelm@52430
   111
  current proof state with mathematical symbols and special characters
wenzelm@46284
   112
  represented in {\LaTeX} source, according to the Isabelle style
wenzelm@58552
   113
  @{cite "isabelle-sys"}.
wenzelm@28762
   114
wenzelm@28762
   115
  Note that antiquotations (cf.\ \secref{sec:antiq}) provide a more
wenzelm@28762
   116
  systematic way to include formal items into the printed text
wenzelm@28762
   117
  document.
wenzelm@58618
   118
\<close>
wenzelm@28762
   119
wenzelm@28762
   120
wenzelm@58618
   121
subsection \<open>Details of printed content\<close>
wenzelm@28763
   122
wenzelm@58618
   123
text \<open>
wenzelm@42655
   124
  \begin{tabular}{rcll}
wenzelm@49699
   125
    @{attribute_def show_markup} & : & @{text attribute} \\
wenzelm@42655
   126
    @{attribute_def show_types} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   127
    @{attribute_def show_sorts} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   128
    @{attribute_def show_consts} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   129
    @{attribute_def show_abbrevs} & : & @{text attribute} & default @{text true} \\
wenzelm@42655
   130
    @{attribute_def show_brackets} & : & @{text attribute} & default @{text false} \\
wenzelm@42669
   131
    @{attribute_def names_long} & : & @{text attribute} & default @{text false} \\
wenzelm@42669
   132
    @{attribute_def names_short} & : & @{text attribute} & default @{text false} \\
wenzelm@42669
   133
    @{attribute_def names_unique} & : & @{text attribute} & default @{text true} \\
wenzelm@42655
   134
    @{attribute_def eta_contract} & : & @{text attribute} & default @{text true} \\
wenzelm@42655
   135
    @{attribute_def goals_limit} & : & @{text attribute} & default @{text 10} \\
wenzelm@42655
   136
    @{attribute_def show_main_goal} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   137
    @{attribute_def show_hyps} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   138
    @{attribute_def show_tags} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   139
    @{attribute_def show_question_marks} & : & @{text attribute} & default @{text true} \\
wenzelm@42655
   140
  \end{tabular}
wenzelm@42655
   141
  \medskip
wenzelm@28763
   142
wenzelm@42655
   143
  These configuration options control the detail of information that
wenzelm@42655
   144
  is displayed for types, terms, theorems, goals etc.  See also
wenzelm@42655
   145
  \secref{sec:config}.
wenzelm@28765
   146
wenzelm@28763
   147
  \begin{description}
wenzelm@28763
   148
wenzelm@49699
   149
  \item @{attribute show_markup} controls direct inlining of markup
wenzelm@49699
   150
  into the printed representation of formal entities --- notably type
wenzelm@49699
   151
  and sort constraints.  This enables Prover IDE users to retrieve
wenzelm@49699
   152
  that information via tooltips or popups while hovering with the
wenzelm@49699
   153
  mouse over the output window, for example.  Consequently, this
wenzelm@58842
   154
  option is enabled by default for Isabelle/jEdit.
wenzelm@49699
   155
wenzelm@42655
   156
  \item @{attribute show_types} and @{attribute show_sorts} control
wenzelm@42655
   157
  printing of type constraints for term variables, and sort
wenzelm@42655
   158
  constraints for type variables.  By default, neither of these are
wenzelm@42655
   159
  shown in output.  If @{attribute show_sorts} is enabled, types are
wenzelm@49699
   160
  always shown as well.  In Isabelle/jEdit, manual setting of these
wenzelm@49699
   161
  options is normally not required thanks to @{attribute show_markup}
wenzelm@49699
   162
  above.
wenzelm@28763
   163
wenzelm@28763
   164
  Note that displaying types and sorts may explain why a polymorphic
wenzelm@28763
   165
  inference rule fails to resolve with some goal, or why a rewrite
wenzelm@28763
   166
  rule does not apply as expected.
wenzelm@28763
   167
wenzelm@42655
   168
  \item @{attribute show_consts} controls printing of types of
wenzelm@42655
   169
  constants when displaying a goal state.
wenzelm@28765
   170
wenzelm@28765
   171
  Note that the output can be enormous, because polymorphic constants
wenzelm@28765
   172
  often occur at several different type instances.
wenzelm@28763
   173
wenzelm@42655
   174
  \item @{attribute show_abbrevs} controls folding of constant
wenzelm@42655
   175
  abbreviations.
wenzelm@40879
   176
wenzelm@42655
   177
  \item @{attribute show_brackets} controls bracketing in pretty
wenzelm@42655
   178
  printed output.  If enabled, all sub-expressions of the pretty
wenzelm@28765
   179
  printing tree will be parenthesized, even if this produces malformed
wenzelm@28765
   180
  term syntax!  This crude way of showing the internal structure of
wenzelm@28765
   181
  pretty printed entities may occasionally help to diagnose problems
wenzelm@28765
   182
  with operator priorities, for example.
wenzelm@28763
   183
wenzelm@42669
   184
  \item @{attribute names_long}, @{attribute names_short}, and
wenzelm@42669
   185
  @{attribute names_unique} control the way of printing fully
wenzelm@42358
   186
  qualified internal names in external form.  See also
wenzelm@42358
   187
  \secref{sec:antiq} for the document antiquotation options of the
wenzelm@42358
   188
  same names.
wenzelm@42358
   189
wenzelm@42655
   190
  \item @{attribute eta_contract} controls @{text "\<eta>"}-contracted
wenzelm@42655
   191
  printing of terms.
wenzelm@28763
   192
wenzelm@28763
   193
  The @{text \<eta>}-contraction law asserts @{prop "(\<lambda>x. f x) \<equiv> f"},
wenzelm@28763
   194
  provided @{text x} is not free in @{text f}.  It asserts
wenzelm@28763
   195
  \emph{extensionality} of functions: @{prop "f \<equiv> g"} if @{prop "f x \<equiv>
wenzelm@28763
   196
  g x"} for all @{text x}.  Higher-order unification frequently puts
wenzelm@28763
   197
  terms into a fully @{text \<eta>}-expanded form.  For example, if @{text
wenzelm@28763
   198
  F} has type @{text "(\<tau> \<Rightarrow> \<tau>) \<Rightarrow> \<tau>"} then its expanded form is @{term
wenzelm@28763
   199
  "\<lambda>h. F (\<lambda>x. h x)"}.
wenzelm@28763
   200
wenzelm@42655
   201
  Enabling @{attribute eta_contract} makes Isabelle perform @{text
wenzelm@28763
   202
  \<eta>}-contractions before printing, so that @{term "\<lambda>h. F (\<lambda>x. h x)"}
wenzelm@28763
   203
  appears simply as @{text F}.
wenzelm@28763
   204
wenzelm@28763
   205
  Note that the distinction between a term and its @{text \<eta>}-expanded
wenzelm@28765
   206
  form occasionally matters.  While higher-order resolution and
wenzelm@28765
   207
  rewriting operate modulo @{text "\<alpha>\<beta>\<eta>"}-conversion, some other tools
wenzelm@28765
   208
  might look at terms more discretely.
wenzelm@28763
   209
wenzelm@42655
   210
  \item @{attribute goals_limit} controls the maximum number of
wenzelm@51960
   211
  subgoals to be printed.
wenzelm@28763
   212
wenzelm@42655
   213
  \item @{attribute show_main_goal} controls whether the main result
wenzelm@42655
   214
  to be proven should be displayed.  This information might be
wenzelm@39130
   215
  relevant for schematic goals, to inspect the current claim that has
wenzelm@39130
   216
  been synthesized so far.
wenzelm@28763
   217
wenzelm@42655
   218
  \item @{attribute show_hyps} controls printing of implicit
wenzelm@42655
   219
  hypotheses of local facts.  Normally, only those hypotheses are
wenzelm@42655
   220
  displayed that are \emph{not} covered by the assumptions of the
wenzelm@42655
   221
  current context: this situation indicates a fault in some tool being
wenzelm@42655
   222
  used.
wenzelm@28763
   223
wenzelm@42655
   224
  By enabling @{attribute show_hyps}, output of \emph{all} hypotheses
wenzelm@42655
   225
  can be enforced, which is occasionally useful for diagnostic
wenzelm@42655
   226
  purposes.
wenzelm@28763
   227
wenzelm@42655
   228
  \item @{attribute show_tags} controls printing of extra annotations
wenzelm@42655
   229
  within theorems, such as internal position information, or the case
wenzelm@42655
   230
  names being attached by the attribute @{attribute case_names}.
wenzelm@28765
   231
wenzelm@28765
   232
  Note that the @{attribute tagged} and @{attribute untagged}
wenzelm@28765
   233
  attributes provide low-level access to the collection of tags
wenzelm@28765
   234
  associated with a theorem.
wenzelm@28765
   235
wenzelm@42655
   236
  \item @{attribute show_question_marks} controls printing of question
wenzelm@42655
   237
  marks for schematic variables, such as @{text ?x}.  Only the leading
wenzelm@28765
   238
  question mark is affected, the remaining text is unchanged
wenzelm@28765
   239
  (including proper markup for schematic variables that might be
wenzelm@28765
   240
  relevant for user interfaces).
wenzelm@28765
   241
wenzelm@28765
   242
  \end{description}
wenzelm@58618
   243
\<close>
wenzelm@28765
   244
wenzelm@28765
   245
wenzelm@58618
   246
subsection \<open>Alternative print modes \label{sec:print-modes}\<close>
wenzelm@46284
   247
wenzelm@58618
   248
text \<open>
wenzelm@46284
   249
  \begin{mldecls}
wenzelm@46284
   250
    @{index_ML print_mode_value: "unit -> string list"} \\
wenzelm@46284
   251
    @{index_ML Print_Mode.with_modes: "string list -> ('a -> 'b) -> 'a -> 'b"} \\
wenzelm@46284
   252
  \end{mldecls}
wenzelm@46284
   253
wenzelm@46284
   254
  The \emph{print mode} facility allows to modify various operations
wenzelm@46284
   255
  for printing.  Commands like @{command typ}, @{command term},
wenzelm@46284
   256
  @{command thm} (see \secref{sec:print-diag}) take additional print
wenzelm@46284
   257
  modes as optional argument.  The underlying ML operations are as
wenzelm@46284
   258
  follows.
wenzelm@46284
   259
wenzelm@46284
   260
  \begin{description}
wenzelm@46284
   261
wenzelm@46284
   262
  \item @{ML "print_mode_value ()"} yields the list of currently
wenzelm@46284
   263
  active print mode names.  This should be understood as symbolic
wenzelm@46284
   264
  representation of certain individual features for printing (with
wenzelm@46284
   265
  precedence from left to right).
wenzelm@46284
   266
wenzelm@46284
   267
  \item @{ML Print_Mode.with_modes}~@{text "modes f x"} evaluates
wenzelm@46284
   268
  @{text "f x"} in an execution context where the print mode is
wenzelm@46284
   269
  prepended by the given @{text "modes"}.  This provides a thread-safe
wenzelm@46284
   270
  way to augment print modes.  It is also monotonic in the set of mode
wenzelm@46284
   271
  names: it retains the default print mode that certain
wenzelm@46284
   272
  user-interfaces might have installed for their proper functioning!
wenzelm@46284
   273
wenzelm@46284
   274
  \end{description}
wenzelm@46284
   275
wenzelm@46284
   276
  \medskip The pretty printer for inner syntax maintains alternative
wenzelm@46284
   277
  mixfix productions for any print mode name invented by the user, say
wenzelm@46284
   278
  in commands like @{command notation} or @{command abbreviation}.
wenzelm@46284
   279
  Mode names can be arbitrary, but the following ones have a specific
wenzelm@46284
   280
  meaning by convention:
wenzelm@46284
   281
wenzelm@46284
   282
  \begin{itemize}
wenzelm@46284
   283
wenzelm@58724
   284
  \item @{verbatim \<open>""\<close>} (the empty string): default mode;
wenzelm@46284
   285
  implicitly active as last element in the list of modes.
wenzelm@46284
   286
wenzelm@46284
   287
  \item @{verbatim input}: dummy print mode that is never active; may
wenzelm@46284
   288
  be used to specify notation that is only available for input.
wenzelm@46284
   289
wenzelm@46284
   290
  \item @{verbatim internal} dummy print mode that is never active;
wenzelm@46284
   291
  used internally in Isabelle/Pure.
wenzelm@46284
   292
wenzelm@46284
   293
  \item @{verbatim xsymbols}: enable proper mathematical symbols
wenzelm@46284
   294
  instead of ASCII art.\footnote{This traditional mode name stems from
wenzelm@58842
   295
  the ``X-Symbol'' package for classic Proof~General with XEmacs.}
wenzelm@46284
   296
wenzelm@46284
   297
  \item @{verbatim HTML}: additional mode that is active in HTML
wenzelm@46284
   298
  presentation of Isabelle theory sources; allows to provide
wenzelm@46284
   299
  alternative output notation.
wenzelm@46284
   300
wenzelm@46284
   301
  \item @{verbatim latex}: additional mode that is active in {\LaTeX}
wenzelm@46284
   302
  document preparation of Isabelle theory sources; allows to provide
wenzelm@46284
   303
  alternative output notation.
wenzelm@46284
   304
wenzelm@46284
   305
  \end{itemize}
wenzelm@58618
   306
\<close>
wenzelm@46284
   307
wenzelm@46284
   308
wenzelm@58618
   309
section \<open>Mixfix annotations \label{sec:mixfix}\<close>
wenzelm@28762
   310
wenzelm@58618
   311
text \<open>Mixfix annotations specify concrete \emph{inner syntax} of
wenzelm@35351
   312
  Isabelle types and terms.  Locally fixed parameters in toplevel
wenzelm@46290
   313
  theorem statements, locale and class specifications also admit
wenzelm@46290
   314
  mixfix annotations in a fairly uniform manner.  A mixfix annotation
wenzelm@50635
   315
  describes the concrete syntax, the translation to abstract
wenzelm@46290
   316
  syntax, and the pretty printing.  Special case annotations provide a
wenzelm@46290
   317
  simple means of specifying infix operators and binders.
wenzelm@46290
   318
wenzelm@58552
   319
  Isabelle mixfix syntax is inspired by {\OBJ} @{cite OBJ}.  It allows
wenzelm@46290
   320
  to specify any context-free priority grammar, which is more general
wenzelm@46290
   321
  than the fixity declarations of ML and Prolog.
wenzelm@28762
   322
wenzelm@55112
   323
  @{rail \<open>
wenzelm@51654
   324
    @{syntax_def mixfix}: '('
wenzelm@58761
   325
      (@{syntax template} prios? @{syntax nat}? |
wenzelm@58761
   326
        (@'infix' | @'infixl' | @'infixr') @{syntax template} @{syntax nat} |
wenzelm@58761
   327
        @'binder' @{syntax template} prios? @{syntax nat} |
wenzelm@58761
   328
        @'structure') ')'
wenzelm@46290
   329
    ;
wenzelm@46290
   330
    template: string
wenzelm@46289
   331
    ;
wenzelm@42596
   332
    prios: '[' (@{syntax nat} + ',') ']'
wenzelm@55112
   333
  \<close>}
wenzelm@28762
   334
wenzelm@46290
   335
  The string given as @{text template} may include literal text,
wenzelm@46290
   336
  spacing, blocks, and arguments (denoted by ``@{text _}''); the
wenzelm@46290
   337
  special symbol ``@{verbatim "\<index>"}'' (printed as ``@{text "\<index>"}'')
wenzelm@51657
   338
  represents an index argument that specifies an implicit @{keyword
wenzelm@51657
   339
  "structure"} reference (see also \secref{sec:locale}).  Only locally
wenzelm@51657
   340
  fixed variables may be declared as @{keyword "structure"}.
wenzelm@51657
   341
wenzelm@51657
   342
  Infix and binder declarations provide common abbreviations for
wenzelm@51657
   343
  particular mixfix declarations.  So in practice, mixfix templates
wenzelm@51657
   344
  mostly degenerate to literal text for concrete syntax, such as
wenzelm@58618
   345
  ``@{verbatim "++"}'' for an infix symbol.\<close>
wenzelm@28762
   346
wenzelm@46290
   347
wenzelm@58618
   348
subsection \<open>The general mixfix form\<close>
wenzelm@46290
   349
wenzelm@58618
   350
text \<open>In full generality, mixfix declarations work as follows.
wenzelm@46290
   351
  Suppose a constant @{text "c :: \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} is annotated by
wenzelm@46290
   352
  @{text "(mixfix [p\<^sub>1, \<dots>, p\<^sub>n] p)"}, where @{text "mixfix"} is a string
wenzelm@46290
   353
  @{text "d\<^sub>0 _ d\<^sub>1 _ \<dots> _ d\<^sub>n"} consisting of delimiters that surround
wenzelm@46290
   354
  argument positions as indicated by underscores.
wenzelm@28762
   355
wenzelm@28762
   356
  Altogether this determines a production for a context-free priority
wenzelm@28762
   357
  grammar, where for each argument @{text "i"} the syntactic category
wenzelm@46292
   358
  is determined by @{text "\<tau>\<^sub>i"} (with priority @{text "p\<^sub>i"}), and the
wenzelm@46292
   359
  result category is determined from @{text "\<tau>"} (with priority @{text
wenzelm@46292
   360
  "p"}).  Priority specifications are optional, with default 0 for
wenzelm@46292
   361
  arguments and 1000 for the result.\footnote{Omitting priorities is
wenzelm@46292
   362
  prone to syntactic ambiguities unless the delimiter tokens determine
wenzelm@46292
   363
  fully bracketed notation, as in @{text "if _ then _ else _ fi"}.}
wenzelm@28762
   364
wenzelm@28762
   365
  Since @{text "\<tau>"} may be again a function type, the constant
wenzelm@28762
   366
  type scheme may have more argument positions than the mixfix
wenzelm@28762
   367
  pattern.  Printing a nested application @{text "c t\<^sub>1 \<dots> t\<^sub>m"} for
wenzelm@28762
   368
  @{text "m > n"} works by attaching concrete notation only to the
wenzelm@28762
   369
  innermost part, essentially by printing @{text "(c t\<^sub>1 \<dots> t\<^sub>n) \<dots> t\<^sub>m"}
wenzelm@28762
   370
  instead.  If a term has fewer arguments than specified in the mixfix
wenzelm@28762
   371
  template, the concrete syntax is ignored.
wenzelm@28762
   372
wenzelm@28762
   373
  \medskip A mixfix template may also contain additional directives
wenzelm@28762
   374
  for pretty printing, notably spaces, blocks, and breaks.  The
wenzelm@28762
   375
  general template format is a sequence over any of the following
wenzelm@28762
   376
  entities.
wenzelm@28762
   377
wenzelm@28778
   378
  \begin{description}
wenzelm@28762
   379
wenzelm@28771
   380
  \item @{text "d"} is a delimiter, namely a non-empty sequence of
wenzelm@28771
   381
  characters other than the following special characters:
wenzelm@28762
   382
wenzelm@28771
   383
  \smallskip
wenzelm@28771
   384
  \begin{tabular}{ll}
wenzelm@28771
   385
    @{verbatim "'"} & single quote \\
wenzelm@28771
   386
    @{verbatim "_"} & underscore \\
wenzelm@28771
   387
    @{text "\<index>"} & index symbol \\
wenzelm@28771
   388
    @{verbatim "("} & open parenthesis \\
wenzelm@28771
   389
    @{verbatim ")"} & close parenthesis \\
wenzelm@28771
   390
    @{verbatim "/"} & slash \\
wenzelm@28771
   391
  \end{tabular}
wenzelm@28771
   392
  \medskip
wenzelm@28762
   393
wenzelm@28771
   394
  \item @{verbatim "'"} escapes the special meaning of these
wenzelm@28771
   395
  meta-characters, producing a literal version of the following
wenzelm@28771
   396
  character, unless that is a blank.
wenzelm@28771
   397
wenzelm@28771
   398
  A single quote followed by a blank separates delimiters, without
wenzelm@28771
   399
  affecting printing, but input tokens may have additional white space
wenzelm@28771
   400
  here.
wenzelm@28771
   401
wenzelm@28771
   402
  \item @{verbatim "_"} is an argument position, which stands for a
wenzelm@28762
   403
  certain syntactic category in the underlying grammar.
wenzelm@28762
   404
wenzelm@28771
   405
  \item @{text "\<index>"} is an indexed argument position; this is the place
wenzelm@28771
   406
  where implicit structure arguments can be attached.
wenzelm@28762
   407
wenzelm@28771
   408
  \item @{text "s"} is a non-empty sequence of spaces for printing.
wenzelm@28771
   409
  This and the following specifications do not affect parsing at all.
wenzelm@28762
   410
wenzelm@28771
   411
  \item @{verbatim "("}@{text n} opens a pretty printing block.  The
wenzelm@28762
   412
  optional number specifies how much indentation to add when a line
wenzelm@28762
   413
  break occurs within the block.  If the parenthesis is not followed
wenzelm@28762
   414
  by digits, the indentation defaults to 0.  A block specified via
wenzelm@28771
   415
  @{verbatim "(00"} is unbreakable.
wenzelm@28762
   416
wenzelm@28771
   417
  \item @{verbatim ")"} closes a pretty printing block.
wenzelm@28762
   418
wenzelm@28771
   419
  \item @{verbatim "//"} forces a line break.
wenzelm@28762
   420
wenzelm@28771
   421
  \item @{verbatim "/"}@{text s} allows a line break.  Here @{text s}
wenzelm@28771
   422
  stands for the string of spaces (zero or more) right after the
wenzelm@28771
   423
  slash.  These spaces are printed if the break is \emph{not} taken.
wenzelm@28762
   424
wenzelm@28778
   425
  \end{description}
wenzelm@28762
   426
wenzelm@28762
   427
  The general idea of pretty printing with blocks and breaks is also
wenzelm@58552
   428
  described in @{cite "paulson-ml2"}; it goes back to @{cite "Oppen:1980"}.
wenzelm@58618
   429
\<close>
wenzelm@28762
   430
wenzelm@28762
   431
wenzelm@58618
   432
subsection \<open>Infixes\<close>
wenzelm@46290
   433
wenzelm@58618
   434
text \<open>Infix operators are specified by convenient short forms that
wenzelm@46290
   435
  abbreviate general mixfix annotations as follows:
wenzelm@46290
   436
wenzelm@46290
   437
  \begin{center}
wenzelm@46290
   438
  \begin{tabular}{lll}
wenzelm@46290
   439
wenzelm@58724
   440
  @{verbatim "("}@{keyword_def "infix"}~@{verbatim \<open>"\<close>}@{text sy}@{verbatim \<open>"\<close>} @{text "p"}@{verbatim ")"}
wenzelm@46290
   441
  & @{text "\<mapsto>"} &
wenzelm@58724
   442
  @{verbatim \<open>("(_\<close>}~@{text sy}@{verbatim \<open>/ _)" [\<close>}@{text "p + 1"}@{verbatim ","}~@{text "p + 1"}@{verbatim "]"}~@{text "p"}@{verbatim ")"} \\
wenzelm@58724
   443
  @{verbatim "("}@{keyword_def "infixl"}~@{verbatim \<open>"\<close>}@{text sy}@{verbatim \<open>"\<close>} @{text "p"}@{verbatim ")"}
wenzelm@46290
   444
  & @{text "\<mapsto>"} &
wenzelm@58724
   445
  @{verbatim \<open>("(_\<close>}~@{text sy}@{verbatim \<open>/ _)" [\<close>}@{text "p"}@{verbatim ","}~@{text "p + 1"}@{verbatim "]"}~@{text "p"}@{verbatim ")"} \\
wenzelm@58724
   446
  @{verbatim "("}@{keyword_def "infixr"}~@{verbatim \<open>"\<close>}@{text sy}@{verbatim \<open>"\<close>}~@{text "p"}@{verbatim ")"}
wenzelm@46290
   447
  & @{text "\<mapsto>"} &
wenzelm@58724
   448
  @{verbatim \<open>("(_\<close>}~@{text sy}@{verbatim \<open>/ _)" [\<close>}@{text "p + 1"}@{verbatim ","}~@{text "p"}@{verbatim "]"}~@{text "p"}@{verbatim ")"} \\
wenzelm@46290
   449
wenzelm@46290
   450
  \end{tabular}
wenzelm@46290
   451
  \end{center}
wenzelm@46290
   452
wenzelm@58724
   453
  The mixfix template @{verbatim \<open>"(_\<close>}~@{text sy}@{verbatim \<open>/ _)"\<close>}
wenzelm@46292
   454
  specifies two argument positions; the delimiter is preceded by a
wenzelm@46292
   455
  space and followed by a space or line break; the entire phrase is a
wenzelm@46292
   456
  pretty printing block.
wenzelm@46290
   457
wenzelm@46290
   458
  The alternative notation @{verbatim "op"}~@{text sy} is introduced
wenzelm@46290
   459
  in addition.  Thus any infix operator may be written in prefix form
wenzelm@46290
   460
  (as in ML), independently of the number of arguments in the term.
wenzelm@58618
   461
\<close>
wenzelm@46290
   462
wenzelm@46290
   463
wenzelm@58618
   464
subsection \<open>Binders\<close>
wenzelm@46290
   465
wenzelm@58618
   466
text \<open>A \emph{binder} is a variable-binding construct such as a
wenzelm@46290
   467
  quantifier.  The idea to formalize @{text "\<forall>x. b"} as @{text "All
wenzelm@46290
   468
  (\<lambda>x. b)"} for @{text "All :: ('a \<Rightarrow> bool) \<Rightarrow> bool"} already goes back
wenzelm@58552
   469
  to @{cite church40}.  Isabelle declarations of certain higher-order
wenzelm@46292
   470
  operators may be annotated with @{keyword_def "binder"} annotations
wenzelm@46292
   471
  as follows:
wenzelm@46290
   472
wenzelm@46290
   473
  \begin{center}
wenzelm@58724
   474
  @{text "c :: "}@{verbatim \<open>"\<close>}@{text "(\<tau>\<^sub>1 \<Rightarrow> \<tau>\<^sub>2) \<Rightarrow> \<tau>\<^sub>3"}@{verbatim \<open>"  (\<close>}@{keyword "binder"}~@{verbatim \<open>"\<close>}@{text "sy"}@{verbatim \<open>" [\<close>}@{text "p"}@{verbatim "]"}~@{text "q"}@{verbatim ")"}
wenzelm@46290
   475
  \end{center}
wenzelm@46290
   476
wenzelm@46290
   477
  This introduces concrete binder syntax @{text "sy x. b"}, where
wenzelm@46290
   478
  @{text x} is a bound variable of type @{text "\<tau>\<^sub>1"}, the body @{text
wenzelm@46290
   479
  b} has type @{text "\<tau>\<^sub>2"} and the whole term has type @{text "\<tau>\<^sub>3"}.
wenzelm@46290
   480
  The optional integer @{text p} specifies the syntactic priority of
wenzelm@46290
   481
  the body; the default is @{text "q"}, which is also the priority of
wenzelm@46290
   482
  the whole construct.
wenzelm@46290
   483
wenzelm@46290
   484
  Internally, the binder syntax is expanded to something like this:
wenzelm@46290
   485
  \begin{center}
wenzelm@58724
   486
  @{text "c_binder :: "}@{verbatim \<open>"\<close>}@{text "idts \<Rightarrow> \<tau>\<^sub>2 \<Rightarrow> \<tau>\<^sub>3"}@{verbatim \<open>"  ("(3\<close>}@{text sy}@{verbatim \<open>_./ _)" [0,\<close>}~@{text "p"}@{verbatim "]"}~@{text "q"}@{verbatim ")"}
wenzelm@46290
   487
  \end{center}
wenzelm@46290
   488
wenzelm@46290
   489
  Here @{syntax (inner) idts} is the nonterminal symbol for a list of
wenzelm@46290
   490
  identifiers with optional type constraints (see also
wenzelm@46290
   491
  \secref{sec:pure-grammar}).  The mixfix template @{verbatim
wenzelm@58724
   492
  \<open>"(3\<close>}@{text sy}@{verbatim \<open>_./ _)"\<close>} defines argument positions
wenzelm@46290
   493
  for the bound identifiers and the body, separated by a dot with
wenzelm@46290
   494
  optional line break; the entire phrase is a pretty printing block of
wenzelm@46290
   495
  indentation level 3.  Note that there is no extra space after @{text
wenzelm@46290
   496
  "sy"}, so it needs to be included user specification if the binder
wenzelm@46290
   497
  syntax ends with a token that may be continued by an identifier
wenzelm@46290
   498
  token at the start of @{syntax (inner) idts}.
wenzelm@46290
   499
wenzelm@46290
   500
  Furthermore, a syntax translation to transforms @{text "c_binder x\<^sub>1
wenzelm@46290
   501
  \<dots> x\<^sub>n b"} into iterated application @{text "c (\<lambda>x\<^sub>1. \<dots> c (\<lambda>x\<^sub>n. b)\<dots>)"}.
wenzelm@58618
   502
  This works in both directions, for parsing and printing.\<close>
wenzelm@46290
   503
wenzelm@46290
   504
wenzelm@58618
   505
section \<open>Explicit notation \label{sec:notation}\<close>
wenzelm@28762
   506
wenzelm@58618
   507
text \<open>
wenzelm@28762
   508
  \begin{matharray}{rcll}
wenzelm@35413
   509
    @{command_def "type_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
wenzelm@35413
   510
    @{command_def "no_type_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
wenzelm@28762
   511
    @{command_def "notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
wenzelm@28762
   512
    @{command_def "no_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
wenzelm@36508
   513
    @{command_def "write"} & : & @{text "proof(state) \<rightarrow> proof(state)"} \\
wenzelm@28762
   514
  \end{matharray}
wenzelm@28762
   515
wenzelm@46288
   516
  Commands that introduce new logical entities (terms or types)
wenzelm@46288
   517
  usually allow to provide mixfix annotations on the spot, which is
wenzelm@46288
   518
  convenient for default notation.  Nonetheless, the syntax may be
wenzelm@46288
   519
  modified later on by declarations for explicit notation.  This
wenzelm@46288
   520
  allows to add or delete mixfix annotations for of existing logical
wenzelm@46288
   521
  entities within the current context.
wenzelm@46288
   522
wenzelm@55112
   523
  @{rail \<open>
wenzelm@59783
   524
    (@@{command type_notation} | @@{command no_type_notation}) @{syntax mode}? \<newline>
wenzelm@59783
   525
      (@{syntax nameref} @{syntax mixfix} + @'and')
wenzelm@35413
   526
    ;
wenzelm@59783
   527
    (@@{command notation} | @@{command no_notation}) @{syntax mode}? \<newline>
wenzelm@51654
   528
      (@{syntax nameref} @{syntax mixfix} + @'and')
wenzelm@28762
   529
    ;
wenzelm@51654
   530
    @@{command write} @{syntax mode}? (@{syntax nameref} @{syntax mixfix} + @'and')
wenzelm@55112
   531
  \<close>}
wenzelm@28762
   532
wenzelm@28762
   533
  \begin{description}
wenzelm@28762
   534
wenzelm@35413
   535
  \item @{command "type_notation"}~@{text "c (mx)"} associates mixfix
wenzelm@35413
   536
  syntax with an existing type constructor.  The arity of the
wenzelm@35413
   537
  constructor is retrieved from the context.
wenzelm@46282
   538
wenzelm@35413
   539
  \item @{command "no_type_notation"} is similar to @{command
wenzelm@35413
   540
  "type_notation"}, but removes the specified syntax annotation from
wenzelm@35413
   541
  the present context.
wenzelm@35413
   542
wenzelm@28762
   543
  \item @{command "notation"}~@{text "c (mx)"} associates mixfix
wenzelm@35413
   544
  syntax with an existing constant or fixed variable.  The type
wenzelm@35413
   545
  declaration of the given entity is retrieved from the context.
wenzelm@46282
   546
wenzelm@28762
   547
  \item @{command "no_notation"} is similar to @{command "notation"},
wenzelm@28762
   548
  but removes the specified syntax annotation from the present
wenzelm@28762
   549
  context.
wenzelm@28762
   550
wenzelm@36508
   551
  \item @{command "write"} is similar to @{command "notation"}, but
wenzelm@36508
   552
  works within an Isar proof body.
wenzelm@36508
   553
wenzelm@28762
   554
  \end{description}
wenzelm@58618
   555
\<close>
wenzelm@28762
   556
wenzelm@28778
   557
wenzelm@58618
   558
section \<open>The Pure syntax \label{sec:pure-syntax}\<close>
wenzelm@28769
   559
wenzelm@58618
   560
subsection \<open>Lexical matters \label{sec:inner-lex}\<close>
wenzelm@46282
   561
wenzelm@58618
   562
text \<open>The inner lexical syntax vaguely resembles the outer one
wenzelm@46282
   563
  (\secref{sec:outer-lex}), but some details are different.  There are
wenzelm@46282
   564
  two main categories of inner syntax tokens:
wenzelm@46282
   565
wenzelm@46282
   566
  \begin{enumerate}
wenzelm@46282
   567
wenzelm@46282
   568
  \item \emph{delimiters} --- the literal tokens occurring in
wenzelm@46282
   569
  productions of the given priority grammar (cf.\
wenzelm@46282
   570
  \secref{sec:priority-grammar});
wenzelm@46282
   571
wenzelm@46282
   572
  \item \emph{named tokens} --- various categories of identifiers etc.
wenzelm@46282
   573
wenzelm@46282
   574
  \end{enumerate}
wenzelm@46282
   575
wenzelm@46282
   576
  Delimiters override named tokens and may thus render certain
wenzelm@46282
   577
  identifiers inaccessible.  Sometimes the logical context admits
wenzelm@46282
   578
  alternative ways to refer to the same entity, potentially via
wenzelm@46282
   579
  qualified names.
wenzelm@46282
   580
wenzelm@46282
   581
  \medskip The categories for named tokens are defined once and for
wenzelm@46282
   582
  all as follows, reusing some categories of the outer token syntax
wenzelm@46282
   583
  (\secref{sec:outer-lex}).
wenzelm@46282
   584
wenzelm@46282
   585
  \begin{center}
wenzelm@46282
   586
  \begin{supertabular}{rcl}
wenzelm@46282
   587
    @{syntax_def (inner) id} & = & @{syntax_ref ident} \\
wenzelm@46282
   588
    @{syntax_def (inner) longid} & = & @{syntax_ref longident} \\
wenzelm@46282
   589
    @{syntax_def (inner) var} & = & @{syntax_ref var} \\
wenzelm@46282
   590
    @{syntax_def (inner) tid} & = & @{syntax_ref typefree} \\
wenzelm@46282
   591
    @{syntax_def (inner) tvar} & = & @{syntax_ref typevar} \\
haftmann@58410
   592
    @{syntax_def (inner) num_token} & = & @{syntax_ref nat} \\
haftmann@58410
   593
    @{syntax_def (inner) float_token} & = & @{syntax_ref nat}@{verbatim "."}@{syntax_ref nat} \\
wenzelm@46483
   594
    @{syntax_def (inner) str_token} & = & @{verbatim "''"} @{text "\<dots>"} @{verbatim "''"} \\
wenzelm@58724
   595
    @{syntax_def (inner) string_token} & = & @{verbatim \<open>"\<close>} @{text "\<dots>"} @{verbatim \<open>"\<close>} \\
wenzelm@55033
   596
    @{syntax_def (inner) cartouche} & = & @{verbatim "\<open>"} @{text "\<dots>"} @{verbatim "\<close>"} \\
wenzelm@46282
   597
  \end{supertabular}
wenzelm@46282
   598
  \end{center}
wenzelm@46282
   599
wenzelm@46282
   600
  The token categories @{syntax (inner) num_token}, @{syntax (inner)
wenzelm@58421
   601
  float_token}, @{syntax (inner) str_token}, @{syntax (inner) string_token},
wenzelm@58421
   602
  and @{syntax (inner) cartouche} are not used in Pure. Object-logics may
wenzelm@58421
   603
  implement numerals and string literals by adding appropriate syntax
wenzelm@58421
   604
  declarations, together with some translation functions (e.g.\ see @{file
wenzelm@58421
   605
  "~~/src/HOL/Tools/string_syntax.ML"}).
wenzelm@46282
   606
wenzelm@58421
   607
  The derived categories @{syntax_def (inner) num_const}, and @{syntax_def
wenzelm@58421
   608
  (inner) float_const}, provide robust access to the respective tokens: the
wenzelm@58421
   609
  syntax tree holds a syntactic constant instead of a free variable.
wenzelm@58618
   610
\<close>
wenzelm@46282
   611
wenzelm@46282
   612
wenzelm@58618
   613
subsection \<open>Priority grammars \label{sec:priority-grammar}\<close>
wenzelm@28769
   614
wenzelm@58618
   615
text \<open>A context-free grammar consists of a set of \emph{terminal
wenzelm@28769
   616
  symbols}, a set of \emph{nonterminal symbols} and a set of
wenzelm@28769
   617
  \emph{productions}.  Productions have the form @{text "A = \<gamma>"},
wenzelm@28769
   618
  where @{text A} is a nonterminal and @{text \<gamma>} is a string of
wenzelm@28769
   619
  terminals and nonterminals.  One designated nonterminal is called
wenzelm@28769
   620
  the \emph{root symbol}.  The language defined by the grammar
wenzelm@28769
   621
  consists of all strings of terminals that can be derived from the
wenzelm@28769
   622
  root symbol by applying productions as rewrite rules.
wenzelm@28769
   623
wenzelm@28769
   624
  The standard Isabelle parser for inner syntax uses a \emph{priority
wenzelm@28769
   625
  grammar}.  Each nonterminal is decorated by an integer priority:
wenzelm@28769
   626
  @{text "A\<^sup>(\<^sup>p\<^sup>)"}.  In a derivation, @{text "A\<^sup>(\<^sup>p\<^sup>)"} may be rewritten
wenzelm@28769
   627
  using a production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} only if @{text "p \<le> q"}.  Any
wenzelm@28769
   628
  priority grammar can be translated into a normal context-free
wenzelm@28769
   629
  grammar by introducing new nonterminals and productions.
wenzelm@28769
   630
wenzelm@28769
   631
  \medskip Formally, a set of context free productions @{text G}
wenzelm@28769
   632
  induces a derivation relation @{text "\<longrightarrow>\<^sub>G"} as follows.  Let @{text
wenzelm@28769
   633
  \<alpha>} and @{text \<beta>} denote strings of terminal or nonterminal symbols.
wenzelm@28774
   634
  Then @{text "\<alpha> A\<^sup>(\<^sup>p\<^sup>) \<beta> \<longrightarrow>\<^sub>G \<alpha> \<gamma> \<beta>"} holds if and only if @{text G}
wenzelm@28774
   635
  contains some production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} for @{text "p \<le> q"}.
wenzelm@28769
   636
wenzelm@28769
   637
  \medskip The following grammar for arithmetic expressions
wenzelm@28769
   638
  demonstrates how binding power and associativity of operators can be
wenzelm@28769
   639
  enforced by priorities.
wenzelm@28769
   640
wenzelm@28769
   641
  \begin{center}
wenzelm@28769
   642
  \begin{tabular}{rclr}
wenzelm@28774
   643
  @{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim "("} @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim ")"} \\
wenzelm@28769
   644
  @{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim 0} \\
wenzelm@28769
   645
  @{text "A\<^sup>(\<^sup>0\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} \\
wenzelm@28769
   646
  @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} \\
wenzelm@28769
   647
  @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "="} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} \\
wenzelm@28769
   648
  \end{tabular}
wenzelm@28769
   649
  \end{center}
wenzelm@28769
   650
  The choice of priorities determines that @{verbatim "-"} binds
wenzelm@28769
   651
  tighter than @{verbatim "*"}, which binds tighter than @{verbatim
wenzelm@28769
   652
  "+"}.  Furthermore @{verbatim "+"} associates to the left and
wenzelm@28769
   653
  @{verbatim "*"} to the right.
wenzelm@28769
   654
wenzelm@28769
   655
  \medskip For clarity, grammars obey these conventions:
wenzelm@28769
   656
  \begin{itemize}
wenzelm@28769
   657
wenzelm@28769
   658
  \item All priorities must lie between 0 and 1000.
wenzelm@28769
   659
wenzelm@28769
   660
  \item Priority 0 on the right-hand side and priority 1000 on the
wenzelm@28769
   661
  left-hand side may be omitted.
wenzelm@28769
   662
wenzelm@28769
   663
  \item The production @{text "A\<^sup>(\<^sup>p\<^sup>) = \<alpha>"} is written as @{text "A = \<alpha>
wenzelm@28769
   664
  (p)"}, i.e.\ the priority of the left-hand side actually appears in
wenzelm@28769
   665
  a column on the far right.
wenzelm@28769
   666
wenzelm@28769
   667
  \item Alternatives are separated by @{text "|"}.
wenzelm@28769
   668
wenzelm@28769
   669
  \item Repetition is indicated by dots @{text "(\<dots>)"} in an informal
wenzelm@28769
   670
  but obvious way.
wenzelm@28769
   671
wenzelm@28769
   672
  \end{itemize}
wenzelm@28769
   673
wenzelm@28769
   674
  Using these conventions, the example grammar specification above
wenzelm@28769
   675
  takes the form:
wenzelm@28769
   676
  \begin{center}
wenzelm@28769
   677
  \begin{tabular}{rclc}
wenzelm@28774
   678
    @{text A} & @{text "="} & @{verbatim "("} @{text A} @{verbatim ")"} \\
wenzelm@28774
   679
              & @{text "|"} & @{verbatim 0} & \qquad\qquad \\
wenzelm@28769
   680
              & @{text "|"} & @{text A} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} & @{text "(0)"} \\
wenzelm@28769
   681
              & @{text "|"} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
wenzelm@28769
   682
              & @{text "|"} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
wenzelm@28769
   683
  \end{tabular}
wenzelm@28769
   684
  \end{center}
wenzelm@58618
   685
\<close>
wenzelm@28769
   686
wenzelm@28769
   687
wenzelm@58618
   688
subsection \<open>The Pure grammar \label{sec:pure-grammar}\<close>
wenzelm@28770
   689
wenzelm@58618
   690
text \<open>The priority grammar of the @{text "Pure"} theory is defined
wenzelm@46287
   691
  approximately like this:
wenzelm@28774
   692
wenzelm@28770
   693
  \begin{center}
wenzelm@28773
   694
  \begin{supertabular}{rclr}
wenzelm@28770
   695
wenzelm@28778
   696
  @{syntax_def (inner) any} & = & @{text "prop  |  logic"} \\\\
wenzelm@28772
   697
wenzelm@28778
   698
  @{syntax_def (inner) prop} & = & @{verbatim "("} @{text prop} @{verbatim ")"} \\
wenzelm@28772
   699
    & @{text "|"} & @{text "prop\<^sup>(\<^sup>4\<^sup>)"} @{verbatim "::"} @{text type} & @{text "(3)"} \\
wenzelm@50636
   700
    & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "=="} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(2)"} \\
wenzelm@50636
   701
    & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{text "\<equiv>"} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(2)"} \\
wenzelm@28856
   702
    & @{text "|"} & @{text "prop\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "&&&"} @{text "prop\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
wenzelm@28772
   703
    & @{text "|"} & @{text "prop\<^sup>(\<^sup>2\<^sup>)"} @{verbatim "==>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
wenzelm@28773
   704
    & @{text "|"} & @{text "prop\<^sup>(\<^sup>2\<^sup>)"} @{text "\<Longrightarrow>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
wenzelm@28772
   705
    & @{text "|"} & @{verbatim "[|"} @{text prop} @{verbatim ";"} @{text "\<dots>"} @{verbatim ";"} @{text prop} @{verbatim "|]"} @{verbatim "==>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
wenzelm@28773
   706
    & @{text "|"} & @{text "\<lbrakk>"} @{text prop} @{verbatim ";"} @{text "\<dots>"} @{verbatim ";"} @{text prop} @{text "\<rbrakk>"} @{text "\<Longrightarrow>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
wenzelm@28772
   707
    & @{text "|"} & @{verbatim "!!"} @{text idts} @{verbatim "."} @{text prop} & @{text "(0)"} \\
wenzelm@28773
   708
    & @{text "|"} & @{text "\<And>"} @{text idts} @{verbatim "."} @{text prop} & @{text "(0)"} \\
wenzelm@28773
   709
    & @{text "|"} & @{verbatim OFCLASS} @{verbatim "("} @{text type} @{verbatim ","} @{text logic} @{verbatim ")"} \\
wenzelm@28773
   710
    & @{text "|"} & @{verbatim SORT_CONSTRAINT} @{verbatim "("} @{text type} @{verbatim ")"} \\
wenzelm@28856
   711
    & @{text "|"} & @{verbatim TERM} @{text logic} \\
wenzelm@28773
   712
    & @{text "|"} & @{verbatim PROP} @{text aprop} \\\\
wenzelm@28772
   713
wenzelm@28856
   714
  @{syntax_def (inner) aprop} & = & @{verbatim "("} @{text aprop} @{verbatim ")"} \\
wenzelm@28856
   715
    & @{text "|"} & @{text "id  |  longid  |  var  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "..."} \\
wenzelm@28856
   716
    & @{text "|"} & @{verbatim CONST} @{text "id  |  "}@{verbatim CONST} @{text "longid"} \\
wenzelm@46287
   717
    & @{text "|"} & @{verbatim XCONST} @{text "id  |  "}@{verbatim XCONST} @{text "longid"} \\
wenzelm@28773
   718
    & @{text "|"} & @{text "logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)  any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "(999)"} \\\\
wenzelm@28770
   719
wenzelm@28778
   720
  @{syntax_def (inner) logic} & = & @{verbatim "("} @{text logic} @{verbatim ")"} \\
wenzelm@28772
   721
    & @{text "|"} & @{text "logic\<^sup>(\<^sup>4\<^sup>)"} @{verbatim "::"} @{text type} & @{text "(3)"} \\
wenzelm@28773
   722
    & @{text "|"} & @{text "id  |  longid  |  var  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "..."} \\
wenzelm@28856
   723
    & @{text "|"} & @{verbatim CONST} @{text "id  |  "}@{verbatim CONST} @{text "longid"} \\
wenzelm@46287
   724
    & @{text "|"} & @{verbatim XCONST} @{text "id  |  "}@{verbatim XCONST} @{text "longid"} \\
wenzelm@28773
   725
    & @{text "|"} & @{text "logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)  any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "(999)"} \\
wenzelm@46287
   726
    & @{text "|"} & @{text "\<struct> index\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} \\
wenzelm@28772
   727
    & @{text "|"} & @{verbatim "%"} @{text pttrns} @{verbatim "."} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
wenzelm@28773
   728
    & @{text "|"} & @{text \<lambda>} @{text pttrns} @{verbatim "."} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
wenzelm@46287
   729
    & @{text "|"} & @{verbatim op} @{verbatim "=="}@{text "  |  "}@{verbatim op} @{text "\<equiv>"}@{text "  |  "}@{verbatim op} @{verbatim "&&&"} \\
wenzelm@46287
   730
    & @{text "|"} & @{verbatim op} @{verbatim "==>"}@{text "  |  "}@{verbatim op} @{text "\<Longrightarrow>"} \\
wenzelm@28772
   731
    & @{text "|"} & @{verbatim TYPE} @{verbatim "("} @{text type} @{verbatim ")"} \\\\
wenzelm@28772
   732
wenzelm@28778
   733
  @{syntax_def (inner) idt} & = & @{verbatim "("} @{text idt} @{verbatim ")"}@{text "  |  id  |  "}@{verbatim "_"} \\
wenzelm@28773
   734
    & @{text "|"} & @{text id} @{verbatim "::"} @{text type} & @{text "(0)"} \\
wenzelm@28773
   735
    & @{text "|"} & @{verbatim "_"} @{verbatim "::"} @{text type} & @{text "(0)"} \\\\
wenzelm@28772
   736
wenzelm@46287
   737
  @{syntax_def (inner) index} & = & @{verbatim "\<^bsub>"} @{text "logic\<^sup>(\<^sup>0\<^sup>)"} @{verbatim "\<^esub>"}@{text "  |  |  \<index>"} \\\\
wenzelm@46287
   738
wenzelm@28778
   739
  @{syntax_def (inner) idts} & = & @{text "idt  |  idt\<^sup>(\<^sup>1\<^sup>) idts"} & @{text "(0)"} \\\\
wenzelm@28772
   740
wenzelm@28778
   741
  @{syntax_def (inner) pttrn} & = & @{text idt} \\\\
wenzelm@28772
   742
wenzelm@28778
   743
  @{syntax_def (inner) pttrns} & = & @{text "pttrn  |  pttrn\<^sup>(\<^sup>1\<^sup>) pttrns"} & @{text "(0)"} \\\\
wenzelm@28774
   744
wenzelm@28778
   745
  @{syntax_def (inner) type} & = & @{verbatim "("} @{text type} @{verbatim ")"} \\
wenzelm@28773
   746
    & @{text "|"} & @{text "tid  |  tvar  |  "}@{verbatim "_"} \\
wenzelm@28773
   747
    & @{text "|"} & @{text "tid"} @{verbatim "::"} @{text "sort  |  tvar  "}@{verbatim "::"} @{text "sort  |  "}@{verbatim "_"} @{verbatim "::"} @{text "sort"} \\
wenzelm@46287
   748
    & @{text "|"} & @{text "type_name  |  type\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) type_name"} \\
wenzelm@46287
   749
    & @{text "|"} & @{verbatim "("} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim ")"} @{text type_name} \\
wenzelm@28772
   750
    & @{text "|"} & @{text "type\<^sup>(\<^sup>1\<^sup>)"} @{verbatim "=>"} @{text type} & @{text "(0)"} \\
wenzelm@28773
   751
    & @{text "|"} & @{text "type\<^sup>(\<^sup>1\<^sup>)"} @{text "\<Rightarrow>"} @{text type} & @{text "(0)"} \\
wenzelm@28773
   752
    & @{text "|"} & @{verbatim "["} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim "]"} @{verbatim "=>"} @{text type} & @{text "(0)"} \\
wenzelm@46287
   753
    & @{text "|"} & @{verbatim "["} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim "]"} @{text "\<Rightarrow>"} @{text type} & @{text "(0)"} \\
wenzelm@46287
   754
  @{syntax_def (inner) type_name} & = & @{text "id  |  longid"} \\\\
wenzelm@28772
   755
wenzelm@46287
   756
  @{syntax_def (inner) sort} & = & @{syntax class_name}~@{text "  |  "}@{verbatim "{}"} \\
wenzelm@46287
   757
    & @{text "|"} & @{verbatim "{"} @{syntax class_name} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{syntax class_name} @{verbatim "}"} \\
wenzelm@46287
   758
  @{syntax_def (inner) class_name} & = & @{text "id  |  longid"} \\
wenzelm@28773
   759
  \end{supertabular}
wenzelm@28770
   760
  \end{center}
wenzelm@28770
   761
wenzelm@28774
   762
  \medskip Here literal terminals are printed @{verbatim "verbatim"};
wenzelm@28774
   763
  see also \secref{sec:inner-lex} for further token categories of the
wenzelm@28774
   764
  inner syntax.  The meaning of the nonterminals defined by the above
wenzelm@28774
   765
  grammar is as follows:
wenzelm@28770
   766
wenzelm@28770
   767
  \begin{description}
wenzelm@28770
   768
wenzelm@28778
   769
  \item @{syntax_ref (inner) any} denotes any term.
wenzelm@28770
   770
wenzelm@28778
   771
  \item @{syntax_ref (inner) prop} denotes meta-level propositions,
wenzelm@28778
   772
  which are terms of type @{typ prop}.  The syntax of such formulae of
wenzelm@28778
   773
  the meta-logic is carefully distinguished from usual conventions for
wenzelm@28778
   774
  object-logics.  In particular, plain @{text "\<lambda>"}-term notation is
wenzelm@28778
   775
  \emph{not} recognized as @{syntax (inner) prop}.
wenzelm@28770
   776
wenzelm@28778
   777
  \item @{syntax_ref (inner) aprop} denotes atomic propositions, which
wenzelm@28778
   778
  are embedded into regular @{syntax (inner) prop} by means of an
wenzelm@28778
   779
  explicit @{verbatim PROP} token.
wenzelm@28770
   780
wenzelm@28770
   781
  Terms of type @{typ prop} with non-constant head, e.g.\ a plain
wenzelm@28770
   782
  variable, are printed in this form.  Constants that yield type @{typ
wenzelm@28770
   783
  prop} are expected to provide their own concrete syntax; otherwise
wenzelm@28778
   784
  the printed version will appear like @{syntax (inner) logic} and
wenzelm@28778
   785
  cannot be parsed again as @{syntax (inner) prop}.
wenzelm@28770
   786
wenzelm@28778
   787
  \item @{syntax_ref (inner) logic} denotes arbitrary terms of a
wenzelm@28778
   788
  logical type, excluding type @{typ prop}.  This is the main
wenzelm@28778
   789
  syntactic category of object-logic entities, covering plain @{text
wenzelm@28778
   790
  \<lambda>}-term notation (variables, abstraction, application), plus
wenzelm@28778
   791
  anything defined by the user.
wenzelm@28770
   792
wenzelm@28770
   793
  When specifying notation for logical entities, all logical types
wenzelm@28770
   794
  (excluding @{typ prop}) are \emph{collapsed} to this single category
wenzelm@28778
   795
  of @{syntax (inner) logic}.
wenzelm@28770
   796
wenzelm@46287
   797
  \item @{syntax_ref (inner) index} denotes an optional index term for
wenzelm@51657
   798
  indexed syntax.  If omitted, it refers to the first @{keyword_ref
wenzelm@46287
   799
  "structure"} variable in the context.  The special dummy ``@{text
wenzelm@46287
   800
  "\<index>"}'' serves as pattern variable in mixfix annotations that
wenzelm@46287
   801
  introduce indexed notation.
wenzelm@46287
   802
wenzelm@28778
   803
  \item @{syntax_ref (inner) idt} denotes identifiers, possibly
wenzelm@28778
   804
  constrained by types.
wenzelm@28770
   805
wenzelm@28778
   806
  \item @{syntax_ref (inner) idts} denotes a sequence of @{syntax_ref
wenzelm@28778
   807
  (inner) idt}.  This is the most basic category for variables in
wenzelm@28778
   808
  iterated binders, such as @{text "\<lambda>"} or @{text "\<And>"}.
wenzelm@28770
   809
wenzelm@28778
   810
  \item @{syntax_ref (inner) pttrn} and @{syntax_ref (inner) pttrns}
wenzelm@28778
   811
  denote patterns for abstraction, cases bindings etc.  In Pure, these
wenzelm@28778
   812
  categories start as a merely copy of @{syntax (inner) idt} and
wenzelm@28778
   813
  @{syntax (inner) idts}, respectively.  Object-logics may add
wenzelm@28778
   814
  additional productions for binding forms.
wenzelm@28770
   815
wenzelm@28778
   816
  \item @{syntax_ref (inner) type} denotes types of the meta-logic.
wenzelm@28770
   817
wenzelm@28778
   818
  \item @{syntax_ref (inner) sort} denotes meta-level sorts.
wenzelm@28770
   819
wenzelm@28770
   820
  \end{description}
wenzelm@28770
   821
wenzelm@28774
   822
  Here are some further explanations of certain syntax features.
wenzelm@28773
   823
wenzelm@28773
   824
  \begin{itemize}
wenzelm@28770
   825
wenzelm@28778
   826
  \item In @{syntax (inner) idts}, note that @{text "x :: nat y"} is
wenzelm@28778
   827
  parsed as @{text "x :: (nat y)"}, treating @{text y} like a type
wenzelm@28778
   828
  constructor applied to @{text nat}.  To avoid this interpretation,
wenzelm@28778
   829
  write @{text "(x :: nat) y"} with explicit parentheses.
wenzelm@28773
   830
wenzelm@28773
   831
  \item Similarly, @{text "x :: nat y :: nat"} is parsed as @{text "x ::
wenzelm@28770
   832
  (nat y :: nat)"}.  The correct form is @{text "(x :: nat) (y ::
wenzelm@28770
   833
  nat)"}, or @{text "(x :: nat) y :: nat"} if @{text y} is last in the
wenzelm@28770
   834
  sequence of identifiers.
wenzelm@28773
   835
wenzelm@28773
   836
  \item Type constraints for terms bind very weakly.  For example,
wenzelm@28773
   837
  @{text "x < y :: nat"} is normally parsed as @{text "(x < y) ::
wenzelm@28773
   838
  nat"}, unless @{text "<"} has a very low priority, in which case the
wenzelm@28773
   839
  input is likely to be ambiguous.  The correct form is @{text "x < (y
wenzelm@28773
   840
  :: nat)"}.
wenzelm@28773
   841
wenzelm@28773
   842
  \item Constraints may be either written with two literal colons
wenzelm@28773
   843
  ``@{verbatim "::"}'' or the double-colon symbol @{verbatim "\<Colon>"},
wenzelm@28774
   844
  which actually looks exactly the same in some {\LaTeX} styles.
wenzelm@28773
   845
wenzelm@28774
   846
  \item Dummy variables (written as underscore) may occur in different
wenzelm@28774
   847
  roles.
wenzelm@28773
   848
wenzelm@28773
   849
  \begin{description}
wenzelm@28773
   850
wenzelm@28774
   851
  \item A type ``@{text "_"}'' or ``@{text "_ :: sort"}'' acts like an
wenzelm@28774
   852
  anonymous inference parameter, which is filled-in according to the
wenzelm@28774
   853
  most general type produced by the type-checking phase.
wenzelm@28770
   854
wenzelm@28774
   855
  \item A bound ``@{text "_"}'' refers to a vacuous abstraction, where
wenzelm@28774
   856
  the body does not refer to the binding introduced here.  As in the
wenzelm@28774
   857
  term @{term "\<lambda>x _. x"}, which is @{text "\<alpha>"}-equivalent to @{text
wenzelm@28774
   858
  "\<lambda>x y. x"}.
wenzelm@28773
   859
wenzelm@28774
   860
  \item A free ``@{text "_"}'' refers to an implicit outer binding.
wenzelm@28774
   861
  Higher definitional packages usually allow forms like @{text "f x _
wenzelm@28774
   862
  = x"}.
wenzelm@28773
   863
wenzelm@28774
   864
  \item A schematic ``@{text "_"}'' (within a term pattern, see
wenzelm@28774
   865
  \secref{sec:term-decls}) refers to an anonymous variable that is
wenzelm@28774
   866
  implicitly abstracted over its context of locally bound variables.
wenzelm@28774
   867
  For example, this allows pattern matching of @{text "{x. f x = g
wenzelm@28774
   868
  x}"} against @{text "{x. _ = _}"}, or even @{text "{_. _ = _}"} by
wenzelm@28774
   869
  using both bound and schematic dummies.
wenzelm@28773
   870
wenzelm@28773
   871
  \end{description}
wenzelm@28773
   872
wenzelm@28774
   873
  \item The three literal dots ``@{verbatim "..."}'' may be also
wenzelm@28774
   874
  written as ellipsis symbol @{verbatim "\<dots>"}.  In both cases this
wenzelm@28774
   875
  refers to a special schematic variable, which is bound in the
wenzelm@28774
   876
  context.  This special term abbreviation works nicely with
wenzelm@28774
   877
  calculational reasoning (\secref{sec:calculation}).
wenzelm@28774
   878
wenzelm@46287
   879
  \item @{verbatim CONST} ensures that the given identifier is treated
wenzelm@46287
   880
  as constant term, and passed through the parse tree in fully
wenzelm@46287
   881
  internalized form.  This is particularly relevant for translation
wenzelm@46287
   882
  rules (\secref{sec:syn-trans}), notably on the RHS.
wenzelm@46287
   883
wenzelm@46287
   884
  \item @{verbatim XCONST} is similar to @{verbatim CONST}, but
wenzelm@46287
   885
  retains the constant name as given.  This is only relevant to
wenzelm@46287
   886
  translation rules (\secref{sec:syn-trans}), notably on the LHS.
wenzelm@46287
   887
wenzelm@28773
   888
  \end{itemize}
wenzelm@58618
   889
\<close>
wenzelm@28770
   890
wenzelm@28777
   891
wenzelm@58618
   892
subsection \<open>Inspecting the syntax\<close>
wenzelm@28777
   893
wenzelm@58618
   894
text \<open>
wenzelm@46282
   895
  \begin{matharray}{rcl}
wenzelm@46282
   896
    @{command_def "print_syntax"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@46282
   897
  \end{matharray}
wenzelm@28777
   898
wenzelm@46282
   899
  \begin{description}
wenzelm@46282
   900
wenzelm@46282
   901
  \item @{command "print_syntax"} prints the inner syntax of the
wenzelm@46282
   902
  current context.  The output can be quite large; the most important
wenzelm@46282
   903
  sections are explained below.
wenzelm@28777
   904
wenzelm@46282
   905
  \begin{description}
wenzelm@28777
   906
wenzelm@46282
   907
  \item @{text "lexicon"} lists the delimiters of the inner token
wenzelm@46282
   908
  language; see \secref{sec:inner-lex}.
wenzelm@28777
   909
wenzelm@46282
   910
  \item @{text "prods"} lists the productions of the underlying
wenzelm@46282
   911
  priority grammar; see \secref{sec:priority-grammar}.
wenzelm@28777
   912
wenzelm@46282
   913
  The nonterminal @{text "A\<^sup>(\<^sup>p\<^sup>)"} is rendered in plain text as @{text
wenzelm@46282
   914
  "A[p]"}; delimiters are quoted.  Many productions have an extra
wenzelm@46282
   915
  @{text "\<dots> => name"}.  These names later become the heads of parse
wenzelm@46282
   916
  trees; they also guide the pretty printer.
wenzelm@28777
   917
wenzelm@46282
   918
  Productions without such parse tree names are called \emph{copy
wenzelm@46282
   919
  productions}.  Their right-hand side must have exactly one
wenzelm@46282
   920
  nonterminal symbol (or named token).  The parser does not create a
wenzelm@46282
   921
  new parse tree node for copy productions, but simply returns the
wenzelm@46282
   922
  parse tree of the right-hand symbol.
wenzelm@46282
   923
wenzelm@46282
   924
  If the right-hand side of a copy production consists of a single
wenzelm@46282
   925
  nonterminal without any delimiters, then it is called a \emph{chain
wenzelm@46282
   926
  production}.  Chain productions act as abbreviations: conceptually,
wenzelm@46282
   927
  they are removed from the grammar by adding new productions.
wenzelm@46282
   928
  Priority information attached to chain productions is ignored; only
wenzelm@46282
   929
  the dummy value @{text "-1"} is displayed.
wenzelm@46282
   930
wenzelm@46282
   931
  \item @{text "print modes"} lists the alternative print modes
wenzelm@46282
   932
  provided by this grammar; see \secref{sec:print-modes}.
wenzelm@28777
   933
wenzelm@46282
   934
  \item @{text "parse_rules"} and @{text "print_rules"} relate to
wenzelm@46282
   935
  syntax translations (macros); see \secref{sec:syn-trans}.
wenzelm@46282
   936
wenzelm@46282
   937
  \item @{text "parse_ast_translation"} and @{text
wenzelm@46282
   938
  "print_ast_translation"} list sets of constants that invoke
wenzelm@46282
   939
  translation functions for abstract syntax trees, which are only
wenzelm@46282
   940
  required in very special situations; see \secref{sec:tr-funs}.
wenzelm@28777
   941
wenzelm@46282
   942
  \item @{text "parse_translation"} and @{text "print_translation"}
wenzelm@46282
   943
  list the sets of constants that invoke regular translation
wenzelm@46282
   944
  functions; see \secref{sec:tr-funs}.
wenzelm@29157
   945
wenzelm@46282
   946
  \end{description}
wenzelm@46282
   947
wenzelm@46282
   948
  \end{description}
wenzelm@58618
   949
\<close>
wenzelm@28774
   950
wenzelm@28770
   951
wenzelm@58618
   952
subsection \<open>Ambiguity of parsed expressions\<close>
wenzelm@46291
   953
wenzelm@58618
   954
text \<open>
wenzelm@46291
   955
  \begin{tabular}{rcll}
wenzelm@46512
   956
    @{attribute_def syntax_ambiguity_warning} & : & @{text attribute} & default @{text true} \\
wenzelm@46506
   957
    @{attribute_def syntax_ambiguity_limit} & : & @{text attribute} & default @{text 10} \\
wenzelm@46291
   958
  \end{tabular}
wenzelm@46291
   959
wenzelm@46291
   960
  Depending on the grammar and the given input, parsing may be
wenzelm@46291
   961
  ambiguous.  Isabelle lets the Earley parser enumerate all possible
wenzelm@46291
   962
  parse trees, and then tries to make the best out of the situation.
wenzelm@46291
   963
  Terms that cannot be type-checked are filtered out, which often
wenzelm@46291
   964
  leads to a unique result in the end.  Unlike regular type
wenzelm@46291
   965
  reconstruction, which is applied to the whole collection of input
wenzelm@46291
   966
  terms simultaneously, the filtering stage only treats each given
wenzelm@46291
   967
  term in isolation.  Filtering is also not attempted for individual
wenzelm@46291
   968
  types or raw ASTs (as required for @{command translations}).
wenzelm@46291
   969
wenzelm@46291
   970
  Certain warning or error messages are printed, depending on the
wenzelm@46291
   971
  situation and the given configuration options.  Parsing ultimately
wenzelm@46291
   972
  fails, if multiple results remain after the filtering phase.
wenzelm@46291
   973
wenzelm@46291
   974
  \begin{description}
wenzelm@46291
   975
wenzelm@46512
   976
  \item @{attribute syntax_ambiguity_warning} controls output of
wenzelm@46512
   977
  explicit warning messages about syntax ambiguity.
wenzelm@46291
   978
wenzelm@46506
   979
  \item @{attribute syntax_ambiguity_limit} determines the number of
wenzelm@46291
   980
  resulting parse trees that are shown as part of the printed message
wenzelm@46291
   981
  in case of an ambiguity.
wenzelm@46291
   982
wenzelm@46291
   983
  \end{description}
wenzelm@58618
   984
\<close>
wenzelm@46291
   985
wenzelm@46291
   986
wenzelm@58618
   987
section \<open>Syntax transformations \label{sec:syntax-transformations}\<close>
wenzelm@48113
   988
wenzelm@58618
   989
text \<open>The inner syntax engine of Isabelle provides separate
wenzelm@52413
   990
  mechanisms to transform parse trees either via rewrite systems on
wenzelm@48113
   991
  first-order ASTs (\secref{sec:syn-trans}), or ML functions on ASTs
wenzelm@48113
   992
  or syntactic @{text "\<lambda>"}-terms (\secref{sec:tr-funs}).  This works
wenzelm@48113
   993
  both for parsing and printing, as outlined in
wenzelm@48113
   994
  \figref{fig:parse-print}.
wenzelm@48113
   995
wenzelm@48113
   996
  \begin{figure}[htbp]
wenzelm@48113
   997
  \begin{center}
wenzelm@48113
   998
  \begin{tabular}{cl}
wenzelm@48113
   999
  string          & \\
wenzelm@48113
  1000
  @{text "\<down>"}     & lexer + parser \\
wenzelm@48113
  1001
  parse tree      & \\
wenzelm@48113
  1002
  @{text "\<down>"}     & parse AST translation \\
wenzelm@48113
  1003
  AST             & \\
wenzelm@48113
  1004
  @{text "\<down>"}     & AST rewriting (macros) \\
wenzelm@48113
  1005
  AST             & \\
wenzelm@48113
  1006
  @{text "\<down>"}     & parse translation \\
wenzelm@48113
  1007
  --- pre-term ---    & \\
wenzelm@48113
  1008
  @{text "\<down>"}     & print translation \\
wenzelm@48113
  1009
  AST             & \\
wenzelm@48113
  1010
  @{text "\<down>"}     & AST rewriting (macros) \\
wenzelm@48113
  1011
  AST             & \\
wenzelm@48113
  1012
  @{text "\<down>"}     & print AST translation \\
wenzelm@48113
  1013
  string          &
wenzelm@48113
  1014
  \end{tabular}
wenzelm@48113
  1015
  \end{center}
wenzelm@48113
  1016
  \caption{Parsing and printing with translations}\label{fig:parse-print}
wenzelm@48113
  1017
  \end{figure}
wenzelm@48113
  1018
wenzelm@48113
  1019
  These intermediate syntax tree formats eventually lead to a pre-term
wenzelm@48113
  1020
  with all names and binding scopes resolved, but most type
wenzelm@48113
  1021
  information still missing.  Explicit type constraints might be given by
wenzelm@48113
  1022
  the user, or implicit position information by the system --- both
wenzelm@48816
  1023
  need to be passed-through carefully by syntax transformations.
wenzelm@48113
  1024
wenzelm@48113
  1025
  Pre-terms are further processed by the so-called \emph{check} and
wenzelm@48113
  1026
  \emph{unckeck} phases that are intertwined with type-inference (see
wenzelm@58552
  1027
  also @{cite "isabelle-implementation"}).  The latter allows to operate
wenzelm@48113
  1028
  on higher-order abstract syntax with proper binding and type
wenzelm@48113
  1029
  information already available.
wenzelm@48113
  1030
wenzelm@48113
  1031
  As a rule of thumb, anything that manipulates bindings of variables
wenzelm@48113
  1032
  or constants needs to be implemented as syntax transformation (see
wenzelm@48113
  1033
  below).  Anything else is better done via check/uncheck: a prominent
wenzelm@48113
  1034
  example application is the @{command abbreviation} concept of
wenzelm@58618
  1035
  Isabelle/Pure.\<close>
wenzelm@48113
  1036
wenzelm@48113
  1037
wenzelm@58618
  1038
subsection \<open>Abstract syntax trees \label{sec:ast}\<close>
wenzelm@48113
  1039
wenzelm@58618
  1040
text \<open>The ML datatype @{ML_type Ast.ast} explicitly represents the
wenzelm@48114
  1041
  intermediate AST format that is used for syntax rewriting
wenzelm@48114
  1042
  (\secref{sec:syn-trans}).  It is defined in ML as follows:
wenzelm@48114
  1043
  \begin{ttbox}
wenzelm@48114
  1044
  datatype ast =
wenzelm@48114
  1045
    Constant of string |
wenzelm@48114
  1046
    Variable of string |
wenzelm@48114
  1047
    Appl of ast list
wenzelm@48114
  1048
  \end{ttbox}
wenzelm@48114
  1049
wenzelm@48114
  1050
  An AST is either an atom (constant or variable) or a list of (at
wenzelm@48114
  1051
  least two) subtrees.  Occasional diagnostic output of ASTs uses
wenzelm@48114
  1052
  notation that resembles S-expression of LISP.  Constant atoms are
wenzelm@48114
  1053
  shown as quoted strings, variable atoms as non-quoted strings and
wenzelm@48114
  1054
  applications as a parenthesized list of subtrees.  For example, the
wenzelm@48114
  1055
  AST
wenzelm@58724
  1056
  @{ML [display] \<open>Ast.Appl [Ast.Constant "_abs", Ast.Variable "x", Ast.Variable "t"]\<close>}
wenzelm@58724
  1057
  is pretty-printed as @{verbatim \<open>("_abs" x t)\<close>}.  Note that
wenzelm@48114
  1058
  @{verbatim "()"} and @{verbatim "(x)"} are excluded as ASTs, because
wenzelm@48114
  1059
  they have too few subtrees.
wenzelm@48114
  1060
wenzelm@48114
  1061
  \medskip AST application is merely a pro-forma mechanism to indicate
wenzelm@48114
  1062
  certain syntactic structures.  Thus @{verbatim "(c a b)"} could mean
wenzelm@48114
  1063
  either term application or type application, depending on the
wenzelm@48114
  1064
  syntactic context.
wenzelm@48114
  1065
wenzelm@58724
  1066
  Nested application like @{verbatim \<open>(("_abs" x t) u)\<close>} is also
wenzelm@48114
  1067
  possible, but ASTs are definitely first-order: the syntax constant
wenzelm@58724
  1068
  @{verbatim \<open>"_abs"\<close>} does not bind the @{verbatim x} in any way.
wenzelm@48114
  1069
  Proper bindings are introduced in later stages of the term syntax,
wenzelm@58724
  1070
  where @{verbatim \<open>("_abs" x t)\<close>} becomes an @{ML Abs} node and
wenzelm@48114
  1071
  occurrences of @{verbatim x} in @{verbatim t} are replaced by bound
wenzelm@48114
  1072
  variables (represented as de-Bruijn indices).
wenzelm@58618
  1073
\<close>
wenzelm@48113
  1074
wenzelm@48113
  1075
wenzelm@58618
  1076
subsubsection \<open>AST constants versus variables\<close>
wenzelm@48114
  1077
wenzelm@58618
  1078
text \<open>Depending on the situation --- input syntax, output syntax,
wenzelm@56582
  1079
  translation patterns --- the distinction of atomic ASTs as @{ML
wenzelm@48114
  1080
  Ast.Constant} versus @{ML Ast.Variable} serves slightly different
wenzelm@48114
  1081
  purposes.
wenzelm@48114
  1082
wenzelm@48114
  1083
  Input syntax of a term such as @{text "f a b = c"} does not yet
wenzelm@48114
  1084
  indicate the scopes of atomic entities @{text "f, a, b, c"}: they
wenzelm@48114
  1085
  could be global constants or local variables, even bound ones
wenzelm@48114
  1086
  depending on the context of the term.  @{ML Ast.Variable} leaves
wenzelm@48114
  1087
  this choice still open: later syntax layers (or translation
wenzelm@48114
  1088
  functions) may capture such a variable to determine its role
wenzelm@48114
  1089
  specifically, to make it a constant, bound variable, free variable
wenzelm@48114
  1090
  etc.  In contrast, syntax translations that introduce already known
wenzelm@48114
  1091
  constants would rather do it via @{ML Ast.Constant} to prevent
wenzelm@48114
  1092
  accidental re-interpretation later on.
wenzelm@48114
  1093
wenzelm@48114
  1094
  Output syntax turns term constants into @{ML Ast.Constant} and
wenzelm@48114
  1095
  variables (free or schematic) into @{ML Ast.Variable}.  This
wenzelm@48114
  1096
  information is precise when printing fully formal @{text "\<lambda>"}-terms.
wenzelm@48114
  1097
wenzelm@52413
  1098
  \medskip AST translation patterns (\secref{sec:syn-trans}) that
wenzelm@52413
  1099
  represent terms cannot distinguish constants and variables
wenzelm@52413
  1100
  syntactically.  Explicit indication of @{text "CONST c"} inside the
wenzelm@52413
  1101
  term language is required, unless @{text "c"} is known as special
wenzelm@52413
  1102
  \emph{syntax constant} (see also @{command syntax}).  It is also
wenzelm@52413
  1103
  possible to use @{command syntax} declarations (without mixfix
wenzelm@52413
  1104
  annotation) to enforce that certain unqualified names are always
wenzelm@52413
  1105
  treated as constant within the syntax machinery.
wenzelm@48114
  1106
wenzelm@52413
  1107
  The situation is simpler for ASTs that represent types or sorts,
wenzelm@52413
  1108
  since the concrete syntax already distinguishes type variables from
wenzelm@52413
  1109
  type constants (constructors).  So @{text "('a, 'b) foo"}
wenzelm@52413
  1110
  corresponds to an AST application of some constant for @{text foo}
wenzelm@52413
  1111
  and variable arguments for @{text "'a"} and @{text "'b"}.  Note that
wenzelm@52413
  1112
  the postfix application is merely a feature of the concrete syntax,
wenzelm@58618
  1113
  while in the AST the constructor occurs in head position.\<close>
wenzelm@48114
  1114
wenzelm@48114
  1115
wenzelm@58618
  1116
subsubsection \<open>Authentic syntax names\<close>
wenzelm@48114
  1117
wenzelm@58618
  1118
text \<open>Naming constant entities within ASTs is another delicate
wenzelm@52413
  1119
  issue.  Unqualified names are resolved in the name space tables in
wenzelm@48114
  1120
  the last stage of parsing, after all translations have been applied.
wenzelm@48114
  1121
  Since syntax transformations do not know about this later name
wenzelm@52413
  1122
  resolution, there can be surprises in boundary cases.
wenzelm@48114
  1123
wenzelm@48114
  1124
  \emph{Authentic syntax names} for @{ML Ast.Constant} avoid this
wenzelm@48114
  1125
  problem: the fully-qualified constant name with a special prefix for
wenzelm@48114
  1126
  its formal category (@{text "class"}, @{text "type"}, @{text
wenzelm@48114
  1127
  "const"}, @{text "fixed"}) represents the information faithfully
wenzelm@48114
  1128
  within the untyped AST format.  Accidental overlap with free or
wenzelm@48114
  1129
  bound variables is excluded as well.  Authentic syntax names work
wenzelm@48114
  1130
  implicitly in the following situations:
wenzelm@48114
  1131
wenzelm@48114
  1132
  \begin{itemize}
wenzelm@48114
  1133
wenzelm@48114
  1134
  \item Input of term constants (or fixed variables) that are
wenzelm@48114
  1135
  introduced by concrete syntax via @{command notation}: the
wenzelm@48114
  1136
  correspondence of a particular grammar production to some known term
wenzelm@48114
  1137
  entity is preserved.
wenzelm@48114
  1138
wenzelm@48816
  1139
  \item Input of type constants (constructors) and type classes ---
wenzelm@48114
  1140
  thanks to explicit syntactic distinction independently on the
wenzelm@48114
  1141
  context.
wenzelm@48114
  1142
wenzelm@48114
  1143
  \item Output of term constants, type constants, type classes ---
wenzelm@48114
  1144
  this information is already available from the internal term to be
wenzelm@48114
  1145
  printed.
wenzelm@48114
  1146
wenzelm@48114
  1147
  \end{itemize}
wenzelm@48114
  1148
wenzelm@48114
  1149
  In other words, syntax transformations that operate on input terms
wenzelm@48816
  1150
  written as prefix applications are difficult to make robust.
wenzelm@48816
  1151
  Luckily, this case rarely occurs in practice, because syntax forms
wenzelm@58618
  1152
  to be translated usually correspond to some concrete notation.\<close>
wenzelm@48114
  1153
wenzelm@48114
  1154
wenzelm@58618
  1155
subsection \<open>Raw syntax and translations \label{sec:syn-trans}\<close>
wenzelm@28762
  1156
wenzelm@58618
  1157
text \<open>
wenzelm@48117
  1158
  \begin{tabular}{rcll}
wenzelm@41229
  1159
    @{command_def "nonterminal"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1160
    @{command_def "syntax"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1161
    @{command_def "no_syntax"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1162
    @{command_def "translations"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1163
    @{command_def "no_translations"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@48117
  1164
    @{attribute_def syntax_ast_trace} & : & @{text attribute} & default @{text false} \\
wenzelm@48117
  1165
    @{attribute_def syntax_ast_stats} & : & @{text attribute} & default @{text false} \\
wenzelm@48117
  1166
  \end{tabular}
wenzelm@28762
  1167
wenzelm@59783
  1168
  \medskip
wenzelm@59783
  1169
wenzelm@46292
  1170
  Unlike mixfix notation for existing formal entities
wenzelm@46292
  1171
  (\secref{sec:notation}), raw syntax declarations provide full access
wenzelm@48115
  1172
  to the priority grammar of the inner syntax, without any sanity
wenzelm@48115
  1173
  checks.  This includes additional syntactic categories (via
wenzelm@48115
  1174
  @{command nonterminal}) and free-form grammar productions (via
wenzelm@48115
  1175
  @{command syntax}).  Additional syntax translations (or macros, via
wenzelm@48115
  1176
  @{command translations}) are required to turn resulting parse trees
wenzelm@48115
  1177
  into proper representations of formal entities again.
wenzelm@46292
  1178
wenzelm@55112
  1179
  @{rail \<open>
wenzelm@42596
  1180
    @@{command nonterminal} (@{syntax name} + @'and')
wenzelm@28762
  1181
    ;
wenzelm@46494
  1182
    (@@{command syntax} | @@{command no_syntax}) @{syntax mode}? (constdecl +)
wenzelm@28762
  1183
    ;
wenzelm@42596
  1184
    (@@{command translations} | @@{command no_translations})
wenzelm@42596
  1185
      (transpat ('==' | '=>' | '<=' | '\<rightleftharpoons>' | '\<rightharpoonup>' | '\<leftharpoondown>') transpat +)
wenzelm@28762
  1186
    ;
wenzelm@28762
  1187
wenzelm@46494
  1188
    constdecl: @{syntax name} '::' @{syntax type} @{syntax mixfix}?
wenzelm@46494
  1189
    ;
wenzelm@42596
  1190
    mode: ('(' ( @{syntax name} | @'output' | @{syntax name} @'output' ) ')')
wenzelm@28762
  1191
    ;
wenzelm@42596
  1192
    transpat: ('(' @{syntax nameref} ')')? @{syntax string}
wenzelm@55112
  1193
  \<close>}
wenzelm@28762
  1194
wenzelm@28762
  1195
  \begin{description}
wenzelm@46282
  1196
wenzelm@41229
  1197
  \item @{command "nonterminal"}~@{text c} declares a type
wenzelm@28762
  1198
  constructor @{text c} (without arguments) to act as purely syntactic
wenzelm@28762
  1199
  type: a nonterminal symbol of the inner syntax.
wenzelm@28762
  1200
wenzelm@46292
  1201
  \item @{command "syntax"}~@{text "(mode) c :: \<sigma> (mx)"} augments the
wenzelm@46292
  1202
  priority grammar and the pretty printer table for the given print
wenzelm@58724
  1203
  mode (default @{verbatim \<open>""\<close>}). An optional keyword @{keyword_ref
wenzelm@46292
  1204
  "output"} means that only the pretty printer table is affected.
wenzelm@46292
  1205
wenzelm@46292
  1206
  Following \secref{sec:mixfix}, the mixfix annotation @{text "mx =
wenzelm@46292
  1207
  template ps q"} together with type @{text "\<sigma> = \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} and
wenzelm@46292
  1208
  specify a grammar production.  The @{text template} contains
wenzelm@46292
  1209
  delimiter tokens that surround @{text "n"} argument positions
wenzelm@46292
  1210
  (@{verbatim "_"}).  The latter correspond to nonterminal symbols
wenzelm@46292
  1211
  @{text "A\<^sub>i"} derived from the argument types @{text "\<tau>\<^sub>i"} as
wenzelm@46292
  1212
  follows:
wenzelm@46292
  1213
  \begin{itemize}
wenzelm@46292
  1214
wenzelm@46292
  1215
  \item @{text "prop"} if @{text "\<tau>\<^sub>i = prop"}
wenzelm@46292
  1216
wenzelm@46292
  1217
  \item @{text "logic"} if @{text "\<tau>\<^sub>i = (\<dots>)\<kappa>"} for logical type
wenzelm@46292
  1218
  constructor @{text "\<kappa> \<noteq> prop"}
wenzelm@46292
  1219
wenzelm@46292
  1220
  \item @{text any} if @{text "\<tau>\<^sub>i = \<alpha>"} for type variables
wenzelm@46292
  1221
wenzelm@46292
  1222
  \item @{text "\<kappa>"} if @{text "\<tau>\<^sub>i = \<kappa>"} for nonterminal @{text "\<kappa>"}
wenzelm@46292
  1223
  (syntactic type constructor)
wenzelm@46292
  1224
wenzelm@46292
  1225
  \end{itemize}
wenzelm@46292
  1226
wenzelm@46292
  1227
  Each @{text "A\<^sub>i"} is decorated by priority @{text "p\<^sub>i"} from the
wenzelm@56582
  1228
  given list @{text "ps"}; missing priorities default to 0.
wenzelm@46292
  1229
wenzelm@46292
  1230
  The resulting nonterminal of the production is determined similarly
wenzelm@46292
  1231
  from type @{text "\<tau>"}, with priority @{text "q"} and default 1000.
wenzelm@46292
  1232
wenzelm@46292
  1233
  \medskip Parsing via this production produces parse trees @{text
wenzelm@46292
  1234
  "t\<^sub>1, \<dots>, t\<^sub>n"} for the argument slots.  The resulting parse tree is
wenzelm@46292
  1235
  composed as @{text "c t\<^sub>1 \<dots> t\<^sub>n"}, by using the syntax constant @{text
wenzelm@46292
  1236
  "c"} of the syntax declaration.
wenzelm@46292
  1237
wenzelm@46292
  1238
  Such syntactic constants are invented on the spot, without formal
wenzelm@46292
  1239
  check wrt.\ existing declarations.  It is conventional to use plain
wenzelm@46292
  1240
  identifiers prefixed by a single underscore (e.g.\ @{text
wenzelm@46292
  1241
  "_foobar"}).  Names should be chosen with care, to avoid clashes
wenzelm@48816
  1242
  with other syntax declarations.
wenzelm@46292
  1243
wenzelm@46292
  1244
  \medskip The special case of copy production is specified by @{text
wenzelm@58724
  1245
  "c = "}@{verbatim \<open>""\<close>} (empty string).  It means that the
wenzelm@46292
  1246
  resulting parse tree @{text "t"} is copied directly, without any
wenzelm@46292
  1247
  further decoration.
wenzelm@46282
  1248
wenzelm@28762
  1249
  \item @{command "no_syntax"}~@{text "(mode) decls"} removes grammar
wenzelm@28762
  1250
  declarations (and translations) resulting from @{text decls}, which
wenzelm@28762
  1251
  are interpreted in the same manner as for @{command "syntax"} above.
wenzelm@46282
  1252
wenzelm@28762
  1253
  \item @{command "translations"}~@{text rules} specifies syntactic
wenzelm@48115
  1254
  translation rules (i.e.\ macros) as first-order rewrite rules on
wenzelm@48816
  1255
  ASTs (\secref{sec:ast}).  The theory context maintains two
wenzelm@48115
  1256
  independent lists translation rules: parse rules (@{verbatim "=>"}
wenzelm@48115
  1257
  or @{text "\<rightharpoonup>"}) and print rules (@{verbatim "<="} or @{text "\<leftharpoondown>"}).
wenzelm@48115
  1258
  For convenience, both can be specified simultaneously as parse~/
wenzelm@48115
  1259
  print rules (@{verbatim "=="} or @{text "\<rightleftharpoons>"}).
wenzelm@48115
  1260
wenzelm@28762
  1261
  Translation patterns may be prefixed by the syntactic category to be
wenzelm@48115
  1262
  used for parsing; the default is @{text logic} which means that
wenzelm@48115
  1263
  regular term syntax is used.  Both sides of the syntax translation
wenzelm@48115
  1264
  rule undergo parsing and parse AST translations
wenzelm@48115
  1265
  \secref{sec:tr-funs}, in order to perform some fundamental
wenzelm@48115
  1266
  normalization like @{text "\<lambda>x y. b \<leadsto> \<lambda>x. \<lambda>y. b"}, but other AST
wenzelm@48115
  1267
  translation rules are \emph{not} applied recursively here.
wenzelm@48115
  1268
wenzelm@48115
  1269
  When processing AST patterns, the inner syntax lexer runs in a
wenzelm@48115
  1270
  different mode that allows identifiers to start with underscore.
wenzelm@48115
  1271
  This accommodates the usual naming convention for auxiliary syntax
wenzelm@48115
  1272
  constants --- those that do not have a logical counter part --- by
wenzelm@48115
  1273
  allowing to specify arbitrary AST applications within the term
wenzelm@48115
  1274
  syntax, independently of the corresponding concrete syntax.
wenzelm@48115
  1275
wenzelm@48115
  1276
  Atomic ASTs are distinguished as @{ML Ast.Constant} versus @{ML
wenzelm@48115
  1277
  Ast.Variable} as follows: a qualified name or syntax constant
wenzelm@48115
  1278
  declared via @{command syntax}, or parse tree head of concrete
wenzelm@48115
  1279
  notation becomes @{ML Ast.Constant}, anything else @{ML
wenzelm@48115
  1280
  Ast.Variable}.  Note that @{text CONST} and @{text XCONST} within
wenzelm@48115
  1281
  the term language (\secref{sec:pure-grammar}) allow to enforce
wenzelm@48115
  1282
  treatment as constants.
wenzelm@48115
  1283
wenzelm@48115
  1284
  AST rewrite rules @{text "(lhs, rhs)"} need to obey the following
wenzelm@48115
  1285
  side-conditions:
wenzelm@48115
  1286
wenzelm@48115
  1287
  \begin{itemize}
wenzelm@48115
  1288
wenzelm@48115
  1289
  \item Rules must be left linear: @{text "lhs"} must not contain
wenzelm@48115
  1290
  repeated variables.\footnote{The deeper reason for this is that AST
wenzelm@48115
  1291
  equality is not well-defined: different occurrences of the ``same''
wenzelm@48115
  1292
  AST could be decorated differently by accidental type-constraints or
wenzelm@48115
  1293
  source position information, for example.}
wenzelm@48115
  1294
wenzelm@48115
  1295
  \item Every variable in @{text "rhs"} must also occur in @{text
wenzelm@48115
  1296
  "lhs"}.
wenzelm@48115
  1297
wenzelm@48115
  1298
  \end{itemize}
wenzelm@46282
  1299
wenzelm@28762
  1300
  \item @{command "no_translations"}~@{text rules} removes syntactic
wenzelm@28762
  1301
  translation rules, which are interpreted in the same manner as for
wenzelm@28762
  1302
  @{command "translations"} above.
wenzelm@28762
  1303
wenzelm@48117
  1304
  \item @{attribute syntax_ast_trace} and @{attribute
wenzelm@48117
  1305
  syntax_ast_stats} control diagnostic output in the AST normalization
wenzelm@48117
  1306
  process, when translation rules are applied to concrete input or
wenzelm@48117
  1307
  output.
wenzelm@48117
  1308
wenzelm@28762
  1309
  \end{description}
wenzelm@46293
  1310
wenzelm@46293
  1311
  Raw syntax and translations provides a slightly more low-level
wenzelm@46293
  1312
  access to the grammar and the form of resulting parse trees.  It is
wenzelm@46293
  1313
  often possible to avoid this untyped macro mechanism, and use
wenzelm@46293
  1314
  type-safe @{command abbreviation} or @{command notation} instead.
wenzelm@46293
  1315
  Some important situations where @{command syntax} and @{command
wenzelm@46293
  1316
  translations} are really need are as follows:
wenzelm@46293
  1317
wenzelm@46293
  1318
  \begin{itemize}
wenzelm@46293
  1319
wenzelm@46293
  1320
  \item Iterated replacement via recursive @{command translations}.
wenzelm@46293
  1321
  For example, consider list enumeration @{term "[a, b, c, d]"} as
wenzelm@46293
  1322
  defined in theory @{theory List} in Isabelle/HOL.
wenzelm@46293
  1323
wenzelm@46293
  1324
  \item Change of binding status of variables: anything beyond the
wenzelm@46293
  1325
  built-in @{keyword "binder"} mixfix annotation requires explicit
wenzelm@46293
  1326
  syntax translations.  For example, consider list filter
wenzelm@46293
  1327
  comprehension @{term "[x \<leftarrow> xs . P]"} as defined in theory @{theory
wenzelm@46293
  1328
  List} in Isabelle/HOL.
wenzelm@46293
  1329
wenzelm@46293
  1330
  \end{itemize}
wenzelm@58618
  1331
\<close>
wenzelm@28762
  1332
wenzelm@58618
  1333
subsubsection \<open>Applying translation rules\<close>
wenzelm@48117
  1334
wenzelm@58618
  1335
text \<open>As a term is being parsed or printed, an AST is generated as
wenzelm@48117
  1336
  an intermediate form according to \figref{fig:parse-print}.  The AST
wenzelm@48117
  1337
  is normalized by applying translation rules in the manner of a
wenzelm@48117
  1338
  first-order term rewriting system.  We first examine how a single
wenzelm@48117
  1339
  rule is applied.
wenzelm@48117
  1340
wenzelm@48117
  1341
  Let @{text "t"} be the abstract syntax tree to be normalized and
wenzelm@48117
  1342
  @{text "(lhs, rhs)"} some translation rule.  A subtree @{text "u"}
wenzelm@48117
  1343
  of @{text "t"} is called \emph{redex} if it is an instance of @{text
wenzelm@48117
  1344
  "lhs"}; in this case the pattern @{text "lhs"} is said to match the
wenzelm@48117
  1345
  object @{text "u"}.  A redex matched by @{text "lhs"} may be
wenzelm@48117
  1346
  replaced by the corresponding instance of @{text "rhs"}, thus
wenzelm@48117
  1347
  \emph{rewriting} the AST @{text "t"}.  Matching requires some notion
wenzelm@48117
  1348
  of \emph{place-holders} in rule patterns: @{ML Ast.Variable} serves
wenzelm@48117
  1349
  this purpose.
wenzelm@48117
  1350
wenzelm@48117
  1351
  More precisely, the matching of the object @{text "u"} against the
wenzelm@48117
  1352
  pattern @{text "lhs"} is performed as follows:
wenzelm@48117
  1353
wenzelm@48117
  1354
  \begin{itemize}
wenzelm@48117
  1355
wenzelm@48117
  1356
  \item Objects of the form @{ML Ast.Variable}~@{text "x"} or @{ML
wenzelm@48117
  1357
  Ast.Constant}~@{text "x"} are matched by pattern @{ML
wenzelm@48117
  1358
  Ast.Constant}~@{text "x"}.  Thus all atomic ASTs in the object are
wenzelm@48117
  1359
  treated as (potential) constants, and a successful match makes them
wenzelm@48117
  1360
  actual constants even before name space resolution (see also
wenzelm@48117
  1361
  \secref{sec:ast}).
wenzelm@48117
  1362
wenzelm@48117
  1363
  \item Object @{text "u"} is matched by pattern @{ML
wenzelm@48117
  1364
  Ast.Variable}~@{text "x"}, binding @{text "x"} to @{text "u"}.
wenzelm@48117
  1365
wenzelm@48117
  1366
  \item Object @{ML Ast.Appl}~@{text "us"} is matched by @{ML
wenzelm@48117
  1367
  Ast.Appl}~@{text "ts"} if @{text "us"} and @{text "ts"} have the
wenzelm@48117
  1368
  same length and each corresponding subtree matches.
wenzelm@48117
  1369
wenzelm@48117
  1370
  \item In every other case, matching fails.
wenzelm@48117
  1371
wenzelm@48117
  1372
  \end{itemize}
wenzelm@48117
  1373
wenzelm@48117
  1374
  A successful match yields a substitution that is applied to @{text
wenzelm@48117
  1375
  "rhs"}, generating the instance that replaces @{text "u"}.
wenzelm@48117
  1376
wenzelm@48117
  1377
  Normalizing an AST involves repeatedly applying translation rules
wenzelm@48117
  1378
  until none are applicable.  This works yoyo-like: top-down,
wenzelm@48117
  1379
  bottom-up, top-down, etc.  At each subtree position, rules are
wenzelm@48117
  1380
  chosen in order of appearance in the theory definitions.
wenzelm@48117
  1381
wenzelm@48117
  1382
  The configuration options @{attribute syntax_ast_trace} and
wenzelm@48816
  1383
  @{attribute syntax_ast_stats} might help to understand this process
wenzelm@48117
  1384
  and diagnose problems.
wenzelm@48117
  1385
wenzelm@48117
  1386
  \begin{warn}
wenzelm@48117
  1387
  If syntax translation rules work incorrectly, the output of
wenzelm@48118
  1388
  @{command_ref print_syntax} with its \emph{rules} sections reveals the
wenzelm@48117
  1389
  actual internal forms of AST pattern, without potentially confusing
wenzelm@48117
  1390
  concrete syntax.  Recall that AST constants appear as quoted strings
wenzelm@48117
  1391
  and variables without quotes.
wenzelm@48117
  1392
  \end{warn}
wenzelm@48117
  1393
wenzelm@48117
  1394
  \begin{warn}
wenzelm@48117
  1395
  If @{attribute_ref eta_contract} is set to @{text "true"}, terms
wenzelm@48117
  1396
  will be @{text "\<eta>"}-contracted \emph{before} the AST rewriter sees
wenzelm@48117
  1397
  them.  Thus some abstraction nodes needed for print rules to match
wenzelm@48117
  1398
  may vanish.  For example, @{text "Ball A (\<lambda>x. P x)"} would contract
wenzelm@48117
  1399
  to @{text "Ball A P"} and the standard print rule would fail to
wenzelm@48117
  1400
  apply.  This problem can be avoided by hand-written ML translation
wenzelm@48117
  1401
  functions (see also \secref{sec:tr-funs}), which is in fact the same
wenzelm@48117
  1402
  mechanism used in built-in @{keyword "binder"} declarations.
wenzelm@48117
  1403
  \end{warn}
wenzelm@58618
  1404
\<close>
wenzelm@48117
  1405
wenzelm@28762
  1406
wenzelm@58618
  1407
subsection \<open>Syntax translation functions \label{sec:tr-funs}\<close>
wenzelm@28762
  1408
wenzelm@58618
  1409
text \<open>
wenzelm@28762
  1410
  \begin{matharray}{rcl}
wenzelm@28762
  1411
    @{command_def "parse_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1412
    @{command_def "parse_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1413
    @{command_def "print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1414
    @{command_def "typed_print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1415
    @{command_def "print_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@56186
  1416
    @{ML_antiquotation_def "class_syntax"} & : & @{text "ML antiquotation"} \\
wenzelm@56186
  1417
    @{ML_antiquotation_def "type_syntax"} & : & @{text "ML antiquotation"} \\
wenzelm@56186
  1418
    @{ML_antiquotation_def "const_syntax"} & : & @{text "ML antiquotation"} \\
wenzelm@56186
  1419
    @{ML_antiquotation_def "syntax_const"} & : & @{text "ML antiquotation"} \\
wenzelm@28762
  1420
  \end{matharray}
wenzelm@28762
  1421
wenzelm@48118
  1422
  Syntax translation functions written in ML admit almost arbitrary
wenzelm@48118
  1423
  manipulations of inner syntax, at the expense of some complexity and
wenzelm@48118
  1424
  obscurity in the implementation.
wenzelm@48118
  1425
wenzelm@55112
  1426
  @{rail \<open>
wenzelm@42596
  1427
  ( @@{command parse_ast_translation} | @@{command parse_translation} |
wenzelm@42596
  1428
    @@{command print_translation} | @@{command typed_print_translation} |
wenzelm@52143
  1429
    @@{command print_ast_translation}) @{syntax text}
wenzelm@48119
  1430
  ;
wenzelm@48119
  1431
  (@@{ML_antiquotation class_syntax} |
wenzelm@48119
  1432
   @@{ML_antiquotation type_syntax} |
wenzelm@48119
  1433
   @@{ML_antiquotation const_syntax} |
wenzelm@48119
  1434
   @@{ML_antiquotation syntax_const}) name
wenzelm@55112
  1435
  \<close>}
wenzelm@28762
  1436
wenzelm@48119
  1437
  \begin{description}
wenzelm@48119
  1438
wenzelm@48119
  1439
  \item @{command parse_translation} etc. declare syntax translation
wenzelm@48119
  1440
  functions to the theory.  Any of these commands have a single
wenzelm@48119
  1441
  @{syntax text} argument that refers to an ML expression of
wenzelm@52413
  1442
  appropriate type as follows:
wenzelm@48118
  1443
wenzelm@48118
  1444
  \medskip
wenzelm@48119
  1445
  {\footnotesize
wenzelm@52143
  1446
  \begin{tabular}{l}
wenzelm@52143
  1447
  @{command parse_ast_translation} : \\
wenzelm@52143
  1448
  \quad @{ML_type "(string * (Proof.context -> Ast.ast list -> Ast.ast)) list"} \\
wenzelm@52143
  1449
  @{command parse_translation} : \\
wenzelm@52143
  1450
  \quad @{ML_type "(string * (Proof.context -> term list -> term)) list"} \\
wenzelm@52143
  1451
  @{command print_translation} : \\
wenzelm@52143
  1452
  \quad @{ML_type "(string * (Proof.context -> term list -> term)) list"} \\
wenzelm@52143
  1453
  @{command typed_print_translation} : \\
wenzelm@52143
  1454
  \quad @{ML_type "(string * (Proof.context -> typ -> term list -> term)) list"} \\
wenzelm@52143
  1455
  @{command print_ast_translation} : \\
wenzelm@52143
  1456
  \quad @{ML_type "(string * (Proof.context -> Ast.ast list -> Ast.ast)) list"} \\
wenzelm@48118
  1457
  \end{tabular}}
wenzelm@48118
  1458
  \medskip
wenzelm@28762
  1459
wenzelm@48816
  1460
  The argument list consists of @{text "(c, tr)"} pairs, where @{text
wenzelm@48816
  1461
  "c"} is the syntax name of the formal entity involved, and @{text
wenzelm@48816
  1462
  "tr"} a function that translates a syntax form @{text "c args"} into
wenzelm@52413
  1463
  @{text "tr ctxt args"} (depending on the context).  The Isabelle/ML
wenzelm@52413
  1464
  naming convention for parse translations is @{text "c_tr"} and for
wenzelm@52413
  1465
  print translations @{text "c_tr'"}.
wenzelm@48118
  1466
wenzelm@48118
  1467
  The @{command_ref print_syntax} command displays the sets of names
wenzelm@48118
  1468
  associated with the translation functions of a theory under @{text
wenzelm@48118
  1469
  "parse_ast_translation"} etc.
wenzelm@48118
  1470
wenzelm@48119
  1471
  \item @{text "@{class_syntax c}"}, @{text "@{type_syntax c}"},
wenzelm@48119
  1472
  @{text "@{const_syntax c}"} inline the authentic syntax name of the
wenzelm@48119
  1473
  given formal entities into the ML source.  This is the
wenzelm@48119
  1474
  fully-qualified logical name prefixed by a special marker to
wenzelm@48119
  1475
  indicate its kind: thus different logical name spaces are properly
wenzelm@48119
  1476
  distinguished within parse trees.
wenzelm@48119
  1477
wenzelm@48119
  1478
  \item @{text "@{const_syntax c}"} inlines the name @{text "c"} of
wenzelm@48119
  1479
  the given syntax constant, having checked that it has been declared
wenzelm@48119
  1480
  via some @{command syntax} commands within the theory context.  Note
wenzelm@48119
  1481
  that the usual naming convention makes syntax constants start with
wenzelm@48119
  1482
  underscore, to reduce the chance of accidental clashes with other
wenzelm@48119
  1483
  names occurring in parse trees (unqualified constants etc.).
wenzelm@48119
  1484
wenzelm@48119
  1485
  \end{description}
wenzelm@58618
  1486
\<close>
wenzelm@48118
  1487
wenzelm@48119
  1488
wenzelm@58618
  1489
subsubsection \<open>The translation strategy\<close>
wenzelm@28762
  1490
wenzelm@58618
  1491
text \<open>The different kinds of translation functions are invoked during
wenzelm@48118
  1492
  the transformations between parse trees, ASTs and syntactic terms
wenzelm@48118
  1493
  (cf.\ \figref{fig:parse-print}).  Whenever a combination of the form
wenzelm@48118
  1494
  @{text "c x\<^sub>1 \<dots> x\<^sub>n"} is encountered, and a translation function
wenzelm@48118
  1495
  @{text "f"} of appropriate kind is declared for @{text "c"}, the
wenzelm@48118
  1496
  result is produced by evaluation of @{text "f [x\<^sub>1, \<dots>, x\<^sub>n]"} in ML.
wenzelm@48118
  1497
wenzelm@48118
  1498
  For AST translations, the arguments @{text "x\<^sub>1, \<dots>, x\<^sub>n"} are ASTs.  A
wenzelm@48118
  1499
  combination has the form @{ML "Ast.Constant"}~@{text "c"} or @{ML
wenzelm@48118
  1500
  "Ast.Appl"}~@{text "["}@{ML Ast.Constant}~@{text "c, x\<^sub>1, \<dots>, x\<^sub>n]"}.
wenzelm@48118
  1501
  For term translations, the arguments are terms and a combination has
wenzelm@48118
  1502
  the form @{ML Const}~@{text "(c, \<tau>)"} or @{ML Const}~@{text "(c, \<tau>)
wenzelm@48118
  1503
  $ x\<^sub>1 $ \<dots> $ x\<^sub>n"}.  Terms allow more sophisticated transformations
wenzelm@48118
  1504
  than ASTs do, typically involving abstractions and bound
wenzelm@48118
  1505
  variables. \emph{Typed} print translations may even peek at the type
wenzelm@52413
  1506
  @{text "\<tau>"} of the constant they are invoked on, although some
wenzelm@52413
  1507
  information might have been suppressed for term output already.
wenzelm@48118
  1508
wenzelm@48118
  1509
  Regardless of whether they act on ASTs or terms, translation
wenzelm@48118
  1510
  functions called during the parsing process differ from those for
wenzelm@48118
  1511
  printing in their overall behaviour:
wenzelm@48118
  1512
wenzelm@48118
  1513
  \begin{description}
wenzelm@28762
  1514
wenzelm@48118
  1515
  \item [Parse translations] are applied bottom-up.  The arguments are
wenzelm@48118
  1516
  already in translated form.  The translations must not fail;
wenzelm@48118
  1517
  exceptions trigger an error message.  There may be at most one
wenzelm@48118
  1518
  function associated with any syntactic name.
wenzelm@46294
  1519
wenzelm@48118
  1520
  \item [Print translations] are applied top-down.  They are supplied
wenzelm@48118
  1521
  with arguments that are partly still in internal form.  The result
wenzelm@48118
  1522
  again undergoes translation; therefore a print translation should
wenzelm@48118
  1523
  not introduce as head the very constant that invoked it.  The
wenzelm@48118
  1524
  function may raise exception @{ML Match} to indicate failure; in
wenzelm@48118
  1525
  this event it has no effect.  Multiple functions associated with
wenzelm@48118
  1526
  some syntactic name are tried in the order of declaration in the
wenzelm@48118
  1527
  theory.
wenzelm@48118
  1528
wenzelm@48118
  1529
  \end{description}
wenzelm@48118
  1530
wenzelm@48118
  1531
  Only constant atoms --- constructor @{ML Ast.Constant} for ASTs and
wenzelm@48118
  1532
  @{ML Const} for terms --- can invoke translation functions.  This
wenzelm@48118
  1533
  means that parse translations can only be associated with parse tree
wenzelm@48118
  1534
  heads of concrete syntax, or syntactic constants introduced via
wenzelm@48118
  1535
  other translations.  For plain identifiers within the term language,
wenzelm@48118
  1536
  the status of constant versus variable is not yet know during
wenzelm@48118
  1537
  parsing.  This is in contrast to print translations, where constants
wenzelm@48118
  1538
  are explicitly known from the given term in its fully internal form.
wenzelm@58618
  1539
\<close>
wenzelm@28762
  1540
wenzelm@52414
  1541
wenzelm@58618
  1542
subsection \<open>Built-in syntax transformations\<close>
wenzelm@52414
  1543
wenzelm@58618
  1544
text \<open>
wenzelm@52414
  1545
  Here are some further details of the main syntax transformation
wenzelm@52414
  1546
  phases of \figref{fig:parse-print}.
wenzelm@58618
  1547
\<close>
wenzelm@52414
  1548
wenzelm@52414
  1549
wenzelm@58618
  1550
subsubsection \<open>Transforming parse trees to ASTs\<close>
wenzelm@52414
  1551
wenzelm@58618
  1552
text \<open>The parse tree is the raw output of the parser.  It is
wenzelm@52414
  1553
  transformed into an AST according to some basic scheme that may be
wenzelm@52414
  1554
  augmented by AST translation functions as explained in
wenzelm@52414
  1555
  \secref{sec:tr-funs}.
wenzelm@52414
  1556
wenzelm@52414
  1557
  The parse tree is constructed by nesting the right-hand sides of the
wenzelm@52414
  1558
  productions used to recognize the input.  Such parse trees are
wenzelm@52414
  1559
  simply lists of tokens and constituent parse trees, the latter
wenzelm@52414
  1560
  representing the nonterminals of the productions.  Ignoring AST
wenzelm@52414
  1561
  translation functions, parse trees are transformed to ASTs by
wenzelm@52414
  1562
  stripping out delimiters and copy productions, while retaining some
wenzelm@52414
  1563
  source position information from input tokens.
wenzelm@52414
  1564
wenzelm@52414
  1565
  The Pure syntax provides predefined AST translations to make the
wenzelm@52414
  1566
  basic @{text "\<lambda>"}-term structure more apparent within the
wenzelm@52414
  1567
  (first-order) AST representation, and thus facilitate the use of
wenzelm@52414
  1568
  @{command translations} (see also \secref{sec:syn-trans}).  This
wenzelm@52414
  1569
  covers ordinary term application, type application, nested
wenzelm@52414
  1570
  abstraction, iterated meta implications and function types.  The
wenzelm@52414
  1571
  effect is illustrated on some representative input strings is as
wenzelm@52414
  1572
  follows:
wenzelm@52414
  1573
wenzelm@52414
  1574
  \begin{center}
wenzelm@52414
  1575
  \begin{tabular}{ll}
wenzelm@52414
  1576
  input source & AST \\
wenzelm@52414
  1577
  \hline
wenzelm@52414
  1578
  @{text "f x y z"} & @{verbatim "(f x y z)"} \\
wenzelm@52414
  1579
  @{text "'a ty"} & @{verbatim "(ty 'a)"} \\
wenzelm@52414
  1580
  @{text "('a, 'b)ty"} & @{verbatim "(ty 'a 'b)"} \\
wenzelm@58724
  1581
  @{text "\<lambda>x y z. t"} & @{verbatim \<open>("_abs" x ("_abs" y ("_abs" z t)))\<close>} \\
wenzelm@58724
  1582
  @{text "\<lambda>x :: 'a. t"} & @{verbatim \<open>("_abs" ("_constrain" x 'a) t)\<close>} \\
wenzelm@58726
  1583
  @{text "\<lbrakk>P; Q; R\<rbrakk> \<Longrightarrow> S"} & @{verbatim \<open>("Pure.imp" P ("Pure.imp" Q ("Pure.imp" R S)))\<close>} \\
wenzelm@58724
  1584
   @{text "['a, 'b, 'c] \<Rightarrow> 'd"} & @{verbatim \<open>("fun" 'a ("fun" 'b ("fun" 'c 'd)))\<close>} \\
wenzelm@52414
  1585
  \end{tabular}
wenzelm@52414
  1586
  \end{center}
wenzelm@52414
  1587
wenzelm@52414
  1588
  Note that type and sort constraints may occur in further places ---
wenzelm@52414
  1589
  translations need to be ready to cope with them.  The built-in
wenzelm@52414
  1590
  syntax transformation from parse trees to ASTs insert additional
wenzelm@52414
  1591
  constraints that represent source positions.
wenzelm@58618
  1592
\<close>
wenzelm@52414
  1593
wenzelm@52414
  1594
wenzelm@58618
  1595
subsubsection \<open>Transforming ASTs to terms\<close>
wenzelm@52414
  1596
wenzelm@58618
  1597
text \<open>After application of macros (\secref{sec:syn-trans}), the AST
wenzelm@52414
  1598
  is transformed into a term.  This term still lacks proper type
wenzelm@52414
  1599
  information, but it might contain some constraints consisting of
wenzelm@52414
  1600
  applications with head @{verbatim "_constrain"}, where the second
wenzelm@52414
  1601
  argument is a type encoded as a pre-term within the syntax.  Type
wenzelm@52414
  1602
  inference later introduces correct types, or indicates type errors
wenzelm@52414
  1603
  in the input.
wenzelm@52414
  1604
wenzelm@52414
  1605
  Ignoring parse translations, ASTs are transformed to terms by
wenzelm@52414
  1606
  mapping AST constants to term constants, AST variables to term
wenzelm@52414
  1607
  variables or constants (according to the name space), and AST
wenzelm@52414
  1608
  applications to iterated term applications.
wenzelm@52414
  1609
wenzelm@52414
  1610
  The outcome is still a first-order term.  Proper abstractions and
wenzelm@52414
  1611
  bound variables are introduced by parse translations associated with
wenzelm@58724
  1612
  certain syntax constants.  Thus @{verbatim \<open>("_abs" x x)\<close>} eventually
wenzelm@58724
  1613
  becomes a de-Bruijn term @{verbatim \<open>Abs ("x", _, Bound 0)\<close>}.
wenzelm@58618
  1614
\<close>
wenzelm@52414
  1615
wenzelm@52414
  1616
wenzelm@58618
  1617
subsubsection \<open>Printing of terms\<close>
wenzelm@52414
  1618
wenzelm@58618
  1619
text \<open>The output phase is essentially the inverse of the input
wenzelm@52414
  1620
  phase.  Terms are translated via abstract syntax trees into
wenzelm@52414
  1621
  pretty-printed text.
wenzelm@52414
  1622
wenzelm@52414
  1623
  Ignoring print translations, the transformation maps term constants,
wenzelm@52414
  1624
  variables and applications to the corresponding constructs on ASTs.
wenzelm@52414
  1625
  Abstractions are mapped to applications of the special constant
wenzelm@52414
  1626
  @{verbatim "_abs"} as seen before.  Type constraints are represented
wenzelm@52414
  1627
  via special @{verbatim "_constrain"} forms, according to various
wenzelm@52414
  1628
  policies of type annotation determined elsewhere.  Sort constraints
wenzelm@52414
  1629
  of type variables are handled in a similar fashion.
wenzelm@52414
  1630
wenzelm@52414
  1631
  After application of macros (\secref{sec:syn-trans}), the AST is
wenzelm@52414
  1632
  finally pretty-printed.  The built-in print AST translations reverse
wenzelm@52414
  1633
  the corresponding parse AST translations.
wenzelm@52414
  1634
wenzelm@52414
  1635
  \medskip For the actual printing process, the priority grammar
wenzelm@52414
  1636
  (\secref{sec:priority-grammar}) plays a vital role: productions are
wenzelm@52414
  1637
  used as templates for pretty printing, with argument slots stemming
wenzelm@52414
  1638
  from nonterminals, and syntactic sugar stemming from literal tokens.
wenzelm@52414
  1639
wenzelm@52414
  1640
  Each AST application with constant head @{text "c"} and arguments
wenzelm@52414
  1641
  @{text "t\<^sub>1"}, \dots, @{text "t\<^sub>n"} (for @{text "n = 0"} the AST is
wenzelm@52414
  1642
  just the constant @{text "c"} itself) is printed according to the
wenzelm@52414
  1643
  first grammar production of result name @{text "c"}.  The required
wenzelm@52414
  1644
  syntax priority of the argument slot is given by its nonterminal
wenzelm@52414
  1645
  @{text "A\<^sup>(\<^sup>p\<^sup>)"}.  The argument @{text "t\<^sub>i"} that corresponds to the
wenzelm@52414
  1646
  position of @{text "A\<^sup>(\<^sup>p\<^sup>)"} is printed recursively, and then put in
wenzelm@52414
  1647
  parentheses \emph{if} its priority @{text "p"} requires this.  The
wenzelm@52414
  1648
  resulting output is concatenated with the syntactic sugar according
wenzelm@52414
  1649
  to the grammar production.
wenzelm@52414
  1650
wenzelm@52414
  1651
  If an AST application @{text "(c x\<^sub>1 \<dots> x\<^sub>m)"} has more arguments than
wenzelm@52414
  1652
  the corresponding production, it is first split into @{text "((c x\<^sub>1
wenzelm@52414
  1653
  \<dots> x\<^sub>n) x\<^sub>n\<^sub>+\<^sub>1 \<dots> x\<^sub>m)"} and then printed recursively as above.
wenzelm@52414
  1654
wenzelm@52414
  1655
  Applications with too few arguments or with non-constant head or
wenzelm@52414
  1656
  without a corresponding production are printed in prefix-form like
wenzelm@52414
  1657
  @{text "f t\<^sub>1 \<dots> t\<^sub>n"} for terms.
wenzelm@52414
  1658
wenzelm@52414
  1659
  Multiple productions associated with some name @{text "c"} are tried
wenzelm@52414
  1660
  in order of appearance within the grammar.  An occurrence of some
wenzelm@52414
  1661
  AST variable @{text "x"} is printed as @{text "x"} outright.
wenzelm@52414
  1662
wenzelm@52414
  1663
  \medskip White space is \emph{not} inserted automatically.  If
wenzelm@52414
  1664
  blanks (or breaks) are required to separate tokens, they need to be
wenzelm@52414
  1665
  specified in the mixfix declaration (\secref{sec:mixfix}).
wenzelm@58618
  1666
\<close>
wenzelm@52414
  1667
wenzelm@28762
  1668
end