doc-src/IsarRef/Thy/Inner_Syntax.thy
author wenzelm
Fri Feb 17 15:42:26 2012 +0100 (2012-02-17)
changeset 46512 4f9f61f9b535
parent 46506 c7faa011bfa7
child 48113 1c4500446ba4
permissions -rw-r--r--
simplified configuration options for syntax ambiguity;
wenzelm@28762
     1
theory Inner_Syntax
wenzelm@42651
     2
imports Base Main
wenzelm@28762
     3
begin
wenzelm@28762
     4
wenzelm@28778
     5
chapter {* Inner syntax --- the term language \label{ch:inner-syntax} *}
wenzelm@28762
     6
wenzelm@46282
     7
text {* The inner syntax of Isabelle provides concrete notation for
wenzelm@46282
     8
  the main entities of the logical framework, notably @{text
wenzelm@46282
     9
  "\<lambda>"}-terms with types and type classes.  Applications may either
wenzelm@46282
    10
  extend existing syntactic categories by additional notation, or
wenzelm@46282
    11
  define new sub-languages that are linked to the standard term
wenzelm@46282
    12
  language via some explicit markers.  For example @{verbatim
wenzelm@46282
    13
  FOO}~@{text "foo"} could embed the syntax corresponding for some
wenzelm@46282
    14
  user-defined nonterminal @{text "foo"} --- within the bounds of the
wenzelm@46282
    15
  given lexical syntax of Isabelle/Pure.
wenzelm@46282
    16
wenzelm@46282
    17
  The most basic way to specify concrete syntax for logical entities
wenzelm@46282
    18
  works via mixfix annotations (\secref{sec:mixfix}), which may be
wenzelm@46282
    19
  usually given as part of the original declaration or via explicit
wenzelm@46282
    20
  notation commands later on (\secref{sec:notation}).  This already
wenzelm@46282
    21
  covers many needs of concrete syntax without having to understand
wenzelm@46282
    22
  the full complexity of inner syntax layers.
wenzelm@46282
    23
wenzelm@46282
    24
  Further details of the syntax engine involves the classical
wenzelm@46282
    25
  distinction of lexical language versus context-free grammar (see
wenzelm@46282
    26
  \secref{sec:pure-syntax}), and various mechanisms for \emph{syntax
wenzelm@46282
    27
  translations} --- either as rewrite systems on first-order ASTs
wenzelm@46282
    28
  (\secref{sec:syn-trans}) or ML functions on ASTs or @{text
wenzelm@46282
    29
  "\<lambda>"}-terms that represent parse trees (\secref{sec:tr-funs}).
wenzelm@46282
    30
*}
wenzelm@46282
    31
wenzelm@46282
    32
wenzelm@28762
    33
section {* Printing logical entities *}
wenzelm@28762
    34
wenzelm@46284
    35
subsection {* Diagnostic commands \label{sec:print-diag} *}
wenzelm@28762
    36
wenzelm@28762
    37
text {*
wenzelm@28762
    38
  \begin{matharray}{rcl}
wenzelm@28766
    39
    @{command_def "typ"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762
    40
    @{command_def "term"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762
    41
    @{command_def "prop"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28766
    42
    @{command_def "thm"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762
    43
    @{command_def "prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28762
    44
    @{command_def "full_prf"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@28766
    45
    @{command_def "pr"}@{text "\<^sup>*"} & : & @{text "any \<rightarrow>"} \\
wenzelm@28762
    46
  \end{matharray}
wenzelm@28762
    47
wenzelm@28762
    48
  These diagnostic commands assist interactive development by printing
wenzelm@28762
    49
  internal logical entities in a human-readable fashion.
wenzelm@28762
    50
wenzelm@42596
    51
  @{rail "
wenzelm@42596
    52
    @@{command typ} @{syntax modes}? @{syntax type}
wenzelm@28762
    53
    ;
wenzelm@42596
    54
    @@{command term} @{syntax modes}? @{syntax term}
wenzelm@28762
    55
    ;
wenzelm@42596
    56
    @@{command prop} @{syntax modes}? @{syntax prop}
wenzelm@28762
    57
    ;
wenzelm@42596
    58
    @@{command thm} @{syntax modes}? @{syntax thmrefs}
wenzelm@28762
    59
    ;
wenzelm@42596
    60
    ( @@{command prf} | @@{command full_prf} ) @{syntax modes}? @{syntax thmrefs}?
wenzelm@28762
    61
    ;
wenzelm@42596
    62
    @@{command pr} @{syntax modes}? @{syntax nat}?
wenzelm@28762
    63
    ;
wenzelm@28762
    64
wenzelm@42596
    65
    @{syntax_def modes}: '(' (@{syntax name} + ) ')'
wenzelm@42596
    66
  "}
wenzelm@28762
    67
wenzelm@28762
    68
  \begin{description}
wenzelm@28762
    69
wenzelm@28766
    70
  \item @{command "typ"}~@{text \<tau>} reads and prints types of the
wenzelm@28766
    71
  meta-logic according to the current theory or proof context.
wenzelm@28766
    72
wenzelm@28766
    73
  \item @{command "term"}~@{text t} and @{command "prop"}~@{text \<phi>}
wenzelm@28766
    74
  read, type-check and print terms or propositions according to the
wenzelm@28766
    75
  current theory or proof context; the inferred type of @{text t} is
wenzelm@28766
    76
  output as well.  Note that these commands are also useful in
wenzelm@28766
    77
  inspecting the current environment of term abbreviations.
wenzelm@28762
    78
wenzelm@28762
    79
  \item @{command "thm"}~@{text "a\<^sub>1 \<dots> a\<^sub>n"} retrieves
wenzelm@28762
    80
  theorems from the current theory or proof context.  Note that any
wenzelm@28762
    81
  attributes included in the theorem specifications are applied to a
wenzelm@28762
    82
  temporary context derived from the current theory or proof; the
wenzelm@28762
    83
  result is discarded, i.e.\ attributes involved in @{text "a\<^sub>1,
wenzelm@28762
    84
  \<dots>, a\<^sub>n"} do not have any permanent effect.
wenzelm@28762
    85
wenzelm@28762
    86
  \item @{command "prf"} displays the (compact) proof term of the
wenzelm@28762
    87
  current proof state (if present), or of the given theorems. Note
wenzelm@28762
    88
  that this requires proof terms to be switched on for the current
wenzelm@28762
    89
  object logic (see the ``Proof terms'' section of the Isabelle
wenzelm@28762
    90
  reference manual for information on how to do this).
wenzelm@28762
    91
wenzelm@28762
    92
  \item @{command "full_prf"} is like @{command "prf"}, but displays
wenzelm@28762
    93
  the full proof term, i.e.\ also displays information omitted in the
wenzelm@28762
    94
  compact proof term, which is denoted by ``@{text _}'' placeholders
wenzelm@28762
    95
  there.
wenzelm@28762
    96
wenzelm@39165
    97
  \item @{command "pr"}~@{text "goals"} prints the current proof state
wenzelm@39165
    98
  (if present), including current facts and goals.  The optional limit
wenzelm@39165
    99
  arguments affect the number of goals to be displayed, which is
wenzelm@39165
   100
  initially 10.  Omitting limit value leaves the current setting
wenzelm@39165
   101
  unchanged.
wenzelm@28766
   102
wenzelm@28762
   103
  \end{description}
wenzelm@28762
   104
wenzelm@28762
   105
  All of the diagnostic commands above admit a list of @{text modes}
wenzelm@42926
   106
  to be specified, which is appended to the current print mode; see
wenzelm@46284
   107
  also \secref{sec:print-modes}.  Thus the output behavior may be
wenzelm@46284
   108
  modified according particular print mode features.  For example,
wenzelm@46284
   109
  @{command "pr"}~@{text "(latex xsymbols)"} would print the current
wenzelm@46284
   110
  proof state with mathematical symbols and special characters
wenzelm@46284
   111
  represented in {\LaTeX} source, according to the Isabelle style
wenzelm@28762
   112
  \cite{isabelle-sys}.
wenzelm@28762
   113
wenzelm@28762
   114
  Note that antiquotations (cf.\ \secref{sec:antiq}) provide a more
wenzelm@28762
   115
  systematic way to include formal items into the printed text
wenzelm@28762
   116
  document.
wenzelm@28762
   117
*}
wenzelm@28762
   118
wenzelm@28762
   119
wenzelm@28763
   120
subsection {* Details of printed content *}
wenzelm@28763
   121
wenzelm@28763
   122
text {*
wenzelm@42655
   123
  \begin{tabular}{rcll}
wenzelm@42655
   124
    @{attribute_def show_types} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   125
    @{attribute_def show_sorts} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   126
    @{attribute_def show_consts} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   127
    @{attribute_def show_abbrevs} & : & @{text attribute} & default @{text true} \\
wenzelm@42655
   128
    @{attribute_def show_brackets} & : & @{text attribute} & default @{text false} \\
wenzelm@42669
   129
    @{attribute_def names_long} & : & @{text attribute} & default @{text false} \\
wenzelm@42669
   130
    @{attribute_def names_short} & : & @{text attribute} & default @{text false} \\
wenzelm@42669
   131
    @{attribute_def names_unique} & : & @{text attribute} & default @{text true} \\
wenzelm@42655
   132
    @{attribute_def eta_contract} & : & @{text attribute} & default @{text true} \\
wenzelm@42655
   133
    @{attribute_def goals_limit} & : & @{text attribute} & default @{text 10} \\
wenzelm@42655
   134
    @{attribute_def show_main_goal} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   135
    @{attribute_def show_hyps} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   136
    @{attribute_def show_tags} & : & @{text attribute} & default @{text false} \\
wenzelm@42655
   137
    @{attribute_def show_question_marks} & : & @{text attribute} & default @{text true} \\
wenzelm@42655
   138
  \end{tabular}
wenzelm@42655
   139
  \medskip
wenzelm@28763
   140
wenzelm@42655
   141
  These configuration options control the detail of information that
wenzelm@42655
   142
  is displayed for types, terms, theorems, goals etc.  See also
wenzelm@42655
   143
  \secref{sec:config}.
wenzelm@28765
   144
wenzelm@28763
   145
  \begin{description}
wenzelm@28763
   146
wenzelm@42655
   147
  \item @{attribute show_types} and @{attribute show_sorts} control
wenzelm@42655
   148
  printing of type constraints for term variables, and sort
wenzelm@42655
   149
  constraints for type variables.  By default, neither of these are
wenzelm@42655
   150
  shown in output.  If @{attribute show_sorts} is enabled, types are
wenzelm@42655
   151
  always shown as well.
wenzelm@28763
   152
wenzelm@28763
   153
  Note that displaying types and sorts may explain why a polymorphic
wenzelm@28763
   154
  inference rule fails to resolve with some goal, or why a rewrite
wenzelm@28763
   155
  rule does not apply as expected.
wenzelm@28763
   156
wenzelm@42655
   157
  \item @{attribute show_consts} controls printing of types of
wenzelm@42655
   158
  constants when displaying a goal state.
wenzelm@28765
   159
wenzelm@28765
   160
  Note that the output can be enormous, because polymorphic constants
wenzelm@28765
   161
  often occur at several different type instances.
wenzelm@28763
   162
wenzelm@42655
   163
  \item @{attribute show_abbrevs} controls folding of constant
wenzelm@42655
   164
  abbreviations.
wenzelm@40879
   165
wenzelm@42655
   166
  \item @{attribute show_brackets} controls bracketing in pretty
wenzelm@42655
   167
  printed output.  If enabled, all sub-expressions of the pretty
wenzelm@28765
   168
  printing tree will be parenthesized, even if this produces malformed
wenzelm@28765
   169
  term syntax!  This crude way of showing the internal structure of
wenzelm@28765
   170
  pretty printed entities may occasionally help to diagnose problems
wenzelm@28765
   171
  with operator priorities, for example.
wenzelm@28763
   172
wenzelm@42669
   173
  \item @{attribute names_long}, @{attribute names_short}, and
wenzelm@42669
   174
  @{attribute names_unique} control the way of printing fully
wenzelm@42358
   175
  qualified internal names in external form.  See also
wenzelm@42358
   176
  \secref{sec:antiq} for the document antiquotation options of the
wenzelm@42358
   177
  same names.
wenzelm@42358
   178
wenzelm@42655
   179
  \item @{attribute eta_contract} controls @{text "\<eta>"}-contracted
wenzelm@42655
   180
  printing of terms.
wenzelm@28763
   181
wenzelm@28763
   182
  The @{text \<eta>}-contraction law asserts @{prop "(\<lambda>x. f x) \<equiv> f"},
wenzelm@28763
   183
  provided @{text x} is not free in @{text f}.  It asserts
wenzelm@28763
   184
  \emph{extensionality} of functions: @{prop "f \<equiv> g"} if @{prop "f x \<equiv>
wenzelm@28763
   185
  g x"} for all @{text x}.  Higher-order unification frequently puts
wenzelm@28763
   186
  terms into a fully @{text \<eta>}-expanded form.  For example, if @{text
wenzelm@28763
   187
  F} has type @{text "(\<tau> \<Rightarrow> \<tau>) \<Rightarrow> \<tau>"} then its expanded form is @{term
wenzelm@28763
   188
  "\<lambda>h. F (\<lambda>x. h x)"}.
wenzelm@28763
   189
wenzelm@42655
   190
  Enabling @{attribute eta_contract} makes Isabelle perform @{text
wenzelm@28763
   191
  \<eta>}-contractions before printing, so that @{term "\<lambda>h. F (\<lambda>x. h x)"}
wenzelm@28763
   192
  appears simply as @{text F}.
wenzelm@28763
   193
wenzelm@28763
   194
  Note that the distinction between a term and its @{text \<eta>}-expanded
wenzelm@28765
   195
  form occasionally matters.  While higher-order resolution and
wenzelm@28765
   196
  rewriting operate modulo @{text "\<alpha>\<beta>\<eta>"}-conversion, some other tools
wenzelm@28765
   197
  might look at terms more discretely.
wenzelm@28763
   198
wenzelm@42655
   199
  \item @{attribute goals_limit} controls the maximum number of
wenzelm@39130
   200
  subgoals to be shown in goal output.
wenzelm@28763
   201
wenzelm@42655
   202
  \item @{attribute show_main_goal} controls whether the main result
wenzelm@42655
   203
  to be proven should be displayed.  This information might be
wenzelm@39130
   204
  relevant for schematic goals, to inspect the current claim that has
wenzelm@39130
   205
  been synthesized so far.
wenzelm@28763
   206
wenzelm@42655
   207
  \item @{attribute show_hyps} controls printing of implicit
wenzelm@42655
   208
  hypotheses of local facts.  Normally, only those hypotheses are
wenzelm@42655
   209
  displayed that are \emph{not} covered by the assumptions of the
wenzelm@42655
   210
  current context: this situation indicates a fault in some tool being
wenzelm@42655
   211
  used.
wenzelm@28763
   212
wenzelm@42655
   213
  By enabling @{attribute show_hyps}, output of \emph{all} hypotheses
wenzelm@42655
   214
  can be enforced, which is occasionally useful for diagnostic
wenzelm@42655
   215
  purposes.
wenzelm@28763
   216
wenzelm@42655
   217
  \item @{attribute show_tags} controls printing of extra annotations
wenzelm@42655
   218
  within theorems, such as internal position information, or the case
wenzelm@42655
   219
  names being attached by the attribute @{attribute case_names}.
wenzelm@28765
   220
wenzelm@28765
   221
  Note that the @{attribute tagged} and @{attribute untagged}
wenzelm@28765
   222
  attributes provide low-level access to the collection of tags
wenzelm@28765
   223
  associated with a theorem.
wenzelm@28765
   224
wenzelm@42655
   225
  \item @{attribute show_question_marks} controls printing of question
wenzelm@42655
   226
  marks for schematic variables, such as @{text ?x}.  Only the leading
wenzelm@28765
   227
  question mark is affected, the remaining text is unchanged
wenzelm@28765
   228
  (including proper markup for schematic variables that might be
wenzelm@28765
   229
  relevant for user interfaces).
wenzelm@28765
   230
wenzelm@28765
   231
  \end{description}
wenzelm@28765
   232
*}
wenzelm@28765
   233
wenzelm@28765
   234
wenzelm@46284
   235
subsection {* Alternative print modes \label{sec:print-modes} *}
wenzelm@46284
   236
wenzelm@46284
   237
text {*
wenzelm@46284
   238
  \begin{mldecls}
wenzelm@46284
   239
    @{index_ML print_mode_value: "unit -> string list"} \\
wenzelm@46284
   240
    @{index_ML Print_Mode.with_modes: "string list -> ('a -> 'b) -> 'a -> 'b"} \\
wenzelm@46284
   241
  \end{mldecls}
wenzelm@46284
   242
wenzelm@46284
   243
  The \emph{print mode} facility allows to modify various operations
wenzelm@46284
   244
  for printing.  Commands like @{command typ}, @{command term},
wenzelm@46284
   245
  @{command thm} (see \secref{sec:print-diag}) take additional print
wenzelm@46284
   246
  modes as optional argument.  The underlying ML operations are as
wenzelm@46284
   247
  follows.
wenzelm@46284
   248
wenzelm@46284
   249
  \begin{description}
wenzelm@46284
   250
wenzelm@46284
   251
  \item @{ML "print_mode_value ()"} yields the list of currently
wenzelm@46284
   252
  active print mode names.  This should be understood as symbolic
wenzelm@46284
   253
  representation of certain individual features for printing (with
wenzelm@46284
   254
  precedence from left to right).
wenzelm@46284
   255
wenzelm@46284
   256
  \item @{ML Print_Mode.with_modes}~@{text "modes f x"} evaluates
wenzelm@46284
   257
  @{text "f x"} in an execution context where the print mode is
wenzelm@46284
   258
  prepended by the given @{text "modes"}.  This provides a thread-safe
wenzelm@46284
   259
  way to augment print modes.  It is also monotonic in the set of mode
wenzelm@46284
   260
  names: it retains the default print mode that certain
wenzelm@46284
   261
  user-interfaces might have installed for their proper functioning!
wenzelm@46284
   262
wenzelm@46284
   263
  \end{description}
wenzelm@46284
   264
wenzelm@46284
   265
  \begin{warn}
wenzelm@46284
   266
  The old global reference @{ML print_mode} should never be used
wenzelm@46284
   267
  directly in applications.  Its main reason for being publicly
wenzelm@46284
   268
  accessible is to support historic versions of Proof~General.
wenzelm@46284
   269
  \end{warn}
wenzelm@46284
   270
wenzelm@46284
   271
  \medskip The pretty printer for inner syntax maintains alternative
wenzelm@46284
   272
  mixfix productions for any print mode name invented by the user, say
wenzelm@46284
   273
  in commands like @{command notation} or @{command abbreviation}.
wenzelm@46284
   274
  Mode names can be arbitrary, but the following ones have a specific
wenzelm@46284
   275
  meaning by convention:
wenzelm@46284
   276
wenzelm@46284
   277
  \begin{itemize}
wenzelm@46284
   278
wenzelm@46284
   279
  \item @{verbatim "\"\""} (the empty string): default mode;
wenzelm@46284
   280
  implicitly active as last element in the list of modes.
wenzelm@46284
   281
wenzelm@46284
   282
  \item @{verbatim input}: dummy print mode that is never active; may
wenzelm@46284
   283
  be used to specify notation that is only available for input.
wenzelm@46284
   284
wenzelm@46284
   285
  \item @{verbatim internal} dummy print mode that is never active;
wenzelm@46284
   286
  used internally in Isabelle/Pure.
wenzelm@46284
   287
wenzelm@46284
   288
  \item @{verbatim xsymbols}: enable proper mathematical symbols
wenzelm@46284
   289
  instead of ASCII art.\footnote{This traditional mode name stems from
wenzelm@46284
   290
  the ``X-Symbol'' package for old versions Proof~General with XEmacs,
wenzelm@46284
   291
  although that package has been superseded by Unicode in recent
wenzelm@46284
   292
  years.}
wenzelm@46284
   293
wenzelm@46284
   294
  \item @{verbatim HTML}: additional mode that is active in HTML
wenzelm@46284
   295
  presentation of Isabelle theory sources; allows to provide
wenzelm@46284
   296
  alternative output notation.
wenzelm@46284
   297
wenzelm@46284
   298
  \item @{verbatim latex}: additional mode that is active in {\LaTeX}
wenzelm@46284
   299
  document preparation of Isabelle theory sources; allows to provide
wenzelm@46284
   300
  alternative output notation.
wenzelm@46284
   301
wenzelm@46284
   302
  \end{itemize}
wenzelm@46284
   303
*}
wenzelm@46284
   304
wenzelm@46284
   305
wenzelm@28765
   306
subsection {* Printing limits *}
wenzelm@28765
   307
wenzelm@28765
   308
text {*
wenzelm@28765
   309
  \begin{mldecls}
wenzelm@36745
   310
    @{index_ML Pretty.margin_default: "int Unsynchronized.ref"} \\
wenzelm@28765
   311
    @{index_ML print_depth: "int -> unit"} \\
wenzelm@28765
   312
  \end{mldecls}
wenzelm@28765
   313
wenzelm@28765
   314
  These ML functions set limits for pretty printed text.
wenzelm@28765
   315
wenzelm@28765
   316
  \begin{description}
wenzelm@28765
   317
wenzelm@36745
   318
  \item @{ML Pretty.margin_default} indicates the global default for
wenzelm@36745
   319
  the right margin of the built-in pretty printer, with initial value
wenzelm@36745
   320
  76.  Note that user-interfaces typically control margins
wenzelm@36745
   321
  automatically when resizing windows, or even bypass the formatting
wenzelm@36745
   322
  engine of Isabelle/ML altogether and do it within the front end via
wenzelm@36745
   323
  Isabelle/Scala.
wenzelm@28765
   324
wenzelm@28765
   325
  \item @{ML print_depth}~@{text n} limits the printing depth of the
wenzelm@28765
   326
  ML toplevel pretty printer; the precise effect depends on the ML
wenzelm@28765
   327
  compiler and run-time system.  Typically @{text n} should be less
wenzelm@28765
   328
  than 10.  Bigger values such as 100--1000 are useful for debugging.
wenzelm@28763
   329
wenzelm@28763
   330
  \end{description}
wenzelm@28763
   331
*}
wenzelm@28763
   332
wenzelm@28763
   333
wenzelm@46282
   334
section {* Mixfix annotations \label{sec:mixfix} *}
wenzelm@28762
   335
wenzelm@28762
   336
text {* Mixfix annotations specify concrete \emph{inner syntax} of
wenzelm@35351
   337
  Isabelle types and terms.  Locally fixed parameters in toplevel
wenzelm@46290
   338
  theorem statements, locale and class specifications also admit
wenzelm@46290
   339
  mixfix annotations in a fairly uniform manner.  A mixfix annotation
wenzelm@46290
   340
  describes describes the concrete syntax, the translation to abstract
wenzelm@46290
   341
  syntax, and the pretty printing.  Special case annotations provide a
wenzelm@46290
   342
  simple means of specifying infix operators and binders.
wenzelm@46290
   343
wenzelm@46290
   344
  Isabelle mixfix syntax is inspired by {\OBJ} \cite{OBJ}.  It allows
wenzelm@46290
   345
  to specify any context-free priority grammar, which is more general
wenzelm@46290
   346
  than the fixity declarations of ML and Prolog.
wenzelm@28762
   347
wenzelm@42596
   348
  @{rail "
wenzelm@46289
   349
    @{syntax_def mixfix}: '(' mfix ')'
wenzelm@28762
   350
    ;
wenzelm@46289
   351
    @{syntax_def struct_mixfix}: '(' ( mfix | @'structure' ) ')'
wenzelm@28762
   352
    ;
wenzelm@28762
   353
wenzelm@46290
   354
    mfix: @{syntax template} prios? @{syntax nat}? |
wenzelm@46290
   355
      (@'infix' | @'infixl' | @'infixr') @{syntax template} @{syntax nat} |
wenzelm@46290
   356
      @'binder' @{syntax template} prios? @{syntax nat}
wenzelm@46290
   357
    ;
wenzelm@46290
   358
    template: string
wenzelm@46289
   359
    ;
wenzelm@42596
   360
    prios: '[' (@{syntax nat} + ',') ']'
wenzelm@42596
   361
  "}
wenzelm@28762
   362
wenzelm@46290
   363
  The string given as @{text template} may include literal text,
wenzelm@46290
   364
  spacing, blocks, and arguments (denoted by ``@{text _}''); the
wenzelm@46290
   365
  special symbol ``@{verbatim "\<index>"}'' (printed as ``@{text "\<index>"}'')
wenzelm@46290
   366
  represents an index argument that specifies an implicit structure
wenzelm@46290
   367
  reference (see also \secref{sec:locale}).  Infix and binder
wenzelm@46290
   368
  declarations provide common abbreviations for particular mixfix
wenzelm@46290
   369
  declarations.  So in practice, mixfix templates mostly degenerate to
wenzelm@46290
   370
  literal text for concrete syntax, such as ``@{verbatim "++"}'' for
wenzelm@46290
   371
  an infix symbol.
wenzelm@46290
   372
*}
wenzelm@28762
   373
wenzelm@46290
   374
wenzelm@46290
   375
subsection {* The general mixfix form *}
wenzelm@46290
   376
wenzelm@46290
   377
text {* In full generality, mixfix declarations work as follows.
wenzelm@46290
   378
  Suppose a constant @{text "c :: \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} is annotated by
wenzelm@46290
   379
  @{text "(mixfix [p\<^sub>1, \<dots>, p\<^sub>n] p)"}, where @{text "mixfix"} is a string
wenzelm@46290
   380
  @{text "d\<^sub>0 _ d\<^sub>1 _ \<dots> _ d\<^sub>n"} consisting of delimiters that surround
wenzelm@46290
   381
  argument positions as indicated by underscores.
wenzelm@28762
   382
wenzelm@28762
   383
  Altogether this determines a production for a context-free priority
wenzelm@28762
   384
  grammar, where for each argument @{text "i"} the syntactic category
wenzelm@46292
   385
  is determined by @{text "\<tau>\<^sub>i"} (with priority @{text "p\<^sub>i"}), and the
wenzelm@46292
   386
  result category is determined from @{text "\<tau>"} (with priority @{text
wenzelm@46292
   387
  "p"}).  Priority specifications are optional, with default 0 for
wenzelm@46292
   388
  arguments and 1000 for the result.\footnote{Omitting priorities is
wenzelm@46292
   389
  prone to syntactic ambiguities unless the delimiter tokens determine
wenzelm@46292
   390
  fully bracketed notation, as in @{text "if _ then _ else _ fi"}.}
wenzelm@28762
   391
wenzelm@28762
   392
  Since @{text "\<tau>"} may be again a function type, the constant
wenzelm@28762
   393
  type scheme may have more argument positions than the mixfix
wenzelm@28762
   394
  pattern.  Printing a nested application @{text "c t\<^sub>1 \<dots> t\<^sub>m"} for
wenzelm@28762
   395
  @{text "m > n"} works by attaching concrete notation only to the
wenzelm@28762
   396
  innermost part, essentially by printing @{text "(c t\<^sub>1 \<dots> t\<^sub>n) \<dots> t\<^sub>m"}
wenzelm@28762
   397
  instead.  If a term has fewer arguments than specified in the mixfix
wenzelm@28762
   398
  template, the concrete syntax is ignored.
wenzelm@28762
   399
wenzelm@28762
   400
  \medskip A mixfix template may also contain additional directives
wenzelm@28762
   401
  for pretty printing, notably spaces, blocks, and breaks.  The
wenzelm@28762
   402
  general template format is a sequence over any of the following
wenzelm@28762
   403
  entities.
wenzelm@28762
   404
wenzelm@28778
   405
  \begin{description}
wenzelm@28762
   406
wenzelm@28771
   407
  \item @{text "d"} is a delimiter, namely a non-empty sequence of
wenzelm@28771
   408
  characters other than the following special characters:
wenzelm@28762
   409
wenzelm@28771
   410
  \smallskip
wenzelm@28771
   411
  \begin{tabular}{ll}
wenzelm@28771
   412
    @{verbatim "'"} & single quote \\
wenzelm@28771
   413
    @{verbatim "_"} & underscore \\
wenzelm@28771
   414
    @{text "\<index>"} & index symbol \\
wenzelm@28771
   415
    @{verbatim "("} & open parenthesis \\
wenzelm@28771
   416
    @{verbatim ")"} & close parenthesis \\
wenzelm@28771
   417
    @{verbatim "/"} & slash \\
wenzelm@28771
   418
  \end{tabular}
wenzelm@28771
   419
  \medskip
wenzelm@28762
   420
wenzelm@28771
   421
  \item @{verbatim "'"} escapes the special meaning of these
wenzelm@28771
   422
  meta-characters, producing a literal version of the following
wenzelm@28771
   423
  character, unless that is a blank.
wenzelm@28771
   424
wenzelm@28771
   425
  A single quote followed by a blank separates delimiters, without
wenzelm@28771
   426
  affecting printing, but input tokens may have additional white space
wenzelm@28771
   427
  here.
wenzelm@28771
   428
wenzelm@28771
   429
  \item @{verbatim "_"} is an argument position, which stands for a
wenzelm@28762
   430
  certain syntactic category in the underlying grammar.
wenzelm@28762
   431
wenzelm@28771
   432
  \item @{text "\<index>"} is an indexed argument position; this is the place
wenzelm@28771
   433
  where implicit structure arguments can be attached.
wenzelm@28762
   434
wenzelm@28771
   435
  \item @{text "s"} is a non-empty sequence of spaces for printing.
wenzelm@28771
   436
  This and the following specifications do not affect parsing at all.
wenzelm@28762
   437
wenzelm@28771
   438
  \item @{verbatim "("}@{text n} opens a pretty printing block.  The
wenzelm@28762
   439
  optional number specifies how much indentation to add when a line
wenzelm@28762
   440
  break occurs within the block.  If the parenthesis is not followed
wenzelm@28762
   441
  by digits, the indentation defaults to 0.  A block specified via
wenzelm@28771
   442
  @{verbatim "(00"} is unbreakable.
wenzelm@28762
   443
wenzelm@28771
   444
  \item @{verbatim ")"} closes a pretty printing block.
wenzelm@28762
   445
wenzelm@28771
   446
  \item @{verbatim "//"} forces a line break.
wenzelm@28762
   447
wenzelm@28771
   448
  \item @{verbatim "/"}@{text s} allows a line break.  Here @{text s}
wenzelm@28771
   449
  stands for the string of spaces (zero or more) right after the
wenzelm@28771
   450
  slash.  These spaces are printed if the break is \emph{not} taken.
wenzelm@28762
   451
wenzelm@28778
   452
  \end{description}
wenzelm@28762
   453
wenzelm@28762
   454
  The general idea of pretty printing with blocks and breaks is also
wenzelm@46286
   455
  described in \cite{paulson-ml2}; it goes back to \cite{Oppen:1980}.
wenzelm@28762
   456
*}
wenzelm@28762
   457
wenzelm@28762
   458
wenzelm@46290
   459
subsection {* Infixes *}
wenzelm@46290
   460
wenzelm@46290
   461
text {* Infix operators are specified by convenient short forms that
wenzelm@46290
   462
  abbreviate general mixfix annotations as follows:
wenzelm@46290
   463
wenzelm@46290
   464
  \begin{center}
wenzelm@46290
   465
  \begin{tabular}{lll}
wenzelm@46290
   466
wenzelm@46292
   467
  @{verbatim "("}@{keyword_def "infix"}~@{verbatim "\""}@{text sy}@{verbatim "\""} @{text "p"}@{verbatim ")"}
wenzelm@46290
   468
  & @{text "\<mapsto>"} &
wenzelm@46290
   469
  @{verbatim "(\"(_ "}@{text sy}@{verbatim "/ _)\" ["}@{text "p + 1"}@{verbatim ", "}@{text "p + 1"}@{verbatim "]"}@{text " p"}@{verbatim ")"} \\
wenzelm@46292
   470
  @{verbatim "("}@{keyword_def "infixl"}~@{verbatim "\""}@{text sy}@{verbatim "\""} @{text "p"}@{verbatim ")"}
wenzelm@46290
   471
  & @{text "\<mapsto>"} &
wenzelm@46290
   472
  @{verbatim "(\"(_ "}@{text sy}@{verbatim "/ _)\" ["}@{text "p"}@{verbatim ", "}@{text "p + 1"}@{verbatim "]"}@{text " p"}@{verbatim ")"} \\
wenzelm@46292
   473
  @{verbatim "("}@{keyword_def "infixr"}~@{verbatim "\""}@{text sy}@{verbatim "\""} @{text "p"}@{verbatim ")"}
wenzelm@46290
   474
  & @{text "\<mapsto>"} &
wenzelm@46290
   475
  @{verbatim "(\"(_ "}@{text sy}@{verbatim "/ _)\" ["}@{text "p + 1"}@{verbatim ", "}@{text "p"}@{verbatim "]"}@{text " p"}@{verbatim ")"} \\
wenzelm@46290
   476
wenzelm@46290
   477
  \end{tabular}
wenzelm@46290
   478
  \end{center}
wenzelm@46290
   479
wenzelm@46292
   480
  The mixfix template @{verbatim "\"(_ "}@{text sy}@{verbatim "/ _)\""}
wenzelm@46292
   481
  specifies two argument positions; the delimiter is preceded by a
wenzelm@46292
   482
  space and followed by a space or line break; the entire phrase is a
wenzelm@46292
   483
  pretty printing block.
wenzelm@46290
   484
wenzelm@46290
   485
  The alternative notation @{verbatim "op"}~@{text sy} is introduced
wenzelm@46290
   486
  in addition.  Thus any infix operator may be written in prefix form
wenzelm@46290
   487
  (as in ML), independently of the number of arguments in the term.
wenzelm@46290
   488
*}
wenzelm@46290
   489
wenzelm@46290
   490
wenzelm@46290
   491
subsection {* Binders *}
wenzelm@46290
   492
wenzelm@46290
   493
text {* A \emph{binder} is a variable-binding construct such as a
wenzelm@46290
   494
  quantifier.  The idea to formalize @{text "\<forall>x. b"} as @{text "All
wenzelm@46290
   495
  (\<lambda>x. b)"} for @{text "All :: ('a \<Rightarrow> bool) \<Rightarrow> bool"} already goes back
wenzelm@46290
   496
  to \cite{church40}.  Isabelle declarations of certain higher-order
wenzelm@46292
   497
  operators may be annotated with @{keyword_def "binder"} annotations
wenzelm@46292
   498
  as follows:
wenzelm@46290
   499
wenzelm@46290
   500
  \begin{center}
wenzelm@46290
   501
  @{text "c :: "}@{verbatim "\""}@{text "(\<tau>\<^sub>1 \<Rightarrow> \<tau>\<^sub>2) \<Rightarrow> \<tau>\<^sub>3"}@{verbatim "\"  ("}@{keyword "binder"}@{verbatim " \""}@{text "sy"}@{verbatim "\" ["}@{text "p"}@{verbatim "] "}@{text "q"}@{verbatim ")"}
wenzelm@46290
   502
  \end{center}
wenzelm@46290
   503
wenzelm@46290
   504
  This introduces concrete binder syntax @{text "sy x. b"}, where
wenzelm@46290
   505
  @{text x} is a bound variable of type @{text "\<tau>\<^sub>1"}, the body @{text
wenzelm@46290
   506
  b} has type @{text "\<tau>\<^sub>2"} and the whole term has type @{text "\<tau>\<^sub>3"}.
wenzelm@46290
   507
  The optional integer @{text p} specifies the syntactic priority of
wenzelm@46290
   508
  the body; the default is @{text "q"}, which is also the priority of
wenzelm@46290
   509
  the whole construct.
wenzelm@46290
   510
wenzelm@46290
   511
  Internally, the binder syntax is expanded to something like this:
wenzelm@46290
   512
  \begin{center}
wenzelm@46290
   513
  @{text "c_binder :: "}@{verbatim "\""}@{text "idts \<Rightarrow> \<tau>\<^sub>2 \<Rightarrow> \<tau>\<^sub>3"}@{verbatim "\"  (\"(3"}@{text sy}@{verbatim "_./ _)\" [0, "}@{text "p"}@{verbatim "] "}@{text "q"}@{verbatim ")"}
wenzelm@46290
   514
  \end{center}
wenzelm@46290
   515
wenzelm@46290
   516
  Here @{syntax (inner) idts} is the nonterminal symbol for a list of
wenzelm@46290
   517
  identifiers with optional type constraints (see also
wenzelm@46290
   518
  \secref{sec:pure-grammar}).  The mixfix template @{verbatim
wenzelm@46290
   519
  "\"(3"}@{text sy}@{verbatim "_./ _)\""} defines argument positions
wenzelm@46290
   520
  for the bound identifiers and the body, separated by a dot with
wenzelm@46290
   521
  optional line break; the entire phrase is a pretty printing block of
wenzelm@46290
   522
  indentation level 3.  Note that there is no extra space after @{text
wenzelm@46290
   523
  "sy"}, so it needs to be included user specification if the binder
wenzelm@46290
   524
  syntax ends with a token that may be continued by an identifier
wenzelm@46290
   525
  token at the start of @{syntax (inner) idts}.
wenzelm@46290
   526
wenzelm@46290
   527
  Furthermore, a syntax translation to transforms @{text "c_binder x\<^sub>1
wenzelm@46290
   528
  \<dots> x\<^sub>n b"} into iterated application @{text "c (\<lambda>x\<^sub>1. \<dots> c (\<lambda>x\<^sub>n. b)\<dots>)"}.
wenzelm@46290
   529
  This works in both directions, for parsing and printing.  *}
wenzelm@46290
   530
wenzelm@46290
   531
wenzelm@46282
   532
section {* Explicit notation \label{sec:notation} *}
wenzelm@28762
   533
wenzelm@28762
   534
text {*
wenzelm@28762
   535
  \begin{matharray}{rcll}
wenzelm@35413
   536
    @{command_def "type_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
wenzelm@35413
   537
    @{command_def "no_type_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
wenzelm@28762
   538
    @{command_def "notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
wenzelm@28762
   539
    @{command_def "no_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
wenzelm@36508
   540
    @{command_def "write"} & : & @{text "proof(state) \<rightarrow> proof(state)"} \\
wenzelm@28762
   541
  \end{matharray}
wenzelm@28762
   542
wenzelm@46288
   543
  Commands that introduce new logical entities (terms or types)
wenzelm@46288
   544
  usually allow to provide mixfix annotations on the spot, which is
wenzelm@46288
   545
  convenient for default notation.  Nonetheless, the syntax may be
wenzelm@46288
   546
  modified later on by declarations for explicit notation.  This
wenzelm@46288
   547
  allows to add or delete mixfix annotations for of existing logical
wenzelm@46288
   548
  entities within the current context.
wenzelm@46288
   549
wenzelm@42596
   550
  @{rail "
wenzelm@42596
   551
    (@@{command type_notation} | @@{command no_type_notation}) @{syntax target}?
wenzelm@42596
   552
      @{syntax mode}? \\ (@{syntax nameref} @{syntax mixfix} + @'and')
wenzelm@35413
   553
    ;
wenzelm@42596
   554
    (@@{command notation} | @@{command no_notation}) @{syntax target}? @{syntax mode}? \\
wenzelm@42705
   555
      (@{syntax nameref} @{syntax struct_mixfix} + @'and')
wenzelm@28762
   556
    ;
wenzelm@42705
   557
    @@{command write} @{syntax mode}? (@{syntax nameref} @{syntax struct_mixfix} + @'and')
wenzelm@42596
   558
  "}
wenzelm@28762
   559
wenzelm@28762
   560
  \begin{description}
wenzelm@28762
   561
wenzelm@35413
   562
  \item @{command "type_notation"}~@{text "c (mx)"} associates mixfix
wenzelm@35413
   563
  syntax with an existing type constructor.  The arity of the
wenzelm@35413
   564
  constructor is retrieved from the context.
wenzelm@46282
   565
wenzelm@35413
   566
  \item @{command "no_type_notation"} is similar to @{command
wenzelm@35413
   567
  "type_notation"}, but removes the specified syntax annotation from
wenzelm@35413
   568
  the present context.
wenzelm@35413
   569
wenzelm@28762
   570
  \item @{command "notation"}~@{text "c (mx)"} associates mixfix
wenzelm@35413
   571
  syntax with an existing constant or fixed variable.  The type
wenzelm@35413
   572
  declaration of the given entity is retrieved from the context.
wenzelm@46282
   573
wenzelm@28762
   574
  \item @{command "no_notation"} is similar to @{command "notation"},
wenzelm@28762
   575
  but removes the specified syntax annotation from the present
wenzelm@28762
   576
  context.
wenzelm@28762
   577
wenzelm@36508
   578
  \item @{command "write"} is similar to @{command "notation"}, but
wenzelm@36508
   579
  works within an Isar proof body.
wenzelm@36508
   580
wenzelm@28762
   581
  \end{description}
wenzelm@28762
   582
*}
wenzelm@28762
   583
wenzelm@28778
   584
wenzelm@28778
   585
section {* The Pure syntax \label{sec:pure-syntax} *}
wenzelm@28769
   586
wenzelm@46282
   587
subsection {* Lexical matters \label{sec:inner-lex} *}
wenzelm@46282
   588
wenzelm@46282
   589
text {* The inner lexical syntax vaguely resembles the outer one
wenzelm@46282
   590
  (\secref{sec:outer-lex}), but some details are different.  There are
wenzelm@46282
   591
  two main categories of inner syntax tokens:
wenzelm@46282
   592
wenzelm@46282
   593
  \begin{enumerate}
wenzelm@46282
   594
wenzelm@46282
   595
  \item \emph{delimiters} --- the literal tokens occurring in
wenzelm@46282
   596
  productions of the given priority grammar (cf.\
wenzelm@46282
   597
  \secref{sec:priority-grammar});
wenzelm@46282
   598
wenzelm@46282
   599
  \item \emph{named tokens} --- various categories of identifiers etc.
wenzelm@46282
   600
wenzelm@46282
   601
  \end{enumerate}
wenzelm@46282
   602
wenzelm@46282
   603
  Delimiters override named tokens and may thus render certain
wenzelm@46282
   604
  identifiers inaccessible.  Sometimes the logical context admits
wenzelm@46282
   605
  alternative ways to refer to the same entity, potentially via
wenzelm@46282
   606
  qualified names.
wenzelm@46282
   607
wenzelm@46282
   608
  \medskip The categories for named tokens are defined once and for
wenzelm@46282
   609
  all as follows, reusing some categories of the outer token syntax
wenzelm@46282
   610
  (\secref{sec:outer-lex}).
wenzelm@46282
   611
wenzelm@46282
   612
  \begin{center}
wenzelm@46282
   613
  \begin{supertabular}{rcl}
wenzelm@46282
   614
    @{syntax_def (inner) id} & = & @{syntax_ref ident} \\
wenzelm@46282
   615
    @{syntax_def (inner) longid} & = & @{syntax_ref longident} \\
wenzelm@46282
   616
    @{syntax_def (inner) var} & = & @{syntax_ref var} \\
wenzelm@46282
   617
    @{syntax_def (inner) tid} & = & @{syntax_ref typefree} \\
wenzelm@46282
   618
    @{syntax_def (inner) tvar} & = & @{syntax_ref typevar} \\
wenzelm@46282
   619
    @{syntax_def (inner) num_token} & = & @{syntax_ref nat}@{text "  |  "}@{verbatim "-"}@{syntax_ref nat} \\
wenzelm@46282
   620
    @{syntax_def (inner) float_token} & = & @{syntax_ref nat}@{verbatim "."}@{syntax_ref nat}@{text "  |  "}@{verbatim "-"}@{syntax_ref nat}@{verbatim "."}@{syntax_ref nat} \\
wenzelm@46282
   621
    @{syntax_def (inner) xnum_token} & = & @{verbatim "#"}@{syntax_ref nat}@{text "  |  "}@{verbatim "#-"}@{syntax_ref nat} \\
wenzelm@46282
   622
wenzelm@46483
   623
    @{syntax_def (inner) str_token} & = & @{verbatim "''"} @{text "\<dots>"} @{verbatim "''"} \\
wenzelm@46282
   624
  \end{supertabular}
wenzelm@46282
   625
  \end{center}
wenzelm@46282
   626
wenzelm@46282
   627
  The token categories @{syntax (inner) num_token}, @{syntax (inner)
wenzelm@46282
   628
  float_token}, @{syntax (inner) xnum_token}, and @{syntax (inner)
wenzelm@46483
   629
  str_token} are not used in Pure.  Object-logics may implement numerals
wenzelm@46282
   630
  and string constants by adding appropriate syntax declarations,
wenzelm@46282
   631
  together with some translation functions (e.g.\ see Isabelle/HOL).
wenzelm@46282
   632
wenzelm@46282
   633
  The derived categories @{syntax_def (inner) num_const}, @{syntax_def
wenzelm@46282
   634
  (inner) float_const}, and @{syntax_def (inner) num_const} provide
wenzelm@46282
   635
  robust access to the respective tokens: the syntax tree holds a
wenzelm@46282
   636
  syntactic constant instead of a free variable.
wenzelm@46282
   637
*}
wenzelm@46282
   638
wenzelm@46282
   639
wenzelm@28777
   640
subsection {* Priority grammars \label{sec:priority-grammar} *}
wenzelm@28769
   641
wenzelm@28769
   642
text {* A context-free grammar consists of a set of \emph{terminal
wenzelm@28769
   643
  symbols}, a set of \emph{nonterminal symbols} and a set of
wenzelm@28769
   644
  \emph{productions}.  Productions have the form @{text "A = \<gamma>"},
wenzelm@28769
   645
  where @{text A} is a nonterminal and @{text \<gamma>} is a string of
wenzelm@28769
   646
  terminals and nonterminals.  One designated nonterminal is called
wenzelm@28769
   647
  the \emph{root symbol}.  The language defined by the grammar
wenzelm@28769
   648
  consists of all strings of terminals that can be derived from the
wenzelm@28769
   649
  root symbol by applying productions as rewrite rules.
wenzelm@28769
   650
wenzelm@28769
   651
  The standard Isabelle parser for inner syntax uses a \emph{priority
wenzelm@28769
   652
  grammar}.  Each nonterminal is decorated by an integer priority:
wenzelm@28769
   653
  @{text "A\<^sup>(\<^sup>p\<^sup>)"}.  In a derivation, @{text "A\<^sup>(\<^sup>p\<^sup>)"} may be rewritten
wenzelm@28769
   654
  using a production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} only if @{text "p \<le> q"}.  Any
wenzelm@28769
   655
  priority grammar can be translated into a normal context-free
wenzelm@28769
   656
  grammar by introducing new nonterminals and productions.
wenzelm@28769
   657
wenzelm@28769
   658
  \medskip Formally, a set of context free productions @{text G}
wenzelm@28769
   659
  induces a derivation relation @{text "\<longrightarrow>\<^sub>G"} as follows.  Let @{text
wenzelm@28769
   660
  \<alpha>} and @{text \<beta>} denote strings of terminal or nonterminal symbols.
wenzelm@28774
   661
  Then @{text "\<alpha> A\<^sup>(\<^sup>p\<^sup>) \<beta> \<longrightarrow>\<^sub>G \<alpha> \<gamma> \<beta>"} holds if and only if @{text G}
wenzelm@28774
   662
  contains some production @{text "A\<^sup>(\<^sup>q\<^sup>) = \<gamma>"} for @{text "p \<le> q"}.
wenzelm@28769
   663
wenzelm@28769
   664
  \medskip The following grammar for arithmetic expressions
wenzelm@28769
   665
  demonstrates how binding power and associativity of operators can be
wenzelm@28769
   666
  enforced by priorities.
wenzelm@28769
   667
wenzelm@28769
   668
  \begin{center}
wenzelm@28769
   669
  \begin{tabular}{rclr}
wenzelm@28774
   670
  @{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim "("} @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim ")"} \\
wenzelm@28769
   671
  @{text "A\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "="} & @{verbatim 0} \\
wenzelm@28769
   672
  @{text "A\<^sup>(\<^sup>0\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>0\<^sup>)"} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} \\
wenzelm@28769
   673
  @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "="} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} \\
wenzelm@28769
   674
  @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "="} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} \\
wenzelm@28769
   675
  \end{tabular}
wenzelm@28769
   676
  \end{center}
wenzelm@28769
   677
  The choice of priorities determines that @{verbatim "-"} binds
wenzelm@28769
   678
  tighter than @{verbatim "*"}, which binds tighter than @{verbatim
wenzelm@28769
   679
  "+"}.  Furthermore @{verbatim "+"} associates to the left and
wenzelm@28769
   680
  @{verbatim "*"} to the right.
wenzelm@28769
   681
wenzelm@28769
   682
  \medskip For clarity, grammars obey these conventions:
wenzelm@28769
   683
  \begin{itemize}
wenzelm@28769
   684
wenzelm@28769
   685
  \item All priorities must lie between 0 and 1000.
wenzelm@28769
   686
wenzelm@28769
   687
  \item Priority 0 on the right-hand side and priority 1000 on the
wenzelm@28769
   688
  left-hand side may be omitted.
wenzelm@28769
   689
wenzelm@28769
   690
  \item The production @{text "A\<^sup>(\<^sup>p\<^sup>) = \<alpha>"} is written as @{text "A = \<alpha>
wenzelm@28769
   691
  (p)"}, i.e.\ the priority of the left-hand side actually appears in
wenzelm@28769
   692
  a column on the far right.
wenzelm@28769
   693
wenzelm@28769
   694
  \item Alternatives are separated by @{text "|"}.
wenzelm@28769
   695
wenzelm@28769
   696
  \item Repetition is indicated by dots @{text "(\<dots>)"} in an informal
wenzelm@28769
   697
  but obvious way.
wenzelm@28769
   698
wenzelm@28769
   699
  \end{itemize}
wenzelm@28769
   700
wenzelm@28769
   701
  Using these conventions, the example grammar specification above
wenzelm@28769
   702
  takes the form:
wenzelm@28769
   703
  \begin{center}
wenzelm@28769
   704
  \begin{tabular}{rclc}
wenzelm@28774
   705
    @{text A} & @{text "="} & @{verbatim "("} @{text A} @{verbatim ")"} \\
wenzelm@28774
   706
              & @{text "|"} & @{verbatim 0} & \qquad\qquad \\
wenzelm@28769
   707
              & @{text "|"} & @{text A} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} & @{text "(0)"} \\
wenzelm@28769
   708
              & @{text "|"} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
wenzelm@28769
   709
              & @{text "|"} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
wenzelm@28769
   710
  \end{tabular}
wenzelm@28769
   711
  \end{center}
wenzelm@28769
   712
*}
wenzelm@28769
   713
wenzelm@28769
   714
wenzelm@46290
   715
subsection {* The Pure grammar \label{sec:pure-grammar} *}
wenzelm@28770
   716
wenzelm@46287
   717
text {* The priority grammar of the @{text "Pure"} theory is defined
wenzelm@46287
   718
  approximately like this:
wenzelm@28774
   719
wenzelm@28770
   720
  \begin{center}
wenzelm@28773
   721
  \begin{supertabular}{rclr}
wenzelm@28770
   722
wenzelm@28778
   723
  @{syntax_def (inner) any} & = & @{text "prop  |  logic"} \\\\
wenzelm@28772
   724
wenzelm@28778
   725
  @{syntax_def (inner) prop} & = & @{verbatim "("} @{text prop} @{verbatim ")"} \\
wenzelm@28772
   726
    & @{text "|"} & @{text "prop\<^sup>(\<^sup>4\<^sup>)"} @{verbatim "::"} @{text type} & @{text "(3)"} \\
wenzelm@28772
   727
    & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "=="} @{text "any\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
wenzelm@28773
   728
    & @{text "|"} & @{text "any\<^sup>(\<^sup>3\<^sup>)"} @{text "\<equiv>"} @{text "any\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
wenzelm@28856
   729
    & @{text "|"} & @{text "prop\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "&&&"} @{text "prop\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
wenzelm@28772
   730
    & @{text "|"} & @{text "prop\<^sup>(\<^sup>2\<^sup>)"} @{verbatim "==>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
wenzelm@28773
   731
    & @{text "|"} & @{text "prop\<^sup>(\<^sup>2\<^sup>)"} @{text "\<Longrightarrow>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
wenzelm@28772
   732
    & @{text "|"} & @{verbatim "[|"} @{text prop} @{verbatim ";"} @{text "\<dots>"} @{verbatim ";"} @{text prop} @{verbatim "|]"} @{verbatim "==>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
wenzelm@28773
   733
    & @{text "|"} & @{text "\<lbrakk>"} @{text prop} @{verbatim ";"} @{text "\<dots>"} @{verbatim ";"} @{text prop} @{text "\<rbrakk>"} @{text "\<Longrightarrow>"} @{text "prop\<^sup>(\<^sup>1\<^sup>)"} & @{text "(1)"} \\
wenzelm@28772
   734
    & @{text "|"} & @{verbatim "!!"} @{text idts} @{verbatim "."} @{text prop} & @{text "(0)"} \\
wenzelm@28773
   735
    & @{text "|"} & @{text "\<And>"} @{text idts} @{verbatim "."} @{text prop} & @{text "(0)"} \\
wenzelm@28773
   736
    & @{text "|"} & @{verbatim OFCLASS} @{verbatim "("} @{text type} @{verbatim ","} @{text logic} @{verbatim ")"} \\
wenzelm@28773
   737
    & @{text "|"} & @{verbatim SORT_CONSTRAINT} @{verbatim "("} @{text type} @{verbatim ")"} \\
wenzelm@28856
   738
    & @{text "|"} & @{verbatim TERM} @{text logic} \\
wenzelm@28773
   739
    & @{text "|"} & @{verbatim PROP} @{text aprop} \\\\
wenzelm@28772
   740
wenzelm@28856
   741
  @{syntax_def (inner) aprop} & = & @{verbatim "("} @{text aprop} @{verbatim ")"} \\
wenzelm@28856
   742
    & @{text "|"} & @{text "id  |  longid  |  var  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "..."} \\
wenzelm@28856
   743
    & @{text "|"} & @{verbatim CONST} @{text "id  |  "}@{verbatim CONST} @{text "longid"} \\
wenzelm@46287
   744
    & @{text "|"} & @{verbatim XCONST} @{text "id  |  "}@{verbatim XCONST} @{text "longid"} \\
wenzelm@28773
   745
    & @{text "|"} & @{text "logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)  any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "(999)"} \\\\
wenzelm@28770
   746
wenzelm@28778
   747
  @{syntax_def (inner) logic} & = & @{verbatim "("} @{text logic} @{verbatim ")"} \\
wenzelm@28772
   748
    & @{text "|"} & @{text "logic\<^sup>(\<^sup>4\<^sup>)"} @{verbatim "::"} @{text type} & @{text "(3)"} \\
wenzelm@28773
   749
    & @{text "|"} & @{text "id  |  longid  |  var  |  "}@{verbatim "_"}@{text "  |  "}@{verbatim "..."} \\
wenzelm@28856
   750
    & @{text "|"} & @{verbatim CONST} @{text "id  |  "}@{verbatim CONST} @{text "longid"} \\
wenzelm@46287
   751
    & @{text "|"} & @{verbatim XCONST} @{text "id  |  "}@{verbatim XCONST} @{text "longid"} \\
wenzelm@28773
   752
    & @{text "|"} & @{text "logic\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)  any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) \<dots> any\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} & @{text "(999)"} \\
wenzelm@46287
   753
    & @{text "|"} & @{text "\<struct> index\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>)"} \\
wenzelm@28772
   754
    & @{text "|"} & @{verbatim "%"} @{text pttrns} @{verbatim "."} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
wenzelm@28773
   755
    & @{text "|"} & @{text \<lambda>} @{text pttrns} @{verbatim "."} @{text "any\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
wenzelm@46287
   756
    & @{text "|"} & @{verbatim op} @{verbatim "=="}@{text "  |  "}@{verbatim op} @{text "\<equiv>"}@{text "  |  "}@{verbatim op} @{verbatim "&&&"} \\
wenzelm@46287
   757
    & @{text "|"} & @{verbatim op} @{verbatim "==>"}@{text "  |  "}@{verbatim op} @{text "\<Longrightarrow>"} \\
wenzelm@28772
   758
    & @{text "|"} & @{verbatim TYPE} @{verbatim "("} @{text type} @{verbatim ")"} \\\\
wenzelm@28772
   759
wenzelm@28778
   760
  @{syntax_def (inner) idt} & = & @{verbatim "("} @{text idt} @{verbatim ")"}@{text "  |  id  |  "}@{verbatim "_"} \\
wenzelm@28773
   761
    & @{text "|"} & @{text id} @{verbatim "::"} @{text type} & @{text "(0)"} \\
wenzelm@28773
   762
    & @{text "|"} & @{verbatim "_"} @{verbatim "::"} @{text type} & @{text "(0)"} \\\\
wenzelm@28772
   763
wenzelm@46287
   764
  @{syntax_def (inner) index} & = & @{verbatim "\<^bsub>"} @{text "logic\<^sup>(\<^sup>0\<^sup>)"} @{verbatim "\<^esub>"}@{text "  |  |  \<index>"} \\\\
wenzelm@46287
   765
wenzelm@28778
   766
  @{syntax_def (inner) idts} & = & @{text "idt  |  idt\<^sup>(\<^sup>1\<^sup>) idts"} & @{text "(0)"} \\\\
wenzelm@28772
   767
wenzelm@28778
   768
  @{syntax_def (inner) pttrn} & = & @{text idt} \\\\
wenzelm@28772
   769
wenzelm@28778
   770
  @{syntax_def (inner) pttrns} & = & @{text "pttrn  |  pttrn\<^sup>(\<^sup>1\<^sup>) pttrns"} & @{text "(0)"} \\\\
wenzelm@28774
   771
wenzelm@28778
   772
  @{syntax_def (inner) type} & = & @{verbatim "("} @{text type} @{verbatim ")"} \\
wenzelm@28773
   773
    & @{text "|"} & @{text "tid  |  tvar  |  "}@{verbatim "_"} \\
wenzelm@28773
   774
    & @{text "|"} & @{text "tid"} @{verbatim "::"} @{text "sort  |  tvar  "}@{verbatim "::"} @{text "sort  |  "}@{verbatim "_"} @{verbatim "::"} @{text "sort"} \\
wenzelm@46287
   775
    & @{text "|"} & @{text "type_name  |  type\<^sup>(\<^sup>1\<^sup>0\<^sup>0\<^sup>0\<^sup>) type_name"} \\
wenzelm@46287
   776
    & @{text "|"} & @{verbatim "("} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim ")"} @{text type_name} \\
wenzelm@28772
   777
    & @{text "|"} & @{text "type\<^sup>(\<^sup>1\<^sup>)"} @{verbatim "=>"} @{text type} & @{text "(0)"} \\
wenzelm@28773
   778
    & @{text "|"} & @{text "type\<^sup>(\<^sup>1\<^sup>)"} @{text "\<Rightarrow>"} @{text type} & @{text "(0)"} \\
wenzelm@28773
   779
    & @{text "|"} & @{verbatim "["} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim "]"} @{verbatim "=>"} @{text type} & @{text "(0)"} \\
wenzelm@46287
   780
    & @{text "|"} & @{verbatim "["} @{text type} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{text type} @{verbatim "]"} @{text "\<Rightarrow>"} @{text type} & @{text "(0)"} \\
wenzelm@46287
   781
  @{syntax_def (inner) type_name} & = & @{text "id  |  longid"} \\\\
wenzelm@28772
   782
wenzelm@46287
   783
  @{syntax_def (inner) sort} & = & @{syntax class_name}~@{text "  |  "}@{verbatim "{}"} \\
wenzelm@46287
   784
    & @{text "|"} & @{verbatim "{"} @{syntax class_name} @{verbatim ","} @{text "\<dots>"} @{verbatim ","} @{syntax class_name} @{verbatim "}"} \\
wenzelm@46287
   785
  @{syntax_def (inner) class_name} & = & @{text "id  |  longid"} \\
wenzelm@28773
   786
  \end{supertabular}
wenzelm@28770
   787
  \end{center}
wenzelm@28770
   788
wenzelm@28774
   789
  \medskip Here literal terminals are printed @{verbatim "verbatim"};
wenzelm@28774
   790
  see also \secref{sec:inner-lex} for further token categories of the
wenzelm@28774
   791
  inner syntax.  The meaning of the nonterminals defined by the above
wenzelm@28774
   792
  grammar is as follows:
wenzelm@28770
   793
wenzelm@28770
   794
  \begin{description}
wenzelm@28770
   795
wenzelm@28778
   796
  \item @{syntax_ref (inner) any} denotes any term.
wenzelm@28770
   797
wenzelm@28778
   798
  \item @{syntax_ref (inner) prop} denotes meta-level propositions,
wenzelm@28778
   799
  which are terms of type @{typ prop}.  The syntax of such formulae of
wenzelm@28778
   800
  the meta-logic is carefully distinguished from usual conventions for
wenzelm@28778
   801
  object-logics.  In particular, plain @{text "\<lambda>"}-term notation is
wenzelm@28778
   802
  \emph{not} recognized as @{syntax (inner) prop}.
wenzelm@28770
   803
wenzelm@28778
   804
  \item @{syntax_ref (inner) aprop} denotes atomic propositions, which
wenzelm@28778
   805
  are embedded into regular @{syntax (inner) prop} by means of an
wenzelm@28778
   806
  explicit @{verbatim PROP} token.
wenzelm@28770
   807
wenzelm@28770
   808
  Terms of type @{typ prop} with non-constant head, e.g.\ a plain
wenzelm@28770
   809
  variable, are printed in this form.  Constants that yield type @{typ
wenzelm@28770
   810
  prop} are expected to provide their own concrete syntax; otherwise
wenzelm@28778
   811
  the printed version will appear like @{syntax (inner) logic} and
wenzelm@28778
   812
  cannot be parsed again as @{syntax (inner) prop}.
wenzelm@28770
   813
wenzelm@28778
   814
  \item @{syntax_ref (inner) logic} denotes arbitrary terms of a
wenzelm@28778
   815
  logical type, excluding type @{typ prop}.  This is the main
wenzelm@28778
   816
  syntactic category of object-logic entities, covering plain @{text
wenzelm@28778
   817
  \<lambda>}-term notation (variables, abstraction, application), plus
wenzelm@28778
   818
  anything defined by the user.
wenzelm@28770
   819
wenzelm@28770
   820
  When specifying notation for logical entities, all logical types
wenzelm@28770
   821
  (excluding @{typ prop}) are \emph{collapsed} to this single category
wenzelm@28778
   822
  of @{syntax (inner) logic}.
wenzelm@28770
   823
wenzelm@46287
   824
  \item @{syntax_ref (inner) index} denotes an optional index term for
wenzelm@46287
   825
  indexed syntax.  If omitted, it refers to the first @{keyword
wenzelm@46287
   826
  "structure"} variable in the context.  The special dummy ``@{text
wenzelm@46287
   827
  "\<index>"}'' serves as pattern variable in mixfix annotations that
wenzelm@46287
   828
  introduce indexed notation.
wenzelm@46287
   829
wenzelm@28778
   830
  \item @{syntax_ref (inner) idt} denotes identifiers, possibly
wenzelm@28778
   831
  constrained by types.
wenzelm@28770
   832
wenzelm@28778
   833
  \item @{syntax_ref (inner) idts} denotes a sequence of @{syntax_ref
wenzelm@28778
   834
  (inner) idt}.  This is the most basic category for variables in
wenzelm@28778
   835
  iterated binders, such as @{text "\<lambda>"} or @{text "\<And>"}.
wenzelm@28770
   836
wenzelm@28778
   837
  \item @{syntax_ref (inner) pttrn} and @{syntax_ref (inner) pttrns}
wenzelm@28778
   838
  denote patterns for abstraction, cases bindings etc.  In Pure, these
wenzelm@28778
   839
  categories start as a merely copy of @{syntax (inner) idt} and
wenzelm@28778
   840
  @{syntax (inner) idts}, respectively.  Object-logics may add
wenzelm@28778
   841
  additional productions for binding forms.
wenzelm@28770
   842
wenzelm@28778
   843
  \item @{syntax_ref (inner) type} denotes types of the meta-logic.
wenzelm@28770
   844
wenzelm@28778
   845
  \item @{syntax_ref (inner) sort} denotes meta-level sorts.
wenzelm@28770
   846
wenzelm@28770
   847
  \end{description}
wenzelm@28770
   848
wenzelm@28774
   849
  Here are some further explanations of certain syntax features.
wenzelm@28773
   850
wenzelm@28773
   851
  \begin{itemize}
wenzelm@28770
   852
wenzelm@28778
   853
  \item In @{syntax (inner) idts}, note that @{text "x :: nat y"} is
wenzelm@28778
   854
  parsed as @{text "x :: (nat y)"}, treating @{text y} like a type
wenzelm@28778
   855
  constructor applied to @{text nat}.  To avoid this interpretation,
wenzelm@28778
   856
  write @{text "(x :: nat) y"} with explicit parentheses.
wenzelm@28773
   857
wenzelm@28773
   858
  \item Similarly, @{text "x :: nat y :: nat"} is parsed as @{text "x ::
wenzelm@28770
   859
  (nat y :: nat)"}.  The correct form is @{text "(x :: nat) (y ::
wenzelm@28770
   860
  nat)"}, or @{text "(x :: nat) y :: nat"} if @{text y} is last in the
wenzelm@28770
   861
  sequence of identifiers.
wenzelm@28773
   862
wenzelm@28773
   863
  \item Type constraints for terms bind very weakly.  For example,
wenzelm@28773
   864
  @{text "x < y :: nat"} is normally parsed as @{text "(x < y) ::
wenzelm@28773
   865
  nat"}, unless @{text "<"} has a very low priority, in which case the
wenzelm@28773
   866
  input is likely to be ambiguous.  The correct form is @{text "x < (y
wenzelm@28773
   867
  :: nat)"}.
wenzelm@28773
   868
wenzelm@28773
   869
  \item Constraints may be either written with two literal colons
wenzelm@28773
   870
  ``@{verbatim "::"}'' or the double-colon symbol @{verbatim "\<Colon>"},
wenzelm@28774
   871
  which actually looks exactly the same in some {\LaTeX} styles.
wenzelm@28773
   872
wenzelm@28774
   873
  \item Dummy variables (written as underscore) may occur in different
wenzelm@28774
   874
  roles.
wenzelm@28773
   875
wenzelm@28773
   876
  \begin{description}
wenzelm@28773
   877
wenzelm@28774
   878
  \item A type ``@{text "_"}'' or ``@{text "_ :: sort"}'' acts like an
wenzelm@28774
   879
  anonymous inference parameter, which is filled-in according to the
wenzelm@28774
   880
  most general type produced by the type-checking phase.
wenzelm@28770
   881
wenzelm@28774
   882
  \item A bound ``@{text "_"}'' refers to a vacuous abstraction, where
wenzelm@28774
   883
  the body does not refer to the binding introduced here.  As in the
wenzelm@28774
   884
  term @{term "\<lambda>x _. x"}, which is @{text "\<alpha>"}-equivalent to @{text
wenzelm@28774
   885
  "\<lambda>x y. x"}.
wenzelm@28773
   886
wenzelm@28774
   887
  \item A free ``@{text "_"}'' refers to an implicit outer binding.
wenzelm@28774
   888
  Higher definitional packages usually allow forms like @{text "f x _
wenzelm@28774
   889
  = x"}.
wenzelm@28773
   890
wenzelm@28774
   891
  \item A schematic ``@{text "_"}'' (within a term pattern, see
wenzelm@28774
   892
  \secref{sec:term-decls}) refers to an anonymous variable that is
wenzelm@28774
   893
  implicitly abstracted over its context of locally bound variables.
wenzelm@28774
   894
  For example, this allows pattern matching of @{text "{x. f x = g
wenzelm@28774
   895
  x}"} against @{text "{x. _ = _}"}, or even @{text "{_. _ = _}"} by
wenzelm@28774
   896
  using both bound and schematic dummies.
wenzelm@28773
   897
wenzelm@28773
   898
  \end{description}
wenzelm@28773
   899
wenzelm@28774
   900
  \item The three literal dots ``@{verbatim "..."}'' may be also
wenzelm@28774
   901
  written as ellipsis symbol @{verbatim "\<dots>"}.  In both cases this
wenzelm@28774
   902
  refers to a special schematic variable, which is bound in the
wenzelm@28774
   903
  context.  This special term abbreviation works nicely with
wenzelm@28774
   904
  calculational reasoning (\secref{sec:calculation}).
wenzelm@28774
   905
wenzelm@46287
   906
  \item @{verbatim CONST} ensures that the given identifier is treated
wenzelm@46287
   907
  as constant term, and passed through the parse tree in fully
wenzelm@46287
   908
  internalized form.  This is particularly relevant for translation
wenzelm@46287
   909
  rules (\secref{sec:syn-trans}), notably on the RHS.
wenzelm@46287
   910
wenzelm@46287
   911
  \item @{verbatim XCONST} is similar to @{verbatim CONST}, but
wenzelm@46287
   912
  retains the constant name as given.  This is only relevant to
wenzelm@46287
   913
  translation rules (\secref{sec:syn-trans}), notably on the LHS.
wenzelm@46287
   914
wenzelm@28773
   915
  \end{itemize}
wenzelm@28770
   916
*}
wenzelm@28770
   917
wenzelm@28777
   918
wenzelm@46282
   919
subsection {* Inspecting the syntax *}
wenzelm@28777
   920
wenzelm@46282
   921
text {*
wenzelm@46282
   922
  \begin{matharray}{rcl}
wenzelm@46282
   923
    @{command_def "print_syntax"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
wenzelm@46282
   924
  \end{matharray}
wenzelm@28777
   925
wenzelm@46282
   926
  \begin{description}
wenzelm@46282
   927
wenzelm@46282
   928
  \item @{command "print_syntax"} prints the inner syntax of the
wenzelm@46282
   929
  current context.  The output can be quite large; the most important
wenzelm@46282
   930
  sections are explained below.
wenzelm@28777
   931
wenzelm@46282
   932
  \begin{description}
wenzelm@28777
   933
wenzelm@46282
   934
  \item @{text "lexicon"} lists the delimiters of the inner token
wenzelm@46282
   935
  language; see \secref{sec:inner-lex}.
wenzelm@28777
   936
wenzelm@46282
   937
  \item @{text "prods"} lists the productions of the underlying
wenzelm@46282
   938
  priority grammar; see \secref{sec:priority-grammar}.
wenzelm@28777
   939
wenzelm@46282
   940
  The nonterminal @{text "A\<^sup>(\<^sup>p\<^sup>)"} is rendered in plain text as @{text
wenzelm@46282
   941
  "A[p]"}; delimiters are quoted.  Many productions have an extra
wenzelm@46282
   942
  @{text "\<dots> => name"}.  These names later become the heads of parse
wenzelm@46282
   943
  trees; they also guide the pretty printer.
wenzelm@28777
   944
wenzelm@46282
   945
  Productions without such parse tree names are called \emph{copy
wenzelm@46282
   946
  productions}.  Their right-hand side must have exactly one
wenzelm@46282
   947
  nonterminal symbol (or named token).  The parser does not create a
wenzelm@46282
   948
  new parse tree node for copy productions, but simply returns the
wenzelm@46282
   949
  parse tree of the right-hand symbol.
wenzelm@46282
   950
wenzelm@46282
   951
  If the right-hand side of a copy production consists of a single
wenzelm@46282
   952
  nonterminal without any delimiters, then it is called a \emph{chain
wenzelm@46282
   953
  production}.  Chain productions act as abbreviations: conceptually,
wenzelm@46282
   954
  they are removed from the grammar by adding new productions.
wenzelm@46282
   955
  Priority information attached to chain productions is ignored; only
wenzelm@46282
   956
  the dummy value @{text "-1"} is displayed.
wenzelm@46282
   957
wenzelm@46282
   958
  \item @{text "print modes"} lists the alternative print modes
wenzelm@46282
   959
  provided by this grammar; see \secref{sec:print-modes}.
wenzelm@28777
   960
wenzelm@46282
   961
  \item @{text "parse_rules"} and @{text "print_rules"} relate to
wenzelm@46282
   962
  syntax translations (macros); see \secref{sec:syn-trans}.
wenzelm@46282
   963
wenzelm@46282
   964
  \item @{text "parse_ast_translation"} and @{text
wenzelm@46282
   965
  "print_ast_translation"} list sets of constants that invoke
wenzelm@46282
   966
  translation functions for abstract syntax trees, which are only
wenzelm@46282
   967
  required in very special situations; see \secref{sec:tr-funs}.
wenzelm@28777
   968
wenzelm@46282
   969
  \item @{text "parse_translation"} and @{text "print_translation"}
wenzelm@46282
   970
  list the sets of constants that invoke regular translation
wenzelm@46282
   971
  functions; see \secref{sec:tr-funs}.
wenzelm@29157
   972
wenzelm@46282
   973
  \end{description}
wenzelm@46282
   974
wenzelm@46282
   975
  \end{description}
wenzelm@28777
   976
*}
wenzelm@28774
   977
wenzelm@28770
   978
wenzelm@46291
   979
subsection {* Ambiguity of parsed expressions *}
wenzelm@46291
   980
wenzelm@46291
   981
text {*
wenzelm@46291
   982
  \begin{tabular}{rcll}
wenzelm@46512
   983
    @{attribute_def syntax_ambiguity_warning} & : & @{text attribute} & default @{text true} \\
wenzelm@46506
   984
    @{attribute_def syntax_ambiguity_limit} & : & @{text attribute} & default @{text 10} \\
wenzelm@46291
   985
  \end{tabular}
wenzelm@46291
   986
wenzelm@46291
   987
  Depending on the grammar and the given input, parsing may be
wenzelm@46291
   988
  ambiguous.  Isabelle lets the Earley parser enumerate all possible
wenzelm@46291
   989
  parse trees, and then tries to make the best out of the situation.
wenzelm@46291
   990
  Terms that cannot be type-checked are filtered out, which often
wenzelm@46291
   991
  leads to a unique result in the end.  Unlike regular type
wenzelm@46291
   992
  reconstruction, which is applied to the whole collection of input
wenzelm@46291
   993
  terms simultaneously, the filtering stage only treats each given
wenzelm@46291
   994
  term in isolation.  Filtering is also not attempted for individual
wenzelm@46291
   995
  types or raw ASTs (as required for @{command translations}).
wenzelm@46291
   996
wenzelm@46291
   997
  Certain warning or error messages are printed, depending on the
wenzelm@46291
   998
  situation and the given configuration options.  Parsing ultimately
wenzelm@46291
   999
  fails, if multiple results remain after the filtering phase.
wenzelm@46291
  1000
wenzelm@46291
  1001
  \begin{description}
wenzelm@46291
  1002
wenzelm@46512
  1003
  \item @{attribute syntax_ambiguity_warning} controls output of
wenzelm@46512
  1004
  explicit warning messages about syntax ambiguity.
wenzelm@46291
  1005
wenzelm@46506
  1006
  \item @{attribute syntax_ambiguity_limit} determines the number of
wenzelm@46291
  1007
  resulting parse trees that are shown as part of the printed message
wenzelm@46291
  1008
  in case of an ambiguity.
wenzelm@46291
  1009
wenzelm@46291
  1010
  \end{description}
wenzelm@46291
  1011
*}
wenzelm@46291
  1012
wenzelm@46291
  1013
wenzelm@46282
  1014
section {* Raw syntax and translations \label{sec:syn-trans} *}
wenzelm@28762
  1015
wenzelm@28762
  1016
text {*
wenzelm@28762
  1017
  \begin{matharray}{rcl}
wenzelm@41229
  1018
    @{command_def "nonterminal"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1019
    @{command_def "syntax"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1020
    @{command_def "no_syntax"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1021
    @{command_def "translations"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1022
    @{command_def "no_translations"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1023
  \end{matharray}
wenzelm@28762
  1024
wenzelm@46292
  1025
  Unlike mixfix notation for existing formal entities
wenzelm@46292
  1026
  (\secref{sec:notation}), raw syntax declarations provide full access
wenzelm@46292
  1027
  to the priority grammar of the inner syntax.  This includes
wenzelm@46292
  1028
  additional syntactic categories (via @{command nonterminal}) and
wenzelm@46292
  1029
  free-form grammar productions (via @{command syntax}).  Additional
wenzelm@46292
  1030
  syntax translations (or macros, via @{command translations}) are
wenzelm@46292
  1031
  required to turn resulting parse trees into proper representations
wenzelm@46292
  1032
  of formal entities again.
wenzelm@46292
  1033
wenzelm@42596
  1034
  @{rail "
wenzelm@42596
  1035
    @@{command nonterminal} (@{syntax name} + @'and')
wenzelm@28762
  1036
    ;
wenzelm@46494
  1037
    (@@{command syntax} | @@{command no_syntax}) @{syntax mode}? (constdecl +)
wenzelm@28762
  1038
    ;
wenzelm@42596
  1039
    (@@{command translations} | @@{command no_translations})
wenzelm@42596
  1040
      (transpat ('==' | '=>' | '<=' | '\<rightleftharpoons>' | '\<rightharpoonup>' | '\<leftharpoondown>') transpat +)
wenzelm@28762
  1041
    ;
wenzelm@28762
  1042
wenzelm@46494
  1043
    constdecl: @{syntax name} '::' @{syntax type} @{syntax mixfix}?
wenzelm@46494
  1044
    ;
wenzelm@42596
  1045
    mode: ('(' ( @{syntax name} | @'output' | @{syntax name} @'output' ) ')')
wenzelm@28762
  1046
    ;
wenzelm@42596
  1047
    transpat: ('(' @{syntax nameref} ')')? @{syntax string}
wenzelm@42596
  1048
  "}
wenzelm@28762
  1049
wenzelm@28762
  1050
  \begin{description}
wenzelm@46282
  1051
wenzelm@41229
  1052
  \item @{command "nonterminal"}~@{text c} declares a type
wenzelm@28762
  1053
  constructor @{text c} (without arguments) to act as purely syntactic
wenzelm@28762
  1054
  type: a nonterminal symbol of the inner syntax.
wenzelm@28762
  1055
wenzelm@46292
  1056
  \item @{command "syntax"}~@{text "(mode) c :: \<sigma> (mx)"} augments the
wenzelm@46292
  1057
  priority grammar and the pretty printer table for the given print
wenzelm@46292
  1058
  mode (default @{verbatim "\"\""}). An optional keyword @{keyword_ref
wenzelm@46292
  1059
  "output"} means that only the pretty printer table is affected.
wenzelm@46292
  1060
wenzelm@46292
  1061
  Following \secref{sec:mixfix}, the mixfix annotation @{text "mx =
wenzelm@46292
  1062
  template ps q"} together with type @{text "\<sigma> = \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} and
wenzelm@46292
  1063
  specify a grammar production.  The @{text template} contains
wenzelm@46292
  1064
  delimiter tokens that surround @{text "n"} argument positions
wenzelm@46292
  1065
  (@{verbatim "_"}).  The latter correspond to nonterminal symbols
wenzelm@46292
  1066
  @{text "A\<^sub>i"} derived from the argument types @{text "\<tau>\<^sub>i"} as
wenzelm@46292
  1067
  follows:
wenzelm@46292
  1068
  \begin{itemize}
wenzelm@46292
  1069
wenzelm@46292
  1070
  \item @{text "prop"} if @{text "\<tau>\<^sub>i = prop"}
wenzelm@46292
  1071
wenzelm@46292
  1072
  \item @{text "logic"} if @{text "\<tau>\<^sub>i = (\<dots>)\<kappa>"} for logical type
wenzelm@46292
  1073
  constructor @{text "\<kappa> \<noteq> prop"}
wenzelm@46292
  1074
wenzelm@46292
  1075
  \item @{text any} if @{text "\<tau>\<^sub>i = \<alpha>"} for type variables
wenzelm@46292
  1076
wenzelm@46292
  1077
  \item @{text "\<kappa>"} if @{text "\<tau>\<^sub>i = \<kappa>"} for nonterminal @{text "\<kappa>"}
wenzelm@46292
  1078
  (syntactic type constructor)
wenzelm@46292
  1079
wenzelm@46292
  1080
  \end{itemize}
wenzelm@46292
  1081
wenzelm@46292
  1082
  Each @{text "A\<^sub>i"} is decorated by priority @{text "p\<^sub>i"} from the
wenzelm@46292
  1083
  given list @{text "ps"}; misssing priorities default to 0.
wenzelm@46292
  1084
wenzelm@46292
  1085
  The resulting nonterminal of the production is determined similarly
wenzelm@46292
  1086
  from type @{text "\<tau>"}, with priority @{text "q"} and default 1000.
wenzelm@46292
  1087
wenzelm@46292
  1088
  \medskip Parsing via this production produces parse trees @{text
wenzelm@46292
  1089
  "t\<^sub>1, \<dots>, t\<^sub>n"} for the argument slots.  The resulting parse tree is
wenzelm@46292
  1090
  composed as @{text "c t\<^sub>1 \<dots> t\<^sub>n"}, by using the syntax constant @{text
wenzelm@46292
  1091
  "c"} of the syntax declaration.
wenzelm@46292
  1092
wenzelm@46292
  1093
  Such syntactic constants are invented on the spot, without formal
wenzelm@46292
  1094
  check wrt.\ existing declarations.  It is conventional to use plain
wenzelm@46292
  1095
  identifiers prefixed by a single underscore (e.g.\ @{text
wenzelm@46292
  1096
  "_foobar"}).  Names should be chosen with care, to avoid clashes
wenzelm@46292
  1097
  with unrelated syntax declarations.
wenzelm@46292
  1098
wenzelm@46292
  1099
  \medskip The special case of copy production is specified by @{text
wenzelm@46292
  1100
  "c = "}@{verbatim "\"\""} (empty string).  It means that the
wenzelm@46292
  1101
  resulting parse tree @{text "t"} is copied directly, without any
wenzelm@46292
  1102
  further decoration.
wenzelm@46282
  1103
wenzelm@28762
  1104
  \item @{command "no_syntax"}~@{text "(mode) decls"} removes grammar
wenzelm@28762
  1105
  declarations (and translations) resulting from @{text decls}, which
wenzelm@28762
  1106
  are interpreted in the same manner as for @{command "syntax"} above.
wenzelm@46282
  1107
wenzelm@28762
  1108
  \item @{command "translations"}~@{text rules} specifies syntactic
wenzelm@28762
  1109
  translation rules (i.e.\ macros): parse~/ print rules (@{text "\<rightleftharpoons>"}),
wenzelm@28762
  1110
  parse rules (@{text "\<rightharpoonup>"}), or print rules (@{text "\<leftharpoondown>"}).
wenzelm@28762
  1111
  Translation patterns may be prefixed by the syntactic category to be
wenzelm@28762
  1112
  used for parsing; the default is @{text logic}.
wenzelm@46282
  1113
wenzelm@28762
  1114
  \item @{command "no_translations"}~@{text rules} removes syntactic
wenzelm@28762
  1115
  translation rules, which are interpreted in the same manner as for
wenzelm@28762
  1116
  @{command "translations"} above.
wenzelm@28762
  1117
wenzelm@28762
  1118
  \end{description}
wenzelm@46293
  1119
wenzelm@46293
  1120
  Raw syntax and translations provides a slightly more low-level
wenzelm@46293
  1121
  access to the grammar and the form of resulting parse trees.  It is
wenzelm@46293
  1122
  often possible to avoid this untyped macro mechanism, and use
wenzelm@46293
  1123
  type-safe @{command abbreviation} or @{command notation} instead.
wenzelm@46293
  1124
  Some important situations where @{command syntax} and @{command
wenzelm@46293
  1125
  translations} are really need are as follows:
wenzelm@46293
  1126
wenzelm@46293
  1127
  \begin{itemize}
wenzelm@46293
  1128
wenzelm@46293
  1129
  \item Iterated replacement via recursive @{command translations}.
wenzelm@46293
  1130
  For example, consider list enumeration @{term "[a, b, c, d]"} as
wenzelm@46293
  1131
  defined in theory @{theory List} in Isabelle/HOL.
wenzelm@46293
  1132
wenzelm@46293
  1133
  \item Change of binding status of variables: anything beyond the
wenzelm@46293
  1134
  built-in @{keyword "binder"} mixfix annotation requires explicit
wenzelm@46293
  1135
  syntax translations.  For example, consider list filter
wenzelm@46293
  1136
  comprehension @{term "[x \<leftarrow> xs . P]"} as defined in theory @{theory
wenzelm@46293
  1137
  List} in Isabelle/HOL.
wenzelm@46293
  1138
wenzelm@46293
  1139
  \end{itemize}
wenzelm@28762
  1140
*}
wenzelm@28762
  1141
wenzelm@28762
  1142
wenzelm@28779
  1143
section {* Syntax translation functions \label{sec:tr-funs} *}
wenzelm@28762
  1144
wenzelm@28762
  1145
text {*
wenzelm@28762
  1146
  \begin{matharray}{rcl}
wenzelm@28762
  1147
    @{command_def "parse_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1148
    @{command_def "parse_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1149
    @{command_def "print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1150
    @{command_def "typed_print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1151
    @{command_def "print_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
wenzelm@28762
  1152
  \end{matharray}
wenzelm@28762
  1153
wenzelm@42596
  1154
  @{rail "
wenzelm@42596
  1155
  ( @@{command parse_ast_translation} | @@{command parse_translation} |
wenzelm@42596
  1156
    @@{command print_translation} | @@{command typed_print_translation} |
wenzelm@42596
  1157
    @@{command print_ast_translation}) ('(' @'advanced' ')')? @{syntax text}
wenzelm@42596
  1158
  "}
wenzelm@28762
  1159
wenzelm@28762
  1160
  Syntax translation functions written in ML admit almost arbitrary
wenzelm@28762
  1161
  manipulations of Isabelle's inner syntax.  Any of the above commands
wenzelm@42596
  1162
  have a single @{syntax text} argument that refers to an ML
wenzelm@28762
  1163
  expression of appropriate type, which are as follows by default:
wenzelm@28762
  1164
wenzelm@28762
  1165
%FIXME proper antiquotations
wenzelm@28762
  1166
\begin{ttbox}
wenzelm@28762
  1167
val parse_ast_translation   : (string * (ast list -> ast)) list
wenzelm@28762
  1168
val parse_translation       : (string * (term list -> term)) list
wenzelm@28762
  1169
val print_translation       : (string * (term list -> term)) list
wenzelm@42247
  1170
val typed_print_translation : (string * (typ -> term list -> term)) list
wenzelm@28762
  1171
val print_ast_translation   : (string * (ast list -> ast)) list
wenzelm@28762
  1172
\end{ttbox}
wenzelm@28762
  1173
wenzelm@28762
  1174
  If the @{text "(advanced)"} option is given, the corresponding
wenzelm@28762
  1175
  translation functions may depend on the current theory or proof
wenzelm@28762
  1176
  context.  This allows to implement advanced syntax mechanisms, as
wenzelm@28762
  1177
  translations functions may refer to specific theory declarations or
wenzelm@28762
  1178
  auxiliary proof data.
wenzelm@28762
  1179
wenzelm@28762
  1180
%FIXME proper antiquotations
wenzelm@28762
  1181
\begin{ttbox}
wenzelm@28762
  1182
val parse_ast_translation:
wenzelm@28762
  1183
  (string * (Proof.context -> ast list -> ast)) list
wenzelm@28762
  1184
val parse_translation:
wenzelm@28762
  1185
  (string * (Proof.context -> term list -> term)) list
wenzelm@28762
  1186
val print_translation:
wenzelm@28762
  1187
  (string * (Proof.context -> term list -> term)) list
wenzelm@28762
  1188
val typed_print_translation:
wenzelm@42247
  1189
  (string * (Proof.context -> typ -> term list -> term)) list
wenzelm@28762
  1190
val print_ast_translation:
wenzelm@28762
  1191
  (string * (Proof.context -> ast list -> ast)) list
wenzelm@28762
  1192
\end{ttbox}
wenzelm@46294
  1193
wenzelm@46294
  1194
  \medskip See also the chapter on ``Syntax Transformations'' in old
wenzelm@46294
  1195
  \cite{isabelle-ref} for further details on translations on parse
wenzelm@46294
  1196
  trees.
wenzelm@28762
  1197
*}
wenzelm@28762
  1198
wenzelm@28762
  1199
end