src/Doc/Isar_Ref/Inner_Syntax.thy
changeset 58618 782f0b662cae
parent 58552 66fed99e874f
child 58724 e5f809f52f26
equal deleted inserted replaced
58617:4f169d2cf6f3 58618:782f0b662cae
     1 theory Inner_Syntax
     1 theory Inner_Syntax
     2 imports Base Main
     2 imports Base Main
     3 begin
     3 begin
     4 
     4 
     5 chapter {* Inner syntax --- the term language \label{ch:inner-syntax} *}
     5 chapter \<open>Inner syntax --- the term language \label{ch:inner-syntax}\<close>
     6 
     6 
     7 text {* The inner syntax of Isabelle provides concrete notation for
     7 text \<open>The inner syntax of Isabelle provides concrete notation for
     8   the main entities of the logical framework, notably @{text
     8   the main entities of the logical framework, notably @{text
     9   "\<lambda>"}-terms with types and type classes.  Applications may either
     9   "\<lambda>"}-terms with types and type classes.  Applications may either
    10   extend existing syntactic categories by additional notation, or
    10   extend existing syntactic categories by additional notation, or
    11   define new sub-languages that are linked to the standard term
    11   define new sub-languages that are linked to the standard term
    12   language via some explicit markers.  For example @{verbatim
    12   language via some explicit markers.  For example @{verbatim
    23 
    23 
    24   Further details of the syntax engine involves the classical
    24   Further details of the syntax engine involves the classical
    25   distinction of lexical language versus context-free grammar (see
    25   distinction of lexical language versus context-free grammar (see
    26   \secref{sec:pure-syntax}), and various mechanisms for \emph{syntax
    26   \secref{sec:pure-syntax}), and various mechanisms for \emph{syntax
    27   transformations} (see \secref{sec:syntax-transformations}).
    27   transformations} (see \secref{sec:syntax-transformations}).
    28 *}
    28 \<close>
    29 
    29 
    30 
    30 
    31 section {* Printing logical entities *}
    31 section \<open>Printing logical entities\<close>
    32 
    32 
    33 subsection {* Diagnostic commands \label{sec:print-diag} *}
    33 subsection \<open>Diagnostic commands \label{sec:print-diag}\<close>
    34 
    34 
    35 text {*
    35 text \<open>
    36   \begin{matharray}{rcl}
    36   \begin{matharray}{rcl}
    37     @{command_def "typ"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    37     @{command_def "typ"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    38     @{command_def "term"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    38     @{command_def "term"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    39     @{command_def "prop"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    39     @{command_def "prop"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    40     @{command_def "thm"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
    40     @{command_def "thm"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
   113   @{cite "isabelle-sys"}.
   113   @{cite "isabelle-sys"}.
   114 
   114 
   115   Note that antiquotations (cf.\ \secref{sec:antiq}) provide a more
   115   Note that antiquotations (cf.\ \secref{sec:antiq}) provide a more
   116   systematic way to include formal items into the printed text
   116   systematic way to include formal items into the printed text
   117   document.
   117   document.
   118 *}
   118 \<close>
   119 
   119 
   120 
   120 
   121 subsection {* Details of printed content *}
   121 subsection \<open>Details of printed content\<close>
   122 
   122 
   123 text {*
   123 text \<open>
   124   \begin{tabular}{rcll}
   124   \begin{tabular}{rcll}
   125     @{attribute_def show_markup} & : & @{text attribute} \\
   125     @{attribute_def show_markup} & : & @{text attribute} \\
   126     @{attribute_def show_types} & : & @{text attribute} & default @{text false} \\
   126     @{attribute_def show_types} & : & @{text attribute} & default @{text false} \\
   127     @{attribute_def show_sorts} & : & @{text attribute} & default @{text false} \\
   127     @{attribute_def show_sorts} & : & @{text attribute} & default @{text false} \\
   128     @{attribute_def show_consts} & : & @{text attribute} & default @{text false} \\
   128     @{attribute_def show_consts} & : & @{text attribute} & default @{text false} \\
   239   question mark is affected, the remaining text is unchanged
   239   question mark is affected, the remaining text is unchanged
   240   (including proper markup for schematic variables that might be
   240   (including proper markup for schematic variables that might be
   241   relevant for user interfaces).
   241   relevant for user interfaces).
   242 
   242 
   243   \end{description}
   243   \end{description}
   244 *}
   244 \<close>
   245 
   245 
   246 
   246 
   247 subsection {* Alternative print modes \label{sec:print-modes} *}
   247 subsection \<open>Alternative print modes \label{sec:print-modes}\<close>
   248 
   248 
   249 text {*
   249 text \<open>
   250   \begin{mldecls}
   250   \begin{mldecls}
   251     @{index_ML print_mode_value: "unit -> string list"} \\
   251     @{index_ML print_mode_value: "unit -> string list"} \\
   252     @{index_ML Print_Mode.with_modes: "string list -> ('a -> 'b) -> 'a -> 'b"} \\
   252     @{index_ML Print_Mode.with_modes: "string list -> ('a -> 'b) -> 'a -> 'b"} \\
   253   \end{mldecls}
   253   \end{mldecls}
   254 
   254 
   310   \item @{verbatim latex}: additional mode that is active in {\LaTeX}
   310   \item @{verbatim latex}: additional mode that is active in {\LaTeX}
   311   document preparation of Isabelle theory sources; allows to provide
   311   document preparation of Isabelle theory sources; allows to provide
   312   alternative output notation.
   312   alternative output notation.
   313 
   313 
   314   \end{itemize}
   314   \end{itemize}
   315 *}
   315 \<close>
   316 
   316 
   317 
   317 
   318 section {* Mixfix annotations \label{sec:mixfix} *}
   318 section \<open>Mixfix annotations \label{sec:mixfix}\<close>
   319 
   319 
   320 text {* Mixfix annotations specify concrete \emph{inner syntax} of
   320 text \<open>Mixfix annotations specify concrete \emph{inner syntax} of
   321   Isabelle types and terms.  Locally fixed parameters in toplevel
   321   Isabelle types and terms.  Locally fixed parameters in toplevel
   322   theorem statements, locale and class specifications also admit
   322   theorem statements, locale and class specifications also admit
   323   mixfix annotations in a fairly uniform manner.  A mixfix annotation
   323   mixfix annotations in a fairly uniform manner.  A mixfix annotation
   324   describes the concrete syntax, the translation to abstract
   324   describes the concrete syntax, the translation to abstract
   325   syntax, and the pretty printing.  Special case annotations provide a
   325   syntax, and the pretty printing.  Special case annotations provide a
   350   fixed variables may be declared as @{keyword "structure"}.
   350   fixed variables may be declared as @{keyword "structure"}.
   351 
   351 
   352   Infix and binder declarations provide common abbreviations for
   352   Infix and binder declarations provide common abbreviations for
   353   particular mixfix declarations.  So in practice, mixfix templates
   353   particular mixfix declarations.  So in practice, mixfix templates
   354   mostly degenerate to literal text for concrete syntax, such as
   354   mostly degenerate to literal text for concrete syntax, such as
   355   ``@{verbatim "++"}'' for an infix symbol.  *}
   355   ``@{verbatim "++"}'' for an infix symbol.\<close>
   356 
   356 
   357 
   357 
   358 subsection {* The general mixfix form *}
   358 subsection \<open>The general mixfix form\<close>
   359 
   359 
   360 text {* In full generality, mixfix declarations work as follows.
   360 text \<open>In full generality, mixfix declarations work as follows.
   361   Suppose a constant @{text "c :: \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} is annotated by
   361   Suppose a constant @{text "c :: \<tau>\<^sub>1 \<Rightarrow> \<dots> \<tau>\<^sub>n \<Rightarrow> \<tau>"} is annotated by
   362   @{text "(mixfix [p\<^sub>1, \<dots>, p\<^sub>n] p)"}, where @{text "mixfix"} is a string
   362   @{text "(mixfix [p\<^sub>1, \<dots>, p\<^sub>n] p)"}, where @{text "mixfix"} is a string
   363   @{text "d\<^sub>0 _ d\<^sub>1 _ \<dots> _ d\<^sub>n"} consisting of delimiters that surround
   363   @{text "d\<^sub>0 _ d\<^sub>1 _ \<dots> _ d\<^sub>n"} consisting of delimiters that surround
   364   argument positions as indicated by underscores.
   364   argument positions as indicated by underscores.
   365 
   365 
   434 
   434 
   435   \end{description}
   435   \end{description}
   436 
   436 
   437   The general idea of pretty printing with blocks and breaks is also
   437   The general idea of pretty printing with blocks and breaks is also
   438   described in @{cite "paulson-ml2"}; it goes back to @{cite "Oppen:1980"}.
   438   described in @{cite "paulson-ml2"}; it goes back to @{cite "Oppen:1980"}.
   439 *}
   439 \<close>
   440 
   440 
   441 
   441 
   442 subsection {* Infixes *}
   442 subsection \<open>Infixes\<close>
   443 
   443 
   444 text {* Infix operators are specified by convenient short forms that
   444 text \<open>Infix operators are specified by convenient short forms that
   445   abbreviate general mixfix annotations as follows:
   445   abbreviate general mixfix annotations as follows:
   446 
   446 
   447   \begin{center}
   447   \begin{center}
   448   \begin{tabular}{lll}
   448   \begin{tabular}{lll}
   449 
   449 
   466   pretty printing block.
   466   pretty printing block.
   467 
   467 
   468   The alternative notation @{verbatim "op"}~@{text sy} is introduced
   468   The alternative notation @{verbatim "op"}~@{text sy} is introduced
   469   in addition.  Thus any infix operator may be written in prefix form
   469   in addition.  Thus any infix operator may be written in prefix form
   470   (as in ML), independently of the number of arguments in the term.
   470   (as in ML), independently of the number of arguments in the term.
   471 *}
   471 \<close>
   472 
   472 
   473 
   473 
   474 subsection {* Binders *}
   474 subsection \<open>Binders\<close>
   475 
   475 
   476 text {* A \emph{binder} is a variable-binding construct such as a
   476 text \<open>A \emph{binder} is a variable-binding construct such as a
   477   quantifier.  The idea to formalize @{text "\<forall>x. b"} as @{text "All
   477   quantifier.  The idea to formalize @{text "\<forall>x. b"} as @{text "All
   478   (\<lambda>x. b)"} for @{text "All :: ('a \<Rightarrow> bool) \<Rightarrow> bool"} already goes back
   478   (\<lambda>x. b)"} for @{text "All :: ('a \<Rightarrow> bool) \<Rightarrow> bool"} already goes back
   479   to @{cite church40}.  Isabelle declarations of certain higher-order
   479   to @{cite church40}.  Isabelle declarations of certain higher-order
   480   operators may be annotated with @{keyword_def "binder"} annotations
   480   operators may be annotated with @{keyword_def "binder"} annotations
   481   as follows:
   481   as follows:
   507   syntax ends with a token that may be continued by an identifier
   507   syntax ends with a token that may be continued by an identifier
   508   token at the start of @{syntax (inner) idts}.
   508   token at the start of @{syntax (inner) idts}.
   509 
   509 
   510   Furthermore, a syntax translation to transforms @{text "c_binder x\<^sub>1
   510   Furthermore, a syntax translation to transforms @{text "c_binder x\<^sub>1
   511   \<dots> x\<^sub>n b"} into iterated application @{text "c (\<lambda>x\<^sub>1. \<dots> c (\<lambda>x\<^sub>n. b)\<dots>)"}.
   511   \<dots> x\<^sub>n b"} into iterated application @{text "c (\<lambda>x\<^sub>1. \<dots> c (\<lambda>x\<^sub>n. b)\<dots>)"}.
   512   This works in both directions, for parsing and printing.  *}
   512   This works in both directions, for parsing and printing.\<close>
   513 
   513 
   514 
   514 
   515 section {* Explicit notation \label{sec:notation} *}
   515 section \<open>Explicit notation \label{sec:notation}\<close>
   516 
   516 
   517 text {*
   517 text \<open>
   518   \begin{matharray}{rcll}
   518   \begin{matharray}{rcll}
   519     @{command_def "type_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   519     @{command_def "type_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   520     @{command_def "no_type_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   520     @{command_def "no_type_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   521     @{command_def "notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   521     @{command_def "notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   522     @{command_def "no_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   522     @{command_def "no_notation"} & : & @{text "local_theory \<rightarrow> local_theory"} \\
   560 
   560 
   561   \item @{command "write"} is similar to @{command "notation"}, but
   561   \item @{command "write"} is similar to @{command "notation"}, but
   562   works within an Isar proof body.
   562   works within an Isar proof body.
   563 
   563 
   564   \end{description}
   564   \end{description}
   565 *}
   565 \<close>
   566 
   566 
   567 
   567 
   568 section {* The Pure syntax \label{sec:pure-syntax} *}
   568 section \<open>The Pure syntax \label{sec:pure-syntax}\<close>
   569 
   569 
   570 subsection {* Lexical matters \label{sec:inner-lex} *}
   570 subsection \<open>Lexical matters \label{sec:inner-lex}\<close>
   571 
   571 
   572 text {* The inner lexical syntax vaguely resembles the outer one
   572 text \<open>The inner lexical syntax vaguely resembles the outer one
   573   (\secref{sec:outer-lex}), but some details are different.  There are
   573   (\secref{sec:outer-lex}), but some details are different.  There are
   574   two main categories of inner syntax tokens:
   574   two main categories of inner syntax tokens:
   575 
   575 
   576   \begin{enumerate}
   576   \begin{enumerate}
   577 
   577 
   615   "~~/src/HOL/Tools/string_syntax.ML"}).
   615   "~~/src/HOL/Tools/string_syntax.ML"}).
   616 
   616 
   617   The derived categories @{syntax_def (inner) num_const}, and @{syntax_def
   617   The derived categories @{syntax_def (inner) num_const}, and @{syntax_def
   618   (inner) float_const}, provide robust access to the respective tokens: the
   618   (inner) float_const}, provide robust access to the respective tokens: the
   619   syntax tree holds a syntactic constant instead of a free variable.
   619   syntax tree holds a syntactic constant instead of a free variable.
   620 *}
   620 \<close>
   621 
   621 
   622 
   622 
   623 subsection {* Priority grammars \label{sec:priority-grammar} *}
   623 subsection \<open>Priority grammars \label{sec:priority-grammar}\<close>
   624 
   624 
   625 text {* A context-free grammar consists of a set of \emph{terminal
   625 text \<open>A context-free grammar consists of a set of \emph{terminal
   626   symbols}, a set of \emph{nonterminal symbols} and a set of
   626   symbols}, a set of \emph{nonterminal symbols} and a set of
   627   \emph{productions}.  Productions have the form @{text "A = \<gamma>"},
   627   \emph{productions}.  Productions have the form @{text "A = \<gamma>"},
   628   where @{text A} is a nonterminal and @{text \<gamma>} is a string of
   628   where @{text A} is a nonterminal and @{text \<gamma>} is a string of
   629   terminals and nonterminals.  One designated nonterminal is called
   629   terminals and nonterminals.  One designated nonterminal is called
   630   the \emph{root symbol}.  The language defined by the grammar
   630   the \emph{root symbol}.  The language defined by the grammar
   690               & @{text "|"} & @{text A} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} & @{text "(0)"} \\
   690               & @{text "|"} & @{text A} @{verbatim "+"} @{text "A\<^sup>(\<^sup>1\<^sup>)"} & @{text "(0)"} \\
   691               & @{text "|"} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   691               & @{text "|"} & @{text "A\<^sup>(\<^sup>3\<^sup>)"} @{verbatim "*"} @{text "A\<^sup>(\<^sup>2\<^sup>)"} & @{text "(2)"} \\
   692               & @{text "|"} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   692               & @{text "|"} & @{verbatim "-"} @{text "A\<^sup>(\<^sup>3\<^sup>)"} & @{text "(3)"} \\
   693   \end{tabular}
   693   \end{tabular}
   694   \end{center}
   694   \end{center}
   695 *}
   695 \<close>
   696 
   696 
   697 
   697 
   698 subsection {* The Pure grammar \label{sec:pure-grammar} *}
   698 subsection \<open>The Pure grammar \label{sec:pure-grammar}\<close>
   699 
   699 
   700 text {* The priority grammar of the @{text "Pure"} theory is defined
   700 text \<open>The priority grammar of the @{text "Pure"} theory is defined
   701   approximately like this:
   701   approximately like this:
   702 
   702 
   703   \begin{center}
   703   \begin{center}
   704   \begin{supertabular}{rclr}
   704   \begin{supertabular}{rclr}
   705 
   705 
   894   \item @{verbatim XCONST} is similar to @{verbatim CONST}, but
   894   \item @{verbatim XCONST} is similar to @{verbatim CONST}, but
   895   retains the constant name as given.  This is only relevant to
   895   retains the constant name as given.  This is only relevant to
   896   translation rules (\secref{sec:syn-trans}), notably on the LHS.
   896   translation rules (\secref{sec:syn-trans}), notably on the LHS.
   897 
   897 
   898   \end{itemize}
   898   \end{itemize}
   899 *}
   899 \<close>
   900 
   900 
   901 
   901 
   902 subsection {* Inspecting the syntax *}
   902 subsection \<open>Inspecting the syntax\<close>
   903 
   903 
   904 text {*
   904 text \<open>
   905   \begin{matharray}{rcl}
   905   \begin{matharray}{rcl}
   906     @{command_def "print_syntax"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
   906     @{command_def "print_syntax"}@{text "\<^sup>*"} & : & @{text "context \<rightarrow>"} \\
   907   \end{matharray}
   907   \end{matharray}
   908 
   908 
   909   \begin{description}
   909   \begin{description}
   954   functions; see \secref{sec:tr-funs}.
   954   functions; see \secref{sec:tr-funs}.
   955 
   955 
   956   \end{description}
   956   \end{description}
   957 
   957 
   958   \end{description}
   958   \end{description}
   959 *}
   959 \<close>
   960 
   960 
   961 
   961 
   962 subsection {* Ambiguity of parsed expressions *}
   962 subsection \<open>Ambiguity of parsed expressions\<close>
   963 
   963 
   964 text {*
   964 text \<open>
   965   \begin{tabular}{rcll}
   965   \begin{tabular}{rcll}
   966     @{attribute_def syntax_ambiguity_warning} & : & @{text attribute} & default @{text true} \\
   966     @{attribute_def syntax_ambiguity_warning} & : & @{text attribute} & default @{text true} \\
   967     @{attribute_def syntax_ambiguity_limit} & : & @{text attribute} & default @{text 10} \\
   967     @{attribute_def syntax_ambiguity_limit} & : & @{text attribute} & default @{text 10} \\
   968   \end{tabular}
   968   \end{tabular}
   969 
   969 
   989   \item @{attribute syntax_ambiguity_limit} determines the number of
   989   \item @{attribute syntax_ambiguity_limit} determines the number of
   990   resulting parse trees that are shown as part of the printed message
   990   resulting parse trees that are shown as part of the printed message
   991   in case of an ambiguity.
   991   in case of an ambiguity.
   992 
   992 
   993   \end{description}
   993   \end{description}
   994 *}
   994 \<close>
   995 
   995 
   996 
   996 
   997 section {* Syntax transformations \label{sec:syntax-transformations} *}
   997 section \<open>Syntax transformations \label{sec:syntax-transformations}\<close>
   998 
   998 
   999 text {* The inner syntax engine of Isabelle provides separate
   999 text \<open>The inner syntax engine of Isabelle provides separate
  1000   mechanisms to transform parse trees either via rewrite systems on
  1000   mechanisms to transform parse trees either via rewrite systems on
  1001   first-order ASTs (\secref{sec:syn-trans}), or ML functions on ASTs
  1001   first-order ASTs (\secref{sec:syn-trans}), or ML functions on ASTs
  1002   or syntactic @{text "\<lambda>"}-terms (\secref{sec:tr-funs}).  This works
  1002   or syntactic @{text "\<lambda>"}-terms (\secref{sec:tr-funs}).  This works
  1003   both for parsing and printing, as outlined in
  1003   both for parsing and printing, as outlined in
  1004   \figref{fig:parse-print}.
  1004   \figref{fig:parse-print}.
  1040 
  1040 
  1041   As a rule of thumb, anything that manipulates bindings of variables
  1041   As a rule of thumb, anything that manipulates bindings of variables
  1042   or constants needs to be implemented as syntax transformation (see
  1042   or constants needs to be implemented as syntax transformation (see
  1043   below).  Anything else is better done via check/uncheck: a prominent
  1043   below).  Anything else is better done via check/uncheck: a prominent
  1044   example application is the @{command abbreviation} concept of
  1044   example application is the @{command abbreviation} concept of
  1045   Isabelle/Pure. *}
  1045   Isabelle/Pure.\<close>
  1046 
  1046 
  1047 
  1047 
  1048 subsection {* Abstract syntax trees \label{sec:ast} *}
  1048 subsection \<open>Abstract syntax trees \label{sec:ast}\<close>
  1049 
  1049 
  1050 text {* The ML datatype @{ML_type Ast.ast} explicitly represents the
  1050 text \<open>The ML datatype @{ML_type Ast.ast} explicitly represents the
  1051   intermediate AST format that is used for syntax rewriting
  1051   intermediate AST format that is used for syntax rewriting
  1052   (\secref{sec:syn-trans}).  It is defined in ML as follows:
  1052   (\secref{sec:syn-trans}).  It is defined in ML as follows:
  1053   \begin{ttbox}
  1053   \begin{ttbox}
  1054   datatype ast =
  1054   datatype ast =
  1055     Constant of string |
  1055     Constant of string |
  1079   @{verbatim "\"_abs\""} does not bind the @{verbatim x} in any way.
  1079   @{verbatim "\"_abs\""} does not bind the @{verbatim x} in any way.
  1080   Proper bindings are introduced in later stages of the term syntax,
  1080   Proper bindings are introduced in later stages of the term syntax,
  1081   where @{verbatim "(\"_abs\" x t)"} becomes an @{ML Abs} node and
  1081   where @{verbatim "(\"_abs\" x t)"} becomes an @{ML Abs} node and
  1082   occurrences of @{verbatim x} in @{verbatim t} are replaced by bound
  1082   occurrences of @{verbatim x} in @{verbatim t} are replaced by bound
  1083   variables (represented as de-Bruijn indices).
  1083   variables (represented as de-Bruijn indices).
  1084 *}
  1084 \<close>
  1085 
  1085 
  1086 
  1086 
  1087 subsubsection {* AST constants versus variables *}
  1087 subsubsection \<open>AST constants versus variables\<close>
  1088 
  1088 
  1089 text {* Depending on the situation --- input syntax, output syntax,
  1089 text \<open>Depending on the situation --- input syntax, output syntax,
  1090   translation patterns --- the distinction of atomic ASTs as @{ML
  1090   translation patterns --- the distinction of atomic ASTs as @{ML
  1091   Ast.Constant} versus @{ML Ast.Variable} serves slightly different
  1091   Ast.Constant} versus @{ML Ast.Variable} serves slightly different
  1092   purposes.
  1092   purposes.
  1093 
  1093 
  1094   Input syntax of a term such as @{text "f a b = c"} does not yet
  1094   Input syntax of a term such as @{text "f a b = c"} does not yet
  1119   since the concrete syntax already distinguishes type variables from
  1119   since the concrete syntax already distinguishes type variables from
  1120   type constants (constructors).  So @{text "('a, 'b) foo"}
  1120   type constants (constructors).  So @{text "('a, 'b) foo"}
  1121   corresponds to an AST application of some constant for @{text foo}
  1121   corresponds to an AST application of some constant for @{text foo}
  1122   and variable arguments for @{text "'a"} and @{text "'b"}.  Note that
  1122   and variable arguments for @{text "'a"} and @{text "'b"}.  Note that
  1123   the postfix application is merely a feature of the concrete syntax,
  1123   the postfix application is merely a feature of the concrete syntax,
  1124   while in the AST the constructor occurs in head position.  *}
  1124   while in the AST the constructor occurs in head position.\<close>
  1125 
  1125 
  1126 
  1126 
  1127 subsubsection {* Authentic syntax names *}
  1127 subsubsection \<open>Authentic syntax names\<close>
  1128 
  1128 
  1129 text {* Naming constant entities within ASTs is another delicate
  1129 text \<open>Naming constant entities within ASTs is another delicate
  1130   issue.  Unqualified names are resolved in the name space tables in
  1130   issue.  Unqualified names are resolved in the name space tables in
  1131   the last stage of parsing, after all translations have been applied.
  1131   the last stage of parsing, after all translations have been applied.
  1132   Since syntax transformations do not know about this later name
  1132   Since syntax transformations do not know about this later name
  1133   resolution, there can be surprises in boundary cases.
  1133   resolution, there can be surprises in boundary cases.
  1134 
  1134 
  1158   \end{itemize}
  1158   \end{itemize}
  1159 
  1159 
  1160   In other words, syntax transformations that operate on input terms
  1160   In other words, syntax transformations that operate on input terms
  1161   written as prefix applications are difficult to make robust.
  1161   written as prefix applications are difficult to make robust.
  1162   Luckily, this case rarely occurs in practice, because syntax forms
  1162   Luckily, this case rarely occurs in practice, because syntax forms
  1163   to be translated usually correspond to some concrete notation. *}
  1163   to be translated usually correspond to some concrete notation.\<close>
  1164 
  1164 
  1165 
  1165 
  1166 subsection {* Raw syntax and translations \label{sec:syn-trans} *}
  1166 subsection \<open>Raw syntax and translations \label{sec:syn-trans}\<close>
  1167 
  1167 
  1168 text {*
  1168 text \<open>
  1169   \begin{tabular}{rcll}
  1169   \begin{tabular}{rcll}
  1170     @{command_def "nonterminal"} & : & @{text "theory \<rightarrow> theory"} \\
  1170     @{command_def "nonterminal"} & : & @{text "theory \<rightarrow> theory"} \\
  1171     @{command_def "syntax"} & : & @{text "theory \<rightarrow> theory"} \\
  1171     @{command_def "syntax"} & : & @{text "theory \<rightarrow> theory"} \\
  1172     @{command_def "no_syntax"} & : & @{text "theory \<rightarrow> theory"} \\
  1172     @{command_def "no_syntax"} & : & @{text "theory \<rightarrow> theory"} \\
  1173     @{command_def "translations"} & : & @{text "theory \<rightarrow> theory"} \\
  1173     @{command_def "translations"} & : & @{text "theory \<rightarrow> theory"} \\
  1335   syntax translations.  For example, consider list filter
  1335   syntax translations.  For example, consider list filter
  1336   comprehension @{term "[x \<leftarrow> xs . P]"} as defined in theory @{theory
  1336   comprehension @{term "[x \<leftarrow> xs . P]"} as defined in theory @{theory
  1337   List} in Isabelle/HOL.
  1337   List} in Isabelle/HOL.
  1338 
  1338 
  1339   \end{itemize}
  1339   \end{itemize}
  1340 *}
  1340 \<close>
  1341 
  1341 
  1342 subsubsection {* Applying translation rules *}
  1342 subsubsection \<open>Applying translation rules\<close>
  1343 
  1343 
  1344 text {* As a term is being parsed or printed, an AST is generated as
  1344 text \<open>As a term is being parsed or printed, an AST is generated as
  1345   an intermediate form according to \figref{fig:parse-print}.  The AST
  1345   an intermediate form according to \figref{fig:parse-print}.  The AST
  1346   is normalized by applying translation rules in the manner of a
  1346   is normalized by applying translation rules in the manner of a
  1347   first-order term rewriting system.  We first examine how a single
  1347   first-order term rewriting system.  We first examine how a single
  1348   rule is applied.
  1348   rule is applied.
  1349 
  1349 
  1408   to @{text "Ball A P"} and the standard print rule would fail to
  1408   to @{text "Ball A P"} and the standard print rule would fail to
  1409   apply.  This problem can be avoided by hand-written ML translation
  1409   apply.  This problem can be avoided by hand-written ML translation
  1410   functions (see also \secref{sec:tr-funs}), which is in fact the same
  1410   functions (see also \secref{sec:tr-funs}), which is in fact the same
  1411   mechanism used in built-in @{keyword "binder"} declarations.
  1411   mechanism used in built-in @{keyword "binder"} declarations.
  1412   \end{warn}
  1412   \end{warn}
  1413 *}
  1413 \<close>
  1414 
  1414 
  1415 
  1415 
  1416 subsection {* Syntax translation functions \label{sec:tr-funs} *}
  1416 subsection \<open>Syntax translation functions \label{sec:tr-funs}\<close>
  1417 
  1417 
  1418 text {*
  1418 text \<open>
  1419   \begin{matharray}{rcl}
  1419   \begin{matharray}{rcl}
  1420     @{command_def "parse_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1420     @{command_def "parse_ast_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1421     @{command_def "parse_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1421     @{command_def "parse_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1422     @{command_def "print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1422     @{command_def "print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1423     @{command_def "typed_print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1423     @{command_def "typed_print_translation"} & : & @{text "theory \<rightarrow> theory"} \\
  1490   that the usual naming convention makes syntax constants start with
  1490   that the usual naming convention makes syntax constants start with
  1491   underscore, to reduce the chance of accidental clashes with other
  1491   underscore, to reduce the chance of accidental clashes with other
  1492   names occurring in parse trees (unqualified constants etc.).
  1492   names occurring in parse trees (unqualified constants etc.).
  1493 
  1493 
  1494   \end{description}
  1494   \end{description}
  1495 *}
  1495 \<close>
  1496 
  1496 
  1497 
  1497 
  1498 subsubsection {* The translation strategy *}
  1498 subsubsection \<open>The translation strategy\<close>
  1499 
  1499 
  1500 text {* The different kinds of translation functions are invoked during
  1500 text \<open>The different kinds of translation functions are invoked during
  1501   the transformations between parse trees, ASTs and syntactic terms
  1501   the transformations between parse trees, ASTs and syntactic terms
  1502   (cf.\ \figref{fig:parse-print}).  Whenever a combination of the form
  1502   (cf.\ \figref{fig:parse-print}).  Whenever a combination of the form
  1503   @{text "c x\<^sub>1 \<dots> x\<^sub>n"} is encountered, and a translation function
  1503   @{text "c x\<^sub>1 \<dots> x\<^sub>n"} is encountered, and a translation function
  1504   @{text "f"} of appropriate kind is declared for @{text "c"}, the
  1504   @{text "f"} of appropriate kind is declared for @{text "c"}, the
  1505   result is produced by evaluation of @{text "f [x\<^sub>1, \<dots>, x\<^sub>n]"} in ML.
  1505   result is produced by evaluation of @{text "f [x\<^sub>1, \<dots>, x\<^sub>n]"} in ML.
  1543   heads of concrete syntax, or syntactic constants introduced via
  1543   heads of concrete syntax, or syntactic constants introduced via
  1544   other translations.  For plain identifiers within the term language,
  1544   other translations.  For plain identifiers within the term language,
  1545   the status of constant versus variable is not yet know during
  1545   the status of constant versus variable is not yet know during
  1546   parsing.  This is in contrast to print translations, where constants
  1546   parsing.  This is in contrast to print translations, where constants
  1547   are explicitly known from the given term in its fully internal form.
  1547   are explicitly known from the given term in its fully internal form.
  1548 *}
  1548 \<close>
  1549 
  1549 
  1550 
  1550 
  1551 subsection {* Built-in syntax transformations *}
  1551 subsection \<open>Built-in syntax transformations\<close>
  1552 
  1552 
  1553 text {*
  1553 text \<open>
  1554   Here are some further details of the main syntax transformation
  1554   Here are some further details of the main syntax transformation
  1555   phases of \figref{fig:parse-print}.
  1555   phases of \figref{fig:parse-print}.
  1556 *}
  1556 \<close>
  1557 
  1557 
  1558 
  1558 
  1559 subsubsection {* Transforming parse trees to ASTs *}
  1559 subsubsection \<open>Transforming parse trees to ASTs\<close>
  1560 
  1560 
  1561 text {* The parse tree is the raw output of the parser.  It is
  1561 text \<open>The parse tree is the raw output of the parser.  It is
  1562   transformed into an AST according to some basic scheme that may be
  1562   transformed into an AST according to some basic scheme that may be
  1563   augmented by AST translation functions as explained in
  1563   augmented by AST translation functions as explained in
  1564   \secref{sec:tr-funs}.
  1564   \secref{sec:tr-funs}.
  1565 
  1565 
  1566   The parse tree is constructed by nesting the right-hand sides of the
  1566   The parse tree is constructed by nesting the right-hand sides of the
  1596 
  1596 
  1597   Note that type and sort constraints may occur in further places ---
  1597   Note that type and sort constraints may occur in further places ---
  1598   translations need to be ready to cope with them.  The built-in
  1598   translations need to be ready to cope with them.  The built-in
  1599   syntax transformation from parse trees to ASTs insert additional
  1599   syntax transformation from parse trees to ASTs insert additional
  1600   constraints that represent source positions.
  1600   constraints that represent source positions.
  1601 *}
  1601 \<close>
  1602 
  1602 
  1603 
  1603 
  1604 subsubsection {* Transforming ASTs to terms *}
  1604 subsubsection \<open>Transforming ASTs to terms\<close>
  1605 
  1605 
  1606 text {* After application of macros (\secref{sec:syn-trans}), the AST
  1606 text \<open>After application of macros (\secref{sec:syn-trans}), the AST
  1607   is transformed into a term.  This term still lacks proper type
  1607   is transformed into a term.  This term still lacks proper type
  1608   information, but it might contain some constraints consisting of
  1608   information, but it might contain some constraints consisting of
  1609   applications with head @{verbatim "_constrain"}, where the second
  1609   applications with head @{verbatim "_constrain"}, where the second
  1610   argument is a type encoded as a pre-term within the syntax.  Type
  1610   argument is a type encoded as a pre-term within the syntax.  Type
  1611   inference later introduces correct types, or indicates type errors
  1611   inference later introduces correct types, or indicates type errors
  1618 
  1618 
  1619   The outcome is still a first-order term.  Proper abstractions and
  1619   The outcome is still a first-order term.  Proper abstractions and
  1620   bound variables are introduced by parse translations associated with
  1620   bound variables are introduced by parse translations associated with
  1621   certain syntax constants.  Thus @{verbatim "(_abs x x)"} eventually
  1621   certain syntax constants.  Thus @{verbatim "(_abs x x)"} eventually
  1622   becomes a de-Bruijn term @{verbatim "Abs (\"x\", _, Bound 0)"}.
  1622   becomes a de-Bruijn term @{verbatim "Abs (\"x\", _, Bound 0)"}.
  1623 *}
  1623 \<close>
  1624 
  1624 
  1625 
  1625 
  1626 subsubsection {* Printing of terms *}
  1626 subsubsection \<open>Printing of terms\<close>
  1627 
  1627 
  1628 text {* The output phase is essentially the inverse of the input
  1628 text \<open>The output phase is essentially the inverse of the input
  1629   phase.  Terms are translated via abstract syntax trees into
  1629   phase.  Terms are translated via abstract syntax trees into
  1630   pretty-printed text.
  1630   pretty-printed text.
  1631 
  1631 
  1632   Ignoring print translations, the transformation maps term constants,
  1632   Ignoring print translations, the transformation maps term constants,
  1633   variables and applications to the corresponding constructs on ASTs.
  1633   variables and applications to the corresponding constructs on ASTs.
  1670   AST variable @{text "x"} is printed as @{text "x"} outright.
  1670   AST variable @{text "x"} is printed as @{text "x"} outright.
  1671 
  1671 
  1672   \medskip White space is \emph{not} inserted automatically.  If
  1672   \medskip White space is \emph{not} inserted automatically.  If
  1673   blanks (or breaks) are required to separate tokens, they need to be
  1673   blanks (or breaks) are required to separate tokens, they need to be
  1674   specified in the mixfix declaration (\secref{sec:mixfix}).
  1674   specified in the mixfix declaration (\secref{sec:mixfix}).
  1675 *}
  1675 \<close>
  1676 
  1676 
  1677 end
  1677 end