# HG changeset patch # User wenzelm # Date 1340136368 -7200 # Node ID e21f4d5b9636f637e38fc64620d5284a90050f24 # Parent fd773574cb81ff5296ca030e61e49faa117ef98d more on "Applying translation rules"; diff -r fd773574cb81 -r e21f4d5b9636 doc-src/IsarRef/Thy/Inner_Syntax.thy --- a/doc-src/IsarRef/Thy/Inner_Syntax.thy Tue Jun 19 20:51:15 2012 +0200 +++ b/doc-src/IsarRef/Thy/Inner_Syntax.thy Tue Jun 19 22:06:08 2012 +0200 @@ -1182,13 +1182,15 @@ subsection {* Raw syntax and translations \label{sec:syn-trans} *} text {* - \begin{matharray}{rcl} + \begin{tabular}{rcll} @{command_def "nonterminal"} & : & @{text "theory \ theory"} \\ @{command_def "syntax"} & : & @{text "theory \ theory"} \\ @{command_def "no_syntax"} & : & @{text "theory \ theory"} \\ @{command_def "translations"} & : & @{text "theory \ theory"} \\ @{command_def "no_translations"} & : & @{text "theory \ theory"} \\ - \end{matharray} + @{attribute_def syntax_ast_trace} & : & @{text attribute} & default @{text false} \\ + @{attribute_def syntax_ast_stats} & : & @{text attribute} & default @{text false} \\ + \end{tabular} Unlike mixfix notation for existing formal entities (\secref{sec:notation}), raw syntax declarations provide full access @@ -1324,6 +1326,11 @@ translation rules, which are interpreted in the same manner as for @{command "translations"} above. + \item @{attribute syntax_ast_trace} and @{attribute + syntax_ast_stats} control diagnostic output in the AST normalization + process, when translation rules are applied to concrete input or + output. + \end{description} Raw syntax and translations provides a slightly more low-level @@ -1348,6 +1355,79 @@ \end{itemize} *} +subsubsection {* Applying translation rules *} + +text {* As a term is being parsed or printed, an AST is generated as + an intermediate form according to \figref{fig:parse-print}. The AST + is normalized by applying translation rules in the manner of a + first-order term rewriting system. We first examine how a single + rule is applied. + + Let @{text "t"} be the abstract syntax tree to be normalized and + @{text "(lhs, rhs)"} some translation rule. A subtree @{text "u"} + of @{text "t"} is called \emph{redex} if it is an instance of @{text + "lhs"}; in this case the pattern @{text "lhs"} is said to match the + object @{text "u"}. A redex matched by @{text "lhs"} may be + replaced by the corresponding instance of @{text "rhs"}, thus + \emph{rewriting} the AST @{text "t"}. Matching requires some notion + of \emph{place-holders} in rule patterns: @{ML Ast.Variable} serves + this purpose. + + More precisely, the matching of the object @{text "u"} against the + pattern @{text "lhs"} is performed as follows: + + \begin{itemize} + + \item Objects of the form @{ML Ast.Variable}~@{text "x"} or @{ML + Ast.Constant}~@{text "x"} are matched by pattern @{ML + Ast.Constant}~@{text "x"}. Thus all atomic ASTs in the object are + treated as (potential) constants, and a successful match makes them + actual constants even before name space resolution (see also + \secref{sec:ast}). + + \item Object @{text "u"} is matched by pattern @{ML + Ast.Variable}~@{text "x"}, binding @{text "x"} to @{text "u"}. + + \item Object @{ML Ast.Appl}~@{text "us"} is matched by @{ML + Ast.Appl}~@{text "ts"} if @{text "us"} and @{text "ts"} have the + same length and each corresponding subtree matches. + + \item In every other case, matching fails. + + \end{itemize} + + A successful match yields a substitution that is applied to @{text + "rhs"}, generating the instance that replaces @{text "u"}. + + Normalizing an AST involves repeatedly applying translation rules + until none are applicable. This works yoyo-like: top-down, + bottom-up, top-down, etc. At each subtree position, rules are + chosen in order of appearance in the theory definitions. + + The configuration options @{attribute syntax_ast_trace} and + @{attribute syntax_ast_stats} might help understand this process + and diagnose problems. + + \begin{warn} + If syntax translation rules work incorrectly, the output of + @{command print_syntax} with its \emph{rules} sections reveals the + actual internal forms of AST pattern, without potentially confusing + concrete syntax. Recall that AST constants appear as quoted strings + and variables without quotes. + \end{warn} + + \begin{warn} + If @{attribute_ref eta_contract} is set to @{text "true"}, terms + will be @{text "\"}-contracted \emph{before} the AST rewriter sees + them. Thus some abstraction nodes needed for print rules to match + may vanish. For example, @{text "Ball A (\x. P x)"} would contract + to @{text "Ball A P"} and the standard print rule would fail to + apply. This problem can be avoided by hand-written ML translation + functions (see also \secref{sec:tr-funs}), which is in fact the same + mechanism used in built-in @{keyword "binder"} declarations. + \end{warn} +*} + subsection {* Syntax translation functions \label{sec:tr-funs} *} diff -r fd773574cb81 -r e21f4d5b9636 doc-src/IsarRef/Thy/document/Inner_Syntax.tex --- a/doc-src/IsarRef/Thy/document/Inner_Syntax.tex Tue Jun 19 20:51:15 2012 +0200 +++ b/doc-src/IsarRef/Thy/document/Inner_Syntax.tex Tue Jun 19 22:06:08 2012 +0200 @@ -1373,13 +1373,15 @@ \isamarkuptrue% % \begin{isamarkuptext}% -\begin{matharray}{rcl} +\begin{tabular}{rcll} \indexdef{}{command}{nonterminal}\hypertarget{command.nonterminal}{\hyperlink{command.nonterminal}{\mbox{\isa{\isacommand{nonterminal}}}}} & : & \isa{{\isaliteral{22}{\isachardoublequote}}theory\ {\isaliteral{5C3C72696768746172726F773E}{\isasymrightarrow}}\ theory{\isaliteral{22}{\isachardoublequote}}} \\ \indexdef{}{command}{syntax}\hypertarget{command.syntax}{\hyperlink{command.syntax}{\mbox{\isa{\isacommand{syntax}}}}} & : & \isa{{\isaliteral{22}{\isachardoublequote}}theory\ {\isaliteral{5C3C72696768746172726F773E}{\isasymrightarrow}}\ theory{\isaliteral{22}{\isachardoublequote}}} \\ \indexdef{}{command}{no\_syntax}\hypertarget{command.no-syntax}{\hyperlink{command.no-syntax}{\mbox{\isa{\isacommand{no{\isaliteral{5F}{\isacharunderscore}}syntax}}}}} & : & \isa{{\isaliteral{22}{\isachardoublequote}}theory\ {\isaliteral{5C3C72696768746172726F773E}{\isasymrightarrow}}\ theory{\isaliteral{22}{\isachardoublequote}}} \\ \indexdef{}{command}{translations}\hypertarget{command.translations}{\hyperlink{command.translations}{\mbox{\isa{\isacommand{translations}}}}} & : & \isa{{\isaliteral{22}{\isachardoublequote}}theory\ {\isaliteral{5C3C72696768746172726F773E}{\isasymrightarrow}}\ theory{\isaliteral{22}{\isachardoublequote}}} \\ \indexdef{}{command}{no\_translations}\hypertarget{command.no-translations}{\hyperlink{command.no-translations}{\mbox{\isa{\isacommand{no{\isaliteral{5F}{\isacharunderscore}}translations}}}}} & : & \isa{{\isaliteral{22}{\isachardoublequote}}theory\ {\isaliteral{5C3C72696768746172726F773E}{\isasymrightarrow}}\ theory{\isaliteral{22}{\isachardoublequote}}} \\ - \end{matharray} + \indexdef{}{attribute}{syntax\_ast\_trace}\hypertarget{attribute.syntax-ast-trace}{\hyperlink{attribute.syntax-ast-trace}{\mbox{\isa{syntax{\isaliteral{5F}{\isacharunderscore}}ast{\isaliteral{5F}{\isacharunderscore}}trace}}}} & : & \isa{attribute} & default \isa{false} \\ + \indexdef{}{attribute}{syntax\_ast\_stats}\hypertarget{attribute.syntax-ast-stats}{\hyperlink{attribute.syntax-ast-stats}{\mbox{\isa{syntax{\isaliteral{5F}{\isacharunderscore}}ast{\isaliteral{5F}{\isacharunderscore}}stats}}}} & : & \isa{attribute} & default \isa{false} \\ + \end{tabular} Unlike mixfix notation for existing formal entities (\secref{sec:notation}), raw syntax declarations provide full access @@ -1572,6 +1574,10 @@ translation rules, which are interpreted in the same manner as for \hyperlink{command.translations}{\mbox{\isa{\isacommand{translations}}}} above. + \item \hyperlink{attribute.syntax-ast-trace}{\mbox{\isa{syntax{\isaliteral{5F}{\isacharunderscore}}ast{\isaliteral{5F}{\isacharunderscore}}trace}}} and \hyperlink{attribute.syntax-ast-stats}{\mbox{\isa{syntax{\isaliteral{5F}{\isacharunderscore}}ast{\isaliteral{5F}{\isacharunderscore}}stats}}} control diagnostic output in the AST normalization + process, when translation rules are applied to concrete input or + output. + \end{description} Raw syntax and translations provides a slightly more low-level @@ -1595,6 +1601,77 @@ \end{isamarkuptext}% \isamarkuptrue% % +\isamarkupsubsubsection{Applying translation rules% +} +\isamarkuptrue% +% +\begin{isamarkuptext}% +As a term is being parsed or printed, an AST is generated as + an intermediate form according to \figref{fig:parse-print}. The AST + is normalized by applying translation rules in the manner of a + first-order term rewriting system. We first examine how a single + rule is applied. + + Let \isa{{\isaliteral{22}{\isachardoublequote}}t{\isaliteral{22}{\isachardoublequote}}} be the abstract syntax tree to be normalized and + \isa{{\isaliteral{22}{\isachardoublequote}}{\isaliteral{28}{\isacharparenleft}}lhs{\isaliteral{2C}{\isacharcomma}}\ rhs{\isaliteral{29}{\isacharparenright}}{\isaliteral{22}{\isachardoublequote}}} some translation rule. A subtree \isa{{\isaliteral{22}{\isachardoublequote}}u{\isaliteral{22}{\isachardoublequote}}} + of \isa{{\isaliteral{22}{\isachardoublequote}}t{\isaliteral{22}{\isachardoublequote}}} is called \emph{redex} if it is an instance of \isa{{\isaliteral{22}{\isachardoublequote}}lhs{\isaliteral{22}{\isachardoublequote}}}; in this case the pattern \isa{{\isaliteral{22}{\isachardoublequote}}lhs{\isaliteral{22}{\isachardoublequote}}} is said to match the + object \isa{{\isaliteral{22}{\isachardoublequote}}u{\isaliteral{22}{\isachardoublequote}}}. A redex matched by \isa{{\isaliteral{22}{\isachardoublequote}}lhs{\isaliteral{22}{\isachardoublequote}}} may be + replaced by the corresponding instance of \isa{{\isaliteral{22}{\isachardoublequote}}rhs{\isaliteral{22}{\isachardoublequote}}}, thus + \emph{rewriting} the AST \isa{{\isaliteral{22}{\isachardoublequote}}t{\isaliteral{22}{\isachardoublequote}}}. Matching requires some notion + of \emph{place-holders} in rule patterns: \verb|Ast.Variable| serves + this purpose. + + More precisely, the matching of the object \isa{{\isaliteral{22}{\isachardoublequote}}u{\isaliteral{22}{\isachardoublequote}}} against the + pattern \isa{{\isaliteral{22}{\isachardoublequote}}lhs{\isaliteral{22}{\isachardoublequote}}} is performed as follows: + + \begin{itemize} + + \item Objects of the form \verb|Ast.Variable|~\isa{{\isaliteral{22}{\isachardoublequote}}x{\isaliteral{22}{\isachardoublequote}}} or \verb|Ast.Constant|~\isa{{\isaliteral{22}{\isachardoublequote}}x{\isaliteral{22}{\isachardoublequote}}} are matched by pattern \verb|Ast.Constant|~\isa{{\isaliteral{22}{\isachardoublequote}}x{\isaliteral{22}{\isachardoublequote}}}. Thus all atomic ASTs in the object are + treated as (potential) constants, and a successful match makes them + actual constants even before name space resolution (see also + \secref{sec:ast}). + + \item Object \isa{{\isaliteral{22}{\isachardoublequote}}u{\isaliteral{22}{\isachardoublequote}}} is matched by pattern \verb|Ast.Variable|~\isa{{\isaliteral{22}{\isachardoublequote}}x{\isaliteral{22}{\isachardoublequote}}}, binding \isa{{\isaliteral{22}{\isachardoublequote}}x{\isaliteral{22}{\isachardoublequote}}} to \isa{{\isaliteral{22}{\isachardoublequote}}u{\isaliteral{22}{\isachardoublequote}}}. + + \item Object \verb|Ast.Appl|~\isa{{\isaliteral{22}{\isachardoublequote}}us{\isaliteral{22}{\isachardoublequote}}} is matched by \verb|Ast.Appl|~\isa{{\isaliteral{22}{\isachardoublequote}}ts{\isaliteral{22}{\isachardoublequote}}} if \isa{{\isaliteral{22}{\isachardoublequote}}us{\isaliteral{22}{\isachardoublequote}}} and \isa{{\isaliteral{22}{\isachardoublequote}}ts{\isaliteral{22}{\isachardoublequote}}} have the + same length and each corresponding subtree matches. + + \item In every other case, matching fails. + + \end{itemize} + + A successful match yields a substitution that is applied to \isa{{\isaliteral{22}{\isachardoublequote}}rhs{\isaliteral{22}{\isachardoublequote}}}, generating the instance that replaces \isa{{\isaliteral{22}{\isachardoublequote}}u{\isaliteral{22}{\isachardoublequote}}}. + + Normalizing an AST involves repeatedly applying translation rules + until none are applicable. This works yoyo-like: top-down, + bottom-up, top-down, etc. At each subtree position, rules are + chosen in order of appearance in the theory definitions. + + The configuration options \hyperlink{attribute.syntax-ast-trace}{\mbox{\isa{syntax{\isaliteral{5F}{\isacharunderscore}}ast{\isaliteral{5F}{\isacharunderscore}}trace}}} and + \hyperlink{attribute.syntax-ast-stats}{\mbox{\isa{syntax{\isaliteral{5F}{\isacharunderscore}}ast{\isaliteral{5F}{\isacharunderscore}}stats}}} might help understand this process + and diagnose problems. + + \begin{warn} + If syntax translation rules work incorrectly, the output of + \hyperlink{command.print-syntax}{\mbox{\isa{\isacommand{print{\isaliteral{5F}{\isacharunderscore}}syntax}}}} with its \emph{rules} sections reveals the + actual internal forms of AST pattern, without potentially confusing + concrete syntax. Recall that AST constants appear as quoted strings + and variables without quotes. + \end{warn} + + \begin{warn} + If \indexref{}{attribute}{eta\_contract}\hyperlink{attribute.eta-contract}{\mbox{\isa{eta{\isaliteral{5F}{\isacharunderscore}}contract}}} is set to \isa{{\isaliteral{22}{\isachardoublequote}}true{\isaliteral{22}{\isachardoublequote}}}, terms + will be \isa{{\isaliteral{22}{\isachardoublequote}}{\isaliteral{5C3C6574613E}{\isasymeta}}{\isaliteral{22}{\isachardoublequote}}}-contracted \emph{before} the AST rewriter sees + them. Thus some abstraction nodes needed for print rules to match + may vanish. For example, \isa{{\isaliteral{22}{\isachardoublequote}}Ball\ A\ {\isaliteral{28}{\isacharparenleft}}{\isaliteral{5C3C6C616D6264613E}{\isasymlambda}}x{\isaliteral{2E}{\isachardot}}\ P\ x{\isaliteral{29}{\isacharparenright}}{\isaliteral{22}{\isachardoublequote}}} would contract + to \isa{{\isaliteral{22}{\isachardoublequote}}Ball\ A\ P{\isaliteral{22}{\isachardoublequote}}} and the standard print rule would fail to + apply. This problem can be avoided by hand-written ML translation + functions (see also \secref{sec:tr-funs}), which is in fact the same + mechanism used in built-in \hyperlink{keyword.binder}{\mbox{\isa{\isakeyword{binder}}}} declarations. + \end{warn}% +\end{isamarkuptext}% +\isamarkuptrue% +% \isamarkupsubsection{Syntax translation functions \label{sec:tr-funs}% } \isamarkuptrue% diff -r fd773574cb81 -r e21f4d5b9636 doc-src/Ref/syntax.tex --- a/doc-src/Ref/syntax.tex Tue Jun 19 20:51:15 2012 +0200 +++ b/doc-src/Ref/syntax.tex Tue Jun 19 22:06:08 2012 +0200 @@ -235,98 +235,6 @@ \index{ASTs|)} - -\section{Macros: syntactic rewriting} \label{sec:macros} -\index{macros|(}\index{rewriting!syntactic|(} - -\subsection{Specifying macros} - -\begin{warn} -If a macro rule works incorrectly, inspect its internal form as -shown above, recalling that constants appear as quoted strings and -variables without quotes. -\end{warn} - -\begin{warn} -If \ttindex{eta_contract} is set to {\tt true}, terms will be -$\eta$-contracted {\em before\/} the \AST{} rewriter sees them. Thus some -abstraction nodes needed for print rules to match may vanish. For example, -\verb|Ball(A, %x. P(x))| contracts to {\tt Ball(A, P)}; the print rule does -not apply and the output will be {\tt Ball(A, P)}. This problem would not -occur if \ML{} translation functions were used instead of macros (as is -done for binder declarations). -\end{warn} - - -\begin{warn} -Another trap concerns type constraints. If \ttindex{show_types} is set to -{\tt true}, bound variables will be decorated by their meta types at the -binding place (but not at occurrences in the body). Matching with -\verb|Collect(A, %x. P)| binds {\tt x} to something like {\tt ("_constrain" y -"i")} rather than only {\tt y}. \AST{} rewriting will cause the constraint to -appear in the external form, say \verb|{y::i:A::i. P::o}|. - -To allow such constraints to be re-read, your syntax should specify bound -variables using the nonterminal~\ndx{idt}. This is the case in our -example. Choosing {\tt id} instead of {\tt idt} is a common error. -\end{warn} - - - -\subsection{Applying rules} -As a term is being parsed or printed, an \AST{} is generated as an -intermediate form (recall Fig.\ts\ref{fig:parse_print}). The \AST{} is -normalised by applying macro rules in the manner of a traditional term -rewriting system. We first examine how a single rule is applied. - -Let $t$ be the abstract syntax tree to be normalised and $(l, r)$ some -translation rule. A subtree~$u$ of $t$ is a {\bf redex} if it is an -instance of~$l$; in this case $l$ is said to {\bf match}~$u$. A redex -matched by $l$ may be replaced by the corresponding instance of~$r$, thus -{\bf rewriting} the \AST~$t$. Matching requires some notion of {\bf - place-holders} that may occur in rule patterns but not in ordinary -\AST{}s; {\tt Variable} atoms serve this purpose. - -The matching of the object~$u$ by the pattern~$l$ is performed as follows: -\begin{itemize} - \item Every constant matches itself. - - \item $\Variable x$ in the object matches $\Constant x$ in the pattern. - This point is discussed further below. - - \item Every \AST{} in the object matches $\Variable x$ in the pattern, - binding~$x$ to~$u$. - - \item One application matches another if they have the same number of - subtrees and corresponding subtrees match. - - \item In every other case, matching fails. In particular, {\tt - Constant}~$x$ can only match itself. -\end{itemize} -A successful match yields a substitution that is applied to~$r$, generating -the instance that replaces~$u$. - -The second case above may look odd. This is where {\tt Variable}s of -non-rule \AST{}s behave like {\tt Constant}s. Recall that \AST{}s are not -far removed from parse trees; at this level it is not yet known which -identifiers will become constants, bounds, frees, types or classes. As -\S\ref{sec:asts} describes, former parse tree heads appear in \AST{}s as -{\tt Constant}s, while the name tokens \ndx{id}, \ndx{var}, \ndx{tid}, -\ndx{tvar}, \ndx{num}, \ndx{xnum} and \ndx{xstr} become {\tt Variable}s. On the other -hand, when \AST{}s generated from terms for printing, all constants and type -constructors become {\tt Constant}s; see \S\ref{sec:asts}. Thus \AST{}s may -contain a messy mixture of {\tt Variable}s and {\tt Constant}s. This is -insignificant at macro level because matching treats them alike. - -Normalizing an \AST{} involves repeatedly applying macro rules until none are -applicable. Macro rules are chosen in order of appearance in the theory -definitions. You can watch the normalization of \AST{}s during parsing and -printing by setting \ttindex{Syntax.trace_ast} to {\tt true}.\index{tracing!of - macros} The information displayed when tracing includes the \AST{} before -normalization ({\tt pre}), redexes with results ({\tt rewrote}), the normal -form finally reached ({\tt post}) and some statistics ({\tt normalize}). - - \section{Translation functions} \label{sec:tr_funs} \index{translations|(} %