src/Doc/Ref/document/syntax.tex
changeset 52414 8429123bc58a
parent 52413 a59ba6de9687
child 52415 d9fed6e99a57
equal deleted inserted replaced
52413:a59ba6de9687 52414:8429123bc58a
     1 
       
     2 \chapter{Syntax Transformations} \label{chap:syntax}
       
     3 \newcommand\ttapp{\mathrel{\hbox{\tt\$}}}
       
     4 \newcommand\mtt[1]{\mbox{\tt #1}}
       
     5 \newcommand\ttfct[1]{\mathop{\mtt{#1}}\nolimits}
       
     6 \newcommand\Constant{\ttfct{Constant}}
       
     7 \newcommand\Variable{\ttfct{Variable}}
       
     8 \newcommand\Appl[1]{\ttfct{Appl}\,[#1]}
       
     9 \index{syntax!transformations|(}
       
    10 
       
    11 
       
    12 \section{Transforming parse trees to ASTs}\label{sec:astofpt}
       
    13 \index{ASTs!made from parse trees}
       
    14 \newcommand\astofpt[1]{\lbrakk#1\rbrakk}
       
    15 
       
    16 The parse tree is the raw output of the parser.  Translation functions,
       
    17 called {\bf parse AST translations}\indexbold{translations!parse AST},
       
    18 transform the parse tree into an abstract syntax tree.
       
    19 
       
    20 The parse tree is constructed by nesting the right-hand sides of the
       
    21 productions used to recognize the input.  Such parse trees are simply lists
       
    22 of tokens and constituent parse trees, the latter representing the
       
    23 nonterminals of the productions.  Let us refer to the actual productions in
       
    24 the form displayed by {\tt print_syntax} (see \S\ref{sec:inspct-thy} for an
       
    25 example).
       
    26 
       
    27 Ignoring parse \AST{} translations, parse trees are transformed to \AST{}s
       
    28 by stripping out delimiters and copy productions.  More precisely, the
       
    29 mapping $\astofpt{-}$ is derived from the productions as follows:
       
    30 \begin{itemize}
       
    31 \item Name tokens: $\astofpt{t} = \Variable s$, where $t$ is an \ndx{id},
       
    32   \ndx{var}, \ndx{tid}, \ndx{tvar}, \ndx{num}, \ndx{xnum} or \ndx{xstr} token,
       
    33   and $s$ its associated string.  Note that for {\tt xstr} this does not
       
    34   include the quotes.
       
    35 
       
    36 \item Copy productions:\index{productions!copy}
       
    37   $\astofpt{\ldots P \ldots} = \astofpt{P}$.  Here $\ldots$ stands for
       
    38   strings of delimiters, which are discarded.  $P$ stands for the single
       
    39   constituent that is not a delimiter; it is either a nonterminal symbol or
       
    40   a name token.
       
    41 
       
    42   \item 0-ary productions: $\astofpt{\ldots \mtt{=>} c} = \Constant c$.
       
    43     Here there are no constituents other than delimiters, which are
       
    44     discarded.
       
    45 
       
    46   \item $n$-ary productions, where $n \ge 1$: delimiters are discarded and
       
    47     the remaining constituents $P@1$, \ldots, $P@n$ are built into an
       
    48     application whose head constant is~$c$:
       
    49     \[ \astofpt{\ldots P@1 \ldots P@n \ldots \mtt{=>} c} =
       
    50        \Appl{\Constant c, \astofpt{P@1}, \ldots, \astofpt{P@n}}
       
    51     \]
       
    52 \end{itemize}
       
    53 Figure~\ref{fig:parse_ast} presents some simple examples, where {\tt ==},
       
    54 {\tt _appl}, {\tt _args}, and so forth name productions of the Pure syntax.
       
    55 These examples illustrate the need for further translations to make \AST{}s
       
    56 closer to the typed $\lambda$-calculus.  The Pure syntax provides
       
    57 predefined parse \AST{} translations\index{translations!parse AST} for
       
    58 ordinary applications, type applications, nested abstractions, meta
       
    59 implications and function types.  Figure~\ref{fig:parse_ast_tr} shows their
       
    60 effect on some representative input strings.
       
    61 
       
    62 
       
    63 \begin{figure}
       
    64 \begin{center}
       
    65 \tt\begin{tabular}{ll}
       
    66 \rm input string    & \rm \AST \\\hline
       
    67 "f"                 & f \\
       
    68 "'a"                & 'a \\
       
    69 "t == u"            & ("==" t u) \\
       
    70 "f(x)"              & ("_appl" f x) \\
       
    71 "f(x, y)"           & ("_appl" f ("_args" x y)) \\
       
    72 "f(x, y, z)"        & ("_appl" f ("_args" x ("_args" y z))) \\
       
    73 "\%x y.\ t"         & ("_lambda" ("_idts" x y) t) \\
       
    74 \end{tabular}
       
    75 \end{center}
       
    76 \caption{Parsing examples using the Pure syntax}\label{fig:parse_ast}
       
    77 \end{figure}
       
    78 
       
    79 \begin{figure}
       
    80 \begin{center}
       
    81 \tt\begin{tabular}{ll}
       
    82 \rm input string            & \rm \AST{} \\\hline
       
    83 "f(x, y, z)"                & (f x y z) \\
       
    84 "'a ty"                     & (ty 'a) \\
       
    85 "('a, 'b) ty"               & (ty 'a 'b) \\
       
    86 "\%x y z.\ t"               & ("_abs" x ("_abs" y ("_abs" z t))) \\
       
    87 "\%x ::\ 'a.\ t"            & ("_abs" ("_constrain" x 'a) t) \\
       
    88 "[| P; Q; R |] => S"        & ("==>" P ("==>" Q ("==>" R S))) \\
       
    89 "['a, 'b, 'c] => 'd"        & ("fun" 'a ("fun" 'b ("fun" 'c 'd)))
       
    90 \end{tabular}
       
    91 \end{center}
       
    92 \caption{Built-in parse \AST{} translations}\label{fig:parse_ast_tr}
       
    93 \end{figure}
       
    94 
       
    95 The names of constant heads in the \AST{} control the translation process.
       
    96 The list of constants invoking parse \AST{} translations appears in the
       
    97 output of {\tt print_syntax} under {\tt parse_ast_translation}.
       
    98 
       
    99 
       
   100 \section{Transforming ASTs to terms}\label{sec:termofast}
       
   101 \index{terms!made from ASTs}
       
   102 \newcommand\termofast[1]{\lbrakk#1\rbrakk}
       
   103 
       
   104 The \AST{}, after application of macros (see \S\ref{sec:macros}), is
       
   105 transformed into a term.  This term is probably ill-typed since type
       
   106 inference has not occurred yet.  The term may contain type constraints
       
   107 consisting of applications with head {\tt "_constrain"}; the second
       
   108 argument is a type encoded as a term.  Type inference later introduces
       
   109 correct types or rejects the input.
       
   110 
       
   111 Another set of translation functions, namely parse
       
   112 translations\index{translations!parse}, may affect this process.  If we
       
   113 ignore parse translations for the time being, then \AST{}s are transformed
       
   114 to terms by mapping \AST{} constants to constants, \AST{} variables to
       
   115 schematic or free variables and \AST{} applications to applications.
       
   116 
       
   117 More precisely, the mapping $\termofast{-}$ is defined by
       
   118 \begin{itemize}
       
   119 \item Constants: $\termofast{\Constant x} = \ttfct{Const} (x,
       
   120   \mtt{dummyT})$.
       
   121 
       
   122 \item Schematic variables: $\termofast{\Variable \mtt{"?}xi\mtt"} =
       
   123   \ttfct{Var} ((x, i), \mtt{dummyT})$, where $x$ is the base name and $i$
       
   124   the index extracted from~$xi$.
       
   125 
       
   126 \item Free variables: $\termofast{\Variable x} = \ttfct{Free} (x,
       
   127   \mtt{dummyT})$.
       
   128 
       
   129 \item Function applications with $n$ arguments:
       
   130     \[ \termofast{\Appl{f, x@1, \ldots, x@n}} =
       
   131        \termofast{f} \ttapp
       
   132          \termofast{x@1} \ttapp \ldots \ttapp \termofast{x@n}
       
   133     \]
       
   134 \end{itemize}
       
   135 Here \ttindex{Const}, \ttindex{Var}, \ttindex{Free} and
       
   136 \verb|$|\index{$@{\tt\$}} are constructors of the datatype \mltydx{term},
       
   137 while \ttindex{dummyT} stands for some dummy type that is ignored during
       
   138 type inference.
       
   139 
       
   140 So far the outcome is still a first-order term.  Abstractions and bound
       
   141 variables (constructors \ttindex{Abs} and \ttindex{Bound}) are introduced
       
   142 by parse translations.  Such translations are attached to {\tt "_abs"},
       
   143 {\tt "!!"} and user-defined binders.
       
   144 
       
   145 
       
   146 \section{Printing of terms}
       
   147 \newcommand\astofterm[1]{\lbrakk#1\rbrakk}\index{ASTs!made from terms}
       
   148 
       
   149 The output phase is essentially the inverse of the input phase.  Terms are
       
   150 translated via abstract syntax trees into strings.  Finally the strings are
       
   151 pretty printed.
       
   152 
       
   153 Print translations (\S\ref{sec:tr_funs}) may affect the transformation of
       
   154 terms into \AST{}s.  Ignoring those, the transformation maps
       
   155 term constants, variables and applications to the corresponding constructs
       
   156 on \AST{}s.  Abstractions are mapped to applications of the special
       
   157 constant {\tt _abs}.
       
   158 
       
   159 More precisely, the mapping $\astofterm{-}$ is defined as follows:
       
   160 \begin{itemize}
       
   161   \item $\astofterm{\ttfct{Const} (x, \tau)} = \Constant x$.
       
   162 
       
   163   \item $\astofterm{\ttfct{Free} (x, \tau)} = constrain (\Variable x,
       
   164     \tau)$.
       
   165 
       
   166   \item $\astofterm{\ttfct{Var} ((x, i), \tau)} = constrain (\Variable
       
   167     \mtt{"?}xi\mtt", \tau)$, where $\mtt?xi$ is the string representation of
       
   168     the {\tt indexname} $(x, i)$.
       
   169 
       
   170   \item For the abstraction $\lambda x::\tau.t$, let $x'$ be a variant
       
   171     of~$x$ renamed to differ from all names occurring in~$t$, and let $t'$
       
   172     be obtained from~$t$ by replacing all bound occurrences of~$x$ by
       
   173     the free variable $x'$.  This replaces corresponding occurrences of the
       
   174     constructor \ttindex{Bound} by the term $\ttfct{Free} (x',
       
   175     \mtt{dummyT})$:
       
   176    \[ \astofterm{\ttfct{Abs} (x, \tau, t)} =
       
   177       \Appl{\Constant \mtt{"_abs"},
       
   178         constrain(\Variable x', \tau), \astofterm{t'}}
       
   179     \]
       
   180 
       
   181   \item $\astofterm{\ttfct{Bound} i} = \Variable \mtt{"B.}i\mtt"$.
       
   182     The occurrence of constructor \ttindex{Bound} should never happen
       
   183     when printing well-typed terms; it indicates a de Bruijn index with no
       
   184     matching abstraction.
       
   185 
       
   186   \item Where $f$ is not an application,
       
   187     \[ \astofterm{f \ttapp x@1 \ttapp \ldots \ttapp x@n} =
       
   188        \Appl{\astofterm{f}, \astofterm{x@1}, \ldots,\astofterm{x@n}}
       
   189     \]
       
   190 \end{itemize}
       
   191 %
       
   192 Type constraints\index{type constraints} are inserted to allow the printing
       
   193 of types.  This is governed by the boolean variable \ttindex{show_types}:
       
   194 \begin{itemize}
       
   195   \item $constrain(x, \tau) = x$ \ if $\tau = \mtt{dummyT}$ \index{*dummyT} or
       
   196     \ttindex{show_types} is set to {\tt false}.
       
   197 
       
   198   \item $constrain(x, \tau) = \Appl{\Constant \mtt{"_constrain"}, x,
       
   199          \astofterm{\tau}}$ \ otherwise.
       
   200 
       
   201     Here, $\astofterm{\tau}$ is the \AST{} encoding of $\tau$: type
       
   202     constructors go to {\tt Constant}s; type identifiers go to {\tt
       
   203       Variable}s; type applications go to {\tt Appl}s with the type
       
   204     constructor as the first element.  If \ttindex{show_sorts} is set to
       
   205     {\tt true}, some type variables are decorated with an \AST{} encoding
       
   206     of their sort.
       
   207 \end{itemize}
       
   208 %
       
   209 The \AST{}, after application of macros (see \S\ref{sec:macros}), is
       
   210 transformed into the final output string.  The built-in {\bf print AST
       
   211   translations}\indexbold{translations!print AST} reverse the
       
   212 parse \AST{} translations of Fig.\ts\ref{fig:parse_ast_tr}.
       
   213 
       
   214 For the actual printing process, the names attached to productions
       
   215 of the form $\ldots A^{(p@1)}@1 \ldots A^{(p@n)}@n \ldots \mtt{=>} c$ play
       
   216 a vital role.  Each \AST{} with constant head $c$, namely $\mtt"c\mtt"$ or
       
   217 $(\mtt"c\mtt"~ x@1 \ldots x@n)$, is printed according to the production
       
   218 for~$c$.  Each argument~$x@i$ is converted to a string, and put in
       
   219 parentheses if its priority~$(p@i)$ requires this.  The resulting strings
       
   220 and their syntactic sugar (denoted by \dots{} above) are joined to make a
       
   221 single string.
       
   222 
       
   223 If an application $(\mtt"c\mtt"~ x@1 \ldots x@m)$ has more arguments
       
   224 than the corresponding production, it is first split into
       
   225 $((\mtt"c\mtt"~ x@1 \ldots x@n) ~ x@{n+1} \ldots x@m)$.  Applications
       
   226 with too few arguments or with non-constant head or without a
       
   227 corresponding production are printed as $f(x@1, \ldots, x@l)$ or
       
   228 $(\alpha@1, \ldots, \alpha@l) ty$.  Multiple productions associated
       
   229 with some name $c$ are tried in order of appearance.  An occurrence of
       
   230 $\Variable x$ is simply printed as~$x$.
       
   231 
       
   232 Blanks are {\em not\/} inserted automatically.  If blanks are required to
       
   233 separate tokens, specify them in the mixfix declaration, possibly preceded
       
   234 by a slash~({\tt/}) to allow a line break.
       
   235 \index{ASTs|)}
       
   236 
       
   237 %%% Local Variables: 
       
   238 %%% mode: latex
       
   239 %%% TeX-master: "ref"
       
   240 %%% End: