author wenzelm Tue Jul 31 19:26:35 2007 +0200 (2007-07-31) changeset 24089 070d539ba403 parent 24088 c2d8270e53a5 child 24090 ab6f04807005
tuned section "Style";
1.1 --- a/doc-src/IsarImplementation/Thy/ML.thy	Tue Jul 31 14:45:36 2007 +0200
1.2 +++ b/doc-src/IsarImplementation/Thy/ML.thy	Tue Jul 31 19:26:35 2007 +0200
1.3 @@ -4,17 +4,19 @@
1.5  chapter {* Aesthetics of ML programming *}
1.7 +section {* Style *}
1.8 +
1.9  text FIXME
1.11  text {* This style guide is loosely based on
1.12    \url{http://caml.inria.fr/resources/doc/guides/guidelines.en.html}.
1.13  %  FIMXE \url{http://www.cs.cornell.edu/Courses/cs312/2003sp/handouts/style.htm}
1.15 -  Like any style guide, it should not be interpreted dogmatically.
1.16 -  Instead, it forms a collection of recommendations which,
1.17 -  if obeyed, result in code that is not considered to be
1.18 -  obfuscated.  In certain cases, derivations are encouraged,
1.19 -  as far as you know what you are doing.
1.20 +  Like any style guide, it should not be interpreted dogmatically, but
1.21 +  with care and discernment.  Instead, it forms a collection of
1.22 +  recommendations which, if obeyed, result in code that is not
1.23 +  considered to be obfuscated.  In certain cases, derivations are
1.24 +  encouraged, as far as you know what you are doing.
1.26    \begin{description}
1.28 @@ -35,20 +37,25 @@
1.29        \begin{itemize}
1.31          \item The space bar is the easiest key to find on the keyboard,
1.32 -          press it as often as necessary. {\ttfamily 2 + 2} is better
1.33 -          than {\ttfamily 2+2}, likewise {\ttfamily f (x, y)}
1.34 -          better than {\ttfamily f(x,y)}.
1.35 +          press it as often as necessary. @{verbatim "2 + 2"} is better
1.36 +          than @{verbatim "2+2"}, likewise @{verbatim "f (x, y)"} is
1.37 +          better than @{verbatim "f(x,y)"}.
1.39 -        \item Restrict your lines to \emph{at most} 80 characters.
1.40 -          This will allow you to keep the beginning of a line
1.41 -          in view while watching its end.
1.42 +        \item Restrict your lines to 80 characters.  This will allow
1.43 +          you to keep the beginning of a line in view while watching
1.44 +          its end.\footnote{To acknowledge the lax practice of
1.45 +          text editing these days, we tolerate as much as 100
1.46 +          characters per line, but anything beyond 120 is not
1.47 +          considered proper source text.}
1.49 -        \item Ban tabs; they are a context-sensitive formatting
1.50 -          feature and likely to confuse anyone not using your
1.51 -          favourite editor.
1.52 +        \item Ban tabulators; they are a context-sensitive formatting
1.53 +          feature and likely to confuse anyone not using your favorite
1.54 +          editor.\footnote{Some modern programming language even
1.55 +          forbid tabulators altogether according to the formal syntax
1.56 +          definition.}
1.58          \item Get rid of trailing whitespace.  Instead, do not
1.59 -          surpess a trailing newline at the end of your files.
1.60 +          suppress a trailing newline at the end of your files.
1.62          \item Choose a generally accepted style of indentation,
1.63            then use it systematically throughout the whole
1.64 @@ -58,7 +65,7 @@
1.65        \end{itemize}
1.67      \item[cut-and-paste succeeds over copy-and-paste]
1.68 -      \emph{Never} copy-and-paste code when programming.  If you
1.69 +       \emph{Never} copy-and-paste code when programming.  If you
1.70          need the same piece of code twice, introduce a
1.71          reasonable auxiliary function (if there is no
1.72          such function, very likely you got something wrong).
1.73 @@ -72,21 +79,203 @@
1.74        over efforts to explain nasty code.
1.76      \item[functional programming is based on functions]
1.77 -      Avoid constructivisms'', e.g. pass a table lookup function,
1.78 -      rather than an actual table with lookup in body.  Accustom
1.79 -      your way of codeing to the level of expressiveness
1.80 -      a functional programming language is giving onto you.
1.81 +      Avoid constructivisms'', i.e.\ unnecessary concrete datatype
1.82 +      representations.  Instead model things as abstract as
1.83 +      appropriate.  For example, pass a table lookup function rather
1.84 +      than a concrete table with lookup performed in body.  Accustom
1.85 +      your way of coding to the level of expressiveness a functional
1.86 +      programming language is giving onto you.
1.88      \item[tuples]
1.89        are often in the way.  When there is no striking argument
1.90        to tuple function arguments, just write your function curried.
1.92      \item[telling names]
1.93 -      Any name should tell its purpose as exactly as possible,
1.94 -      while keeping its length to the absolutely neccessary minimum.
1.95 -      Always give the same name to function arguments which
1.96 -      have the same meaning. Separate words by underscores
1.97 -      (@{verbatim int_of_string}'', not @{verbatim intOfString}'')
1.98 +      Any name should tell its purpose as exactly as possible, while
1.99 +      keeping its length to the absolutely necessary minimum.  Always
1.100 +      give the same name to function arguments which have the same
1.101 +      meaning. Separate words by underscores (@{verbatim
1.102 +      int_of_string}, not @{verbatim intOfString}).\footnote{Some
1.103 +      recent tools for Emacs include special precautions to cope with
1.104 +      bumpy names in @{text "camelCase"}, e.g.\ for improved on-screen
1.105 +      readability.  It is easier to abstain from using such names in the
1.106 +      first place.}
1.108 +  \end{description}
1.109 +*}
1.112 +section {* Thread-safe programming *}
1.114 +text {*
1.115 +  Recent versions of Poly/ML (5.1 or later) support multithreaded
1.116 +  execution based on native operating system threads of the
1.117 +  underlying platform.  Thus threads will actually be executed in
1.118 +  parallel on multi-core systems.  A speedup-factor of approximately
1.119 +  2--4 can be expected for large well-structured Isabelle sessions,
1.120 +  where theories are organized as a graph with sufficiently many
1.121 +  independent nodes.
1.123 +  Threads lack the memory protection of separate processes, but
1.124 +  operate concurrently on shared heap memory.  This has the advantage
1.125 +  that results of independent computations are immediately available
1.126 +  to other threads, without requiring explicit communication,
1.127 +  reloading, or even recoding of data.
1.129 +  On the other hand, some programming guidelines need to be observed
1.130 +  in order to make unprotected parallelism work out smoothly.  While
1.131 +  the ML system implementation is responsible to maintain basic
1.132 +  integrity of the representation of ML values in memory, the
1.133 +  application programmer needs to ensure that multithreaded execution
1.134 +  does not break the intended semantics.
1.136 +  \medskip \paragraph{Critical shared resources.} Actually only those
1.137 +  parts outside the purely functional world of ML are critical.  In
1.138 +  particular, this covers
1.140 +  \begin{itemize}
1.142 +  \item global references (or arrays), i.e.\ those that persist over
1.143 +  several invocations of associated operations,\footnote{This is
1.144 +  independent of the visibility of such mutable values in the toplevel
1.145 +  scope.}
1.147 +  \item global ML bindings in the toplevel environment (@{verbatim
1.148 +  "type"}, @{verbatim val}, @{verbatim "structure"} etc.) due to
1.149 +  run-time invocation of the compiler,
1.151 +  \item direct I/O on shared channels, notably @{text "stdin"}, @{text
1.152 +  "stdout"}, @{text "stderr"}.
1.154 +  \end{itemize}
1.156 +  The majority of tools implemented within the Isabelle/Isar framework
1.157 +  will not require any of these critical elements: nothing special
1.158 +  needs to be observed when staying in the purely functional fragment
1.159 +  of ML.  Note that output via the official Isabelle channels does not
1.160 +  even count as direct I/O in the above sense, so the operations @{ML
1.161 +  "writeln"}, @{ML "warning"}, @{ML "tracing"} etc.\ are safe.
1.163 +  \paragraph{Multithreading in Isabelle/Isar.}  Our parallel execution
1.164 +  model is centered around the theory loader.  Whenever a given
1.165 +  subgraph of theories needs to be updated, the system schedules a
1.166 +  number of threads to process the sources as required, while
1.167 +  observing their dependencies.  Thus concurrency is limited to
1.168 +  independent nodes according to the theory import relation.
1.170 +  Any user-code that works relatively to the present background theory
1.171 +  is already safe.  Contextual data may be easily stored within the
1.172 +  theory or proof context, thanks to the generic context data concept
1.173 +  of Isabelle/Isar (see \secref{sec:context-data}).  This greatly
1.174 +  diminishes the demand for global state information in the first
1.175 +  place.
1.177 +  \medskip In rare situations where actual mutable content needs to be
1.178 +  manipulated, Isabelle provides a single \emph{critical section} that
1.179 +  may be entered while preventing any other thread from doing the
1.180 +  same.  Entering the critical section without contention is very
1.181 +  fast, and several basic system operations do so frequently.  This
1.182 +  also means that each thread should leave the critical section
1.183 +  quickly, otherwise parallel execution performance may degrade
1.184 +  significantly.
1.186 +  Despite this potential bottle-neck, we refrain from fine-grained
1.187 +  locking mechanisms: the restriction to a single lock prevents
1.188 +  deadlocks without demanding further considerations in user programs.
1.190 +  \paragraph{Good conduct of impure programs.} The following
1.191 +  guidelines enable non-functional programs to participate in
1.194 +  \begin{itemize}
1.196 +  \item Minimize global state information.  Using proper theory and
1.197 +  proof context data will actually return to functional update of
1.198 +  values, without any special precautions for multithreading.  Apart
1.199 +  from the fully general functors for theory and proof data (see
1.200 +  \secref{sec:context-data}) there are drop-in replacements that
1.201 +  emulate primitive references for the most basic cases of
1.202 +  \emph{configuration options} for type @{ML_type "bool"}/@{ML_type
1.203 +  "int"}/@{ML_type "string"} (see structure @{ML_struct ConfigOption})
1.204 +  and lists of theorems (see functor @{ML_functor NamedThmsFun}).
1.206 +  \item Keep components with local state information
1.207 +  \emph{re-entrant}.  Instead of poking initial values into (private)
1.208 +  global references, create a new state record on each invocation, and
1.209 +  pass that through any auxiliary functions of the component.  The
1.210 +  state record may well contain mutable references, without requiring
1.211 +  any special synchronizations, as long as each invocation sees its
1.212 +  own copy.  Occasionally, one might even return to plain functional
1.213 +  updates on non-mutable record values here.
1.215 +  \item Isolate process configuration flags.  The main legitimate
1.216 +  application of global references is to configure the whole process
1.217 +  in a certain way, essentially affecting all threads.  A typical
1.218 +  example is the @{ML show_types} flag, which tells the pretty printer
1.219 +  to output explicit type information for terms.  Such flags usually
1.220 +  do not affect the functionality of the core system, but only the
1.221 +  output being presented to the user.
1.223 +  Occasionally, such global process flags are treated like implicit
1.224 +  arguments to certain operations, by using the @{ML setmp} combinator
1.225 +  for safe temporary assignment.  Traditionally its main purpose was
1.226 +  to ensure proper recovery of the original value when exceptions are
1.227 +  raised in the body.  Now the functionality is extended to enter the
1.228 +  \emph{critical section}, with its usual potential of degrading
1.229 +  parallelism.
1.231 +  Note that recovery of plain value passing semantics via @{ML
1.232 +  setmp}~@{text "ref value"} assumes that this @{text "ref"} is
1.233 +  exclusively manipulated within the critical section.  In particular,
1.234 +  any persistent global assignment of @{text "ref := value"} needs to
1.235 +  be marked critical as well, to prevent intruding another threads
1.236 +  local view, and a lost-update in the global scope, too.
1.238 +  \item Minimize global ML bindings.  Processing theories occasionally
1.239 +  affects the global ML environment as well.  While each ML
1.240 +  compilation unit is safe, the order of scheduling of independent
1.241 +  declarations might cause problems when composing several modules
1.242 +  later on, due to hiding of previous ML names.
1.244 +  This cannot be helped in general, because the ML toplevel lacks the
1.245 +  graph structure of the Isabelle theory space.  Nevertheless, some
1.246 +  sound conventions of keeping global ML names essentially disjoint
1.247 +  (e.g.\ with the help of ML structures) prevents the problem to occur
1.248 +  in most practical situations.
1.250 +  \end{itemize}
1.252 +  Recall that in an open LCF-style'' system like Isabelle/Isar, the
1.253 +  user participates in constructing the overall environment.  This
1.254 +  means that state-based facilities offered by one component need to
1.255 +  be used with caution later on.  Minimizing critical elements, by
1.256 +  staying within the plain value-oriented view relative to theory or
1.257 +  proof contexts most of the time, will also reduce the chance of
1.258 +  mishaps occurring to end-users.
1.259 +*}
1.261 +text %mlref {*
1.262 +  \begin{mldecls}
1.263 +  @{index_ML NAMED_CRITICAL: "string -> (unit -> 'a) -> 'a"} \\
1.264 +  @{index_ML CRITICAL: "(unit -> 'a) -> 'a"} \\
1.265 +  @{index_ML setmp: "'a ref -> 'a -> ('b -> 'c) -> 'b -> 'c"} \\
1.266 +  \end{mldecls}
1.268 +  \begin{description}
1.270 +  \item @{ML NAMED_CRITICAL}~@{text "name f"} evaluates @{text "f ()"}
1.271 +  while staying within the critical section.  The @{text "name"}
1.272 +  argument serves for diagnostic output and might help to determine
1.273 +  sources of congestion.
1.275 +  \item @{ML CRITICAL} is the same as @{ML NAMED_CRITICAL} with empty
1.276 +  name argument.
1.278 +  \item @{ML setmp}~@{text "ref value f x"} evaluates @{text "f x"}
1.279 +  while staying within the critical section and having @{text "ref :=
1.280 +  value"} assigned temporarily.  This recovers a value-passing
1.281 +  semantics involving global references, regardless of exceptions or
1.282 +  concurrency.
1.284    \end{description}
1.285  *}
1.286 @@ -104,7 +293,7 @@
1.288  text %mlref {*
1.289    \begin{mldecls}
1.290 -  @{index_ML "(op |>)": "'a * ('a -> 'b) -> 'b"} \\
1.291 +  @{index_ML "op |> ": "'a * ('a -> 'b) -> 'b"} \\
1.292    @{index_ML fold: "('a -> 'b -> 'b) -> 'a list -> 'b -> 'b"} \\
1.293    @{index_ML fold_rev: "('a -> 'b -> 'b) -> 'a list -> 'b -> 'b"} \\
1.294    \end{mldecls}
1.295 @@ -167,10 +356,10 @@
1.297  text %mlref {*
1.298    \begin{mldecls}
1.299 -  @{index_ML "(op |->)": "('c * 'a) * ('c -> 'a -> 'b) -> 'b"} \\
1.300 -  @{index_ML "(op |>>)": "('a * 'c) * ('a -> 'b) -> 'b * 'c"} \\
1.301 -  @{index_ML "(op ||>)": "('c * 'a) * ('a -> 'b) -> 'c * 'b"} \\
1.302 -  @{index_ML "(op ||>>)": "('c * 'a) * ('a -> 'd * 'b) -> ('c * 'd) * 'b"} \\
1.303 +  @{index_ML "op |-> ": "('c * 'a) * ('c -> 'a -> 'b) -> 'b"} \\
1.304 +  @{index_ML "op |>> ": "('a * 'c) * ('a -> 'b) -> 'b * 'c"} \\
1.305 +  @{index_ML "op ||> ": "('c * 'a) * ('a -> 'b) -> 'c * 'b"} \\
1.306 +  @{index_ML "op ||>> ": "('c * 'a) * ('a -> 'd * 'b) -> ('c * 'd) * 'b"} \\
1.307    @{index_ML fold_map: "('a -> 'b -> 'c * 'b) -> 'a list -> 'b -> 'c list * 'b"} \\
1.308    \end{mldecls}
1.309  *}
1.310 @@ -181,11 +370,11 @@
1.312  text %mlref {*
1.313    \begin{mldecls}
1.314 -  @{index_ML "(op #>)": "('a -> 'b) * ('b -> 'c) -> 'a -> 'c"} \\
1.315 -  @{index_ML "(op #->)": "('a -> 'c * 'b) * ('c -> 'b -> 'd) -> 'a -> 'd"} \\
1.316 -  @{index_ML "(op #>>)": "('a -> 'c * 'b) * ('c -> 'd) -> 'a -> 'd * 'b"} \\
1.317 -  @{index_ML "(op ##>)": "('a -> 'c * 'b) * ('b -> 'd) -> 'a -> 'c * 'd"} \\
1.318 -  @{index_ML "(op ##>>)": "('a -> 'c * 'b) * ('b -> 'e * 'd) -> 'a -> ('c * 'e) * 'd"} \\
1.319 +  @{index_ML "op #> ": "('a -> 'b) * ('b -> 'c) -> 'a -> 'c"} \\
1.320 +  @{index_ML "op #-> ": "('a -> 'c * 'b) * ('c -> 'b -> 'd) -> 'a -> 'd"} \\
1.321 +  @{index_ML "op #>> ": "('a -> 'c * 'b) * ('c -> 'd) -> 'a -> 'd * 'b"} \\
1.322 +  @{index_ML "op ##> ": "('a -> 'c * 'b) * ('b -> 'd) -> 'a -> 'c * 'd"} \\
1.323 +  @{index_ML "op ##>> ": "('a -> 'c * 'b) * ('b -> 'e * 'd) -> 'a -> ('c * 'e) * 'd"} \\
1.324    \end{mldecls}
1.325  *}
1.327 @@ -197,7 +386,7 @@
1.329  text %mlref {*
1.330    \begin{mldecls}
1.331 -  @{index_ML "(op )": "('b -> 'a) -> 'b -> 'a * 'b"} \\
1.332 +  @{index_ML "op  ": "('b -> 'a) -> 'b -> 'a * 'b"} \\
1.333    @{index_ML tap: "('b -> 'a) -> 'b -> 'b"} \\
1.334    \end{mldecls}
1.335  *}
1.336 @@ -222,15 +411,16 @@
1.337  *}
1.339  text {*
1.340 -  Standard selector functions on @{text option}s are provided.
1.341 -  The @{ML try} and @{ML can} functions provide a convenient
1.342 -  interface for handling exceptions -- both take as arguments
1.343 -  a function @{text f} together with a parameter @{text x}
1.344 -  and catch any exception during the evaluation of the application
1.345 -  of @{text f} to @{text x}, either return a lifted result
1.346 -  (@{ML NONE} on failure) or a boolean value (@{ML false} on failure).
1.347 +  Standard selector functions on @{text option}s are provided.  The
1.348 +  @{ML try} and @{ML can} functions provide a convenient interface for
1.349 +  handling exceptions -- both take as arguments a function @{text f}
1.350 +  together with a parameter @{text x} and handle any exception during
1.351 +  the evaluation of the application of @{text f} to @{text x}, either
1.352 +  return a lifted result (@{ML NONE} on failure) or a boolean value
1.353 +  (@{ML false} on failure).
1.354  *}
1.357  section {* Common data structures *}
1.359  subsection {* Lists (as set-like data structures) *}
1.360 @@ -257,7 +447,7 @@
1.362    Functions are parametrized by an explicit equality function
1.363    to accomplish overloaded equality;  in most cases of monomorphic
1.364 -  equality, writing @{ML "(op =)"} should suffice.
1.365 +  equality, writing @{ML "op ="} should suffice.
1.366  *}
1.368  subsection {* Association lists *}
2.1 --- a/doc-src/IsarImplementation/Thy/document/ML.tex	Tue Jul 31 14:45:36 2007 +0200
2.2 +++ b/doc-src/IsarImplementation/Thy/document/ML.tex	Tue Jul 31 19:26:35 2007 +0200
2.3 @@ -22,6 +22,10 @@
2.4  }
2.5  \isamarkuptrue%
2.6  %
2.7 +\isamarkupsection{Style%
2.8 +}
2.9 +\isamarkuptrue%
2.10 +%
2.11  \begin{isamarkuptext}%
2.12  FIXME%
2.13  \end{isamarkuptext}%
2.14 @@ -32,11 +36,11 @@
2.15    \url{http://caml.inria.fr/resources/doc/guides/guidelines.en.html}.
2.16  %  FIMXE \url{http://www.cs.cornell.edu/Courses/cs312/2003sp/handouts/style.htm}
2.18 -  Like any style guide, it should not be interpreted dogmatically.
2.19 -  Instead, it forms a collection of recommendations which,
2.20 -  if obeyed, result in code that is not considered to be
2.21 -  obfuscated.  In certain cases, derivations are encouraged,
2.22 -  as far as you know what you are doing.
2.23 +  Like any style guide, it should not be interpreted dogmatically, but
2.24 +  with care and discernment.  Instead, it forms a collection of
2.25 +  recommendations which, if obeyed, result in code that is not
2.26 +  considered to be obfuscated.  In certain cases, derivations are
2.27 +  encouraged, as far as you know what you are doing.
2.29    \begin{description}
2.31 @@ -57,20 +61,25 @@
2.32        \begin{itemize}
2.34          \item The space bar is the easiest key to find on the keyboard,
2.35 -          press it as often as necessary. {\ttfamily 2 + 2} is better
2.36 -          than {\ttfamily 2+2}, likewise {\ttfamily f (x, y)}
2.37 -          better than {\ttfamily f(x,y)}.
2.38 +          press it as often as necessary. \verb|2 + 2| is better
2.39 +          than \verb|2+2|, likewise \verb|f (x, y)| is
2.40 +          better than \verb|f(x,y)|.
2.42 -        \item Restrict your lines to \emph{at most} 80 characters.
2.43 -          This will allow you to keep the beginning of a line
2.44 -          in view while watching its end.
2.45 +        \item Restrict your lines to 80 characters.  This will allow
2.46 +          you to keep the beginning of a line in view while watching
2.47 +          its end.\footnote{To acknowledge the lax practice of
2.48 +          text editing these days, we tolerate as much as 100
2.49 +          characters per line, but anything beyond 120 is not
2.50 +          considered proper source text.}
2.52 -        \item Ban tabs; they are a context-sensitive formatting
2.53 -          feature and likely to confuse anyone not using your
2.54 -          favourite editor.
2.55 +        \item Ban tabulators; they are a context-sensitive formatting
2.56 +          feature and likely to confuse anyone not using your favorite
2.57 +          editor.\footnote{Some modern programming language even
2.58 +          forbid tabulators altogether according to the formal syntax
2.59 +          definition.}
2.61          \item Get rid of trailing whitespace.  Instead, do not
2.62 -          surpess a trailing newline at the end of your files.
2.63 +          suppress a trailing newline at the end of your files.
2.65          \item Choose a generally accepted style of indentation,
2.66            then use it systematically throughout the whole
2.67 @@ -80,7 +89,7 @@
2.68        \end{itemize}
2.70      \item[cut-and-paste succeeds over copy-and-paste]
2.71 -      \emph{Never} copy-and-paste code when programming.  If you
2.72 +       \emph{Never} copy-and-paste code when programming.  If you
2.73          need the same piece of code twice, introduce a
2.74          reasonable auxiliary function (if there is no
2.75          such function, very likely you got something wrong).
2.76 @@ -94,26 +103,217 @@
2.77        over efforts to explain nasty code.
2.79      \item[functional programming is based on functions]
2.80 -      Avoid constructivisms'', e.g. pass a table lookup function,
2.81 -      rather than an actual table with lookup in body.  Accustom
2.82 -      your way of codeing to the level of expressiveness
2.83 -      a functional programming language is giving onto you.
2.84 +      Avoid constructivisms'', i.e.\ unnecessary concrete datatype
2.85 +      representations.  Instead model things as abstract as
2.86 +      appropriate.  For example, pass a table lookup function rather
2.87 +      than a concrete table with lookup performed in body.  Accustom
2.88 +      your way of coding to the level of expressiveness a functional
2.89 +      programming language is giving onto you.
2.91      \item[tuples]
2.92        are often in the way.  When there is no striking argument
2.93        to tuple function arguments, just write your function curried.
2.95      \item[telling names]
2.96 -      Any name should tell its purpose as exactly as possible,
2.97 -      while keeping its length to the absolutely neccessary minimum.
2.98 -      Always give the same name to function arguments which
2.99 -      have the same meaning. Separate words by underscores
2.100 -      (\verb|int_of_string|'', not \verb|intOfString|'')
2.101 +      Any name should tell its purpose as exactly as possible, while
2.102 +      keeping its length to the absolutely necessary minimum.  Always
2.103 +      give the same name to function arguments which have the same
2.104 +      meaning. Separate words by underscores (\verb|int_of_string|, not \verb|intOfString|).\footnote{Some
2.105 +      recent tools for Emacs include special precautions to cope with
2.106 +      bumpy names in \isa{camelCase}, e.g.\ for improved on-screen
2.107 +      readability.  It is easier to abstain from using such names in the
2.108 +      first place.}
2.110    \end{description}%
2.111  \end{isamarkuptext}%
2.112  \isamarkuptrue%
2.115 +}
2.116 +\isamarkuptrue%
2.117 +%
2.118 +\begin{isamarkuptext}%
2.119 +Recent versions of Poly/ML (5.1 or later) support multithreaded
2.120 +  execution based on native operating system threads of the
2.121 +  underlying platform.  Thus threads will actually be executed in
2.122 +  parallel on multi-core systems.  A speedup-factor of approximately
2.123 +  2--4 can be expected for large well-structured Isabelle sessions,
2.124 +  where theories are organized as a graph with sufficiently many
2.125 +  independent nodes.
2.127 +  Threads lack the memory protection of separate processes, but
2.128 +  operate concurrently on shared heap memory.  This has the advantage
2.129 +  that results of independent computations are immediately available
2.130 +  to other threads, without requiring explicit communication,
2.131 +  reloading, or even recoding of data.
2.133 +  On the other hand, some programming guidelines need to be observed
2.134 +  in order to make unprotected parallelism work out smoothly.  While
2.135 +  the ML system implementation is responsible to maintain basic
2.136 +  integrity of the representation of ML values in memory, the
2.137 +  application programmer needs to ensure that multithreaded execution
2.138 +  does not break the intended semantics.
2.140 +  \medskip \paragraph{Critical shared resources.} Actually only those
2.141 +  parts outside the purely functional world of ML are critical.  In
2.142 +  particular, this covers
2.144 +  \begin{itemize}
2.146 +  \item global references (or arrays), i.e.\ those that persist over
2.147 +  several invocations of associated operations,\footnote{This is
2.148 +  independent of the visibility of such mutable values in the toplevel
2.149 +  scope.}
2.151 +  \item global ML bindings in the toplevel environment (\verb|type|, \verb|val|, \verb|structure| etc.) due to
2.152 +  run-time invocation of the compiler,
2.154 +  \item direct I/O on shared channels, notably \isa{stdin}, \isa{stdout}, \isa{stderr}.
2.156 +  \end{itemize}
2.158 +  The majority of tools implemented within the Isabelle/Isar framework
2.159 +  will not require any of these critical elements: nothing special
2.160 +  needs to be observed when staying in the purely functional fragment
2.161 +  of ML.  Note that output via the official Isabelle channels does not
2.162 +  even count as direct I/O in the above sense, so the operations \verb|writeln|, \verb|warning|, \verb|tracing| etc.\ are safe.
2.164 +  \paragraph{Multithreading in Isabelle/Isar.}  Our parallel execution
2.165 +  model is centered around the theory loader.  Whenever a given
2.166 +  subgraph of theories needs to be updated, the system schedules a
2.167 +  number of threads to process the sources as required, while
2.168 +  observing their dependencies.  Thus concurrency is limited to
2.169 +  independent nodes according to the theory import relation.
2.171 +  Any user-code that works relatively to the present background theory
2.172 +  is already safe.  Contextual data may be easily stored within the
2.173 +  theory or proof context, thanks to the generic context data concept
2.174 +  of Isabelle/Isar (see \secref{sec:context-data}).  This greatly
2.175 +  diminishes the demand for global state information in the first
2.176 +  place.
2.178 +  \medskip In rare situations where actual mutable content needs to be
2.179 +  manipulated, Isabelle provides a single \emph{critical section} that
2.180 +  may be entered while preventing any other thread from doing the
2.181 +  same.  Entering the critical section without contention is very
2.182 +  fast, and several basic system operations do so frequently.  This
2.183 +  also means that each thread should leave the critical section
2.184 +  quickly, otherwise parallel execution performance may degrade
2.185 +  significantly.
2.187 +  Despite this potential bottle-neck, we refrain from fine-grained
2.188 +  locking mechanisms: the restriction to a single lock prevents
2.189 +  deadlocks without demanding further considerations in user programs.
2.191 +  \paragraph{Good conduct of impure programs.} The following
2.192 +  guidelines enable non-functional programs to participate in
2.195 +  \begin{itemize}
2.197 +  \item Minimize global state information.  Using proper theory and
2.198 +  proof context data will actually return to functional update of
2.199 +  values, without any special precautions for multithreading.  Apart
2.200 +  from the fully general functors for theory and proof data (see
2.201 +  \secref{sec:context-data}) there are drop-in replacements that
2.202 +  emulate primitive references for the most basic cases of
2.203 +  \emph{configuration options} for type \verb|bool|/\verb|int|/\verb|string| (see structure \verb|ConfigOption|)
2.204 +  and lists of theorems (see functor \verb|NamedThmsFun|).
2.206 +  \item Keep components with local state information
2.207 +  \emph{re-entrant}.  Instead of poking initial values into (private)
2.208 +  global references, create a new state record on each invocation, and
2.209 +  pass that through any auxiliary functions of the component.  The
2.210 +  state record may well contain mutable references, without requiring
2.211 +  any special synchronizations, as long as each invocation sees its
2.212 +  own copy.  Occasionally, one might even return to plain functional
2.213 +  updates on non-mutable record values here.
2.215 +  \item Isolate process configuration flags.  The main legitimate
2.216 +  application of global references is to configure the whole process
2.217 +  in a certain way, essentially affecting all threads.  A typical
2.218 +  example is the \verb|show_types| flag, which tells the pretty printer
2.219 +  to output explicit type information for terms.  Such flags usually
2.220 +  do not affect the functionality of the core system, but only the
2.221 +  output being presented to the user.
2.223 +  Occasionally, such global process flags are treated like implicit
2.224 +  arguments to certain operations, by using the \verb|setmp| combinator
2.225 +  for safe temporary assignment.  Traditionally its main purpose was
2.226 +  to ensure proper recovery of the original value when exceptions are
2.227 +  raised in the body.  Now the functionality is extended to enter the
2.228 +  \emph{critical section}, with its usual potential of degrading
2.229 +  parallelism.
2.231 +  Note that recovery of plain value passing semantics via \verb|setmp|~\isa{ref\ value} assumes that this \isa{ref} is
2.232 +  exclusively manipulated within the critical section.  In particular,
2.233 +  any persistent global assignment of \isa{ref\ {\isacharcolon}{\isacharequal}\ value} needs to
2.234 +  be marked critical as well, to prevent intruding another threads
2.235 +  local view, and a lost-update in the global scope, too.
2.237 +  \item Minimize global ML bindings.  Processing theories occasionally
2.238 +  affects the global ML environment as well.  While each ML
2.239 +  compilation unit is safe, the order of scheduling of independent
2.240 +  declarations might cause problems when composing several modules
2.241 +  later on, due to hiding of previous ML names.
2.243 +  This cannot be helped in general, because the ML toplevel lacks the
2.244 +  graph structure of the Isabelle theory space.  Nevertheless, some
2.245 +  sound conventions of keeping global ML names essentially disjoint
2.246 +  (e.g.\ with the help of ML structures) prevents the problem to occur
2.247 +  in most practical situations.
2.249 +  \end{itemize}
2.251 +  Recall that in an open LCF-style'' system like Isabelle/Isar, the
2.252 +  user participates in constructing the overall environment.  This
2.253 +  means that state-based facilities offered by one component need to
2.254 +  be used with caution later on.  Minimizing critical elements, by
2.255 +  staying within the plain value-oriented view relative to theory or
2.256 +  proof contexts most of the time, will also reduce the chance of
2.257 +  mishaps occurring to end-users.%
2.258 +\end{isamarkuptext}%
2.259 +\isamarkuptrue%
2.260 +%
2.262 +%
2.264 +%
2.265 +\isatagmlref
2.266 +%
2.267 +\begin{isamarkuptext}%
2.268 +\begin{mldecls}
2.269 +  \indexml{NAMED-CRITICAL}\verb|NAMED_CRITICAL: string -> (unit -> 'a) -> 'a| \\
2.270 +  \indexml{CRITICAL}\verb|CRITICAL: (unit -> 'a) -> 'a| \\
2.271 +  \indexml{setmp}\verb|setmp: 'a ref -> 'a -> ('b -> 'c) -> 'b -> 'c| \\
2.272 +  \end{mldecls}
2.274 +  \begin{description}
2.276 +  \item \verb|NAMED_CRITICAL|~\isa{name\ f} evaluates \isa{f\ {\isacharparenleft}{\isacharparenright}}
2.277 +  while staying within the critical section.  The \isa{name}
2.278 +  argument serves for diagnostic output and might help to determine
2.279 +  sources of congestion.
2.281 +  \item \verb|CRITICAL| is the same as \verb|NAMED_CRITICAL| with empty
2.282 +  name argument.
2.284 +  \item \verb|setmp|~\isa{ref\ value\ f\ x} evaluates \isa{f\ x}
2.285 +  while staying within the critical section and having \isa{ref\ {\isacharcolon}{\isacharequal}\ value} assigned temporarily.  This recovers a value-passing
2.286 +  semantics involving global references, regardless of exceptions or
2.287 +  concurrency.
2.289 +  \end{description}%
2.290 +\end{isamarkuptext}%
2.291 +\isamarkuptrue%
2.292 +%
2.293 +\endisatagmlref
2.294 +{\isafoldmlref}%
2.295 +%
2.297 +%
2.299 +%
2.300  \isamarkupchapter{Basic library functions%
2.302  \isamarkuptrue%
2.303 @@ -137,7 +337,7 @@
2.305  \begin{isamarkuptext}%
2.306  \begin{mldecls}
2.307 -  \indexml{(op |$>$)}\verb|(op |\verb,|,\verb|>): 'a * ('a -> 'b) -> 'b| \\
2.308 +  \indexml{op |$>$ }\verb|op |\verb,|,\verb|> : 'a * ('a -> 'b) -> 'b| \\
2.309    \indexml{fold}\verb|fold: ('a -> 'b -> 'b) -> 'a list -> 'b -> 'b| \\
2.310    \indexml{fold-rev}\verb|fold_rev: ('a -> 'b -> 'b) -> 'a list -> 'b -> 'b| \\
2.311    \end{mldecls}%
2.312 @@ -227,10 +427,10 @@
2.314  \begin{isamarkuptext}%
2.315  \begin{mldecls}
2.316 -  \indexml{(op |-$>$)}\verb|(op |\verb,|,\verb|->): ('c * 'a) * ('c -> 'a -> 'b) -> 'b| \\
2.317 -  \indexml{(op |$>$$>)}\verb|(op |\verb,|,\verb|>>): ('a * 'c) * ('a -> 'b) -> 'b * 'c| \\ 2.318 - \indexml{(op ||>)}\verb|(op |\verb,|,\verb||\verb,|,\verb|>): ('c * 'a) * ('a -> 'b) -> 'c * 'b| \\ 2.319 - \indexml{(op ||>$$>$)}\verb|(op |\verb,|,\verb||\verb,|,\verb|>>): ('c * 'a) * ('a -> 'd * 'b) -> ('c * 'd) * 'b| \\
2.320 +  \indexml{op |-$>$ }\verb|op |\verb,|,\verb|-> : ('c * 'a) * ('c -> 'a -> 'b) -> 'b| \\
2.321 +  \indexml{op |$>$$> }\verb|op |\verb,|,\verb|>> : ('a * 'c) * ('a -> 'b) -> 'b * 'c| \\ 2.322 + \indexml{op ||> }\verb|op |\verb,|,\verb||\verb,|,\verb|> : ('c * 'a) * ('a -> 'b) -> 'c * 'b| \\ 2.323 + \indexml{op ||>$$>$ }\verb|op |\verb,|,\verb||\verb,|,\verb|>> : ('c * 'a) * ('a -> 'd * 'b) -> ('c * 'd) * 'b| \\
2.324    \indexml{fold-map}\verb|fold_map: ('a -> 'b -> 'c * 'b) -> 'a list -> 'b -> 'c list * 'b| \\
2.325    \end{mldecls}%
2.326  \end{isamarkuptext}%
2.327 @@ -256,11 +456,11 @@
2.329  \begin{isamarkuptext}%
2.330  \begin{mldecls}
2.331 -  \indexml{(op \#$>$)}\verb|(op #>): ('a -> 'b) * ('b -> 'c) -> 'a -> 'c| \\
2.332 -  \indexml{(op \#-$>$)}\verb|(op #->): ('a -> 'c * 'b) * ('c -> 'b -> 'd) -> 'a -> 'd| \\
2.333 -  \indexml{(op \#$>$$>)}\verb|(op #>>): ('a -> 'c * 'b) * ('c -> 'd) -> 'a -> 'd * 'b| \\ 2.334 - \indexml{(op \#\#>)}\verb|(op ##>): ('a -> 'c * 'b) * ('b -> 'd) -> 'a -> 'c * 'd| \\ 2.335 - \indexml{(op \#\#>$$>$)}\verb|(op ##>>): ('a -> 'c * 'b) * ('b -> 'e * 'd) -> 'a -> ('c * 'e) * 'd| \\
2.336 +  \indexml{op \#$>$ }\verb|op #> : ('a -> 'b) * ('b -> 'c) -> 'a -> 'c| \\
2.337 +  \indexml{op \#-$>$ }\verb|op #-> : ('a -> 'c * 'b) * ('c -> 'b -> 'd) -> 'a -> 'd| \\
2.338 +  \indexml{op \#$>$$> }\verb|op #>> : ('a -> 'c * 'b) * ('c -> 'd) -> 'a -> 'd * 'b| \\ 2.339 + \indexml{op \#\#> }\verb|op ##> : ('a -> 'c * 'b) * ('b -> 'd) -> 'a -> 'c * 'd| \\ 2.340 + \indexml{op \#\#>$$>$ }\verb|op ##>> : ('a -> 'c * 'b) * ('b -> 'e * 'd) -> 'a -> ('c * 'e) * 'd| \\
2.341    \end{mldecls}%
2.342  \end{isamarkuptext}%
2.343  \isamarkuptrue%
2.344 @@ -287,7 +487,7 @@
2.346  \begin{isamarkuptext}%
2.347  \begin{mldecls}
2.348 -  \indexml{(op )}\verb|(op ): ('b -> 'a) -> 'b -> 'a * 'b| \\
2.349 +  \indexml{op  }\verb|op  : ('b -> 'a) -> 'b -> 'a * 'b| \\
2.350    \indexml{tap}\verb|tap: ('b -> 'a) -> 'b -> 'b| \\
2.351    \end{mldecls}%
2.352  \end{isamarkuptext}%
2.353 @@ -337,13 +537,13 @@
2.356  \begin{isamarkuptext}%
2.357 -Standard selector functions on \isa{option}s are provided.
2.358 -  The \verb|try| and \verb|can| functions provide a convenient
2.359 -  interface for handling exceptions -- both take as arguments
2.360 -  a function \isa{f} together with a parameter \isa{x}
2.361 -  and catch any exception during the evaluation of the application
2.362 -  of \isa{f} to \isa{x}, either return a lifted result
2.363 -  (\verb|NONE| on failure) or a boolean value (\verb|false| on failure).%
2.364 +Standard selector functions on \isa{option}s are provided.  The
2.365 +  \verb|try| and \verb|can| functions provide a convenient interface for
2.366 +  handling exceptions -- both take as arguments a function \isa{f}
2.367 +  together with a parameter \isa{x} and handle any exception during
2.368 +  the evaluation of the application of \isa{f} to \isa{x}, either
2.369 +  return a lifted result (\verb|NONE| on failure) or a boolean value
2.370 +  (\verb|false| on failure).%
2.371  \end{isamarkuptext}%
2.372  \isamarkuptrue%
2.374 @@ -378,7 +578,7 @@
2.376    Functions are parametrized by an explicit equality function
2.377    to accomplish overloaded equality;  in most cases of monomorphic
2.378 -  equality, writing \verb|(op =)| should suffice.%
2.379 +  equality, writing \verb|op =| should suffice.%
2.380  \end{isamarkuptext}%
2.381  \isamarkuptrue%