src/HOL/Unix/Unix.thy
author wenzelm
Wed Dec 29 17:34:41 2010 +0100 (2010-12-29)
changeset 41413 64cd30d6b0b8
parent 39224 39e0d3b86112
child 41579 4031fb078acc
permissions -rw-r--r--
explicit file specifications -- avoid secondary load path;
     1 (*  Title:      HOL/Unix/Unix.thy
     2     Author:     Markus Wenzel, TU Muenchen
     3 *)
     4 
     5 header {* Unix file-systems \label{sec:unix-file-system} *}
     6 
     7 theory Unix
     8 imports
     9   Main
    10   "~~/src/HOL/Library/Nested_Environment"
    11   "~~/src/HOL/Library/List_Prefix"
    12 begin
    13 
    14 text {*
    15   We give a simple mathematical model of the basic structures
    16   underlying the Unix file-system, together with a few fundamental
    17   operations that could be imagined to be performed internally by the
    18   Unix kernel.  This forms the basis for the set of Unix system-calls
    19   to be introduced later (see \secref{sec:unix-trans}), which are the
    20   actual interface offered to processes running in user-space.
    21 
    22   \medskip Basically, any Unix file is either a \emph{plain file} or a
    23   \emph{directory}, consisting of some \emph{content} plus
    24   \emph{attributes}.  The content of a plain file is plain text.  The
    25   content of a directory is a mapping from names to further
    26   files.\footnote{In fact, this is the only way that names get
    27   associated with files.  In Unix files do \emph{not} have a name in
    28   itself.  Even more, any number of names may be associated with the
    29   very same file due to \emph{hard links} (although this is excluded
    30   from our model).}  Attributes include information to control various
    31   ways to access the file (read, write etc.).
    32 
    33   Our model will be quite liberal in omitting excessive detail that is
    34   easily seen to be ``irrelevant'' for the aspects of Unix
    35   file-systems to be discussed here.  First of all, we ignore
    36   character and block special files, pipes, sockets, hard links,
    37   symbolic links, and mount points.
    38 *}
    39 
    40 
    41 subsection {* Names *}
    42 
    43 text {*
    44   User ids and file name components shall be represented by natural
    45   numbers (without loss of generality).  We do not bother about
    46   encoding of actual names (e.g.\ strings), nor a mapping between user
    47   names and user ids as would be present in a reality.
    48 *}
    49 
    50 types
    51   uid = nat
    52   name = nat
    53   path = "name list"
    54 
    55 
    56 subsection {* Attributes *}
    57 
    58 text {*
    59   Unix file attributes mainly consist of \emph{owner} information and
    60   a number of \emph{permission} bits which control access for
    61   ``user'', ``group'', and ``others'' (see the Unix man pages @{text
    62   "chmod(2)"} and @{text "stat(2)"} for more details).
    63 
    64   \medskip Our model of file permissions only considers the ``others''
    65   part.  The ``user'' field may be omitted without loss of overall
    66   generality, since the owner is usually able to change it anyway by
    67   performing @{text chmod}.\footnote{The inclined Unix expert may try
    68   to figure out some exotic arrangements of a real-world Unix
    69   file-system such that the owner of a file is unable to apply the
    70   @{text chmod} system call.}  We omit ``group'' permissions as a
    71   genuine simplification as we just do not intend to discuss a model
    72   of multiple groups and group membership, but pretend that everyone
    73   is member of a single global group.\footnote{A general HOL model of
    74   user group structures and related issues is given in
    75   \cite{Naraschewski:2001}.}
    76 *}
    77 
    78 datatype perm =
    79     Readable
    80   | Writable
    81   | Executable    -- "(ignored)"
    82 
    83 types perms = "perm set"
    84 
    85 record att =
    86   owner :: uid
    87   others :: perms
    88 
    89 text {*
    90   For plain files @{term Readable} and @{term Writable} specify read
    91   and write access to the actual content, i.e.\ the string of text
    92   stored here.  For directories @{term Readable} determines if the set
    93   of entry names may be accessed, and @{term Writable} controls the
    94   ability to create or delete any entries (both plain files or
    95   sub-directories).
    96 
    97   As another simplification, we ignore the @{term Executable}
    98   permission altogether.  In reality it would indicate executable
    99   plain files (also known as ``binaries''), or control actual lookup
   100   of directory entries (recall that mere directory browsing is
   101   controlled via @{term Readable}).  Note that the latter means that
   102   in order to perform any file-system operation whatsoever, all
   103   directories encountered on the path would have to grant @{term
   104   Executable}.  We ignore this detail and pretend that all directories
   105   give @{term Executable} permission to anybody.
   106 *}
   107 
   108 
   109 subsection {* Files *}
   110 
   111 text {*
   112   In order to model the general tree structure of a Unix file-system
   113   we use the arbitrarily branching datatype @{typ "('a, 'b, 'c) env"}
   114   from the standard library of Isabelle/HOL
   115   \cite{Bauer-et-al:2002:HOL-Library}.  This type provides
   116   constructors @{term Val} and @{term Env} as follows:
   117 
   118   \medskip
   119   {\def\isastyleminor{\isastyle}
   120   \begin{tabular}{l}
   121   @{term [source] "Val :: 'a \<Rightarrow> ('a, 'b, 'c) env"} \\
   122   @{term [source] "Env :: 'b \<Rightarrow> ('c \<Rightarrow> ('a, 'b, 'c) env option) \<Rightarrow> ('a, 'b, 'c) env"} \\
   123   \end{tabular}}
   124   \medskip
   125 
   126   Here the parameter @{typ 'a} refers to plain values occurring at
   127   leaf positions, parameter @{typ 'b} to information kept with inner
   128   branch nodes, and parameter @{typ 'c} to the branching type of the
   129   tree structure.  For our purpose we use the type instance with @{typ
   130   "att \<times> string"} (representing plain files), @{typ att} (for
   131   attributes of directory nodes), and @{typ name} (for the index type
   132   of directory nodes).
   133 *}
   134 
   135 types
   136   "file" = "(att \<times> string, att, name) env"
   137 
   138 text {*
   139   \medskip The HOL library also provides @{term lookup} and @{term
   140   update} operations for general tree structures with the subsequent
   141   primitive recursive characterizations.
   142 
   143   \medskip
   144   {\def\isastyleminor{\isastyle}
   145   \begin{tabular}{l}
   146   @{term [source] "lookup :: ('a, 'b, 'c) env \<Rightarrow> 'c list \<Rightarrow> ('a, 'b, 'c) env option"} \\
   147   @{term [source]
   148   "update :: 'c list \<Rightarrow> ('a, 'b, 'c) env option \<Rightarrow> ('a, 'b, 'c) env \<Rightarrow> ('a, 'b, 'c) env"} \\
   149   \end{tabular}}
   150 
   151   @{thm [display, indent = 2, eta_contract = false] lookup_eq [no_vars]}
   152   @{thm [display, indent = 2, eta_contract = false] update_eq [no_vars]}
   153 
   154   Several further properties of these operations are proven in
   155   \cite{Bauer-et-al:2002:HOL-Library}.  These will be routinely used
   156   later on without further notice.
   157 
   158   \bigskip Apparently, the elements of type @{typ "file"} contain an
   159   @{typ att} component in either case.  We now define a few auxiliary
   160   operations to manipulate this field uniformly, following the
   161   conventions for record types in Isabelle/HOL
   162   \cite{Nipkow-et-al:2000:HOL}.
   163 *}
   164 
   165 definition
   166   "attributes file =
   167     (case file of
   168       Val (att, text) \<Rightarrow> att
   169     | Env att dir \<Rightarrow> att)"
   170 
   171 definition
   172   "map_attributes f file =
   173     (case file of
   174       Val (att, text) \<Rightarrow> Val (f att, text)
   175     | Env att dir \<Rightarrow> Env (f att) dir)"
   176 
   177 lemma [simp]: "attributes (Val (att, text)) = att"
   178   by (simp add: attributes_def)
   179 
   180 lemma [simp]: "attributes (Env att dir) = att"
   181   by (simp add: attributes_def)
   182 
   183 lemma [simp]: "attributes (map_attributes f file) = f (attributes file)"
   184   by (cases "file") (simp_all add: attributes_def map_attributes_def
   185     split_tupled_all)
   186 
   187 lemma [simp]: "map_attributes f (Val (att, text)) = Val (f att, text)"
   188   by (simp add: map_attributes_def)
   189 
   190 lemma [simp]: "map_attributes f (Env att dir) = Env (f att) dir"
   191   by (simp add: map_attributes_def)
   192 
   193 
   194 subsection {* Initial file-systems *}
   195 
   196 text {*
   197   Given a set of \emph{known users} a file-system shall be initialized
   198   by providing an empty home directory for each user, with read-only
   199   access for everyone else.  (Note that we may directly use the user
   200   id as home directory name, since both types have been identified.)
   201   Certainly, the very root directory is owned by the super user (who
   202   has user id 0).
   203 *}
   204 
   205 definition
   206   "init users =
   207     Env \<lparr>owner = 0, others = {Readable}\<rparr>
   208       (\<lambda>u. if u \<in> users then Some (Env \<lparr>owner = u, others = {Readable}\<rparr> empty)
   209         else None)"
   210 
   211 
   212 subsection {* Accessing file-systems *}
   213 
   214 text {*
   215   The main internal file-system operation is access of a file by a
   216   user, requesting a certain set of permissions.  The resulting @{typ
   217   "file option"} indicates if a file had been present at the
   218   corresponding @{typ path} and if access was granted according to the
   219   permissions recorded within the file-system.
   220 
   221   Note that by the rules of Unix file-system security (e.g.\
   222   \cite{Tanenbaum:1992}) both the super-user and owner may always
   223   access a file unconditionally (in our simplified model).
   224 *}
   225 
   226 definition
   227   "access root path uid perms =
   228     (case lookup root path of
   229       None \<Rightarrow> None
   230     | Some file \<Rightarrow>
   231         if uid = 0
   232           \<or> uid = owner (attributes file)
   233           \<or> perms \<subseteq> others (attributes file)
   234         then Some file
   235         else None)"
   236 
   237 text {*
   238   \medskip Successful access to a certain file is the main
   239   prerequisite for system-calls to be applicable (cf.\
   240   \secref{sec:unix-trans}).  Any modification of the file-system is
   241   then performed using the basic @{term update} operation.
   242 
   243   \medskip We see that @{term access} is just a wrapper for the basic
   244   @{term lookup} function, with additional checking of
   245   attributes. Subsequently we establish a few auxiliary facts that
   246   stem from the primitive @{term lookup} used within @{term access}.
   247 *}
   248 
   249 lemma access_empty_lookup: "access root path uid {} = lookup root path"
   250   by (simp add: access_def split: option.splits)
   251 
   252 lemma access_some_lookup:
   253   "access root path uid perms = Some file \<Longrightarrow>
   254     lookup root path = Some file"
   255   by (simp add: access_def split: option.splits if_splits)
   256 
   257 lemma access_update_other:
   258   assumes parallel: "path' \<parallel> path"
   259   shows "access (update path' opt root) path uid perms = access root path uid perms"
   260 proof -
   261   from parallel obtain y z xs ys zs where
   262       "y \<noteq> z" and "path' = xs @ y # ys" and "path = xs @ z # zs"
   263     by (blast dest: parallel_decomp)
   264   then have "lookup (update path' opt root) path = lookup root path"
   265     by (blast intro: lookup_update_other)
   266   then show ?thesis by (simp only: access_def)
   267 qed
   268 
   269 
   270 section {* File-system transitions \label{sec:unix-trans} *}
   271 
   272 subsection {* Unix system calls \label{sec:unix-syscall} *}
   273 
   274 text {*
   275   According to established operating system design (cf.\
   276   \cite{Tanenbaum:1992}) user space processes may only initiate system
   277   operations by a fixed set of \emph{system-calls}.  This enables the
   278   kernel to enforce certain security policies in the first
   279   place.\footnote{Incidently, this is the very same principle employed
   280   by any ``LCF-style'' theorem proving system according to Milner's
   281   principle of ``correctness by construction'', such as Isabelle/HOL
   282   itself.}
   283 
   284   \medskip In our model of Unix we give a fixed datatype @{text
   285   operation} for the syntax of system-calls, together with an
   286   inductive definition of file-system state transitions of the form
   287   @{text "root \<midarrow>x\<rightarrow> root'"} for the operational semantics.
   288 *}
   289 
   290 datatype operation =
   291     Read uid string path
   292   | Write uid string path
   293   | Chmod uid perms path
   294   | Creat uid perms path
   295   | Unlink uid path
   296   | Mkdir uid perms path
   297   | Rmdir uid path
   298   | Readdir uid "name set" path
   299 
   300 text {*
   301   The @{typ uid} field of an operation corresponds to the
   302   \emph{effective user id} of the underlying process, although our
   303   model never mentions processes explicitly.  The other parameters are
   304   provided as arguments by the caller; the @{term path} one is common
   305   to all kinds of system-calls.
   306 *}
   307 
   308 primrec
   309   uid_of :: "operation \<Rightarrow> uid"
   310 where
   311     "uid_of (Read uid text path) = uid"
   312   | "uid_of (Write uid text path) = uid"
   313   | "uid_of (Chmod uid perms path) = uid"
   314   | "uid_of (Creat uid perms path) = uid"
   315   | "uid_of (Unlink uid path) = uid"
   316   | "uid_of (Mkdir uid path perms) = uid"
   317   | "uid_of (Rmdir uid path) = uid"
   318   | "uid_of (Readdir uid names path) = uid"
   319 
   320 primrec
   321   path_of :: "operation \<Rightarrow> path"
   322 where
   323     "path_of (Read uid text path) = path"
   324   | "path_of (Write uid text path) = path"
   325   | "path_of (Chmod uid perms path) = path"
   326   | "path_of (Creat uid perms path) = path"
   327   | "path_of (Unlink uid path) = path"
   328   | "path_of (Mkdir uid perms path) = path"
   329   | "path_of (Rmdir uid path) = path"
   330   | "path_of (Readdir uid names path) = path"
   331 
   332 text {*
   333   \medskip Note that we have omitted explicit @{text Open} and @{text
   334   Close} operations, pretending that @{term Read} and @{term Write}
   335   would already take care of this behind the scenes.  Thus we have
   336   basically treated actual sequences of real system-calls @{text
   337   "open"}--@{text read}/@{text write}--@{text close} as atomic.
   338 
   339   In principle, this could make big a difference in a model with
   340   explicit concurrent processes.  On the other hand, even on a real
   341   Unix system the exact scheduling of concurrent @{text "open"} and
   342   @{text close} operations does \emph{not} directly affect the success
   343   of corresponding @{text read} or @{text write}.  Unix allows several
   344   processes to have files opened at the same time, even for writing!
   345   Certainly, the result from reading the contents later may be hard to
   346   predict, but the system-calls involved here will succeed in any
   347   case.
   348 
   349   \bigskip The operational semantics of system calls is now specified
   350   via transitions of the file-system configuration.  This is expressed
   351   as an inductive relation (although there is no actual recursion
   352   involved here).
   353 *}
   354 
   355 inductive
   356   transition :: "file \<Rightarrow> operation \<Rightarrow> file \<Rightarrow> bool"
   357     ("_ \<midarrow>_\<rightarrow> _" [90, 1000, 90] 100)
   358 where
   359 
   360   read:
   361     "access root path uid {Readable} = Some (Val (att, text)) \<Longrightarrow>
   362       root \<midarrow>(Read uid text path)\<rightarrow> root" |
   363   "write":
   364     "access root path uid {Writable} = Some (Val (att, text')) \<Longrightarrow>
   365       root \<midarrow>(Write uid text path)\<rightarrow> update path (Some (Val (att, text))) root" |
   366 
   367   chmod:
   368     "access root path uid {} = Some file \<Longrightarrow>
   369       uid = 0 \<or> uid = owner (attributes file) \<Longrightarrow>
   370       root \<midarrow>(Chmod uid perms path)\<rightarrow> update path
   371         (Some (map_attributes (others_update (\<lambda>_. perms)) file)) root" |
   372 
   373   creat:
   374     "path = parent_path @ [name] \<Longrightarrow>
   375       access root parent_path uid {Writable} = Some (Env att parent) \<Longrightarrow>
   376       access root path uid {} = None \<Longrightarrow>
   377       root \<midarrow>(Creat uid perms path)\<rightarrow> update path
   378         (Some (Val (\<lparr>owner = uid, others = perms\<rparr>, []))) root" |
   379   unlink:
   380     "path = parent_path @ [name] \<Longrightarrow>
   381       access root parent_path uid {Writable} = Some (Env att parent) \<Longrightarrow>
   382       access root path uid {} = Some (Val plain) \<Longrightarrow>
   383       root \<midarrow>(Unlink uid path)\<rightarrow> update path None root" |
   384 
   385   mkdir:
   386     "path = parent_path @ [name] \<Longrightarrow>
   387       access root parent_path uid {Writable} = Some (Env att parent) \<Longrightarrow>
   388       access root path uid {} = None \<Longrightarrow>
   389       root \<midarrow>(Mkdir uid perms path)\<rightarrow> update path
   390         (Some (Env \<lparr>owner = uid, others = perms\<rparr> empty)) root" |
   391   rmdir:
   392     "path = parent_path @ [name] \<Longrightarrow>
   393       access root parent_path uid {Writable} = Some (Env att parent) \<Longrightarrow>
   394       access root path uid {} = Some (Env att' empty) \<Longrightarrow>
   395       root \<midarrow>(Rmdir uid path)\<rightarrow> update path None root" |
   396 
   397   readdir:
   398     "access root path uid {Readable} = Some (Env att dir) \<Longrightarrow>
   399       names = dom dir \<Longrightarrow>
   400       root \<midarrow>(Readdir uid names path)\<rightarrow> root"
   401 
   402 text {*
   403   \medskip Certainly, the above specification is central to the whole
   404   formal development.  Any of the results to be established later on
   405   are only meaningful to the outside world if this transition system
   406   provides an adequate model of real Unix systems.  This kind of
   407   ``reality-check'' of a formal model is the well-known problem of
   408   \emph{validation}.
   409 
   410   If in doubt, one may consider to compare our definition with the
   411   informal specifications given the corresponding Unix man pages, or
   412   even peek at an actual implementation such as
   413   \cite{Torvalds-et-al:Linux}.  Another common way to gain confidence
   414   into the formal model is to run simple simulations (see
   415   \secref{sec:unix-examples}), and check the results with that of
   416   experiments performed on a real Unix system.
   417 *}
   418 
   419 
   420 subsection {* Basic properties of single transitions \label{sec:unix-single-trans} *}
   421 
   422 text {*
   423   The transition system @{text "root \<midarrow>x\<rightarrow> root'"} defined above
   424   determines a unique result @{term root'} from given @{term root} and
   425   @{term x} (this is holds rather trivially, since there is even only
   426   one clause for each operation).  This uniqueness statement will
   427   simplify our subsequent development to some extent, since we only
   428   have to reason about a partial function rather than a general
   429   relation.
   430 *}
   431 
   432 theorem transition_uniq:
   433   assumes root': "root \<midarrow>x\<rightarrow> root'"
   434     and root'': "root \<midarrow>x\<rightarrow> root''"
   435   shows "root' = root''"
   436   using root''
   437 proof cases
   438   case read
   439   with root' show ?thesis by cases auto
   440 next
   441   case "write"
   442   with root' show ?thesis by cases auto
   443 next
   444   case chmod
   445   with root' show ?thesis by cases auto
   446 next
   447   case creat
   448   with root' show ?thesis by cases auto
   449 next
   450   case unlink
   451   with root' show ?thesis by cases auto
   452 next
   453   case mkdir
   454   with root' show ?thesis by cases auto
   455 next
   456   case rmdir
   457   with root' show ?thesis by cases auto
   458 next
   459   case readdir
   460   with root' show ?thesis by cases fastsimp+
   461 qed
   462 
   463 text {*
   464   \medskip Apparently, file-system transitions are \emph{type-safe} in
   465   the sense that the result of transforming an actual directory yields
   466   again a directory.
   467 *}
   468 
   469 theorem transition_type_safe:
   470   assumes tr: "root \<midarrow>x\<rightarrow> root'"
   471     and inv: "\<exists>att dir. root = Env att dir"
   472   shows "\<exists>att dir. root' = Env att dir"
   473 proof (cases "path_of x")
   474   case Nil
   475   with tr inv show ?thesis
   476     by cases (auto simp add: access_def split: if_splits)
   477 next
   478   case Cons
   479   from tr obtain opt where
   480       "root' = root \<or> root' = update (path_of x) opt root"
   481     by cases auto
   482   with inv Cons show ?thesis
   483     by (auto simp add: update_eq split: list.splits)
   484 qed
   485 
   486 text {*
   487   The previous result may be seen as the most basic invariant on the
   488   file-system state that is enforced by any proper kernel
   489   implementation.  So user processes --- being bound to the
   490   system-call interface --- may never mess up a file-system such that
   491   the root becomes a plain file instead of a directory, which would be
   492   a strange situation indeed.
   493 *}
   494 
   495 
   496 subsection {* Iterated transitions *}
   497 
   498 text {*
   499   Iterated system transitions via finite sequences of system
   500   operations are modeled inductively as follows.  In a sense, this
   501   relation describes the cumulative effect of the sequence of
   502   system-calls issued by a number of running processes over a finite
   503   amount of time.
   504 *}
   505 
   506 inductive
   507   transitions :: "file \<Rightarrow> operation list \<Rightarrow> file \<Rightarrow> bool"
   508     ("_ =_\<Rightarrow> _" [90, 1000, 90] 100)
   509 where
   510     nil: "root =[]\<Rightarrow> root"
   511   | cons: "root \<midarrow>x\<rightarrow> root' \<Longrightarrow> root' =xs\<Rightarrow> root'' \<Longrightarrow> root =(x # xs)\<Rightarrow> root''"
   512 
   513 text {*
   514   \medskip We establish a few basic facts relating iterated
   515   transitions with single ones, according to the recursive structure
   516   of lists.
   517 *}
   518 
   519 lemma transitions_nil_eq: "root =[]\<Rightarrow> root' = (root = root')"
   520 proof
   521   assume "root =[]\<Rightarrow> root'"
   522   then show "root = root'" by cases simp_all
   523 next
   524   assume "root = root'"
   525   then show "root =[]\<Rightarrow> root'" by (simp only: transitions.nil)
   526 qed
   527 
   528 lemma transitions_cons_eq:
   529   "root =(x # xs)\<Rightarrow> root'' = (\<exists>root'. root \<midarrow>x\<rightarrow> root' \<and> root' =xs\<Rightarrow> root'')"
   530 proof
   531   assume "root =(x # xs)\<Rightarrow> root''"
   532   then show "\<exists>root'. root \<midarrow>x\<rightarrow> root' \<and> root' =xs\<Rightarrow> root''"
   533     by cases auto
   534 next
   535   assume "\<exists>root'. root \<midarrow>x\<rightarrow> root' \<and> root' =xs\<Rightarrow> root''"
   536   then show "root =(x # xs)\<Rightarrow> root''"
   537     by (blast intro: transitions.cons)
   538 qed
   539 
   540 text {*
   541   The next two rules show how to ``destruct'' known transition
   542   sequences.  Note that the second one actually relies on the
   543   uniqueness property of the basic transition system (see
   544   \secref{sec:unix-single-trans}).
   545 *}
   546 
   547 lemma transitions_nilD: "root =[]\<Rightarrow> root' \<Longrightarrow> root' = root"
   548   by (simp add: transitions_nil_eq)
   549 
   550 lemma transitions_consD:
   551   assumes list: "root =(x # xs)\<Rightarrow> root''"
   552     and hd: "root \<midarrow>x\<rightarrow> root'"
   553   shows "root' =xs\<Rightarrow> root''"
   554 proof -
   555   from list obtain r' where r': "root \<midarrow>x\<rightarrow> r'" and root'': "r' =xs\<Rightarrow> root''"
   556     by cases simp_all
   557   from r' hd have "r' = root'" by (rule transition_uniq)
   558   with root'' show "root' =xs\<Rightarrow> root''" by simp
   559 qed
   560 
   561 text {*
   562   \medskip The following fact shows how an invariant @{term Q} of
   563   single transitions with property @{term P} may be transferred to
   564   iterated transitions.  The proof is rather obvious by rule induction
   565   over the definition of @{term "root =xs\<Rightarrow> root'"}.
   566 *}
   567 
   568 lemma transitions_invariant:
   569   assumes r: "\<And>r x r'. r \<midarrow>x\<rightarrow> r' \<Longrightarrow> Q r \<Longrightarrow>  P x \<Longrightarrow> Q r'"
   570     and trans: "root =xs\<Rightarrow> root'"
   571   shows "Q root \<Longrightarrow> \<forall>x \<in> set xs. P x \<Longrightarrow> Q root'"
   572   using trans
   573 proof induct
   574   case nil
   575   show ?case by (rule nil.prems)
   576 next
   577   case (cons root x root' xs root'')
   578   note P = `\<forall>x \<in> set (x # xs). P x`
   579   then have "P x" by simp
   580   with `root \<midarrow>x\<rightarrow> root'` and `Q root` have Q': "Q root'" by (rule r)
   581   from P have "\<forall>x \<in> set xs. P x" by simp
   582   with Q' show "Q root''" by (rule cons.hyps)
   583 qed
   584 
   585 text {*
   586   As an example of applying the previous result, we transfer the basic
   587   type-safety property (see \secref{sec:unix-single-trans}) from
   588   single transitions to iterated ones, which is a rather obvious
   589   result indeed.
   590 *}
   591 
   592 theorem transitions_type_safe:
   593   assumes "root =xs\<Rightarrow> root'"
   594     and "\<exists>att dir. root = Env att dir"
   595   shows "\<exists>att dir. root' = Env att dir"
   596   using transition_type_safe and assms
   597 proof (rule transitions_invariant)
   598   show "\<forall>x \<in> set xs. True" by blast
   599 qed
   600 
   601 
   602 section {* Executable sequences *}
   603 
   604 text {*
   605   An inductively defined relation such as the one of @{text "root \<midarrow>x\<rightarrow>
   606   root'"} (see \secref{sec:unix-syscall}) has two main aspects.  First
   607   of all, the resulting system admits a certain set of transition
   608   rules (introductions) as given in the specification.  Furthermore,
   609   there is an explicit least-fixed-point construction involved, which
   610   results in induction (and case-analysis) rules to eliminate known
   611   transitions in an exhaustive manner.
   612 
   613   \medskip Subsequently, we explore our transition system in an
   614   experimental style, mainly using the introduction rules with basic
   615   algebraic properties of the underlying structures.  The technique
   616   closely resembles that of Prolog combined with functional evaluation
   617   in a very simple manner.
   618 
   619   Just as the ``closed-world assumption'' is left implicit in Prolog,
   620   we do not refer to induction over the whole transition system here.
   621   So this is still purely positive reasoning about possible
   622   executions; exhaustive reasoning will be employed only later on (see
   623   \secref{sec:unix-bogosity}), when we shall demonstrate that certain
   624   behavior is \emph{not} possible.
   625 *}
   626 
   627 
   628 subsection {* Possible transitions *}
   629 
   630 text {*
   631   Rather obviously, a list of system operations can be executed within
   632   a certain state if there is a result state reached by an iterated
   633   transition.
   634 *}
   635 
   636 definition
   637   "can_exec root xs = (\<exists>root'. root =xs\<Rightarrow> root')"
   638 
   639 lemma can_exec_nil: "can_exec root []"
   640   unfolding can_exec_def by (blast intro: transitions.intros)
   641 
   642 lemma can_exec_cons:
   643     "root \<midarrow>x\<rightarrow> root' \<Longrightarrow> can_exec root' xs \<Longrightarrow> can_exec root (x # xs)"
   644   unfolding can_exec_def by (blast intro: transitions.intros)
   645 
   646 text {*
   647   \medskip In case that we already know that a certain sequence can be
   648   executed we may destruct it backwardly into individual transitions.
   649 *}
   650 
   651 lemma can_exec_snocD: "can_exec root (xs @ [y])
   652     \<Longrightarrow> \<exists>root' root''. root =xs\<Rightarrow> root' \<and> root' \<midarrow>y\<rightarrow> root''"
   653 proof (induct xs arbitrary: root)
   654   case Nil
   655   then show ?case
   656     by (simp add: can_exec_def transitions_nil_eq transitions_cons_eq)
   657 next
   658   case (Cons x xs)
   659   from `can_exec root ((x # xs) @ [y])` obtain r root'' where
   660       x: "root \<midarrow>x\<rightarrow> r" and
   661       xs_y: "r =(xs @ [y])\<Rightarrow> root''"
   662     by (auto simp add: can_exec_def transitions_nil_eq transitions_cons_eq)
   663   from xs_y Cons.hyps obtain root' r' where xs: "r =xs\<Rightarrow> root'" and y: "root' \<midarrow>y\<rightarrow> r'"
   664     unfolding can_exec_def by blast
   665   from x xs have "root =(x # xs)\<Rightarrow> root'"
   666     by (rule transitions.cons)
   667   with y show ?case by blast
   668 qed
   669 
   670 
   671 subsection {* Example executions \label{sec:unix-examples} *}
   672 
   673 text {*
   674   We are now ready to perform a few experiments within our formal
   675   model of Unix system-calls.  The common technique is to alternate
   676   introduction rules of the transition system (see
   677   \secref{sec:unix-trans}), and steps to solve any emerging side
   678   conditions using algebraic properties of the underlying file-system
   679   structures (see \secref{sec:unix-file-system}).
   680 *}
   681 
   682 lemmas eval = access_def init_def
   683 
   684 theorem "u \<in> users \<Longrightarrow> can_exec (init users)
   685     [Mkdir u perms [u, name]]"
   686   apply (rule can_exec_cons)
   687     -- {* back-chain @{term can_exec} (of @{term [source] Cons}) *}
   688   apply (rule mkdir)
   689     -- {* back-chain @{term Mkdir} *}
   690   apply (force simp add: eval)+
   691     -- {* solve preconditions of @{term Mkdir} *}
   692   apply (simp add: eval)
   693     -- {* peek at resulting dir (optional) *}
   694   txt {* @{subgoals [display]} *}
   695   apply (rule can_exec_nil)
   696     -- {* back-chain @{term can_exec} (of @{term [source] Nil}) *}
   697   done
   698 
   699 text {*
   700   By inspecting the result shown just before the final step above, we
   701   may gain confidence that our specification of Unix system-calls
   702   actually makes sense.  Further common errors are usually exhibited
   703   when preconditions of transition rules fail unexpectedly.
   704 
   705   \medskip Here are a few further experiments, using the same
   706   techniques as before.
   707 *}
   708 
   709 theorem "u \<in> users \<Longrightarrow> can_exec (init users)
   710    [Creat u perms [u, name],
   711     Unlink u [u, name]]"
   712   apply (rule can_exec_cons)
   713   apply (rule creat)
   714   apply (force simp add: eval)+
   715   apply (simp add: eval)
   716   apply (rule can_exec_cons)
   717   apply (rule unlink)
   718   apply (force simp add: eval)+
   719   apply (simp add: eval)
   720   txt {* peek at result: @{subgoals [display]} *}
   721   apply (rule can_exec_nil)
   722   done
   723 
   724 theorem "u \<in> users \<Longrightarrow> Writable \<in> perms\<^isub>1 \<Longrightarrow>
   725   Readable \<in> perms\<^isub>2 \<Longrightarrow> name\<^isub>3 \<noteq> name\<^isub>4 \<Longrightarrow>
   726   can_exec (init users)
   727    [Mkdir u perms\<^isub>1 [u, name\<^isub>1],
   728     Mkdir u' perms\<^isub>2 [u, name\<^isub>1, name\<^isub>2],
   729     Creat u' perms\<^isub>3 [u, name\<^isub>1, name\<^isub>2, name\<^isub>3],
   730     Creat u' perms\<^isub>3 [u, name\<^isub>1, name\<^isub>2, name\<^isub>4],
   731     Readdir u {name\<^isub>3, name\<^isub>4} [u, name\<^isub>1, name\<^isub>2]]"
   732   apply (rule can_exec_cons, rule transition.intros,
   733     (force simp add: eval)+, (simp add: eval)?)+
   734   txt {* peek at result: @{subgoals [display]} *}
   735   apply (rule can_exec_nil)
   736   done
   737 
   738 theorem "u \<in> users \<Longrightarrow> Writable \<in> perms\<^isub>1 \<Longrightarrow> Readable \<in> perms\<^isub>3 \<Longrightarrow>
   739   can_exec (init users)
   740    [Mkdir u perms\<^isub>1 [u, name\<^isub>1],
   741     Mkdir u' perms\<^isub>2 [u, name\<^isub>1, name\<^isub>2],
   742     Creat u' perms\<^isub>3 [u, name\<^isub>1, name\<^isub>2, name\<^isub>3],
   743     Write u' ''foo'' [u, name\<^isub>1, name\<^isub>2, name\<^isub>3],
   744     Read u ''foo'' [u, name\<^isub>1, name\<^isub>2, name\<^isub>3]]"
   745   apply (rule can_exec_cons, rule transition.intros,
   746     (force simp add: eval)+, (simp add: eval)?)+
   747   txt {* peek at result: @{subgoals [display]} *}
   748   apply (rule can_exec_nil)
   749   done
   750 
   751 
   752 section {* Odd effects --- treated formally \label{sec:unix-bogosity} *}
   753 
   754 text {*
   755   We are now ready to give a completely formal treatment of the
   756   slightly odd situation discussed in the introduction (see
   757   \secref{sec:unix-intro}): the file-system can easily reach a state
   758   where a user is unable to remove his very own directory, because it
   759   is still populated by items placed there by another user in an
   760   uncouth manner.
   761 *}
   762 
   763 subsection {* The general procedure \label{sec:unix-inv-procedure} *}
   764 
   765 text {*
   766   The following theorem expresses the general procedure we are
   767   following to achieve the main result.
   768 *}
   769 
   770 theorem general_procedure:
   771   assumes cannot_y: "\<And>r r'. Q r \<Longrightarrow> r \<midarrow>y\<rightarrow> r' \<Longrightarrow> False"
   772     and init_inv: "\<And>root. init users =bs\<Rightarrow> root \<Longrightarrow> Q root"
   773     and preserve_inv: "\<And>r x r'. r \<midarrow>x\<rightarrow> r' \<Longrightarrow> Q r \<Longrightarrow> P x \<Longrightarrow> Q r'"
   774     and init_result: "init users =bs\<Rightarrow> root"
   775   shows "\<not> (\<exists>xs. (\<forall>x \<in> set xs. P x) \<and> can_exec root (xs @ [y]))"
   776 proof -
   777   {
   778     fix xs
   779     assume Ps: "\<forall>x \<in> set xs. P x"
   780     assume can_exec: "can_exec root (xs @ [y])"
   781     then obtain root' root'' where
   782         xs: "root =xs\<Rightarrow> root'" and y: "root' \<midarrow>y\<rightarrow> root''"
   783       by (blast dest: can_exec_snocD)
   784     from init_result have "Q root" by (rule init_inv)
   785     from preserve_inv xs this Ps have "Q root'"
   786       by (rule transitions_invariant)
   787     from this y have False by (rule cannot_y)
   788   }
   789   then show ?thesis by blast
   790 qed
   791 
   792 text {*
   793   Here @{prop "P x"} refers to the restriction on file-system
   794   operations that are admitted after having reached the critical
   795   configuration; according to the problem specification this will
   796   become @{prop "uid_of x = user\<^isub>1"} later on.  Furthermore,
   797   @{term y} refers to the operations we claim to be impossible to
   798   perform afterwards, we will take @{term Rmdir} later.  Moreover
   799   @{term Q} is a suitable (auxiliary) invariant over the file-system;
   800   choosing @{term Q} adequately is very important to make the proof
   801   work (see \secref{sec:unix-inv-lemmas}).
   802 *}
   803 
   804 
   805 subsection {* The particular situation *}
   806 
   807 text {*
   808   We introduce a few global declarations and axioms to describe our
   809   particular situation considered here.  Thus we avoid excessive use
   810   of local parameters in the subsequent development.
   811 *}
   812 
   813 locale situation =
   814   fixes users :: "uid set"
   815     and user\<^isub>1 :: uid
   816     and user\<^isub>2 :: uid
   817     and name\<^isub>1 :: name
   818     and name\<^isub>2 :: name
   819     and name\<^isub>3 :: name
   820     and perms\<^isub>1 :: perms
   821     and perms\<^isub>2 :: perms
   822   assumes user\<^isub>1_known: "user\<^isub>1 \<in> users"
   823     and user\<^isub>1_not_root: "user\<^isub>1 \<noteq> 0"
   824     and users_neq: "user\<^isub>1 \<noteq> user\<^isub>2"
   825     and perms\<^isub>1_writable: "Writable \<in> perms\<^isub>1"
   826     and perms\<^isub>2_not_writable: "Writable \<notin> perms\<^isub>2"
   827   notes facts = user\<^isub>1_known user\<^isub>1_not_root users_neq
   828     perms\<^isub>1_writable perms\<^isub>2_not_writable
   829 begin
   830 
   831 definition
   832   "bogus =
   833      [Mkdir user\<^isub>1 perms\<^isub>1 [user\<^isub>1, name\<^isub>1],
   834       Mkdir user\<^isub>2 perms\<^isub>2 [user\<^isub>1, name\<^isub>1, name\<^isub>2],
   835       Creat user\<^isub>2 perms\<^isub>2 [user\<^isub>1, name\<^isub>1, name\<^isub>2, name\<^isub>3]]"
   836 definition
   837   "bogus_path = [user\<^isub>1, name\<^isub>1, name\<^isub>2]"
   838 
   839 text {*
   840   The @{term bogus} operations are the ones that lead into the uncouth
   841   situation; @{term bogus_path} is the key position within the
   842   file-system where things go awry.
   843 *}
   844 
   845 
   846 subsection {* Invariance lemmas \label{sec:unix-inv-lemmas} *}
   847 
   848 text {*
   849   The following invariant over the root file-system describes the
   850   bogus situation in an abstract manner: located at a certain @{term
   851   path} within the file-system is a non-empty directory that is
   852   neither owned and nor writable by @{term user\<^isub>1}.
   853 *}
   854 
   855 definition
   856   "invariant root path =
   857     (\<exists>att dir.
   858       access root path user\<^isub>1 {} = Some (Env att dir) \<and> dir \<noteq> empty \<and>
   859       user\<^isub>1 \<noteq> owner att \<and>
   860       access root path user\<^isub>1 {Writable} = None)"
   861 
   862 text {*
   863   Following the general procedure (see
   864   \secref{sec:unix-inv-procedure}) we will now establish the three key
   865   lemmas required to yield the final result.
   866 
   867   \begin{enumerate}
   868 
   869   \item The invariant is sufficiently strong to entail the
   870   pathological case that @{term user\<^isub>1} is unable to remove the
   871   (owned) directory at @{term "[user\<^isub>1, name\<^isub>1]"}.
   872 
   873   \item The invariant does hold after having executed the @{term
   874   bogus} list of operations (starting with an initial file-system
   875   configuration).
   876 
   877   \item The invariant is preserved by any file-system operation
   878   performed by @{term user\<^isub>1} alone, without any help by other
   879   users.
   880 
   881   \end{enumerate}
   882 
   883   As the invariant appears both as assumptions and conclusions in the
   884   course of proof, its formulation is rather critical for the whole
   885   development to work out properly.  In particular, the third step is
   886   very sensitive to the invariant being either too strong or too weak.
   887   Moreover the invariant has to be sufficiently abstract, lest the
   888   proof become cluttered by confusing detail.
   889 
   890   \medskip The first two lemmas are technically straight forward ---
   891   we just have to inspect rather special cases.
   892 *}
   893 
   894 lemma cannot_rmdir:
   895   assumes inv: "invariant root bogus_path"
   896     and rmdir: "root \<midarrow>(Rmdir user\<^isub>1 [user\<^isub>1, name\<^isub>1])\<rightarrow> root'"
   897   shows False
   898 proof -
   899   from inv obtain "file" where "access root bogus_path user\<^isub>1 {} = Some file"
   900     unfolding invariant_def by blast
   901   moreover
   902   from rmdir obtain att where
   903       "access root [user\<^isub>1, name\<^isub>1] user\<^isub>1 {} = Some (Env att empty)"
   904     by cases auto
   905   then have "access root ([user\<^isub>1, name\<^isub>1] @ [name\<^isub>2]) user\<^isub>1 {} = empty name\<^isub>2"
   906     by (simp only: access_empty_lookup lookup_append_some) simp
   907   ultimately show False by (simp add: bogus_path_def)
   908 qed
   909 
   910 lemma
   911   assumes init: "init users =bogus\<Rightarrow> root"
   912   notes eval = facts access_def init_def
   913   shows init_invariant: "invariant root bogus_path"
   914   using init
   915   apply (unfold bogus_def bogus_path_def)
   916   apply (drule transitions_consD, rule transition.intros,
   917       (force simp add: eval)+, (simp add: eval)?)+
   918     -- "evaluate all operations"
   919   apply (drule transitions_nilD)
   920     -- "reach final result"
   921   apply (simp add: invariant_def eval)
   922     -- "check the invariant"
   923   done
   924 
   925 text {*
   926   \medskip At last we are left with the main effort to show that the
   927   ``bogosity'' invariant is preserved by any file-system operation
   928   performed by @{term user\<^isub>1} alone.  Note that this holds for
   929   any @{term path} given, the particular @{term bogus_path} is not
   930   required here.
   931 *}
   932 
   933 lemma preserve_invariant:
   934   assumes tr: "root \<midarrow>x\<rightarrow> root'"
   935     and inv: "invariant root path"
   936     and uid: "uid_of x = user\<^isub>1"
   937   shows "invariant root' path"
   938 proof -
   939   from inv obtain att dir where
   940       inv1: "access root path user\<^isub>1 {} = Some (Env att dir)" and
   941       inv2: "dir \<noteq> empty" and
   942       inv3: "user\<^isub>1 \<noteq> owner att" and
   943       inv4: "access root path user\<^isub>1 {Writable} = None"
   944     by (auto simp add: invariant_def)
   945 
   946   from inv1 have lookup: "lookup root path = Some (Env att dir)"
   947     by (simp only: access_empty_lookup)
   948 
   949   from inv1 inv3 inv4 and user\<^isub>1_not_root
   950   have not_writable: "Writable \<notin> others att"
   951     by (auto simp add: access_def split: option.splits)
   952 
   953   show ?thesis
   954   proof cases
   955     assume "root' = root"
   956     with inv show "invariant root' path" by (simp only:)
   957   next
   958     assume changed: "root' \<noteq> root"
   959     with tr obtain opt where root': "root' = update (path_of x) opt root"
   960       by cases auto
   961     show ?thesis
   962     proof (rule prefix_cases)
   963       assume "path_of x \<parallel> path"
   964       with inv root'
   965       have "\<And>perms. access root' path user\<^isub>1 perms = access root path user\<^isub>1 perms"
   966         by (simp only: access_update_other)
   967       with inv show "invariant root' path"
   968         by (simp only: invariant_def)
   969     next
   970       assume "path_of x \<le> path"
   971       then obtain ys where path: "path = path_of x @ ys" ..
   972 
   973       show ?thesis
   974       proof (cases ys)
   975         assume "ys = []"
   976         with tr uid inv2 inv3 lookup changed path and user\<^isub>1_not_root
   977         have False
   978           by cases (auto simp add: access_empty_lookup dest: access_some_lookup)
   979         then show ?thesis ..
   980       next
   981         fix z zs assume ys: "ys = z # zs"
   982         have "lookup root' path = lookup root path"
   983         proof -
   984           from inv2 lookup path ys
   985           have look: "lookup root (path_of x @ z # zs) = Some (Env att dir)"
   986             by (simp only:)
   987           then obtain att' dir' file' where
   988               look': "lookup root (path_of x) = Some (Env att' dir')" and
   989               dir': "dir' z = Some file'" and
   990               file': "lookup file' zs = Some (Env att dir)"
   991             by (blast dest: lookup_some_upper)
   992 
   993           from tr uid changed look' dir' obtain att'' where
   994               look'': "lookup root' (path_of x) = Some (Env att'' dir')"
   995             by cases (auto simp add: access_empty_lookup lookup_update_some
   996               dest: access_some_lookup)
   997           with dir' file' have "lookup root' (path_of x @ z # zs) =
   998               Some (Env att dir)"
   999             by (simp add: lookup_append_some)
  1000           with look path ys show ?thesis
  1001             by simp
  1002         qed
  1003         with inv show "invariant root' path"
  1004           by (simp only: invariant_def access_def)
  1005       qed
  1006     next
  1007       assume "path < path_of x"
  1008       then obtain y ys where path: "path_of x = path @ y # ys" ..
  1009 
  1010       obtain dir' where
  1011         lookup': "lookup root' path = Some (Env att dir')" and
  1012         inv2': "dir' \<noteq> empty"
  1013       proof (cases ys)
  1014         assume "ys = []"
  1015         with path have parent: "path_of x = path @ [y]" by simp
  1016         with tr uid inv4 changed obtain "file" where
  1017             "root' = update (path_of x) (Some file) root"
  1018           by cases auto
  1019         with lookup parent have "lookup root' path = Some (Env att (dir(y\<mapsto>file)))"
  1020           by (simp only: update_append_some update_cons_nil_env)
  1021         moreover have "dir(y\<mapsto>file) \<noteq> empty" by simp
  1022         ultimately show ?thesis ..
  1023       next
  1024         fix z zs assume ys: "ys = z # zs"
  1025         with lookup root' path
  1026         have "lookup root' path = Some (update (y # ys) opt (Env att dir))"
  1027           by (simp only: update_append_some)
  1028         also obtain file' where
  1029           "update (y # ys) opt (Env att dir) = Env att (dir(y\<mapsto>file'))"
  1030         proof -
  1031           have "dir y \<noteq> None"
  1032           proof -
  1033             have "dir y = lookup (Env att dir) [y]"
  1034               by (simp split: option.splits)
  1035             also from lookup have "\<dots> = lookup root (path @ [y])"
  1036               by (simp only: lookup_append_some)
  1037             also have "\<dots> \<noteq> None"
  1038             proof -
  1039               from ys obtain us u where rev_ys: "ys = us @ [u]"
  1040                 by (cases ys rule: rev_cases) fastsimp+
  1041               with tr path
  1042               have "lookup root ((path @ [y]) @ (us @ [u])) \<noteq> None \<or>
  1043                   lookup root ((path @ [y]) @ us) \<noteq> None"
  1044                 by cases (auto dest: access_some_lookup)
  1045               then show ?thesis 
  1046                 by (simp, blast dest!: lookup_some_append)
  1047             qed
  1048             finally show ?thesis .
  1049           qed
  1050           with ys show ?thesis using that by auto
  1051         qed
  1052         also have "dir(y\<mapsto>file') \<noteq> empty" by simp
  1053         ultimately show ?thesis ..
  1054       qed
  1055 
  1056       from lookup' have inv1': "access root' path user\<^isub>1 {} = Some (Env att dir')"
  1057         by (simp only: access_empty_lookup)
  1058 
  1059       from inv3 lookup' and not_writable user\<^isub>1_not_root
  1060       have "access root' path user\<^isub>1 {Writable} = None"
  1061         by (simp add: access_def)
  1062       with inv1' inv2' inv3 show ?thesis unfolding invariant_def by blast
  1063     qed
  1064   qed
  1065 qed
  1066 
  1067 
  1068 subsection {* Putting it all together \label{sec:unix-main-result} *}
  1069 
  1070 text {*
  1071   The main result is now imminent, just by composing the three
  1072   invariance lemmas (see \secref{sec:unix-inv-lemmas}) according the the
  1073   overall procedure (see \secref{sec:unix-inv-procedure}).
  1074 *}
  1075 
  1076 corollary
  1077   assumes bogus: "init users =bogus\<Rightarrow> root"
  1078   shows "\<not> (\<exists>xs. (\<forall>x \<in> set xs. uid_of x = user\<^isub>1) \<and>
  1079     can_exec root (xs @ [Rmdir user\<^isub>1 [user\<^isub>1, name\<^isub>1]]))"
  1080 proof -
  1081   from cannot_rmdir init_invariant preserve_invariant
  1082     and bogus show ?thesis by (rule general_procedure)
  1083 qed
  1084 
  1085 end
  1086 
  1087 end