doc-src/TutorialI/Inductive/AB.thy
 changeset 10236 7626cb4e1407 parent 10225 b9fd52525b69 child 10237 875bf54b5d74
     1.1 --- a/doc-src/TutorialI/Inductive/AB.thy	Tue Oct 17 10:45:51 2000 +0200
1.2 +++ b/doc-src/TutorialI/Inductive/AB.thy	Tue Oct 17 13:28:57 2000 +0200
1.3 @@ -2,122 +2,307 @@
1.4
1.5  section{*A context free grammar*}
1.6
1.7 +text{*
1.8 +Grammars are nothing but shorthands for inductive definitions of nonterminals
1.9 +which represent sets of strings. For example, the production
1.10 +$A \to B c$ is short for
1.11 +$w \in B \Longrightarrow wc \in A$
1.12 +This section demonstrates this idea with a standard example
1.13 +\cite[p.\ 81]{HopcroftUllman}, a grammar for generating all words with an
1.14 +equal number of $a$'s and $b$'s:
1.15 +\begin{eqnarray}
1.16 +S &\to& \epsilon \mid b A \mid a B \nonumber\\
1.17 +A &\to& a S \mid b A A \nonumber\\
1.18 +B &\to& b S \mid a B B \nonumber
1.19 +\end{eqnarray}
1.20 +At the end we say a few words about the relationship of the formalization
1.21 +and the text in the book~\cite[p.\ 81]{HopcroftUllman}.
1.22 +
1.23 +We start by fixing the alpgabet, which consists only of @{term a}'s
1.24 +and @{term b}'s:
1.25 +*}
1.26 +
1.27  datatype alfa = a | b;
1.28
1.29 -lemma [simp]: "(x ~= a) = (x = b) & (x ~= b) = (x = a)";
1.30 +text{*\noindent
1.31 +For convenience we includ the following easy lemmas as simplification rules:
1.32 +*}
1.33 +
1.34 +lemma [simp]: "(x \<noteq> a) = (x = b) \<and> (x \<noteq> b) = (x = a)";
1.35  apply(case_tac x);
1.36  by(auto);
1.37
1.38 +text{*\noindent
1.39 +Words over this alphabet are of type @{typ"alfa list"}, and
1.40 +the three nonterminals are declare as sets of such words:
1.41 +*}
1.42 +
1.43  consts S :: "alfa list set"
1.44         A :: "alfa list set"
1.45         B :: "alfa list set";
1.46
1.47 +text{*\noindent
1.48 +The above productions are recast as a \emph{simultaneous} inductive
1.49 +definition of @{term S}, @{term A} and @{term B}:
1.50 +*}
1.51 +
1.52  inductive S A B
1.53  intros
1.54 -"[] : S"
1.55 -"w : A ==> b#w : S"
1.56 -"w : B ==> a#w : S"
1.57 +  "[] \<in> S"
1.58 +  "w \<in> A \<Longrightarrow> b#w \<in> S"
1.59 +  "w \<in> B \<Longrightarrow> a#w \<in> S"
1.60
1.61 -"w : S ==> a#w : A"
1.62 -"[| v:A; w:A |] ==> b#v@w : A"
1.63 +  "w \<in> S        \<Longrightarrow> a#w   \<in> A"
1.64 +  "\<lbrakk> v\<in>A; w\<in>A \<rbrakk> \<Longrightarrow> b#v@w \<in> A"
1.65 +
1.66 +  "w \<in> S            \<Longrightarrow> b#w   \<in> B"
1.67 +  "\<lbrakk> v \<in> B; w \<in> B \<rbrakk> \<Longrightarrow> a#v@w \<in> B";
1.68
1.69 -"w : S ==> b#w : B"
1.70 -"[| v:B; w:B |] ==> a#v@w : B";
1.71 +text{*\noindent
1.72 +First we show that all words in @{term S} contain the same number of @{term
1.73 +a}'s and @{term b}'s. Since the definition of @{term S} is by simultaneous
1.74 +induction, so is this proof: we show at the same time that all words in
1.75 +@{term A} contain one more @{term a} than @{term b} and all words in @{term
1.76 +B} contains one more @{term b} than @{term a}.
1.77 +*}
1.78
1.79 -thm S_A_B.induct[no_vars];
1.80 +lemma correctness:
1.81 +  "(w \<in> S \<longrightarrow> size[x\<in>w. x=a] = size[x\<in>w. x=b])     \<and>
1.82 +    (w \<in> A \<longrightarrow> size[x\<in>w. x=a] = size[x\<in>w. x=b] + 1) \<and>
1.83 +    (w \<in> B \<longrightarrow> size[x\<in>w. x=b] = size[x\<in>w. x=a] + 1)"
1.84
1.85 -lemma "(w : S --> size[x:w. x=a] = size[x:w. x=b]) &
1.86 -       (w : A --> size[x:w. x=a] = size[x:w. x=b] + 1) &
1.87 -       (w : B --> size[x:w. x=b] = size[x:w. x=a] + 1)";
1.88 +txt{*\noindent
1.89 +These propositions are expressed with the help of the predefined @{term
1.90 +filter} function on lists, which has the convenient syntax @{term"[x\<in>xs. P
1.91 +x]"}, the list of all elements @{term x} in @{term xs} such that @{prop"P x"}
1.92 +holds. The length of a list is usually written @{term length}, and @{term
1.93 +size} is merely a shorthand.
1.94 +
1.95 +The proof itself is by rule induction and afterwards automatic:
1.96 +*}
1.97 +
1.98  apply(rule S_A_B.induct);
1.99  by(auto);
1.100
1.101 -lemma intermed_val[rule_format(no_asm)]:
1.102 - "(!i<n. abs(f(i+1) - f i) <= #1) -->
1.103 -  f 0 <= k & k <= f n --> (? i <= n. f i = (k::int))";
1.104 -apply(induct n);
1.105 - apply(simp);
1.106 - apply(arith);
1.107 -apply(rule);
1.108 -apply(erule impE);
1.109 - apply(simp);
1.110 -apply(rule);
1.111 -apply(erule_tac x = n in allE);
1.112 -apply(simp);
1.113 -apply(case_tac "k = f(n+1)");
1.114 - apply(force);
1.115 -apply(erule impE);
1.117 - apply(arith);
1.118 -by(blast intro:le_SucI);
1.119 +text{*\noindent
1.120 +This may seem surprising at first, and is indeed an indication of the power
1.121 +of inductive definitions. But it is also quite straightforward. For example,
1.122 +consider the production $A \to b A A$: if $v,w \in A$ and the elements of $A$
1.123 +contain one more $a$ than $b$'s, then $bvw$ must again contain one more $a$
1.124 +than $b$'s.
1.125 +
1.126 +As usual, the correctness of syntactic descriptions is easy, but completeness
1.127 +is hard: does @{term S} contain \emph{all} words with an equal number of
1.128 +@{term a}'s and @{term b}'s? It turns out that this proof requires the
1.129 +following little lemma: every string with two more @{term a}'s than @{term
1.130 +b}'s can be cut somehwere such that each half has one more @{term a} than
1.131 +@{term b}. This is best seen by imagining counting the difference between the
1.132 +number of @{term a}'s than @{term b}'s starting at the left end of the
1.133 +word. We start at 0 and end (at the right end) with 2. Since each move to the
1.134 +right increases or decreases the difference by 1, we must have passed through
1.135 +1 on our way from 0 to 2. Formally, we appeal to the following discrete
1.136 +intermediate value theorem @{thm[source]nat0_intermed_int_val}
1.137 +@{thm[display]nat0_intermed_int_val[no_vars]}
1.138 +where @{term f} is of type @{typ"nat \<Rightarrow> int"}, @{typ int} are the integers,
1.139 +@{term abs} is the absolute value function, and @{term"#1::int"} is the
1.140 +integer 1 (see \S\ref{sec:int}).
1.141
1.142 -lemma step1: "ALL i < size w.
1.143 -  abs((int(size[x:take (i+1) w . P x]) - int(size[x:take (i+1) w . ~P x])) -
1.144 -      (int(size[x:take i w . P x]) - int(size[x:take i w . ~P x]))) <= #1";
1.145 +First we show that the our specific function, the difference between the
1.146 +numbers of @{term a}'s and @{term b}'s, does indeed only change by 1 in every
1.147 +move to the right. At this point we also start generalizing from @{term a}'s
1.148 +and @{term b}'s to an arbitrary property @{term P}. Otherwise we would have
1.149 +to prove the desired lemma twice, once as stated above and once with the
1.150 +roles of @{term a}'s and @{term b}'s interchanged.
1.151 +*}
1.152 +
1.153 +lemma step1: "\<forall>i < size w.
1.154 +  abs((int(size[x\<in>take (i+1) w.  P x]) -
1.155 +       int(size[x\<in>take (i+1) w. \<not>P x]))
1.156 +      -
1.157 +      (int(size[x\<in>take i w.  P x]) -
1.158 +       int(size[x\<in>take i w. \<not>P x]))) <= #1";
1.159 +
1.160 +txt{*\noindent
1.161 +The lemma is a bit hard to read because of the coercion function
1.162 +@{term[source]"int::nat \<Rightarrow> int"}. It is required because @{term size} returns
1.163 +a natural number, but @{text-} on @{typ nat} will do the wrong thing.
1.164 +Function @{term take} is predefined and @{term"take i xs"} is the prefix of
1.165 +length @{term i} of @{term xs}; below we als need @{term"drop i xs"}, which
1.166 +is what remains after that prefix has been dropped from @{term xs}.
1.167 +
1.168 +The proof is by induction on @{term w}, with a trivial base case, and a not
1.169 +so trivial induction step. Since it is essentially just arithmetic, we do not
1.170 +discuss it.
1.171 +*}
1.172 +
1.173  apply(induct w);
1.174   apply(simp);
1.176 -apply(clarsimp);
1.177 -apply(rule conjI);
1.178 - apply(clarify);
1.179 - apply(erule allE);
1.180 - apply(erule impE);
1.181 -  apply(assumption);
1.182 - apply(arith);
1.183 -apply(clarify);
1.184 -apply(erule allE);
1.185 -apply(erule impE);
1.186 - apply(assumption);
1.187 -by(arith);
1.188 +by(force simp add:zabs_def take_Cons split:nat.split if_splits);
1.189
1.190 +text{*
1.191 +Finally we come to the above mentioned lemma about cutting a word with two
1.192 +more elements of one sort than of the other sort into two halfs:
1.193 +*}
1.194
1.195  lemma part1:
1.196 - "size[x:w. P x] = Suc(Suc(size[x:w. ~P x])) ==>
1.197 -  EX i<=size w. size[x:take i w. P x] = size[x:take i w. ~P x]+1";
1.198 -apply(insert intermed_val[OF step1, of "P" "w" "#1"]);
1.199 -apply(simp);
1.200 -apply(erule exE);
1.201 -apply(rule_tac x=i in exI);
1.202 -apply(simp);
1.203 -apply(rule int_int_eq[THEN iffD1]);
1.204 -apply(simp);
1.205 -by(arith);
1.206 + "size[x\<in>w. P x] = size[x\<in>w. \<not>P x]+2 \<Longrightarrow>
1.207 +  \<exists>i\<le>size w. size[x\<in>take i w. P x] = size[x\<in>take i w. \<not>P x]+1";
1.208 +
1.209 +txt{*\noindent
1.210 +This is proved with the help of the intermediate value theorem, instantiated
1.211 +appropriately and with its first premise disposed of by lemma
1.212 +@{thm[source]step1}.
1.213 +*}
1.214 +
1.215 +apply(insert nat0_intermed_int_val[OF step1, of "P" "w" "#1"]);
1.216 +apply simp;
1.217 +by(simp del:int_Suc add:zdiff_eq_eq sym[OF int_Suc]);
1.218 +
1.219 +text{*\noindent
1.220 +The additional lemmas are needed to mediate between @{typ nat} and @{typ int}.
1.221 +
1.222 +Lemma @{thm[source]part1} tells us only about the prefix @{term"take i w"}.
1.223 +The suffix @{term"drop i w"} is dealt with in the following easy lemma:
1.224 +*}
1.225 +
1.226
1.227  lemma part2:
1.228 -"[| size[x:take i xs @ drop i xs. P x] = size[x:take i xs @ drop i xs. ~P x]+2;
1.229 -    size[x:take i xs. P x] = size[x:take i xs. ~P x]+1 |]
1.230 - ==> size[x:drop i xs. P x] = size[x:drop i xs. ~P x]+1";
1.231 +  "\<lbrakk>size[x\<in>take i w @ drop i w. P x] =
1.232 +    size[x\<in>take i w @ drop i w. \<not>P x]+2;
1.233 +    size[x\<in>take i w. P x] = size[x\<in>take i w. \<not>P x]+1\<rbrakk>
1.234 +   \<Longrightarrow> size[x\<in>drop i w. P x] = size[x\<in>drop i w. \<not>P x]+1";
1.235  by(simp del:append_take_drop_id);
1.236
1.237 -lemmas [simp] = S_A_B.intros;
1.238 +text{*\noindent
1.239 +Lemma @{thm[source]append_take_drop_id}, @{thm append_take_drop_id[no_vars]},
1.240 +which is generally useful, needs to be disabled for once.
1.241 +
1.242 +To dispose of trivial cases automatically, the rules of the inductive
1.243 +definition are declared simplification rules:
1.244 +*}
1.245 +
1.246 +declare S_A_B.intros[simp];
1.247 +
1.248 +text{*\noindent
1.249 +This could have been done earlier but was not necessary so far.
1.250 +
1.251 +The completeness theorem tells us that if a word has the same number of
1.252 +@{term a}'s and @{term b}'s, then it is in @{term S}, and similarly and
1.253 +simultaneously for @{term A} and @{term B}:
1.254 +*}
1.255 +
1.256 +theorem completeness:
1.257 +  "(size[x\<in>w. x=a] = size[x\<in>w. x=b]     \<longrightarrow> w \<in> S) \<and>
1.258 +    (size[x\<in>w. x=a] = size[x\<in>w. x=b] + 1 \<longrightarrow> w \<in> A) \<and>
1.259 +    (size[x\<in>w. x=b] = size[x\<in>w. x=a] + 1 \<longrightarrow> w \<in> B)";
1.260 +
1.261 +txt{*\noindent
1.262 +The proof is by induction on @{term w}. Structural induction would fail here
1.263 +because, as we can see from the grammar, we need to make bigger steps than
1.264 +merely appending a single letter at the front. Hence we induct on the length
1.265 +of @{term w}, using the induction rule @{thm[source]length_induct}:
1.266 +*}
1.267
1.268 -lemma "(size[x:w. x=a] = size[x:w. x=b] --> w : S) &
1.269 -       (size[x:w. x=a] = size[x:w. x=b] + 1 --> w : A) &
1.270 -       (size[x:w. x=b] = size[x:w. x=a] + 1 --> w : B)";
1.271  apply(induct_tac w rule: length_induct);
1.272 -apply(case_tac "xs");
1.273 - apply(simp);
1.274 -apply(simp);
1.275 +(*<*)apply(rename_tac w)(*>*)
1.276 +
1.277 +txt{*\noindent
1.278 +The @{text rule} parameter tells @{text induct_tac} explicitly which induction
1.279 +rule to use. For details see \S\ref{sec:complete-ind} below.
1.280 +In this case the result is that we may assume the lemma already
1.281 +holds for all words shorter than @{term w}.
1.282 +
1.283 +The proof continues with a case distinction on @{term w},
1.284 +i.e.\ if @{term w} is empty or not.
1.285 +*}
1.286 +
1.287 +apply(case_tac w);
1.288 + apply(simp_all);
1.289 +(*<*)apply(rename_tac x v)(*>*)
1.290 +
1.291 +txt{*\noindent
1.292 +Simplification disposes of the base case and leaves only two step
1.293 +cases to be proved:
1.294 +if @{prop"w = a#v"} and @{prop"size[x\<in>v. x=a] = size[x\<in>v. x=b]+2"} then
1.295 +@{prop"b#v \<in> A"}, and similarly for @{prop"w = b#v"}.
1.296 +We only consider the first case in detail.
1.297 +
1.298 +After breaking the conjuction up into two cases, we can apply
1.299 +@{thm[source]part1} to the assumption that @{term w} contains two more @{term
1.300 +a}'s than @{term b}'s.
1.301 +*}
1.302 +
1.303  apply(rule conjI);
1.304   apply(clarify);
1.305 - apply(frule part1[of "%x. x=a", simplified]);
1.306 + apply(frule part1[of "\<lambda>x. x=a", simplified]);
1.307   apply(erule exE);
1.308   apply(erule conjE);
1.309 - apply(drule part2[of "%x. x=a", simplified]);
1.310 +
1.311 +txt{*\noindent
1.312 +This yields an index @{prop"i \<le> length v"} such that
1.313 +@{prop"length [x\<in>take i v . x = a] = length [x\<in>take i v . x = b] + 1"}.
1.314 +With the help of @{thm[source]part1} it follows that
1.315 +@{prop"length [x\<in>drop i v . x = a] = length [x\<in>drop i v . x = b] + 1"}.
1.316 +*}
1.317 +
1.318 + apply(drule part2[of "\<lambda>x. x=a", simplified]);
1.319    apply(assumption);
1.320 - apply(rule_tac n1=i and t=list in subst[OF append_take_drop_id]);
1.321 +
1.322 +txt{*\noindent
1.323 +Now it is time to decompose @{term v} in the conclusion @{prop"b#v \<in> A"}
1.324 +into @{term"take i v @ drop i v"},
1.325 +after which the appropriate rule of the grammar reduces the goal
1.326 +to the two subgoals @{prop"take i v \<in> A"} and @{prop"drop i v \<in> A"}:
1.327 +*}
1.328 +
1.329 + apply(rule_tac n1=i and t=v in subst[OF append_take_drop_id]);
1.330   apply(rule S_A_B.intros);
1.332 +
1.333 +txt{*\noindent
1.334 +Both subgoals follow from the induction hypothesis because both @{term"take i
1.335 +v"} and @{term"drop i v"} are shorter than @{term w}:
1.336 +*}
1.337 +
1.338 +  apply(force simp add: min_less_iff_disj);
1.340 +
1.341 +txt{*\noindent
1.342 +Note that the variables @{term n1} and @{term t} referred to in the
1.343 +substitution step above come from the derived theorem @{text"subst[OF
1.344 +append_take_drop_id]"}.
1.345 +
1.346 +The case @{prop"w = b#v"} is proved completely analogously:
1.347 +*}
1.348 +
1.349  apply(clarify);
1.350 -apply(frule part1[of "%x. x=b", simplified]);
1.351 +apply(frule part1[of "\<lambda>x. x=b", simplified]);
1.352  apply(erule exE);
1.353  apply(erule conjE);
1.354 -apply(drule part2[of "%x. x=b", simplified]);
1.355 +apply(drule part2[of "\<lambda>x. x=b", simplified]);
1.356   apply(assumption);
1.357 -apply(rule_tac n1=i and t=list in subst[OF append_take_drop_id]);
1.358 +apply(rule_tac n1=i and t=v in subst[OF append_take_drop_id]);
1.359  apply(rule S_A_B.intros);
1.362
1.363 +text{*
1.364 +We conclude this section with a comparison of the above proof and the one
1.365 +in the textbook \cite[p.\ 81]{HopcroftUllman}. For a start, the texbook
1.366 +grammar, for no good reason, excludes the empty word, which complicates
1.367 +matters just a little bit because we now have 8 instead of our 7 productions.
1.368 +
1.369 +More importantly, the proof itself is different: rather than separating the
1.370 +two directions, they perform one induction on the length of a word. This
1.371 +deprives them of the beauty of rule induction and in the easy direction
1.372 +(correctness) their reasoning is more detailed than our @{text auto}. For the
1.373 +hard part (completeness), they consider just one of the cases that our @{text
1.374 +simp_all} disposes of automatically. Then they conclude the proof by saying
1.375 +about the remaining cases: We do this in a manner similar to our method of
1.376 +proof for part (1); this part is left to the reader''. But this is precisely
1.377 +the part that requires the intermediate value theorem and thus is not at all
1.378 +similar to the other cases (which are automatic in Isabelle). We conclude