src/HOL/Number_Theory/Eratosthenes.thy
author wenzelm
Sat Jul 18 22:58:50 2015 +0200 (2015-07-18)
changeset 60758 d8d85a8172b5
parent 60583 a645a0e6d790
child 61166 5976fe402824
permissions -rw-r--r--
isabelle update_cartouches;
     1 (*  Title:      HOL/Number_Theory/Eratosthenes.thy
     2     Author:     Florian Haftmann, TU Muenchen
     3 *)
     4 
     5 section \<open>The sieve of Eratosthenes\<close>
     6 
     7 theory Eratosthenes
     8 imports Main Primes
     9 begin
    10 
    11 
    12 subsection \<open>Preliminary: strict divisibility\<close>
    13 
    14 context dvd
    15 begin
    16 
    17 abbreviation dvd_strict :: "'a \<Rightarrow> 'a \<Rightarrow> bool" (infixl "dvd'_strict" 50)
    18 where
    19   "b dvd_strict a \<equiv> b dvd a \<and> \<not> a dvd b"
    20 
    21 end
    22 
    23 
    24 subsection \<open>Main corpus\<close>
    25 
    26 text \<open>The sieve is modelled as a list of booleans, where @{const False} means \emph{marked out}.\<close>
    27 
    28 type_synonym marks = "bool list"
    29 
    30 definition numbers_of_marks :: "nat \<Rightarrow> marks \<Rightarrow> nat set"
    31 where
    32   "numbers_of_marks n bs = fst ` {x \<in> set (enumerate n bs). snd x}"
    33 
    34 lemma numbers_of_marks_simps [simp, code]:
    35   "numbers_of_marks n [] = {}"
    36   "numbers_of_marks n (True # bs) = insert n (numbers_of_marks (Suc n) bs)"
    37   "numbers_of_marks n (False # bs) = numbers_of_marks (Suc n) bs"
    38   by (auto simp add: numbers_of_marks_def intro!: image_eqI)
    39 
    40 lemma numbers_of_marks_Suc:
    41   "numbers_of_marks (Suc n) bs = Suc ` numbers_of_marks n bs"
    42   by (auto simp add: numbers_of_marks_def enumerate_Suc_eq image_iff Bex_def)
    43 
    44 lemma numbers_of_marks_replicate_False [simp]:
    45   "numbers_of_marks n (replicate m False) = {}"
    46   by (auto simp add: numbers_of_marks_def enumerate_replicate_eq)
    47 
    48 lemma numbers_of_marks_replicate_True [simp]:
    49   "numbers_of_marks n (replicate m True) = {n..<n+m}"
    50   by (auto simp add: numbers_of_marks_def enumerate_replicate_eq image_def)
    51 
    52 lemma in_numbers_of_marks_eq:
    53   "m \<in> numbers_of_marks n bs \<longleftrightarrow> m \<in> {n..<n + length bs} \<and> bs ! (m - n)"
    54   by (simp add: numbers_of_marks_def in_set_enumerate_eq image_iff add.commute)
    55 
    56 lemma sorted_list_of_set_numbers_of_marks:
    57   "sorted_list_of_set (numbers_of_marks n bs) = map fst (filter snd (enumerate n bs))"
    58   by (auto simp add: numbers_of_marks_def distinct_map
    59     intro!: sorted_filter distinct_filter inj_onI sorted_distinct_set_unique)
    60 
    61 
    62 text \<open>Marking out multiples in a sieve\<close>
    63 
    64 definition mark_out :: "nat \<Rightarrow> marks \<Rightarrow> marks"
    65 where
    66   "mark_out n bs = map (\<lambda>(q, b). b \<and> \<not> Suc n dvd Suc (Suc q)) (enumerate n bs)"
    67 
    68 lemma mark_out_Nil [simp]: "mark_out n [] = []"
    69   by (simp add: mark_out_def)
    70 
    71 lemma length_mark_out [simp]: "length (mark_out n bs) = length bs"
    72   by (simp add: mark_out_def)
    73 
    74 lemma numbers_of_marks_mark_out:
    75     "numbers_of_marks n (mark_out m bs) = {q \<in> numbers_of_marks n bs. \<not> Suc m dvd Suc q - n}"
    76   by (auto simp add: numbers_of_marks_def mark_out_def in_set_enumerate_eq image_iff
    77     nth_enumerate_eq less_eq_dvd_minus)
    78 
    79 
    80 text \<open>Auxiliary operation for efficient implementation\<close>
    81 
    82 definition mark_out_aux :: "nat \<Rightarrow> nat \<Rightarrow> marks \<Rightarrow> marks"
    83 where
    84   "mark_out_aux n m bs =
    85     map (\<lambda>(q, b). b \<and> (q < m + n \<or> \<not> Suc n dvd Suc (Suc q) + (n - m mod Suc n))) (enumerate n bs)"
    86 
    87 lemma mark_out_code [code]: "mark_out n bs = mark_out_aux n n bs"
    88 proof -
    89   have aux: False
    90     if A: "Suc n dvd Suc (Suc a)"
    91     and B: "a < n + n"
    92     and C: "n \<le> a"
    93     for a
    94   proof (cases "n = 0")
    95     case True
    96     with A B C show ?thesis by simp
    97   next
    98     case False
    99     def m \<equiv> "Suc n"
   100     then have "m > 0" by simp
   101     from False have "n > 0" by simp
   102     from A obtain q where q: "Suc (Suc a) = Suc n * q" by (rule dvdE)
   103     have "q > 0"
   104     proof (rule ccontr)
   105       assume "\<not> q > 0"
   106       with q show False by simp
   107     qed
   108     with \<open>n > 0\<close> have "Suc n * q \<ge> 2" by (auto simp add: gr0_conv_Suc)
   109     with q have a: "a = Suc n * q - 2" by simp
   110     with B have "q + n * q < n + n + 2" by auto
   111     then have "m * q < m * 2" by (simp add: m_def)
   112     with \<open>m > 0\<close> have "q < 2" by simp
   113     with \<open>q > 0\<close> have "q = 1" by simp
   114     with a have "a = n - 1" by simp
   115     with \<open>n > 0\<close> C show False by simp
   116   qed
   117   show ?thesis
   118     by (auto simp add: mark_out_def mark_out_aux_def in_set_enumerate_eq intro: aux)
   119 qed
   120 
   121 lemma mark_out_aux_simps [simp, code]:
   122   "mark_out_aux n m [] = []"
   123   "mark_out_aux n 0 (b # bs) = False # mark_out_aux n n bs"
   124   "mark_out_aux n (Suc m) (b # bs) = b # mark_out_aux n m bs"
   125 proof goals
   126   case 1
   127   show ?case
   128     by (simp add: mark_out_aux_def)
   129 next
   130   case 2
   131   show ?case
   132     by (auto simp add: mark_out_code [symmetric] mark_out_aux_def mark_out_def
   133       enumerate_Suc_eq in_set_enumerate_eq less_eq_dvd_minus)
   134 next
   135   case 3
   136   { def v \<equiv> "Suc m" and w \<equiv> "Suc n"
   137     fix q
   138     assume "m + n \<le> q"
   139     then obtain r where q: "q = m + n + r" by (auto simp add: le_iff_add)
   140     { fix u
   141       from w_def have "u mod w < w" by simp
   142       then have "u + (w - u mod w) = w + (u - u mod w)"
   143         by simp
   144       then have "u + (w - u mod w) = w + u div w * w"
   145         by (simp add: div_mod_equality' [symmetric])
   146     }
   147     then have "w dvd v + w + r + (w - v mod w) \<longleftrightarrow> w dvd m + w + r + (w - m mod w)"
   148       by (simp add: add.assoc add.left_commute [of m] add.left_commute [of v]
   149         dvd_add_left_iff dvd_add_right_iff)
   150     moreover from q have "Suc q = m + w + r" by (simp add: w_def)
   151     moreover from q have "Suc (Suc q) = v + w + r" by (simp add: v_def w_def)
   152     ultimately have "w dvd Suc (Suc (q + (w - v mod w))) \<longleftrightarrow> w dvd Suc (q + (w - m mod w))"
   153       by (simp only: add_Suc [symmetric])
   154     then have "Suc n dvd Suc (Suc (Suc (q + n) - Suc m mod Suc n)) \<longleftrightarrow>
   155       Suc n dvd Suc (Suc (q + n - m mod Suc n))"
   156       by (simp add: v_def w_def Suc_diff_le trans_le_add2)
   157   }
   158   then show ?case
   159     by (auto simp add: mark_out_aux_def
   160       enumerate_Suc_eq in_set_enumerate_eq not_less)
   161 qed
   162 
   163 
   164 text \<open>Main entry point to sieve\<close>
   165 
   166 fun sieve :: "nat \<Rightarrow> marks \<Rightarrow> marks"
   167 where
   168   "sieve n [] = []"
   169 | "sieve n (False # bs) = False # sieve (Suc n) bs"
   170 | "sieve n (True # bs) = True # sieve (Suc n) (mark_out n bs)"
   171 
   172 text \<open>
   173   There are the following possible optimisations here:
   174 
   175   \begin{itemize}
   176 
   177     \item @{const sieve} can abort as soon as @{term n} is too big to let
   178       @{const mark_out} have any effect.
   179 
   180     \item Search for further primes can be given up as soon as the search
   181       position exceeds the square root of the maximum candidate.
   182 
   183   \end{itemize}
   184 
   185   This is left as an constructive exercise to the reader.
   186 \<close>
   187 
   188 lemma numbers_of_marks_sieve:
   189   "numbers_of_marks (Suc n) (sieve n bs) =
   190     {q \<in> numbers_of_marks (Suc n) bs. \<forall>m \<in> numbers_of_marks (Suc n) bs. \<not> m dvd_strict q}"
   191 proof (induct n bs rule: sieve.induct)
   192   case 1
   193   show ?case by simp
   194 next
   195   case 2
   196   then show ?case by simp
   197 next
   198   case (3 n bs)
   199   have aux: "n \<in> Suc ` M \<longleftrightarrow> n > 0 \<and> n - 1 \<in> M" (is "?lhs \<longleftrightarrow> ?rhs") for M n
   200   proof
   201     show ?rhs if ?lhs using that by auto
   202     show ?lhs if ?rhs
   203     proof -
   204       from that have "n > 0" and "n - 1 \<in> M" by auto
   205       then have "Suc (n - 1) \<in> Suc ` M" by blast
   206       with \<open>n > 0\<close> show "n \<in> Suc ` M" by simp
   207     qed
   208   qed
   209   have aux1: False if "Suc (Suc n) \<le> m" and "m dvd Suc n" for m :: nat
   210   proof -
   211     from \<open>m dvd Suc n\<close> obtain q where "Suc n = m * q" ..
   212     with \<open>Suc (Suc n) \<le> m\<close> have "Suc (m * q) \<le> m" by simp
   213     then have "m * q < m" by arith
   214     then have "q = 0" by simp
   215     with \<open>Suc n = m * q\<close> show ?thesis by simp
   216   qed
   217   have aux2: "m dvd q"
   218     if 1: "\<forall>q>0. 1 < q \<longrightarrow> Suc n < q \<longrightarrow> q \<le> Suc (n + length bs) \<longrightarrow>
   219       bs ! (q - Suc (Suc n)) \<longrightarrow> \<not> Suc n dvd q \<longrightarrow> q dvd m \<longrightarrow> m dvd q"
   220     and 2: "\<not> Suc n dvd m" "q dvd m"
   221     and 3: "Suc n < q" "q \<le> Suc (n + length bs)" "bs ! (q - Suc (Suc n))"
   222     for m q :: nat
   223   proof -
   224     from 1 have *: "\<And>q. Suc n < q \<Longrightarrow> q \<le> Suc (n + length bs) \<Longrightarrow>
   225       bs ! (q - Suc (Suc n)) \<Longrightarrow> \<not> Suc n dvd q \<Longrightarrow> q dvd m \<Longrightarrow> m dvd q"
   226       by auto
   227     from 2 have "\<not> Suc n dvd q" by (auto elim: dvdE)
   228     moreover note 3
   229     moreover note \<open>q dvd m\<close>
   230     ultimately show ?thesis by (auto intro: *)
   231   qed
   232   from 3 show ?case
   233     apply (simp_all add: numbers_of_marks_mark_out numbers_of_marks_Suc Compr_image_eq
   234       inj_image_eq_iff in_numbers_of_marks_eq Ball_def imp_conjL aux)
   235     apply safe
   236     apply (simp_all add: less_diff_conv2 le_diff_conv2 dvd_minus_self not_less)
   237     apply (clarsimp dest!: aux1)
   238     apply (simp add: Suc_le_eq less_Suc_eq_le)
   239     apply (rule aux2)
   240     apply (clarsimp dest!: aux1)+
   241     done
   242 qed
   243 
   244 
   245 text \<open>Relation of the sieve algorithm to actual primes\<close>
   246 
   247 definition primes_upto :: "nat \<Rightarrow> nat list"
   248 where
   249   "primes_upto n = sorted_list_of_set {m. m \<le> n \<and> prime m}"
   250 
   251 lemma set_primes_upto: "set (primes_upto n) = {m. m \<le> n \<and> prime m}"
   252   by (simp add: primes_upto_def)
   253 
   254 lemma sorted_primes_upto [iff]: "sorted (primes_upto n)"
   255   by (simp add: primes_upto_def)
   256 
   257 lemma distinct_primes_upto [iff]: "distinct (primes_upto n)"
   258   by (simp add: primes_upto_def)
   259 
   260 lemma set_primes_upto_sieve:
   261   "set (primes_upto n) = numbers_of_marks 2 (sieve 1 (replicate (n - 1) True))"
   262 proof -
   263   consider "n = 0 \<or> n = 1" | "n > 1" by arith
   264   then show ?thesis
   265   proof cases
   266     case 1
   267     then show ?thesis
   268       by (auto simp add: numbers_of_marks_sieve numeral_2_eq_2 set_primes_upto
   269         dest: prime_gt_Suc_0_nat)
   270   next
   271     case 2
   272     {
   273       fix m q
   274       assume "Suc (Suc 0) \<le> q"
   275         and "q < Suc n"
   276         and "m dvd q"
   277       then have "m < Suc n" by (auto dest: dvd_imp_le)
   278       assume *: "\<forall>m\<in>{Suc (Suc 0)..<Suc n}. m dvd q \<longrightarrow> q dvd m"
   279         and "m dvd q" and "m \<noteq> 1"
   280       have "m = q"
   281       proof (cases "m = 0")
   282         case True with \<open>m dvd q\<close> show ?thesis by simp
   283       next
   284         case False with \<open>m \<noteq> 1\<close> have "Suc (Suc 0) \<le> m" by arith
   285         with \<open>m < Suc n\<close> * \<open>m dvd q\<close> have "q dvd m" by simp
   286         with \<open>m dvd q\<close> show ?thesis by (simp add: dvd.eq_iff)
   287       qed
   288     }
   289     then have aux: "\<And>m q. Suc (Suc 0) \<le> q \<Longrightarrow>
   290       q < Suc n \<Longrightarrow>
   291       m dvd q \<Longrightarrow>
   292       \<forall>m\<in>{Suc (Suc 0)..<Suc n}. m dvd q \<longrightarrow> q dvd m \<Longrightarrow>
   293       m dvd q \<Longrightarrow> m \<noteq> q \<Longrightarrow> m = 1" by auto
   294     from 2 show ?thesis
   295       apply (auto simp add: numbers_of_marks_sieve numeral_2_eq_2 set_primes_upto
   296         dest: prime_gt_Suc_0_nat)
   297       apply (metis One_nat_def Suc_le_eq less_not_refl prime_nat_def)
   298       apply (metis One_nat_def Suc_le_eq aux prime_nat_def)
   299       done
   300   qed
   301 qed
   302 
   303 lemma primes_upto_sieve [code]:
   304   "primes_upto n = map fst (filter snd (enumerate 2 (sieve 1 (replicate (n - 1) True))))"
   305 proof -
   306   have "primes_upto n = sorted_list_of_set (numbers_of_marks 2 (sieve 1 (replicate (n - 1) True)))"
   307     apply (rule sorted_distinct_set_unique)
   308     apply (simp_all only: set_primes_upto_sieve numbers_of_marks_def)
   309     apply auto
   310     done
   311   then show ?thesis
   312     by (simp add: sorted_list_of_set_numbers_of_marks)
   313 qed
   314 
   315 lemma prime_in_primes_upto: "prime n \<longleftrightarrow> n \<in> set (primes_upto n)"
   316   by (simp add: set_primes_upto)
   317 
   318 
   319 subsection \<open>Application: smallest prime beyond a certain number\<close>
   320 
   321 definition smallest_prime_beyond :: "nat \<Rightarrow> nat"
   322 where
   323   "smallest_prime_beyond n = (LEAST p. prime p \<and> p \<ge> n)"
   324 
   325 lemma prime_smallest_prime_beyond [iff]: "prime (smallest_prime_beyond n)" (is ?P)
   326   and smallest_prime_beyond_le [iff]: "smallest_prime_beyond n \<ge> n" (is ?Q)
   327 proof -
   328   let ?least = "LEAST p. prime p \<and> p \<ge> n"
   329   from primes_infinite obtain q where "prime q \<and> q \<ge> n"
   330     by (metis finite_nat_set_iff_bounded_le mem_Collect_eq nat_le_linear)
   331   then have "prime ?least \<and> ?least \<ge> n"
   332     by (rule LeastI)
   333   then show ?P and ?Q
   334     by (simp_all add: smallest_prime_beyond_def)
   335 qed
   336 
   337 lemma smallest_prime_beyond_smallest: "prime p \<Longrightarrow> p \<ge> n \<Longrightarrow> smallest_prime_beyond n \<le> p"
   338   by (simp only: smallest_prime_beyond_def) (auto intro: Least_le)
   339 
   340 lemma smallest_prime_beyond_eq:
   341   "prime p \<Longrightarrow> p \<ge> n \<Longrightarrow> (\<And>q. prime q \<Longrightarrow> q \<ge> n \<Longrightarrow> q \<ge> p) \<Longrightarrow> smallest_prime_beyond n = p"
   342   by (simp only: smallest_prime_beyond_def) (auto intro: Least_equality)
   343 
   344 definition smallest_prime_between :: "nat \<Rightarrow> nat \<Rightarrow> nat option"
   345 where
   346   "smallest_prime_between m n =
   347     (if (\<exists>p. prime p \<and> m \<le> p \<and> p \<le> n) then Some (smallest_prime_beyond m) else None)"
   348 
   349 lemma smallest_prime_between_None:
   350   "smallest_prime_between m n = None \<longleftrightarrow> (\<forall>q. m \<le> q \<and> q \<le> n \<longrightarrow> \<not> prime q)"
   351   by (auto simp add: smallest_prime_between_def)
   352 
   353 lemma smallest_prime_betwen_Some:
   354   "smallest_prime_between m n = Some p \<longleftrightarrow> smallest_prime_beyond m = p \<and> p \<le> n"
   355   by (auto simp add: smallest_prime_between_def dest: smallest_prime_beyond_smallest [of _ m])
   356 
   357 lemma [code]: "smallest_prime_between m n = List.find (\<lambda>p. p \<ge> m) (primes_upto n)"
   358 proof -
   359   have "List.find (\<lambda>p. p \<ge> m) (primes_upto n) = Some (smallest_prime_beyond m)"
   360     if assms: "m \<le> p" "prime p" "p \<le> n" for p
   361   proof -
   362     def A \<equiv> "{p. p \<le> n \<and> prime p \<and> m \<le> p}"
   363     from assms have "smallest_prime_beyond m \<le> p"
   364       by (auto intro: smallest_prime_beyond_smallest)
   365     from this \<open>p \<le> n\<close> have *: "smallest_prime_beyond m \<le> n"
   366       by (rule order_trans)
   367     from assms have ex: "\<exists>p\<le>n. prime p \<and> m \<le> p"
   368       by auto
   369     then have "finite A"
   370       by (auto simp add: A_def)
   371     with * have "Min A = smallest_prime_beyond m"
   372       by (auto simp add: A_def intro: Min_eqI smallest_prime_beyond_smallest)
   373     with ex sorted_primes_upto show ?thesis
   374       by (auto simp add: set_primes_upto sorted_find_Min A_def)
   375   qed
   376   then show ?thesis
   377     by (auto simp add: smallest_prime_between_def find_None_iff set_primes_upto
   378       intro!: sym [of _ None])
   379 qed
   380 
   381 definition smallest_prime_beyond_aux :: "nat \<Rightarrow> nat \<Rightarrow> nat"
   382 where
   383   "smallest_prime_beyond_aux k n = smallest_prime_beyond n"
   384 
   385 lemma [code]:
   386   "smallest_prime_beyond_aux k n =
   387     (case smallest_prime_between n (k * n) of
   388       Some p \<Rightarrow> p
   389     | None \<Rightarrow> smallest_prime_beyond_aux (Suc k) n)"
   390   by (simp add: smallest_prime_beyond_aux_def smallest_prime_betwen_Some split: option.split)
   391 
   392 lemma [code]: "smallest_prime_beyond n = smallest_prime_beyond_aux 2 n"
   393   by (simp add: smallest_prime_beyond_aux_def)
   394 
   395 end