added first proof reconstruction test for Sledgehammer
authordesharna
Wed, 20 Sep 2023 17:40:09 +0200
changeset 78676 a98e0a816d28
parent 78675 f0a4ad78c0f2
child 78677 1b9e0f74addb
added first proof reconstruction test for Sledgehammer
src/HOL/Metis_Examples/Sledgehammer_Isar_Proofs.certs
src/HOL/Metis_Examples/Sledgehammer_Isar_Proofs.thy
src/HOL/ROOT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Metis_Examples/Sledgehammer_Isar_Proofs.certs	Wed Sep 20 17:40:09 2023 +0200
@@ -0,0 +1,236 @@
+35b253bd6d27f6991af73e29ed1030cb74b50ab2 11 0
+unsat
+(assume conjecture0 (not (! (= (! (- c$ b$) :named @p_2) a$) :named @p_1)))
+(assume hypothesis1 (! (= c$ (+ a$ b$)) :named @p_6))
+(step t3 (cl (or @p_1 (! (not (! (<= @p_2 a$) :named @p_5)) :named @p_3) (! (not (! (<= a$ @p_2) :named @p_7)) :named @p_4))) :rule la_disequality)
+(step t4 (cl @p_1 @p_3 @p_4) :rule or :premises (t3))
+(step t5 (cl @p_3 @p_4) :rule resolution :premises (t4 conjecture0))
+(step t6 (cl @p_5 (! (not @p_6) :named @p_8)) :rule la_generic :args (1 (- 1)))
+(step t7 (cl @p_5) :rule resolution :premises (t6 hypothesis1))
+(step t8 (cl @p_4) :rule resolution :premises (t5 t7))
+(step t9 (cl @p_7 @p_8) :rule la_generic :args (1 1))
+(step t10 (cl) :rule resolution :premises (t9 hypothesis1 t8))
+882414839a017a484227974e42bc9854cb971d88 49 0
+unsat
+((set-logic AUFLIA)
+(proof
+(let ((?x49 (* (- 1) a$)))
+(let ((?x36 (* (- 1) b$)))
+(let ((?x50 (+ c$ ?x36 ?x49)))
+(let (($x92 (= ?x50 0)))
+(let (($x114 (not $x92)))
+(let ((@x128 (rewrite (= (not true) false))))
+(let (($x55 (>= ?x50 0)))
+(let (($x89 (not $x55)))
+(let (($x51 (<= ?x50 0)))
+(let (($x86 (not $x51)))
+(let (($x101 (or $x92 $x86 $x89)))
+(let (($x57 (and $x51 $x55)))
+(let ((@x59 (monotonicity (rewrite (= (<= (+ c$ ?x36) a$) $x51)) (rewrite (= (<= a$ (+ c$ ?x36)) $x55)) (= (and (<= (+ c$ ?x36) a$) (<= a$ (+ c$ ?x36))) $x57))))
+(let ((?x29 (- c$ b$)))
+(let (($x32 (<= a$ ?x29)))
+(let (($x31 (<= ?x29 a$)))
+(let (($x33 (and $x31 $x32)))
+(let ((@x39 (rewrite (= ?x29 (+ c$ ?x36)))))
+(let ((@x48 (monotonicity (monotonicity @x39 (= $x31 (<= (+ c$ ?x36) a$))) (monotonicity @x39 (= $x32 (<= a$ (+ c$ ?x36)))) (= $x33 (and (<= (+ c$ ?x36) a$) (<= a$ (+ c$ ?x36)))))))
+(let ((@x64 (and-elim (mp (asserted $x33) (trans @x48 @x59 (= $x33 $x57)) $x57) $x55)))
+(let ((@x132 (monotonicity (iff-true @x64 (= $x55 true)) (= $x89 (not true)))))
+(let ((@x63 (and-elim (mp (asserted $x33) (trans @x48 @x59 (= $x33 $x57)) $x57) $x51)))
+(let ((@x126 (monotonicity (iff-true @x63 (= $x51 true)) (= $x86 (not true)))))
+(let ((@x137 (monotonicity (trans @x126 @x128 (= $x86 false)) (trans @x132 @x128 (= $x89 false)) (= $x101 (or $x92 false false)))))
+(let ((@x141 (trans @x137 (rewrite (= (or $x92 false false) $x92)) (= $x101 $x92))))
+(let ((?x37 (+ c$ ?x36)))
+(let (($x43 (<= a$ ?x37)))
+(let (($x77 (not $x43)))
+(let (($x40 (<= ?x37 a$)))
+(let (($x74 (not $x40)))
+(let (($x80 (or $x74 $x77)))
+(let (($x71 (= ?x37 a$)))
+(let (($x83 (or $x71 $x80)))
+(let ((@x97 (monotonicity (monotonicity (rewrite (= $x40 $x51)) (= $x74 $x86)) (monotonicity (rewrite (= $x43 $x55)) (= $x77 $x89)) (= $x80 (or $x86 $x89)))))
+(let ((@x100 (monotonicity (rewrite (= $x71 $x92)) @x97 (= $x83 (or $x92 (or $x86 $x89))))))
+(let ((@x105 (trans @x100 (rewrite (= (or $x92 (or $x86 $x89)) $x101)) (= $x83 $x101))))
+(let ((@x79 (monotonicity (monotonicity @x39 (= $x32 $x43)) (= (not $x32) $x77))))
+(let ((@x76 (monotonicity (monotonicity @x39 (= $x31 $x40)) (= (not $x31) $x74))))
+(let ((@x85 (monotonicity (monotonicity @x39 (= (= ?x29 a$) $x71)) (monotonicity @x76 @x79 (= (or (not $x31) (not $x32)) $x80)) (= (or (= ?x29 a$) (or (not $x31) (not $x32))) $x83))))
+(let ((@x107 (trans @x85 @x105 (= (or (= ?x29 a$) (or (not $x31) (not $x32))) $x101))))
+(let ((@x108 (mp (asserted (or (= ?x29 a$) (or (not $x31) (not $x32)))) @x107 $x101)))
+(let ((@x146 (monotonicity (iff-true (mp @x108 @x141 $x92) (= $x92 true)) (= $x114 (not true)))))
+(let ((@x113 (monotonicity (monotonicity @x39 (= (= ?x29 a$) $x71)) (= (not (= ?x29 a$)) (not $x71)))))
+(let ((@x118 (trans @x113 (monotonicity (rewrite (= $x71 $x92)) (= (not $x71) $x114)) (= (not (= ?x29 a$)) $x114))))
+(mp (mp (asserted (not (= ?x29 a$))) @x118 $x114) (trans @x146 @x128 (= $x114 false)) false)))))))))))))))))))))))))))))))))))))))))))))))
+
+f2541774823d4fd4ada0de04dcf1ccc81465be15 38 0
+unsat
+((set-logic AUFLIA)
+(proof
+(let ((?x69 (* (- 1) a$)))
+(let ((?x41 (* (- 1) b$)))
+(let ((?x70 (+ c$ ?x41 ?x69)))
+(let (($x78 (>= ?x70 0)))
+(let (($x80 (not $x78)))
+(let (($x71 (<= ?x70 0)))
+(let (($x74 (not $x71)))
+(let (($x83 (= ?x70 0)))
+(let (($x92 (or $x83 $x74 $x80)))
+(let (($x97 (not $x92)))
+(let (($x37 (or (= (- c$ b$) a$) (or (not (<= (- c$ b$) a$)) (not (<= a$ (- c$ b$)))))))
+(let (($x38 (not $x37)))
+(let ((?x42 (+ c$ ?x41)))
+(let (($x54 (<= a$ ?x42)))
+(let (($x57 (not $x54)))
+(let (($x48 (<= ?x42 a$)))
+(let (($x51 (not $x48)))
+(let (($x60 (or $x51 $x57)))
+(let (($x45 (= ?x42 a$)))
+(let (($x63 (or $x45 $x60)))
+(let ((@x88 (monotonicity (monotonicity (rewrite (= $x48 $x71)) (= $x51 $x74)) (monotonicity (rewrite (= $x54 $x78)) (= $x57 $x80)) (= $x60 (or $x74 $x80)))))
+(let ((@x91 (monotonicity (rewrite (= $x45 $x83)) @x88 (= $x63 (or $x83 (or $x74 $x80))))))
+(let ((@x96 (trans @x91 (rewrite (= (or $x83 (or $x74 $x80)) $x92)) (= $x63 $x92))))
+(let (($x61 (= (or (not (<= (- c$ b$) a$)) (not (<= a$ (- c$ b$)))) $x60)))
+(let ((@x44 (rewrite (= (- c$ b$) ?x42))))
+(let ((@x59 (monotonicity (monotonicity @x44 (= (<= a$ (- c$ b$)) $x54)) (= (not (<= a$ (- c$ b$))) $x57))))
+(let ((@x53 (monotonicity (monotonicity @x44 (= (<= (- c$ b$) a$) $x48)) (= (not (<= (- c$ b$) a$)) $x51))))
+(let ((@x65 (monotonicity (monotonicity @x44 (= (= (- c$ b$) a$) $x45)) (monotonicity @x53 @x59 $x61) (= $x37 $x63))))
+(let ((@x101 (trans (monotonicity @x65 (= $x38 (not $x63))) (monotonicity @x96 (= (not $x63) $x97)) (= $x38 $x97))))
+(let ((@x102 (mp (asserted $x38) @x101 $x97)))
+(let ((@x106 (not-or-elim @x102 $x78)))
+(let ((@x104 (not-or-elim @x102 (not $x83))))
+(let ((@x105 (not-or-elim @x102 $x71)))
+(unit-resolution ((_ th-lemma arith triangle-eq) $x92) @x105 @x104 @x106 false))))))))))))))))))))))))))))))))))))
+
+38c94308bfb0b4491339a6c117e29fba807f290a 27 0
+unsat
+((set-logic AUFLIA)
+(proof
+(let ((?x41 (* (- 1) c$)))
+(let ((?x42 (+ a$ ?x41 b$)))
+(let (($x43 (<= ?x42 0)))
+(let (($x55 (>= ?x42 0)))
+(let (($x69 (and $x55 $x43)))
+(let (($x72 (not $x69)))
+(let ((@x40 (monotonicity (rewrite (= (- c$ b$) (+ c$ (* (- 1) b$)))) (= (<= a$ (- c$ b$)) (<= a$ (+ c$ (* (- 1) b$)))))))
+(let ((@x47 (trans @x40 (rewrite (= (<= a$ (+ c$ (* (- 1) b$))) $x43)) (= (<= a$ (- c$ b$)) $x43))))
+(let ((@x79 (iff-true (mp (asserted (<= a$ (- c$ b$))) @x47 $x43) (= $x43 true))))
+(let ((@x53 (monotonicity (rewrite (= (- c$ b$) (+ c$ (* (- 1) b$)))) (= (<= (- c$ b$) a$) (<= (+ c$ (* (- 1) b$)) a$)))))
+(let ((@x58 (trans @x53 (rewrite (= (<= (+ c$ (* (- 1) b$)) a$) $x55)) (= (<= (- c$ b$) a$) $x55))))
+(let ((@x81 (iff-true (mp (asserted (<= (- c$ b$) a$)) @x58 $x55) (= $x55 true))))
+(let ((@x88 (trans (monotonicity @x81 @x79 (= $x69 (and true true))) (rewrite (= (and true true) true)) (= $x69 true))))
+(let ((@x95 (trans (monotonicity @x88 (= $x72 (not true))) (rewrite (= (not true) false)) (= $x72 false))))
+(let (($x61 (not (and (<= (- c$ b$) a$) (<= a$ (- c$ b$))))))
+(let ((?x35 (+ c$ (* (- 1) b$))))
+(let (($x38 (<= a$ ?x35)))
+(let (($x51 (<= ?x35 a$)))
+(let (($x63 (and $x51 $x38)))
+(let ((@x71 (monotonicity (rewrite (= $x51 $x55)) (rewrite (= $x38 $x43)) (= $x63 $x69))))
+(let ((@x65 (monotonicity @x53 @x40 (= (and (<= (- c$ b$) a$) (<= a$ (- c$ b$))) $x63))))
+(let ((@x76 (trans (monotonicity @x65 (= $x61 (not $x63))) (monotonicity @x71 (= (not $x63) $x72)) (= $x61 $x72))))
+(mp (mp (asserted $x61) @x76 $x72) @x95 false)))))))))))))))))))))))))
+
+d40f5f06cb71bf04976dfbd768e98729d8a02410 29 0
+unsat
+((set-logic AUFLIA)
+(proof
+(let ((?x36 (* (- 1) c$)))
+(let ((?x37 (+ a$ b$ ?x36)))
+(let (($x56 (<= ?x37 0)))
+(let (($x38 (= ?x37 0)))
+(let ((@x41 (mp (asserted (= (+ a$ b$) c$)) (rewrite (= (= (+ a$ b$) c$) $x38)) $x38)))
+(let ((@x85 (monotonicity (iff-true @x41 (= $x38 true)) (= (not $x38) (not true)))))
+(let ((@x89 (trans @x85 (rewrite (= (not true) false)) (= (not $x38) false))))
+(let ((@x96 (trans (monotonicity @x89 (= (or $x56 (not $x38)) (or $x56 false))) (rewrite (= (or $x56 false) $x56)) (= (or $x56 (not $x38)) $x56))))
+(let (($x61 (not $x38)))
+(let (($x64 (or $x56 $x61)))
+(let (($x43 (not (= c$ (+ a$ b$)))))
+(let (($x34 (<= a$ (- c$ b$))))
+(let (($x44 (or $x34 $x43)))
+(let ((@x63 (monotonicity (rewrite (= (= c$ (+ a$ b$)) $x38)) (= $x43 $x61))))
+(let ((@x66 (monotonicity (rewrite (= (<= a$ (+ (* (- 1) b$) c$)) $x56)) @x63 (= (or (<= a$ (+ (* (- 1) b$) c$)) $x43) $x64))))
+(let ((@x52 (monotonicity (rewrite (= (- c$ b$) (+ (* (- 1) b$) c$))) (= $x34 (<= a$ (+ (* (- 1) b$) c$))))))
+(let ((@x55 (monotonicity @x52 (= $x44 (or (<= a$ (+ (* (- 1) b$) c$)) $x43)))))
+(let ((@x97 (mp (mp (asserted $x44) (trans @x55 @x66 (= $x44 $x64)) $x64) @x96 $x56)))
+(let ((@x101 (monotonicity (iff-true @x97 (= $x56 true)) (= (not $x56) (not true)))))
+(let ((@x103 (trans @x101 (rewrite (= (not true) false)) (= (not $x56) false))))
+(let (($x75 (not $x56)))
+(let ((@x77 (monotonicity (rewrite (= (<= a$ (+ (* (- 1) b$) c$)) $x56)) (= (not (<= a$ (+ (* (- 1) b$) c$))) $x75))))
+(let ((@x74 (monotonicity @x52 (= (not $x34) (not (<= a$ (+ (* (- 1) b$) c$)))))))
+(let ((@x80 (mp (asserted (not $x34)) (trans @x74 @x77 (= (not $x34) $x75)) $x75)))
+(mp @x80 @x103 false)))))))))))))))))))))))))))
+
+589302410c98bf9494c2b8bba0d198d5d6fb8f30 22 0
+unsat
+((set-logic AUFLIA)
+(proof
+(let (($x58 (= (+ a$ (* (- 1) c$) b$) 0)))
+(let (($x66 (not (or (<= (+ a$ (* (- 1) c$) b$) 0) (not $x58)))))
+(let (($x36 (not (or (<= a$ (- c$ b$)) (not (= c$ (+ a$ b$)))))))
+(let (($x34 (not (= c$ (+ a$ b$)))))
+(let (($x43 (<= a$ (+ c$ (* (- 1) b$)))))
+(let (($x46 (or $x43 $x34)))
+(let ((@x62 (monotonicity (rewrite (= (= c$ (+ a$ b$)) $x58)) (= $x34 (not $x58)))))
+(let ((@x65 (monotonicity (rewrite (= $x43 (<= (+ a$ (* (- 1) c$) b$) 0))) @x62 (= $x46 (or (<= (+ a$ (* (- 1) c$) b$) 0) (not $x58))))))
+(let ((@x45 (monotonicity (rewrite (= (- c$ b$) (+ c$ (* (- 1) b$)))) (= (<= a$ (- c$ b$)) $x43))))
+(let ((@x51 (monotonicity (monotonicity @x45 (= (or (<= a$ (- c$ b$)) $x34) $x46)) (= $x36 (not $x46)))))
+(let ((@x70 (trans @x51 (monotonicity @x65 (= (not $x46) $x66)) (= $x36 $x66))))
+(let ((@x75 (monotonicity (not-or-elim (mp (asserted $x36) @x70 $x66) $x58) (= (<= (+ a$ (* (- 1) c$) b$) 0) (<= 0 0)))))
+(let ((@x81 (trans @x75 (rewrite (= (<= 0 0) true)) (= (<= (+ a$ (* (- 1) c$) b$) 0) true))))
+(let ((@x84 (monotonicity @x81 (= (not (<= (+ a$ (* (- 1) c$) b$) 0)) (not true)))))
+(let ((@x88 (trans @x84 (rewrite (= (not true) false)) (= (not (<= (+ a$ (* (- 1) c$) b$) 0)) false))))
+(let (($x54 (<= (+ a$ (* (- 1) c$) b$) 0)))
+(let (($x72 (not $x54)))
+(mp (not-or-elim (mp (asserted $x36) @x70 $x66) $x72) @x88 false))))))))))))))))))))
+
+a19eb65c252ea63e4442473afd12fc3c9abc7d26 29 0
+unsat
+((set-logic AUFLIA)
+(proof
+(let ((?x36 (* (- 1) c$)))
+(let ((?x37 (+ a$ b$ ?x36)))
+(let (($x57 (>= ?x37 0)))
+(let (($x38 (= ?x37 0)))
+(let ((@x41 (mp (asserted (= (+ a$ b$) c$)) (rewrite (= (= (+ a$ b$) c$) $x38)) $x38)))
+(let ((@x85 (monotonicity (iff-true @x41 (= $x38 true)) (= (not $x38) (not true)))))
+(let ((@x89 (trans @x85 (rewrite (= (not true) false)) (= (not $x38) false))))
+(let ((@x96 (trans (monotonicity @x89 (= (or $x57 (not $x38)) (or $x57 false))) (rewrite (= (or $x57 false) $x57)) (= (or $x57 (not $x38)) $x57))))
+(let (($x61 (not $x38)))
+(let (($x64 (or $x57 $x61)))
+(let (($x43 (not (= c$ (+ a$ b$)))))
+(let (($x34 (<= (- c$ b$) a$)))
+(let (($x44 (or $x34 $x43)))
+(let ((@x63 (monotonicity (rewrite (= (= c$ (+ a$ b$)) $x38)) (= $x43 $x61))))
+(let ((@x66 (monotonicity (rewrite (= (<= (+ (* (- 1) b$) c$) a$) $x57)) @x63 (= (or (<= (+ (* (- 1) b$) c$) a$) $x43) $x64))))
+(let ((@x52 (monotonicity (rewrite (= (- c$ b$) (+ (* (- 1) b$) c$))) (= $x34 (<= (+ (* (- 1) b$) c$) a$)))))
+(let ((@x55 (monotonicity @x52 (= $x44 (or (<= (+ (* (- 1) b$) c$) a$) $x43)))))
+(let ((@x97 (mp (mp (asserted $x44) (trans @x55 @x66 (= $x44 $x64)) $x64) @x96 $x57)))
+(let ((@x101 (monotonicity (iff-true @x97 (= $x57 true)) (= (not $x57) (not true)))))
+(let ((@x103 (trans @x101 (rewrite (= (not true) false)) (= (not $x57) false))))
+(let (($x75 (not $x57)))
+(let ((@x77 (monotonicity (rewrite (= (<= (+ (* (- 1) b$) c$) a$) $x57)) (= (not (<= (+ (* (- 1) b$) c$) a$)) $x75))))
+(let ((@x74 (monotonicity @x52 (= (not $x34) (not (<= (+ (* (- 1) b$) c$) a$))))))
+(let ((@x80 (mp (asserted (not $x34)) (trans @x74 @x77 (= (not $x34) $x75)) $x75)))
+(mp @x80 @x103 false)))))))))))))))))))))))))))
+
+4935c5af5b588fd6e477bea2bed6285c3e17b0c6 23 0
+unsat
+((set-logic AUFLIA)
+(proof
+(let (($x63 (<= (+ c$ (* (- 1) b$) (* (- 1) a$)) 0)))
+(let (($x81 (not $x63)))
+(let (($x66 (= (+ c$ (* (- 1) b$) (* (- 1) a$)) 0)))
+(let (($x75 (not (or $x63 (not $x66)))))
+(let (($x36 (not (or (<= (- c$ b$) a$) (not (= c$ (+ a$ b$)))))))
+(let (($x49 (= c$ (+ b$ a$))))
+(let (($x52 (not $x49)))
+(let (($x43 (<= (+ c$ (* (- 1) b$)) a$)))
+(let (($x55 (or $x43 $x52)))
+(let ((@x74 (monotonicity (rewrite (= $x43 $x63)) (monotonicity (rewrite (= $x49 $x66)) (= $x52 (not $x66))) (= $x55 (or $x63 (not $x66))))))
+(let (($x56 (= (or (<= (- c$ b$) a$) (not (= c$ (+ a$ b$)))) $x55)))
+(let ((@x51 (monotonicity (rewrite (= (+ a$ b$) (+ b$ a$))) (= (= c$ (+ a$ b$)) $x49))))
+(let ((@x45 (monotonicity (rewrite (= (- c$ b$) (+ c$ (* (- 1) b$)))) (= (<= (- c$ b$) a$) $x43))))
+(let ((@x57 (monotonicity @x45 (monotonicity @x51 (= (not (= c$ (+ a$ b$))) $x52)) $x56)))
+(let ((@x79 (trans (monotonicity @x57 (= $x36 (not $x55))) (monotonicity @x74 (= (not $x55) $x75)) (= $x36 $x75))))
+(let ((@x84 (monotonicity (not-or-elim (mp (asserted $x36) @x79 $x75) $x66) (= $x63 (<= 0 0)))))
+(let ((@x90 (trans @x84 (rewrite (= (<= 0 0) true)) (= $x63 true))))
+(let ((@x97 (trans (monotonicity @x90 (= $x81 (not true))) (rewrite (= (not true) false)) (= $x81 false))))
+(mp (not-or-elim (mp (asserted $x36) @x79 $x75) $x81) @x97 false)))))))))))))))))))))
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Metis_Examples/Sledgehammer_Isar_Proofs.thy	Wed Sep 20 17:40:09 2023 +0200
@@ -0,0 +1,38 @@
+(*  Title:      src/HOL/Metis_Example/Sledgehammer_Isar_Proofs.thy
+    Author:     Martin Desharnais, MPI-INF Saarbruecken
+
+Tests of proof reconstruction (a.k.a. preplay) in Sledgehammer.
+*)
+theory Sledgehammer_Isar_Proofs
+  imports Main
+begin
+
+external_file \<open>Sledgehammer_Isar_Proofs.certs\<close>
+declare [[smt_certificates = "Sledgehammer_Isar_Proofs.certs"]]
+declare [[smt_read_only_certificates = true]]
+
+sledgehammer_params [verit, isar_proof, minimize = false, slices = 1, compress = 1]
+
+lemma
+  fixes a b c :: int
+  shows "a + b = c \<Longrightarrow> c - b = a"
+  sledgehammer [expect = some_preplayed] ()
+proof -
+  assume a1: "a + b = c"
+  have "c - b \<le> a \<or> c \<noteq> a + b"
+    by force
+  then have f2: "c - b \<le> a"
+    using a1 by force
+  have "a \<le> c - b \<or> c \<noteq> a + b"
+    by force
+  then have "a \<le> c - b"
+    using a1 by force
+  then have f3: "c - b \<le> a \<and> a \<le> c - b"
+    using f2 by fastforce
+  have "c - b = a \<or> \<not> c - b \<le> a \<or> \<not> a \<le> c - b"
+    by auto
+  then show ?thesis
+    using f3 by meson
+qed
+
+end
--- a/src/HOL/ROOT	Tue Sep 19 20:24:39 2023 +0200
+++ b/src/HOL/ROOT	Wed Sep 20 17:40:09 2023 +0200
@@ -394,9 +394,10 @@
     Clausification
     Message
     Proxies
+    Sets
+    Sledgehammer_Isar_Proofs
     Tarski
     Trans_Closure
-    Sets
 
 session "HOL-Nitpick_Examples" in Nitpick_Examples = HOL +
   description "