src/HOL/Tools/sat_funcs.ML
 author huffman Fri Mar 30 12:32:35 2012 +0200 (2012-03-30) changeset 47220 52426c62b5d0 parent 45740 132a3e1c0fe5 child 51550 cec08df2c030 permissions -rw-r--r--
replace lemmas eval_nat_numeral with a simpler reformulation
1 (*  Title:      HOL/Tools/sat_funcs.ML
2     Author:     Stephan Merz and Alwen Tiu, QSL Team, LORIA (http://qsl.loria.fr)
3     Author:     Tjark Weber, TU Muenchen
5 Proof reconstruction from SAT solvers.
7   Description:
8     This file defines several tactics to invoke a proof-producing
9     SAT solver on a propositional goal in clausal form.
11     We use a sequent presentation of clauses to speed up resolution
12     proof reconstruction.
13     We call such clauses "raw clauses", which are of the form
14           [x1, ..., xn, P] |- False
15     (note the use of |- instead of ==>, i.e. of Isabelle's (meta-)hyps here),
16     where each xi is a literal (see also comments in cnf_funcs.ML).
18     This does not work for goals containing schematic variables!
20       The tactic produces a clause representation of the given goal
21       in DIMACS format and invokes a SAT solver, which should return
22       a proof consisting of a sequence of resolution steps, indicating
23       the two input clauses, and resulting in new clauses, leading to
24       the empty clause (i.e. "False").  The tactic replays this proof
25       in Isabelle and thus solves the overall goal.
27   There are three SAT tactics available.  They differ in the CNF transformation
28   used. "sat_tac" uses naive CNF transformation to transform the theorem to be
29   proved before giving it to the SAT solver.  The naive transformation in the
30   worst case can lead to an exponential blow up in formula size.  Another
31   tactic, "satx_tac", uses "definitional CNF transformation" which attempts to
32   produce a formula of linear size increase compared to the input formula, at
33   the cost of possibly introducing new variables.  See cnf_funcs.ML for more
34   comments on the CNF transformation.  "rawsat_tac" should be used with
35   caution: no CNF transformation is performed, and the tactic's behavior is
36   undefined if the subgoal is not already given as [| C1; ...; Cn |] ==> False,
37   where each Ci is a disjunction.
39   The SAT solver to be used can be set via the "solver" reference.  See
40   sat_solvers.ML for possible values, and etc/settings for required (solver-
41   dependent) configuration settings.  To replay SAT proofs in Isabelle, you
42   must of course use a proof-producing SAT solver in the first place.
44   Proofs are replayed only if "!quick_and_dirty" is false.  If
45   "!quick_and_dirty" is true, the theorem (in case the SAT solver claims its
46   negation to be unsatisfiable) is proved via an oracle.
47 *)
49 signature SAT =
50 sig
51   val trace_sat: bool Unsynchronized.ref    (* input: print trace messages *)
52   val solver: string Unsynchronized.ref  (* input: name of SAT solver to be used *)
53   val counter: int Unsynchronized.ref     (* output: number of resolution steps during last proof replay *)
54   val rawsat_thm: Proof.context -> cterm list -> thm
55   val rawsat_tac: Proof.context -> int -> tactic
56   val sat_tac: Proof.context -> int -> tactic
57   val satx_tac: Proof.context -> int -> tactic
58 end
60 functor SATFunc(cnf : CNF) : SAT =
61 struct
63 val trace_sat = Unsynchronized.ref false;
65 val solver = Unsynchronized.ref "zchaff_with_proofs";
66   (*see HOL/Tools/sat_solver.ML for possible values*)
68 val counter = Unsynchronized.ref 0;
70 val resolution_thm =
71   @{lemma "(P ==> False) ==> (~ P ==> False) ==> False" by (rule case_split)}
73 val cP = cterm_of @{theory} (Var (("P", 0), HOLogic.boolT));
75 (* ------------------------------------------------------------------------- *)
76 (* lit_ord: an order on integers that considers their absolute values only,  *)
77 (*      thereby treating integers that represent the same atom (positively   *)
78 (*      or negatively) as equal                                              *)
79 (* ------------------------------------------------------------------------- *)
81 fun lit_ord (i, j) = int_ord (abs i, abs j);
83 (* ------------------------------------------------------------------------- *)
84 (* CLAUSE: during proof reconstruction, three kinds of clauses are           *)
85 (*      distinguished:                                                       *)
86 (*      1. NO_CLAUSE: clause not proved (yet)                                *)
87 (*      2. ORIG_CLAUSE: a clause as it occurs in the original problem        *)
88 (*      3. RAW_CLAUSE: a raw clause, with additional precomputed information *)
89 (*         (a mapping from int's to its literals) for faster proof           *)
90 (*         reconstruction                                                    *)
91 (* ------------------------------------------------------------------------- *)
93 datatype CLAUSE =
94     NO_CLAUSE
95   | ORIG_CLAUSE of thm
96   | RAW_CLAUSE of thm * (int * cterm) list;
98 (* ------------------------------------------------------------------------- *)
99 (* resolve_raw_clauses: given a non-empty list of raw clauses, we fold       *)
100 (*      resolution over the list (starting with its head), i.e. with two raw *)
101 (*      clauses                                                              *)
102 (*        [P, x1, ..., a, ..., xn] |- False                                  *)
103 (*      and                                                                  *)
104 (*        [Q, y1, ..., a', ..., ym] |- False                                 *)
105 (*      (where a and a' are dual to each other), we convert the first clause *)
106 (*      to                                                                   *)
107 (*        [P, x1, ..., xn] |- a ==> False ,                                  *)
108 (*      the second clause to                                                 *)
109 (*        [Q, y1, ..., ym] |- a' ==> False                                   *)
110 (*      and then perform resolution with                                     *)
111 (*        [| ?P ==> False; ~?P ==> False |] ==> False                        *)
112 (*      to produce                                                           *)
113 (*        [P, Q, x1, ..., xn, y1, ..., ym] |- False                          *)
114 (*      Each clause is accompanied with an association list mapping integers *)
115 (*      (positive for positive literals, negative for negative literals, and *)
116 (*      the same absolute value for dual literals) to the actual literals as *)
117 (*      cterms.                                                              *)
118 (* ------------------------------------------------------------------------- *)
120 fun resolve_raw_clauses [] =
121       raise THM ("Proof reconstruction failed (empty list of resolvents)!", 0, [])
122   | resolve_raw_clauses (c::cs) =
123       let
124         (* merges two sorted lists wrt. 'lit_ord', suppressing duplicates *)
125         fun merge xs [] = xs
126           | merge [] ys = ys
127           | merge (x :: xs) (y :: ys) =
128               (case (lit_ord o pairself fst) (x, y) of
129                 LESS => x :: merge xs (y :: ys)
130               | EQUAL => x :: merge xs ys
131               | GREATER => y :: merge (x :: xs) ys)
133         (* find out which two hyps are used in the resolution *)
134         fun find_res_hyps ([], _) _ =
135               raise THM ("Proof reconstruction failed (no literal for resolution)!", 0, [])
136           | find_res_hyps (_, []) _ =
137               raise THM ("Proof reconstruction failed (no literal for resolution)!", 0, [])
138           | find_res_hyps (h1 :: hyps1, h2 :: hyps2) acc =
139               (case (lit_ord o pairself fst) (h1, h2) of
140                 LESS  => find_res_hyps (hyps1, h2 :: hyps2) (h1 :: acc)
141               | EQUAL =>
142                   let
143                     val (i1, chyp1) = h1
144                     val (i2, chyp2) = h2
145                   in
146                     if i1 = ~ i2 then
147                       (i1 < 0, chyp1, chyp2, rev acc @ merge hyps1 hyps2)
148                     else (* i1 = i2 *)
149                       find_res_hyps (hyps1, hyps2) (h1 :: acc)
150                   end
151           | GREATER => find_res_hyps (h1 :: hyps1, hyps2) (h2 :: acc))
153         fun resolution (c1, hyps1) (c2, hyps2) =
154           let
155             val _ =
156               if ! trace_sat then  (* FIXME !? *)
157                 tracing ("Resolving clause: " ^ Display.string_of_thm_without_context c1 ^
158                   " (hyps: " ^ ML_Syntax.print_list
159                     (Syntax.string_of_term_global (theory_of_thm c1)) (Thm.hyps_of c1)
160                   ^ ")\nwith clause: " ^ Display.string_of_thm_without_context c2 ^
161                   " (hyps: " ^ ML_Syntax.print_list
162                     (Syntax.string_of_term_global (theory_of_thm c2)) (Thm.hyps_of c2) ^ ")")
163               else ()
165             (* the two literals used for resolution *)
166             val (hyp1_is_neg, hyp1, hyp2, new_hyps) = find_res_hyps (hyps1, hyps2) []
168             val c1' = Thm.implies_intr hyp1 c1  (* Gamma1 |- hyp1 ==> False *)
169             val c2' = Thm.implies_intr hyp2 c2  (* Gamma2 |- hyp2 ==> False *)
171             val res_thm =  (* |- (lit ==> False) ==> (~lit ==> False) ==> False *)
172               let
173                 val cLit =
174                   snd (Thm.dest_comb (if hyp1_is_neg then hyp2 else hyp1))  (* strip Trueprop *)
175               in
176                 Thm.instantiate ([], [(cP, cLit)]) resolution_thm
177               end
179             val _ =
180               if !trace_sat then
181                 tracing ("Resolution theorem: " ^ Display.string_of_thm_without_context res_thm)
182               else ()
184             (* Gamma1, Gamma2 |- False *)
185             val c_new =
186               Thm.implies_elim
187                 (Thm.implies_elim res_thm (if hyp1_is_neg then c2' else c1'))
188                 (if hyp1_is_neg then c1' else c2')
190             val _ =
191               if !trace_sat then
192                 tracing ("Resulting clause: " ^ Display.string_of_thm_without_context c_new ^
193                   " (hyps: " ^ ML_Syntax.print_list
194                       (Syntax.string_of_term_global (theory_of_thm c_new)) (Thm.hyps_of c_new) ^ ")")
195               else ()
196             val _ = Unsynchronized.inc counter
197           in
198             (c_new, new_hyps)
199           end
200         in
201           fold resolution cs c
202         end;
204 (* ------------------------------------------------------------------------- *)
205 (* replay_proof: replays the resolution proof returned by the SAT solver;    *)
206 (*      cf. SatSolver.proof for details of the proof format.  Updates the    *)
207 (*      'clauses' array with derived clauses, and returns the derived clause *)
208 (*      at index 'empty_id' (which should just be "False" if proof           *)
209 (*      reconstruction was successful, with the used clauses as hyps).       *)
210 (*      'atom_table' must contain an injective mapping from all atoms that   *)
211 (*      occur (as part of a literal) in 'clauses' to positive integers.      *)
212 (* ------------------------------------------------------------------------- *)
214 fun replay_proof atom_table clauses (clause_table, empty_id) =
215   let
216     fun index_of_literal chyp =
217       (case (HOLogic.dest_Trueprop o Thm.term_of) chyp of
218         (Const (@{const_name Not}, _) \$ atom) =>
219           SOME (~ (the (Termtab.lookup atom_table atom)))
220       | atom => SOME (the (Termtab.lookup atom_table atom)))
221       handle TERM _ => NONE;  (* 'chyp' is not a literal *)
223     fun prove_clause id =
224       (case Array.sub (clauses, id) of
225         RAW_CLAUSE clause => clause
226       | ORIG_CLAUSE thm =>
227           (* convert the original clause *)
228           let
229             val _ =
230               if !trace_sat then tracing ("Using original clause #" ^ string_of_int id) else ()
231             val raw = cnf.clause2raw_thm thm
232             val hyps = sort (lit_ord o pairself fst) (map_filter (fn chyp =>
233               Option.map (rpair chyp) (index_of_literal chyp)) (#hyps (Thm.crep_thm raw)))
234             val clause = (raw, hyps)
235             val _ = Array.update (clauses, id, RAW_CLAUSE clause)
236           in
237             clause
238           end
239       | NO_CLAUSE =>
240           (* prove the clause, using information from 'clause_table' *)
241           let
242             val _ =
243               if !trace_sat then tracing ("Proving clause #" ^ string_of_int id ^ " ...") else ()
244             val ids = the (Inttab.lookup clause_table id)
245             val clause = resolve_raw_clauses (map prove_clause ids)
246             val _ = Array.update (clauses, id, RAW_CLAUSE clause)
247             val _ =
248               if !trace_sat then
249                 tracing ("Replay chain successful; clause stored at #" ^ string_of_int id)
250               else ()
251           in
252             clause
253           end)
255     val _ = counter := 0
256     val empty_clause = fst (prove_clause empty_id)
257     val _ =
258       if !trace_sat then
259         tracing ("Proof reconstruction successful; " ^
260           string_of_int (!counter) ^ " resolution step(s) total.")
261       else ()
262   in
263     empty_clause
264   end;
266 (* ------------------------------------------------------------------------- *)
267 (* string_of_prop_formula: return a human-readable string representation of  *)
268 (*      a 'prop_formula' (just for tracing)                                  *)
269 (* ------------------------------------------------------------------------- *)
271 fun string_of_prop_formula Prop_Logic.True = "True"
272   | string_of_prop_formula Prop_Logic.False = "False"
273   | string_of_prop_formula (Prop_Logic.BoolVar i) = "x" ^ string_of_int i
274   | string_of_prop_formula (Prop_Logic.Not fm) = "~" ^ string_of_prop_formula fm
275   | string_of_prop_formula (Prop_Logic.Or (fm1, fm2)) =
276       "(" ^ string_of_prop_formula fm1 ^ " v " ^ string_of_prop_formula fm2 ^ ")"
277   | string_of_prop_formula (Prop_Logic.And (fm1, fm2)) =
278       "(" ^ string_of_prop_formula fm1 ^ " & " ^ string_of_prop_formula fm2 ^ ")";
280 (* ------------------------------------------------------------------------- *)
281 (* rawsat_thm: run external SAT solver with the given clauses.  Reconstructs *)
282 (*      a proof from the resulting proof trace of the SAT solver.  The       *)
283 (*      theorem returned is just "False" (with some of the given clauses as  *)
284 (*      hyps).                                                               *)
285 (* ------------------------------------------------------------------------- *)
287 fun rawsat_thm ctxt clauses =
288   let
289     (* remove premises that equal "True" *)
290     val clauses' = filter (fn clause =>
291       (not_equal @{term True} o HOLogic.dest_Trueprop o Thm.term_of) clause
292         handle TERM ("dest_Trueprop", _) => true) clauses
293     (* remove non-clausal premises -- of course this shouldn't actually   *)
294     (* remove anything as long as 'rawsat_tac' is only called after the   *)
295     (* premises have been converted to clauses                            *)
296     val clauses'' = filter (fn clause =>
297       ((cnf.is_clause o HOLogic.dest_Trueprop o Thm.term_of) clause
298         handle TERM ("dest_Trueprop", _) => false)
299       orelse (
300         warning ("Ignoring non-clausal premise " ^ Syntax.string_of_term ctxt (Thm.term_of clause));
301         false)) clauses'
302     (* remove trivial clauses -- this is necessary because zChaff removes *)
303     (* trivial clauses during preprocessing, and otherwise our clause     *)
304     (* numbering would be off                                             *)
305     val nontrivial_clauses =
306       filter (not o cnf.clause_is_trivial o HOLogic.dest_Trueprop o Thm.term_of) clauses''
307     (* sort clauses according to the term order -- an optimization,       *)
308     (* useful because forming the union of hypotheses, as done by         *)
309     (* Conjunction.intr_balanced and fold Thm.weaken below, is quadratic for *)
310     (* terms sorted in descending order, while only linear for terms      *)
311     (* sorted in ascending order                                          *)
312     val sorted_clauses = sort (Term_Ord.fast_term_ord o pairself Thm.term_of) nontrivial_clauses
313     val _ =
314       if !trace_sat then
315         tracing ("Sorted non-trivial clauses:\n" ^
316           cat_lines (map (Syntax.string_of_term ctxt o Thm.term_of) sorted_clauses))
317       else ()
318     (* translate clauses from HOL terms to Prop_Logic.prop_formula *)
319     val (fms, atom_table) =
320       fold_map (Prop_Logic.prop_formula_of_term o HOLogic.dest_Trueprop o Thm.term_of)
321         sorted_clauses Termtab.empty
322     val _ =
323       if !trace_sat then
324         tracing ("Invoking SAT solver on clauses:\n" ^ cat_lines (map string_of_prop_formula fms))
325       else ()
326     val fm = Prop_Logic.all fms
327     (* unit -> Thm.thm *)
328     fun make_quick_and_dirty_thm () =
329       let
330         val _ =
331           if !trace_sat then
332             tracing "'quick_and_dirty' is set: proof reconstruction skipped, using oracle instead."
333           else ()
334         val False_thm = Skip_Proof.make_thm @{theory} (HOLogic.Trueprop \$ @{term False})
335       in
336         (* 'fold Thm.weaken (rev sorted_clauses)' is linear, while 'fold    *)
337         (* Thm.weaken sorted_clauses' would be quadratic, since we sorted   *)
338         (* clauses in ascending order (which is linear for                  *)
339         (* 'Conjunction.intr_balanced', used below)                         *)
340         fold Thm.weaken (rev sorted_clauses) False_thm
341       end
342   in
343     case
344       let val the_solver = ! solver
345       in (tracing ("Invoking solver " ^ the_solver); SatSolver.invoke_solver the_solver fm) end
346     of
347       SatSolver.UNSATISFIABLE (SOME (clause_table, empty_id)) =>
348        (if !trace_sat then
349           tracing ("Proof trace from SAT solver:\n" ^
350             "clauses: " ^ ML_Syntax.print_list
351               (ML_Syntax.print_pair ML_Syntax.print_int (ML_Syntax.print_list ML_Syntax.print_int))
352               (Inttab.dest clause_table) ^ "\n" ^
353             "empty clause: " ^ string_of_int empty_id)
354         else ();
355         if !quick_and_dirty then
356           make_quick_and_dirty_thm ()
357         else
358           let
359             (* optimization: convert the given clauses to "[c_1 && ... && c_n] |- c_i";  *)
360             (*               this avoids accumulation of hypotheses during resolution    *)
361             (* [c_1, ..., c_n] |- c_1 && ... && c_n *)
362             val clauses_thm = Conjunction.intr_balanced (map Thm.assume sorted_clauses)
363             (* [c_1 && ... && c_n] |- c_1 && ... && c_n *)
364             val cnf_cterm = cprop_of clauses_thm
365             val cnf_thm = Thm.assume cnf_cterm
366             (* [[c_1 && ... && c_n] |- c_1, ..., [c_1 && ... && c_n] |- c_n] *)
367             val cnf_clauses = Conjunction.elim_balanced (length sorted_clauses) cnf_thm
368             (* initialize the clause array with the given clauses *)
369             val max_idx = the (Inttab.max_key clause_table)
370             val clause_arr = Array.array (max_idx + 1, NO_CLAUSE)
371             val _ =
372               fold (fn thm => fn idx => (Array.update (clause_arr, idx, ORIG_CLAUSE thm); idx+1))
373                 cnf_clauses 0
374             (* replay the proof to derive the empty clause *)
375             (* [c_1 && ... && c_n] |- False *)
376             val raw_thm = replay_proof atom_table clause_arr (clause_table, empty_id)
377           in
378             (* [c_1, ..., c_n] |- False *)
379             Thm.implies_elim (Thm.implies_intr cnf_cterm raw_thm) clauses_thm
380           end)
381     | SatSolver.UNSATISFIABLE NONE =>
382         if !quick_and_dirty then
383          (warning "SAT solver claims the formula to be unsatisfiable, but did not provide a proof";
384           make_quick_and_dirty_thm ())
385         else
386           raise THM ("SAT solver claims the formula to be unsatisfiable, but did not provide a proof", 0, [])
387     | SatSolver.SATISFIABLE assignment =>
388         let
389           val msg =
390             "SAT solver found a countermodel:\n" ^
391             (commas o map (fn (term, idx) =>
392                 Syntax.string_of_term_global @{theory} term ^ ": " ^
393                   (case assignment idx of NONE => "arbitrary"
394                   | SOME true => "true" | SOME false => "false")))
395               (Termtab.dest atom_table)
396         in
397           raise THM (msg, 0, [])
398         end
399     | SatSolver.UNKNOWN =>
400         raise THM ("SAT solver failed to decide the formula", 0, [])
401   end;
403 (* ------------------------------------------------------------------------- *)
404 (* Tactics                                                                   *)
405 (* ------------------------------------------------------------------------- *)
407 (* ------------------------------------------------------------------------- *)
408 (* rawsat_tac: solves the i-th subgoal of the proof state; this subgoal      *)
409 (*      should be of the form                                                *)
410 (*        [| c1; c2; ...; ck |] ==> False                                    *)
411 (*      where each cj is a non-empty clause (i.e. a disjunction of literals) *)
412 (*      or "True"                                                            *)
413 (* ------------------------------------------------------------------------- *)
415 fun rawsat_tac ctxt i =
416   Subgoal.FOCUS (fn {context = ctxt', prems, ...} =>
417     rtac (rawsat_thm ctxt' (map cprop_of prems)) 1) ctxt i;
419 (* ------------------------------------------------------------------------- *)
420 (* pre_cnf_tac: converts the i-th subgoal                                    *)
421 (*        [| A1 ; ... ; An |] ==> B                                          *)
422 (*      to                                                                   *)
423 (*        [| A1; ... ; An ; ~B |] ==> False                                  *)
424 (*      (handling meta-logical connectives in B properly before negating),   *)
425 (*      then replaces meta-logical connectives in the premises (i.e. "==>",  *)
426 (*      "!!" and "==") by connectives of the HOL object-logic (i.e. by       *)
427 (*      "-->", "!", and "="), then performs beta-eta-normalization on the    *)
428 (*      subgoal                                                              *)
429 (* ------------------------------------------------------------------------- *)
431 val pre_cnf_tac =
432   rtac ccontr THEN'
433   Object_Logic.atomize_prems_tac THEN'
434   CONVERSION Drule.beta_eta_conversion;
436 (* ------------------------------------------------------------------------- *)
437 (* cnfsat_tac: checks if the empty clause "False" occurs among the premises; *)
438 (*      if not, eliminates conjunctions (i.e. each clause of the CNF formula *)
439 (*      becomes a separate premise), then applies 'rawsat_tac' to solve the  *)
440 (*      subgoal                                                              *)
441 (* ------------------------------------------------------------------------- *)
443 fun cnfsat_tac ctxt i =
444   (etac FalseE i) ORELSE (REPEAT_DETERM (etac conjE i) THEN rawsat_tac ctxt i);
446 (* ------------------------------------------------------------------------- *)
447 (* cnfxsat_tac: checks if the empty clause "False" occurs among the          *)
448 (*      premises; if not, eliminates conjunctions (i.e. each clause of the   *)
449 (*      CNF formula becomes a separate premise) and existential quantifiers, *)
450 (*      then applies 'rawsat_tac' to solve the subgoal                       *)
451 (* ------------------------------------------------------------------------- *)
453 fun cnfxsat_tac ctxt i =
454   (etac FalseE i) ORELSE
455     (REPEAT_DETERM (etac conjE i ORELSE etac exE i) THEN rawsat_tac ctxt i);
457 (* ------------------------------------------------------------------------- *)
458 (* sat_tac: tactic for calling an external SAT solver, taking as input an    *)
459 (*      arbitrary formula.  The input is translated to CNF, possibly causing *)
460 (*      an exponential blowup.                                               *)
461 (* ------------------------------------------------------------------------- *)
463 fun sat_tac ctxt i =
464   pre_cnf_tac i THEN cnf.cnf_rewrite_tac ctxt i THEN cnfsat_tac ctxt i;
466 (* ------------------------------------------------------------------------- *)
467 (* satx_tac: tactic for calling an external SAT solver, taking as input an   *)
468 (*      arbitrary formula.  The input is translated to CNF, possibly         *)
469 (*      introducing new literals.                                            *)
470 (* ------------------------------------------------------------------------- *)
472 fun satx_tac ctxt i =
473   pre_cnf_tac i THEN cnf.cnfx_rewrite_tac ctxt i THEN cnfxsat_tac ctxt i;
475 end;