hashing to eliminate the output of duplicate clauses
authorpaulson
Fri, 16 Dec 2005 12:15:54 +0100
changeset 18420 9470061ab283
parent 18419 30f4b1eda7cd
child 18421 464c93701351
hashing to eliminate the output of duplicate clauses
src/HOL/IsaMakefile
src/HOL/ROOT.ML
src/HOL/Tools/ATP/res_clasimpset.ML
src/HOL/Tools/res_clause.ML
--- a/src/HOL/IsaMakefile	Fri Dec 16 11:51:24 2005 +0100
+++ b/src/HOL/IsaMakefile	Fri Dec 16 12:15:54 2005 +0100
@@ -90,6 +90,7 @@
   Lattice_Locales.thy List.ML List.thy Main.ML Main.thy Map.thy			\
   Nat.ML Nat.thy NatArith.thy OrderedGroup.ML OrderedGroup.thy			\
   Orderings.ML Orderings.thy Power.thy PreList.thy Product_Type.thy		\
+  $(SRC)/Pure/General/hashtable.ML \
   ROOT.ML Recdef.thy Reconstruction.thy Record.thy Refute.thy			\
   Relation.ML Relation.thy Relation_Power.thy Ring_and_Field.thy SAT.thy Set.ML		\
   Set.thy SetInterval.thy Sum_Type.thy Tools/ATP/AtpCommunication.ML		\
--- a/src/HOL/ROOT.ML	Fri Dec 16 11:51:24 2005 +0100
+++ b/src/HOL/ROOT.ML	Fri Dec 16 12:15:54 2005 +0100
@@ -11,6 +11,7 @@
 
 use "hologic.ML";
 
+use "~~/src/Pure/General/hashtable.ML";
 use "~~/src/Provers/splitter.ML";
 use "~~/src/Provers/hypsubst.ML";
 use "~~/src/Provers/induct_method.ML";
--- a/src/HOL/Tools/ATP/res_clasimpset.ML	Fri Dec 16 11:51:24 2005 +0100
+++ b/src/HOL/Tools/ATP/res_clasimpset.ML	Fri Dec 16 12:15:54 2005 +0100
@@ -141,36 +141,56 @@
 	multi (clause, theorem) num_of_cls []
     end;
     
-fun get_simpset_thms ctxt goal clas =
-  let val ctab = fold_rev Symtab.update clas Symtab.empty
-      fun unused (s,_) = not (Symtab.defined ctab s)
-  in  ResAxioms.clausify_rules_pairs 
-	(filter unused
-	  (map put_name_pair 
-	    (ReduceAxiomsN.relevant_filter (!relevant) goal
-	      (ResAxioms.simpset_rules_of_ctxt ctxt))))
-  end;
-		  
+(*Hashing to detect duplicate and variant clauses, e.g. from the [iff] attribute
+Some primes from http://primes.utm.edu/:
+   1823   1831   1847   1861   1867   1871   1873   1877   1879   1889 
+   1901   1907   1913   1931   1933   1949   1951   1973   1979   1987 
+   1993   1997   1999   2003   2011   2017   2027   2029   2039   2053 
+   2063   2069   2081   2083   2087   2089   2099   2111   2113   2129 
+*)
+
+exception HASH_CLAUSE;
+
+(*Create a hash table for clauses, of the given size*)
+fun mk_clause_table size =
+      Hashtable.create{hash = ResClause.hash1_clause, 
+		       exn = HASH_CLAUSE,
+		       == = ResClause.clause_eq, 
+		       size = size};
+
+(*Insert x only if fresh*)
+fun insert_new ht (x,y) = ignore (Hashtable.lookup ht x)
+            handle HASH_CLAUSE => Hashtable.insert ht (x,y); 
+
+(*Use a hash table to eliminate duplicates from xs*)
+fun make_unique ht xs = (app (insert_new ht) xs;  Hashtable.map Library.I ht);
 
 (*Write out the claset and simpset rules of the supplied theory.
   FIXME: argument "goal" is a hack to allow relevance filtering.
   To reduce the number of clauses produced, set ResClasimp.relevant:=1*)
 fun get_clasimp_lemmas ctxt goal = 
-    let val claset_thms =
-		map put_name_pair
-		  (ReduceAxiomsN.relevant_filter (!relevant) goal 
-		    (ResAxioms.claset_rules_of_ctxt ctxt))
-	val claset_cls_thms = ResAxioms.clausify_rules_pairs claset_thms
-	val simpset_cls_thms = 
-	      if !use_simpset then get_simpset_thms ctxt goal claset_thms
-	      else []
-	val cls_thms_list = List.concat (claset_cls_thms@simpset_cls_thms)
-	(* Identify the set of clauses to be written out *)
-	val clauses = map #1(cls_thms_list);
-	val cls_nums = map ResClause.num_of_clauses clauses;
-        val whole_list = List.concat 
-              (map clause_numbering (ListPair.zip (cls_thms_list, cls_nums)));
- 	
+  let val claset_thms =
+	    map put_name_pair
+	      (ReduceAxiomsN.relevant_filter (!relevant) goal 
+		(ResAxioms.claset_rules_of_ctxt ctxt))
+      val claset_cls_thms = ResAxioms.clausify_rules_pairs claset_thms
+      val simpset_cls_thms = 
+	    if !use_simpset then 
+	       ResAxioms.clausify_rules_pairs 
+		  (map put_name_pair 
+		    (ReduceAxiomsN.relevant_filter (!relevant) goal
+		      (ResAxioms.simpset_rules_of_ctxt ctxt)))
+	    else []
+      val cls_thms_list = make_unique (mk_clause_table 2129) 
+                                      (List.concat (simpset_cls_thms@claset_cls_thms))
+      (* Identify the set of clauses to be written out *)
+      val clauses = map #1(cls_thms_list);
+      val cls_nums = map ResClause.num_of_clauses clauses;
+      (*Note: in every case, cls_num = 1.  I think that only conjecture clauses
+	can have any other value.*)
+      val whole_list = List.concat 
+	    (map clause_numbering (ListPair.zip (cls_thms_list, cls_nums)));
+      
   in  (* create array of put clausename, number pairs into it *)
       (Array.fromList whole_list, clauses)
   end;
--- a/src/HOL/Tools/res_clause.ML	Fri Dec 16 11:51:24 2005 +0100
+++ b/src/HOL/Tools/res_clause.ML	Fri Dec 16 12:15:54 2005 +0100
@@ -69,9 +69,13 @@
   val gen_tptp_type_cls : int * string * string * string * int -> string
   val tptp_of_typeLit : type_literal -> string
 
+  (*for hashing*)
+  val clause_eq : clause * clause -> bool
+  val hash1_clause : clause -> word
+
   end;
 
-structure ResClause: RES_CLAUSE =
+structure ResClause : RES_CLAUSE =
 struct
 
 (* Added for typed equality *)
@@ -412,7 +416,6 @@
 	   (union_all (ts1::ts2), 
 	    union_all(funcs::funcs')))
       end
-  | term_of t = raise CLAUSE("Function Not First Order 4", t)
 and terms_of ts =  
       let val (args, ts_funcs) = ListPair.unzip (map term_of ts)
       in
@@ -505,21 +508,16 @@
   | term_ord (UVar(_,_),_) = LESS
   | term_ord (Fun(_,_,_),UVar(_)) = GREATER
   | term_ord (Fun(f1,tps1,tms1),Fun(f2,tps2,tms2)) = 
-    let val fn_ord = string_ord (f1,f2)
-    in
-	case fn_ord of EQUAL => 
-		       let val tms_ord = terms_ord (tms1,tms2)
-		       in
-			   case tms_ord of EQUAL => types_ord (tps1,tps2)
-					 | _ => tms_ord
-		       end
-		     | _ => fn_ord
-    end
+     (case string_ord (f1,f2) of
+         EQUAL => 
+	   (case terms_ord (tms1,tms2) of EQUAL => types_ord (tps1,tps2)
+	      | tms_ord => tms_ord)
+       | fn_ord => fn_ord)
 
 and
 
-terms_ord ([],[]) = EQUAL
-  | terms_ord (tms1,tms2) = list_ord term_ord (tms1,tms2);
+  terms_ord ([],[]) = EQUAL
+    | terms_ord (tms1,tms2) = list_ord term_ord (tms1,tms2);
 
 
 
@@ -541,6 +539,7 @@
 
 fun sort_lits lits = sort literal_ord lits;
 
+
 (********** clause equivalence ******************)
 
 fun check_var_pairs (x,y) [] = 0 
@@ -550,7 +549,6 @@
 	if (x = u) orelse (y = v) then 2 (*conflict*)
 	else check_var_pairs (x,y) w;
 
-
 fun type_eq (AtomV(v1),AtomV(v2)) (vars,tvars) =
     (case check_var_pairs (v1,v2) tvars of 0 => (true,(vars,(v1,v2)::tvars))
 					 | 1 => (true,(vars,tvars))
@@ -559,24 +557,24 @@
   | type_eq (AtomF(f1),AtomF(f2)) vtvars = (f1=f2,vtvars)
   | type_eq (AtomF(_),_) vtvars = (false,vtvars)
   | type_eq (Comp(con1,args1),Comp(con2,args2)) vtvars =
-    let val (eq1,vtvars1) = 
-	    if (con1 = con2) then types_eq (args1,args2) vtvars
-	    else (false,vtvars)
-    in
-	(eq1,vtvars1)
-    end
+      let val (eq1,vtvars1) = 
+	      if con1 = con2 then types_eq (args1,args2) vtvars
+	      else (false,vtvars)
+      in
+	  (eq1,vtvars1)
+      end
   | type_eq (Comp(_,_),_) vtvars = (false,vtvars)
 
 and
 
-types_eq ([],[]) vtvars = (true,vtvars)
-| types_eq (tp1::tps1,tp2::tps2) vtvars =
-  let val (eq1,vtvars1) = type_eq (tp1,tp2) vtvars
-      val (eq2,vtvars2) = if eq1 then types_eq (tps1,tps2) vtvars1
-			  else (eq1,vtvars1)
-  in
-      (eq2,vtvars2)
-  end;
+    types_eq ([],[]) vtvars = (true,vtvars)
+  | types_eq (tp1::tps1,tp2::tps2) vtvars =
+      let val (eq1,vtvars1) = type_eq (tp1,tp2) vtvars
+	  val (eq2,vtvars2) = if eq1 then types_eq (tps1,tps2) vtvars1
+			      else (eq1,vtvars1)
+      in
+	  (eq2,vtvars2)
+      end;
 
 
 fun term_eq (UVar(v1,tp1),UVar(v2,tp2)) (vars,tvars) =
@@ -585,27 +583,27 @@
 					| 2 => (false,(vars,tvars)))
   | term_eq (UVar(_,_),_) vtvars = (false,vtvars)
   | term_eq (Fun(f1,tps1,tms1),Fun(f2,tps2,tms2)) vtvars =
-    let val (eq1,vtvars1) = 
-	    if (f1 = f2) then terms_eq (tms1,tms2) vtvars
-	    else (false,vtvars)
-	val (eq2,vtvars2) =
-	    if eq1 then types_eq (tps1,tps2) vtvars1
-	    else (eq1,vtvars1)
-    in
-	(eq2,vtvars2)
-    end
+      let val (eq1,vtvars1) = 
+	      if f1 = f2 then terms_eq (tms1,tms2) vtvars
+	      else (false,vtvars)
+	  val (eq2,vtvars2) =
+	      if eq1 then types_eq (tps1,tps2) vtvars1
+	      else (eq1,vtvars1)
+      in
+	  (eq2,vtvars2)
+      end
   | term_eq (Fun(_,_,_),_) vtvars = (false,vtvars)
 
 and
 
-terms_eq ([],[]) vtvars = (true,vtvars)
-| terms_eq (tm1::tms1,tm2::tms2) vtvars =
-  let val (eq1,vtvars1) = term_eq (tm1,tm2) vtvars
-      val (eq2,vtvars2) = if eq1 then terms_eq (tms1,tms2) vtvars1
-				 else (eq1,vtvars1)
-  in
-      (eq2,vtvars2)
-  end;
+    terms_eq ([],[]) vtvars = (true,vtvars)
+  | terms_eq (tm1::tms1,tm2::tms2) vtvars =
+      let val (eq1,vtvars1) = term_eq (tm1,tm2) vtvars
+	  val (eq2,vtvars2) = if eq1 then terms_eq (tms1,tms2) vtvars1
+				     else (eq1,vtvars1)
+      in
+	  (eq2,vtvars2)
+      end;
 					     
 
 fun pred_eq (Predicate(predname1,tps1,tms1),Predicate(predname2,tps2,tms2)) vtvars =
@@ -634,14 +632,30 @@
     end;
 
 
-fun cls_eq (cls1,cls2) =
+(*Equality of two clauses up to variable renaming*)
+fun clause_eq (cls1,cls2) =
     let val lits1 = get_literals cls1
 	val lits2 = get_literals cls2
     in
-	(length lits1 = length lits2) andalso (fst (lits_eq (lits1,lits2) ([],[])))
+	length lits1 = length lits2 andalso #1 (lits_eq (lits1,lits2) ([],[]))
     end;
 
 
+(*** Hash function for clauses ***)
+
+val xor_words = List.foldl Word.xorb 0w0;
+
+fun hash_term (UVar(_,_), w) = w
+  | hash_term (Fun(f,tps,args), w) = 
+      List.foldl hash_term (Hashtable.hash_string (f,w)) args;
+  
+fun hash_pred (Predicate(pn,_,args), w) = 
+    List.foldl hash_term (Hashtable.hash_string (pn,w)) args;
+    
+fun hash1_literal (Literal(true,pred,_)) = hash_pred (pred, 0w0)
+  | hash1_literal (Literal(false,pred,_)) = Word.notb (hash_pred (pred, 0w0));
+  
+fun hash1_clause clause = xor_words (map hash1_literal (get_literals clause));
 
 
 (* FIX: not sure what to do with these funcs *)
@@ -808,7 +822,8 @@
                    premLits = map make_TVarLit false_tvars_srts'}
    end;
     
-(*The number of clauses generated from cls, including type clauses*)
+(*The number of clauses generated from cls, including type clauses. It's always 1
+  except for conjecture clauses.*)
 fun num_of_clauses (Clause cls) =
     let val num_tfree_lits = 
 	      if !keep_types then length (#tfree_type_literals cls)
@@ -886,17 +901,14 @@
   | string_of_term (Fun (name,typs,terms)) = 
       let val terms_as_strings = map string_of_term terms
 	  val typs' = if !keep_types then map string_of_fol_type typs else []
-      in  name ^ (paren_pack (terms_as_strings @ typs'))  end
-  | string_of_term _ = error "string_of_term";      
+      in  name ^ (paren_pack (terms_as_strings @ typs'))  end;
 
 (* before output the string of the predicate, check if the predicate corresponds to an equality or not. *)
 fun string_of_predicate (Predicate("equal",[typ],terms)) = string_of_equality(typ,terms)
   | string_of_predicate (Predicate(name,typs,terms)) = 
       let val terms_as_strings = map string_of_term terms
 	  val typs' = if !keep_types then map string_of_fol_type typs else []
-      in  name ^ (paren_pack (terms_as_strings @ typs'))  end
-  | string_of_predicate _ = error "string_of_predicate";      
-
+      in  name ^ (paren_pack (terms_as_strings @ typs'))  end;
 
 fun string_of_clausename (cls_id,ax_name) = 
     clause_prefix ^ ascii_of ax_name ^ "_" ^ Int.toString cls_id;