hashing to eliminate the output of duplicate clauses
authorpaulson
Fri Dec 16 12:15:54 2005 +0100 (2005-12-16)
changeset 184209470061ab283
parent 18419 30f4b1eda7cd
child 18421 464c93701351
hashing to eliminate the output of duplicate clauses
src/HOL/IsaMakefile
src/HOL/ROOT.ML
src/HOL/Tools/ATP/res_clasimpset.ML
src/HOL/Tools/res_clause.ML
     1.1 --- a/src/HOL/IsaMakefile	Fri Dec 16 11:51:24 2005 +0100
     1.2 +++ b/src/HOL/IsaMakefile	Fri Dec 16 12:15:54 2005 +0100
     1.3 @@ -90,6 +90,7 @@
     1.4    Lattice_Locales.thy List.ML List.thy Main.ML Main.thy Map.thy			\
     1.5    Nat.ML Nat.thy NatArith.thy OrderedGroup.ML OrderedGroup.thy			\
     1.6    Orderings.ML Orderings.thy Power.thy PreList.thy Product_Type.thy		\
     1.7 +  $(SRC)/Pure/General/hashtable.ML \
     1.8    ROOT.ML Recdef.thy Reconstruction.thy Record.thy Refute.thy			\
     1.9    Relation.ML Relation.thy Relation_Power.thy Ring_and_Field.thy SAT.thy Set.ML		\
    1.10    Set.thy SetInterval.thy Sum_Type.thy Tools/ATP/AtpCommunication.ML		\
     2.1 --- a/src/HOL/ROOT.ML	Fri Dec 16 11:51:24 2005 +0100
     2.2 +++ b/src/HOL/ROOT.ML	Fri Dec 16 12:15:54 2005 +0100
     2.3 @@ -11,6 +11,7 @@
     2.4  
     2.5  use "hologic.ML";
     2.6  
     2.7 +use "~~/src/Pure/General/hashtable.ML";
     2.8  use "~~/src/Provers/splitter.ML";
     2.9  use "~~/src/Provers/hypsubst.ML";
    2.10  use "~~/src/Provers/induct_method.ML";
     3.1 --- a/src/HOL/Tools/ATP/res_clasimpset.ML	Fri Dec 16 11:51:24 2005 +0100
     3.2 +++ b/src/HOL/Tools/ATP/res_clasimpset.ML	Fri Dec 16 12:15:54 2005 +0100
     3.3 @@ -141,36 +141,56 @@
     3.4  	multi (clause, theorem) num_of_cls []
     3.5      end;
     3.6      
     3.7 -fun get_simpset_thms ctxt goal clas =
     3.8 -  let val ctab = fold_rev Symtab.update clas Symtab.empty
     3.9 -      fun unused (s,_) = not (Symtab.defined ctab s)
    3.10 -  in  ResAxioms.clausify_rules_pairs 
    3.11 -	(filter unused
    3.12 -	  (map put_name_pair 
    3.13 -	    (ReduceAxiomsN.relevant_filter (!relevant) goal
    3.14 -	      (ResAxioms.simpset_rules_of_ctxt ctxt))))
    3.15 -  end;
    3.16 -		  
    3.17 +(*Hashing to detect duplicate and variant clauses, e.g. from the [iff] attribute
    3.18 +Some primes from http://primes.utm.edu/:
    3.19 +   1823   1831   1847   1861   1867   1871   1873   1877   1879   1889 
    3.20 +   1901   1907   1913   1931   1933   1949   1951   1973   1979   1987 
    3.21 +   1993   1997   1999   2003   2011   2017   2027   2029   2039   2053 
    3.22 +   2063   2069   2081   2083   2087   2089   2099   2111   2113   2129 
    3.23 +*)
    3.24 +
    3.25 +exception HASH_CLAUSE;
    3.26 +
    3.27 +(*Create a hash table for clauses, of the given size*)
    3.28 +fun mk_clause_table size =
    3.29 +      Hashtable.create{hash = ResClause.hash1_clause, 
    3.30 +		       exn = HASH_CLAUSE,
    3.31 +		       == = ResClause.clause_eq, 
    3.32 +		       size = size};
    3.33 +
    3.34 +(*Insert x only if fresh*)
    3.35 +fun insert_new ht (x,y) = ignore (Hashtable.lookup ht x)
    3.36 +            handle HASH_CLAUSE => Hashtable.insert ht (x,y); 
    3.37 +
    3.38 +(*Use a hash table to eliminate duplicates from xs*)
    3.39 +fun make_unique ht xs = (app (insert_new ht) xs;  Hashtable.map Library.I ht);
    3.40  
    3.41  (*Write out the claset and simpset rules of the supplied theory.
    3.42    FIXME: argument "goal" is a hack to allow relevance filtering.
    3.43    To reduce the number of clauses produced, set ResClasimp.relevant:=1*)
    3.44  fun get_clasimp_lemmas ctxt goal = 
    3.45 -    let val claset_thms =
    3.46 -		map put_name_pair
    3.47 -		  (ReduceAxiomsN.relevant_filter (!relevant) goal 
    3.48 -		    (ResAxioms.claset_rules_of_ctxt ctxt))
    3.49 -	val claset_cls_thms = ResAxioms.clausify_rules_pairs claset_thms
    3.50 -	val simpset_cls_thms = 
    3.51 -	      if !use_simpset then get_simpset_thms ctxt goal claset_thms
    3.52 -	      else []
    3.53 -	val cls_thms_list = List.concat (claset_cls_thms@simpset_cls_thms)
    3.54 -	(* Identify the set of clauses to be written out *)
    3.55 -	val clauses = map #1(cls_thms_list);
    3.56 -	val cls_nums = map ResClause.num_of_clauses clauses;
    3.57 -        val whole_list = List.concat 
    3.58 -              (map clause_numbering (ListPair.zip (cls_thms_list, cls_nums)));
    3.59 - 	
    3.60 +  let val claset_thms =
    3.61 +	    map put_name_pair
    3.62 +	      (ReduceAxiomsN.relevant_filter (!relevant) goal 
    3.63 +		(ResAxioms.claset_rules_of_ctxt ctxt))
    3.64 +      val claset_cls_thms = ResAxioms.clausify_rules_pairs claset_thms
    3.65 +      val simpset_cls_thms = 
    3.66 +	    if !use_simpset then 
    3.67 +	       ResAxioms.clausify_rules_pairs 
    3.68 +		  (map put_name_pair 
    3.69 +		    (ReduceAxiomsN.relevant_filter (!relevant) goal
    3.70 +		      (ResAxioms.simpset_rules_of_ctxt ctxt)))
    3.71 +	    else []
    3.72 +      val cls_thms_list = make_unique (mk_clause_table 2129) 
    3.73 +                                      (List.concat (simpset_cls_thms@claset_cls_thms))
    3.74 +      (* Identify the set of clauses to be written out *)
    3.75 +      val clauses = map #1(cls_thms_list);
    3.76 +      val cls_nums = map ResClause.num_of_clauses clauses;
    3.77 +      (*Note: in every case, cls_num = 1.  I think that only conjecture clauses
    3.78 +	can have any other value.*)
    3.79 +      val whole_list = List.concat 
    3.80 +	    (map clause_numbering (ListPair.zip (cls_thms_list, cls_nums)));
    3.81 +      
    3.82    in  (* create array of put clausename, number pairs into it *)
    3.83        (Array.fromList whole_list, clauses)
    3.84    end;
     4.1 --- a/src/HOL/Tools/res_clause.ML	Fri Dec 16 11:51:24 2005 +0100
     4.2 +++ b/src/HOL/Tools/res_clause.ML	Fri Dec 16 12:15:54 2005 +0100
     4.3 @@ -69,9 +69,13 @@
     4.4    val gen_tptp_type_cls : int * string * string * string * int -> string
     4.5    val tptp_of_typeLit : type_literal -> string
     4.6  
     4.7 +  (*for hashing*)
     4.8 +  val clause_eq : clause * clause -> bool
     4.9 +  val hash1_clause : clause -> word
    4.10 +
    4.11    end;
    4.12  
    4.13 -structure ResClause: RES_CLAUSE =
    4.14 +structure ResClause : RES_CLAUSE =
    4.15  struct
    4.16  
    4.17  (* Added for typed equality *)
    4.18 @@ -412,7 +416,6 @@
    4.19  	   (union_all (ts1::ts2), 
    4.20  	    union_all(funcs::funcs')))
    4.21        end
    4.22 -  | term_of t = raise CLAUSE("Function Not First Order 4", t)
    4.23  and terms_of ts =  
    4.24        let val (args, ts_funcs) = ListPair.unzip (map term_of ts)
    4.25        in
    4.26 @@ -505,21 +508,16 @@
    4.27    | term_ord (UVar(_,_),_) = LESS
    4.28    | term_ord (Fun(_,_,_),UVar(_)) = GREATER
    4.29    | term_ord (Fun(f1,tps1,tms1),Fun(f2,tps2,tms2)) = 
    4.30 -    let val fn_ord = string_ord (f1,f2)
    4.31 -    in
    4.32 -	case fn_ord of EQUAL => 
    4.33 -		       let val tms_ord = terms_ord (tms1,tms2)
    4.34 -		       in
    4.35 -			   case tms_ord of EQUAL => types_ord (tps1,tps2)
    4.36 -					 | _ => tms_ord
    4.37 -		       end
    4.38 -		     | _ => fn_ord
    4.39 -    end
    4.40 +     (case string_ord (f1,f2) of
    4.41 +         EQUAL => 
    4.42 +	   (case terms_ord (tms1,tms2) of EQUAL => types_ord (tps1,tps2)
    4.43 +	      | tms_ord => tms_ord)
    4.44 +       | fn_ord => fn_ord)
    4.45  
    4.46  and
    4.47  
    4.48 -terms_ord ([],[]) = EQUAL
    4.49 -  | terms_ord (tms1,tms2) = list_ord term_ord (tms1,tms2);
    4.50 +  terms_ord ([],[]) = EQUAL
    4.51 +    | terms_ord (tms1,tms2) = list_ord term_ord (tms1,tms2);
    4.52  
    4.53  
    4.54  
    4.55 @@ -541,6 +539,7 @@
    4.56  
    4.57  fun sort_lits lits = sort literal_ord lits;
    4.58  
    4.59 +
    4.60  (********** clause equivalence ******************)
    4.61  
    4.62  fun check_var_pairs (x,y) [] = 0 
    4.63 @@ -550,7 +549,6 @@
    4.64  	if (x = u) orelse (y = v) then 2 (*conflict*)
    4.65  	else check_var_pairs (x,y) w;
    4.66  
    4.67 -
    4.68  fun type_eq (AtomV(v1),AtomV(v2)) (vars,tvars) =
    4.69      (case check_var_pairs (v1,v2) tvars of 0 => (true,(vars,(v1,v2)::tvars))
    4.70  					 | 1 => (true,(vars,tvars))
    4.71 @@ -559,24 +557,24 @@
    4.72    | type_eq (AtomF(f1),AtomF(f2)) vtvars = (f1=f2,vtvars)
    4.73    | type_eq (AtomF(_),_) vtvars = (false,vtvars)
    4.74    | type_eq (Comp(con1,args1),Comp(con2,args2)) vtvars =
    4.75 -    let val (eq1,vtvars1) = 
    4.76 -	    if (con1 = con2) then types_eq (args1,args2) vtvars
    4.77 -	    else (false,vtvars)
    4.78 -    in
    4.79 -	(eq1,vtvars1)
    4.80 -    end
    4.81 +      let val (eq1,vtvars1) = 
    4.82 +	      if con1 = con2 then types_eq (args1,args2) vtvars
    4.83 +	      else (false,vtvars)
    4.84 +      in
    4.85 +	  (eq1,vtvars1)
    4.86 +      end
    4.87    | type_eq (Comp(_,_),_) vtvars = (false,vtvars)
    4.88  
    4.89  and
    4.90  
    4.91 -types_eq ([],[]) vtvars = (true,vtvars)
    4.92 -| types_eq (tp1::tps1,tp2::tps2) vtvars =
    4.93 -  let val (eq1,vtvars1) = type_eq (tp1,tp2) vtvars
    4.94 -      val (eq2,vtvars2) = if eq1 then types_eq (tps1,tps2) vtvars1
    4.95 -			  else (eq1,vtvars1)
    4.96 -  in
    4.97 -      (eq2,vtvars2)
    4.98 -  end;
    4.99 +    types_eq ([],[]) vtvars = (true,vtvars)
   4.100 +  | types_eq (tp1::tps1,tp2::tps2) vtvars =
   4.101 +      let val (eq1,vtvars1) = type_eq (tp1,tp2) vtvars
   4.102 +	  val (eq2,vtvars2) = if eq1 then types_eq (tps1,tps2) vtvars1
   4.103 +			      else (eq1,vtvars1)
   4.104 +      in
   4.105 +	  (eq2,vtvars2)
   4.106 +      end;
   4.107  
   4.108  
   4.109  fun term_eq (UVar(v1,tp1),UVar(v2,tp2)) (vars,tvars) =
   4.110 @@ -585,27 +583,27 @@
   4.111  					| 2 => (false,(vars,tvars)))
   4.112    | term_eq (UVar(_,_),_) vtvars = (false,vtvars)
   4.113    | term_eq (Fun(f1,tps1,tms1),Fun(f2,tps2,tms2)) vtvars =
   4.114 -    let val (eq1,vtvars1) = 
   4.115 -	    if (f1 = f2) then terms_eq (tms1,tms2) vtvars
   4.116 -	    else (false,vtvars)
   4.117 -	val (eq2,vtvars2) =
   4.118 -	    if eq1 then types_eq (tps1,tps2) vtvars1
   4.119 -	    else (eq1,vtvars1)
   4.120 -    in
   4.121 -	(eq2,vtvars2)
   4.122 -    end
   4.123 +      let val (eq1,vtvars1) = 
   4.124 +	      if f1 = f2 then terms_eq (tms1,tms2) vtvars
   4.125 +	      else (false,vtvars)
   4.126 +	  val (eq2,vtvars2) =
   4.127 +	      if eq1 then types_eq (tps1,tps2) vtvars1
   4.128 +	      else (eq1,vtvars1)
   4.129 +      in
   4.130 +	  (eq2,vtvars2)
   4.131 +      end
   4.132    | term_eq (Fun(_,_,_),_) vtvars = (false,vtvars)
   4.133  
   4.134  and
   4.135  
   4.136 -terms_eq ([],[]) vtvars = (true,vtvars)
   4.137 -| terms_eq (tm1::tms1,tm2::tms2) vtvars =
   4.138 -  let val (eq1,vtvars1) = term_eq (tm1,tm2) vtvars
   4.139 -      val (eq2,vtvars2) = if eq1 then terms_eq (tms1,tms2) vtvars1
   4.140 -				 else (eq1,vtvars1)
   4.141 -  in
   4.142 -      (eq2,vtvars2)
   4.143 -  end;
   4.144 +    terms_eq ([],[]) vtvars = (true,vtvars)
   4.145 +  | terms_eq (tm1::tms1,tm2::tms2) vtvars =
   4.146 +      let val (eq1,vtvars1) = term_eq (tm1,tm2) vtvars
   4.147 +	  val (eq2,vtvars2) = if eq1 then terms_eq (tms1,tms2) vtvars1
   4.148 +				     else (eq1,vtvars1)
   4.149 +      in
   4.150 +	  (eq2,vtvars2)
   4.151 +      end;
   4.152  					     
   4.153  
   4.154  fun pred_eq (Predicate(predname1,tps1,tms1),Predicate(predname2,tps2,tms2)) vtvars =
   4.155 @@ -634,14 +632,30 @@
   4.156      end;
   4.157  
   4.158  
   4.159 -fun cls_eq (cls1,cls2) =
   4.160 +(*Equality of two clauses up to variable renaming*)
   4.161 +fun clause_eq (cls1,cls2) =
   4.162      let val lits1 = get_literals cls1
   4.163  	val lits2 = get_literals cls2
   4.164      in
   4.165 -	(length lits1 = length lits2) andalso (fst (lits_eq (lits1,lits2) ([],[])))
   4.166 +	length lits1 = length lits2 andalso #1 (lits_eq (lits1,lits2) ([],[]))
   4.167      end;
   4.168  
   4.169  
   4.170 +(*** Hash function for clauses ***)
   4.171 +
   4.172 +val xor_words = List.foldl Word.xorb 0w0;
   4.173 +
   4.174 +fun hash_term (UVar(_,_), w) = w
   4.175 +  | hash_term (Fun(f,tps,args), w) = 
   4.176 +      List.foldl hash_term (Hashtable.hash_string (f,w)) args;
   4.177 +  
   4.178 +fun hash_pred (Predicate(pn,_,args), w) = 
   4.179 +    List.foldl hash_term (Hashtable.hash_string (pn,w)) args;
   4.180 +    
   4.181 +fun hash1_literal (Literal(true,pred,_)) = hash_pred (pred, 0w0)
   4.182 +  | hash1_literal (Literal(false,pred,_)) = Word.notb (hash_pred (pred, 0w0));
   4.183 +  
   4.184 +fun hash1_clause clause = xor_words (map hash1_literal (get_literals clause));
   4.185  
   4.186  
   4.187  (* FIX: not sure what to do with these funcs *)
   4.188 @@ -808,7 +822,8 @@
   4.189                     premLits = map make_TVarLit false_tvars_srts'}
   4.190     end;
   4.191      
   4.192 -(*The number of clauses generated from cls, including type clauses*)
   4.193 +(*The number of clauses generated from cls, including type clauses. It's always 1
   4.194 +  except for conjecture clauses.*)
   4.195  fun num_of_clauses (Clause cls) =
   4.196      let val num_tfree_lits = 
   4.197  	      if !keep_types then length (#tfree_type_literals cls)
   4.198 @@ -886,17 +901,14 @@
   4.199    | string_of_term (Fun (name,typs,terms)) = 
   4.200        let val terms_as_strings = map string_of_term terms
   4.201  	  val typs' = if !keep_types then map string_of_fol_type typs else []
   4.202 -      in  name ^ (paren_pack (terms_as_strings @ typs'))  end
   4.203 -  | string_of_term _ = error "string_of_term";      
   4.204 +      in  name ^ (paren_pack (terms_as_strings @ typs'))  end;
   4.205  
   4.206  (* before output the string of the predicate, check if the predicate corresponds to an equality or not. *)
   4.207  fun string_of_predicate (Predicate("equal",[typ],terms)) = string_of_equality(typ,terms)
   4.208    | string_of_predicate (Predicate(name,typs,terms)) = 
   4.209        let val terms_as_strings = map string_of_term terms
   4.210  	  val typs' = if !keep_types then map string_of_fol_type typs else []
   4.211 -      in  name ^ (paren_pack (terms_as_strings @ typs'))  end
   4.212 -  | string_of_predicate _ = error "string_of_predicate";      
   4.213 -
   4.214 +      in  name ^ (paren_pack (terms_as_strings @ typs'))  end;
   4.215  
   4.216  fun string_of_clausename (cls_id,ax_name) = 
   4.217      clause_prefix ^ ascii_of ax_name ^ "_" ^ Int.toString cls_id;