# HG changeset patch # User blanchet # Date 1270110426 -7200 # Node ID 1493b43204e92bb51bf336ee6f22e372b7a0a95e # Parent ca6610908ae93f1ce54c1e48d1708c963a1d0cdc# Parent 3fc077c4780a67d33495d089d5bcb1ee2dce1107 merged diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/HOL.thy --- a/src/HOL/HOL.thy Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/HOL.thy Thu Apr 01 10:27:06 2010 +0200 @@ -30,6 +30,7 @@ ("~~/src/Tools/induct_tacs.ML") ("Tools/recfun_codegen.ML") "~~/src/Tools/more_conv.ML" + "~~/src/HOL/Tools/Sledgehammer/named_thm_set.ML" begin setup {* Intuitionistic.method_setup @{binding iprover} *} @@ -800,10 +801,10 @@ *} ML {* -structure No_ATPs = Named_Thms +structure No_ATPs = Named_Thm_Set ( val name = "no_atp" - val description = "theorems that should be avoided by Sledgehammer" + val description = "theorems that should be filtered out by Sledgehammer" ) *} diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/IsaMakefile --- a/src/HOL/IsaMakefile Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/IsaMakefile Thu Apr 01 10:27:06 2010 +0200 @@ -99,7 +99,8 @@ $(OUT)/Pure: Pure -BASE_DEPENDENCIES = $(OUT)/Pure \ +BASE_DEPENDENCIES = $(SRC)/HOL/Tools/Sledgehammer/named_thm_set.ML \ + $(OUT)/Pure \ $(SRC)/Provers/blast.ML \ $(SRC)/Provers/clasimp.ML \ $(SRC)/Provers/classical.ML \ @@ -295,7 +296,6 @@ Tools/numeral.ML \ Tools/numeral_simprocs.ML \ Tools/numeral_syntax.ML \ - Tools/polyhash.ML \ Tools/Predicate_Compile/predicate_compile_aux.ML \ Tools/Predicate_Compile/predicate_compile_compilations.ML \ Tools/Predicate_Compile/predicate_compile_core.ML \ diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Sledgehammer.thy --- a/src/HOL/Sledgehammer.thy Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/Sledgehammer.thy Thu Apr 01 10:27:06 2010 +0200 @@ -10,7 +10,6 @@ theory Sledgehammer imports Plain Hilbert_Choice uses - "Tools/polyhash.ML" "~~/src/Tools/Metis/metis.ML" "Tools/Sledgehammer/sledgehammer_util.ML" ("Tools/Sledgehammer/sledgehammer_fol_clause.ML") @@ -100,7 +99,6 @@ setup Sledgehammer_Fact_Preprocessor.setup use "Tools/Sledgehammer/sledgehammer_hol_clause.ML" use "Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML" -setup Sledgehammer_Proof_Reconstruct.setup use "Tools/Sledgehammer/sledgehammer_fact_filter.ML" use "Tools/ATP_Manager/atp_manager.ML" use "Tools/ATP_Manager/atp_wrapper.ML" diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/ATP_Manager/atp_manager.ML --- a/src/HOL/Tools/ATP_Manager/atp_manager.ML Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/Tools/ATP_Manager/atp_manager.ML Thu Apr 01 10:27:06 2010 +0200 @@ -14,11 +14,15 @@ verbose: bool, atps: string list, full_types: bool, + respect_no_atp: bool, relevance_threshold: real, + convergence: real, + theory_const: bool option, higher_order: bool option, - respect_no_atp: bool, follow_defs: bool, isar_proof: bool, + modulus: int, + sorts: bool, timeout: Time.time, minimize_timeout: Time.time} type problem = @@ -52,21 +56,25 @@ structure ATP_Manager : ATP_MANAGER = struct -type relevance_override = Sledgehammer_Fact_Filter.relevance_override +open Sledgehammer_Fact_Filter +open Sledgehammer_Proof_Reconstruct (** parameters, problems, results, and provers **) -(* TODO: "theory_const", "blacklist_filter", "convergence" *) type params = {debug: bool, verbose: bool, atps: string list, full_types: bool, + respect_no_atp: bool, relevance_threshold: real, + convergence: real, + theory_const: bool option, higher_order: bool option, - respect_no_atp: bool, follow_defs: bool, isar_proof: bool, + modulus: int, + sorts: bool, timeout: Time.time, minimize_timeout: Time.time} @@ -319,6 +327,7 @@ | SOME prover => let val {context = ctxt, facts, goal} = Proof.goal proof_state; + val n = Logic.count_prems (prop_of goal) val desc = "ATP " ^ quote name ^ " for subgoal " ^ string_of_int i ^ ":\n" ^ Syntax.string_of_term ctxt (Thm.term_of (Thm.cprem_of goal i)); @@ -331,9 +340,8 @@ relevance_override = relevance_override, axiom_clauses = NONE, filtered_clauses = NONE} val message = #message (prover params timeout problem) - handle Sledgehammer_HOL_Clause.TRIVIAL => (* FIXME !? *) - "Try this command: " ^ - Markup.markup Markup.sendback "by metis" ^ "." + handle Sledgehammer_HOL_Clause.TRIVIAL => + metis_line i n [] | ERROR msg => ("Error: " ^ msg); val _ = unregister message (Thread.self ()); in () end); diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/ATP_Manager/atp_minimal.ML --- a/src/HOL/Tools/ATP_Manager/atp_minimal.ML Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/Tools/ATP_Manager/atp_minimal.ML Thu Apr 01 10:27:06 2010 +0200 @@ -14,7 +14,7 @@ val linear_minimize : 'a minimize_fun val binary_minimize : 'a minimize_fun val minimize_theorems : - params -> (string * thm list) minimize_fun -> prover -> string + params -> (string * thm list) minimize_fun -> prover -> string -> int -> Proof.state -> (string * thm list) list -> (string * thm list) list option * string end; @@ -23,6 +23,7 @@ struct open Sledgehammer_Fact_Preprocessor +open Sledgehammer_Proof_Reconstruct open ATP_Manager type 'a minimize_fun = ('a list -> bool) -> 'a list -> 'a list @@ -111,7 +112,7 @@ (* wrapper for calling external prover *) fun sledgehammer_test_theorems (params as {full_types, ...} : params) prover - timeout subgoalno state filtered name_thms_pairs = + timeout subgoal state filtered name_thms_pairs = let val _ = priority ("Testing " ^ string_of_int (length name_thms_pairs) ^ " theorems... ") @@ -119,7 +120,7 @@ val axclauses = cnf_rules_pairs (Proof.theory_of state) name_thm_pairs val {context = ctxt, facts, goal} = Proof.goal state val problem = - {subgoal = subgoalno, goal = (ctxt, (facts, goal)), + {subgoal = subgoal, goal = (ctxt, (facts, goal)), relevance_override = {add = [], del = [], only = false}, axiom_clauses = SOME axclauses, filtered_clauses = filtered} val (result, proof) = produce_answer (prover params timeout problem) @@ -130,7 +131,7 @@ (* minimalization of thms *) fun minimize_theorems (params as {minimize_timeout, ...}) gen_min prover - prover_name state name_thms_pairs = + prover_name i state name_thms_pairs = let val msecs = Time.toMilliseconds minimize_timeout val _ = @@ -138,9 +139,10 @@ " theorems, ATP: " ^ prover_name ^ ", time limit: " ^ string_of_int msecs ^ " ms") val test_thms_fun = - sledgehammer_test_theorems params prover minimize_timeout 1 state + sledgehammer_test_theorems params prover minimize_timeout i state fun test_thms filtered thms = case test_thms_fun filtered thms of (Success _, _) => true | _ => false + val n = state |> Proof.raw_goal |> #goal |> prop_of |> Logic.count_prems in (* try prove first to check result and get used theorems *) (case test_thms_fun NONE name_thms_pairs of @@ -157,13 +159,7 @@ val min_names = sort_distinct string_ord (map fst min_thms) val _ = priority (cat_lines ["Minimal " ^ string_of_int (length min_thms) ^ " theorems"]) - in - (SOME min_thms, - "Try this command: " ^ - Markup.markup Markup.sendback - ("by (metis " ^ space_implode " " min_names ^ ")") - ^ ".") - end + in (SOME min_thms, metis_line i n min_names) end | (Timeout, _) => (NONE, "Timeout: You may need to increase the time limit of " ^ string_of_int msecs ^ " ms. Invoke \"atp_minimize [time=...]\".") @@ -172,8 +168,7 @@ | (Failure, _) => (NONE, "Failure: No proof with the theorems supplied")) handle Sledgehammer_HOL_Clause.TRIVIAL => - (SOME [], "Trivial: Try this command: " ^ - Markup.markup Markup.sendback "by metis" ^ ".") + (SOME [], metis_line i n []) | ERROR msg => (NONE, "Error: " ^ msg) end diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/ATP_Manager/atp_wrapper.ML --- a/src/HOL/Tools/ATP_Manager/atp_wrapper.ML Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/Tools/ATP_Manager/atp_wrapper.ML Thu Apr 01 10:27:06 2010 +0200 @@ -47,7 +47,7 @@ arguments: Time.time -> string, failure_strs: string list, max_new_clauses: int, - add_theory_const: bool, + prefers_theory_const: bool, supports_isar_proofs: bool}; @@ -158,16 +158,21 @@ fun generic_tptp_prover (name, {command, arguments, failure_strs, max_new_clauses, - add_theory_const, supports_isar_proofs}) - (params as {relevance_threshold, higher_order, follow_defs, isar_proof, - ...}) timeout = + prefers_theory_const, supports_isar_proofs}) + (params as {respect_no_atp, relevance_threshold, convergence, + theory_const, higher_order, follow_defs, isar_proof, + modulus, sorts, ...}) + timeout = generic_prover - (get_relevant_facts relevance_threshold higher_order follow_defs - max_new_clauses add_theory_const) + (get_relevant_facts respect_no_atp relevance_threshold convergence + higher_order follow_defs max_new_clauses + (the_default prefers_theory_const theory_const)) (prepare_clauses higher_order false) write_tptp_file command (arguments timeout) failure_strs - (if supports_isar_proofs andalso isar_proof then structured_isar_proof - else metis_lemma_list false) name params; + (if supports_isar_proofs andalso isar_proof then + structured_isar_proof modulus sorts + else + metis_lemma_list false) name params; fun tptp_prover name p = (name, generic_tptp_prover (name, p)); @@ -185,7 +190,7 @@ failure_strs = ["Satisfiability detected", "Refutation not found", "CANNOT PROVE"], max_new_clauses = 60, - add_theory_const = false, + prefers_theory_const = false, supports_isar_proofs = true} val vampire = tptp_prover "vampire" vampire_config @@ -202,7 +207,7 @@ "SZS status: ResourceOut", "SZS status ResourceOut", "# Cannot determine problem status"], max_new_clauses = 100, - add_theory_const = false, + prefers_theory_const = false, supports_isar_proofs = true} val e = tptp_prover "e" e_config @@ -211,18 +216,20 @@ fun generic_dfg_prover (name, ({command, arguments, failure_strs, max_new_clauses, - add_theory_const, ...} : prover_config)) - (params as {relevance_threshold, higher_order, follow_defs, ...}) + prefers_theory_const, ...} : prover_config)) + (params as {respect_no_atp, relevance_threshold, convergence, + theory_const, higher_order, follow_defs, ...}) timeout = generic_prover - (get_relevant_facts relevance_threshold higher_order follow_defs - max_new_clauses add_theory_const) + (get_relevant_facts respect_no_atp relevance_threshold convergence + higher_order follow_defs max_new_clauses + (the_default prefers_theory_const theory_const)) (prepare_clauses higher_order true) write_dfg_file command (arguments timeout) failure_strs (metis_lemma_list true) name params; fun dfg_prover (name, p) = (name, generic_dfg_prover (name, p)); -fun spass_config_for add_theory_const : prover_config = +val spass_config : prover_config = {command = Path.explode "$SPASS_HOME/SPASS", arguments = (fn timeout => "-Auto -SOS=1 -PGiven=0 -PProblem=0 -Splits=0" ^ " -FullRed=0 -DocProof -TimeLimit=" ^ @@ -231,15 +238,10 @@ ["SPASS beiseite: Completion found.", "SPASS beiseite: Ran out of time.", "SPASS beiseite: Maximal number of loops exceeded."], max_new_clauses = 40, - add_theory_const = add_theory_const, - supports_isar_proofs = false}; - -val spass_config = spass_config_for true + prefers_theory_const = true, + supports_isar_proofs = false} val spass = dfg_prover ("spass", spass_config) -val spass_no_tc_config = spass_config_for false -val spass_no_tc = dfg_prover ("spass_no_tc", spass_no_tc_config) - (* remote prover invocation via SystemOnTPTP *) @@ -269,34 +271,29 @@ val remote_failure_strs = ["Remote-script could not extract proof"]; -fun remote_prover_config prover_prefix args max_new_clauses add_theory_const - : prover_config = +fun remote_prover_config prover_prefix args + ({failure_strs, max_new_clauses, prefers_theory_const, ...} + : prover_config) : prover_config = {command = Path.explode "$ISABELLE_ATP_MANAGER/SystemOnTPTP", arguments = (fn timeout => args ^ " -t " ^ string_of_int (Time.toSeconds timeout) ^ " -s " ^ the_system prover_prefix), - failure_strs = remote_failure_strs, + failure_strs = remote_failure_strs @ failure_strs, max_new_clauses = max_new_clauses, - add_theory_const = add_theory_const, + prefers_theory_const = prefers_theory_const, supports_isar_proofs = false} val remote_vampire = tptp_prover "remote_vampire" - (remote_prover_config "Vampire---9" "" - (#max_new_clauses vampire_config) (#add_theory_const vampire_config)) + (remote_prover_config "Vampire---9" "" vampire_config) val remote_e = - tptp_prover "remote_e" - (remote_prover_config "EP---" "" - (#max_new_clauses e_config) (#add_theory_const e_config)) + tptp_prover "remote_e" (remote_prover_config "EP---" "" e_config) val remote_spass = - tptp_prover "remote_spass" - (remote_prover_config "SPASS---" "-x" - (#max_new_clauses spass_config) (#add_theory_const spass_config)) + tptp_prover "remote_spass" (remote_prover_config "SPASS---" "-x" spass_config) -val provers = - [spass, vampire, e, spass_no_tc, remote_vampire, remote_spass, remote_e] +val provers = [spass, vampire, e, remote_vampire, remote_spass, remote_e] val prover_setup = fold add_prover provers val setup = diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/Nitpick/nitpick_hol.ML --- a/src/HOL/Tools/Nitpick/nitpick_hol.ML Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/Tools/Nitpick/nitpick_hol.ML Thu Apr 01 10:27:06 2010 +0200 @@ -217,6 +217,7 @@ structure Nitpick_HOL : NITPICK_HOL = struct +open Sledgehammer_Util open Nitpick_Util type const_table = term list Symtab.table @@ -1229,8 +1230,8 @@ | is_ground_term _ = false (* term -> word -> word *) -fun hashw_term (t1 $ t2) = Polyhash.hashw (hashw_term t1, hashw_term t2) - | hashw_term (Const (s, _)) = Polyhash.hashw_string (s, 0w0) +fun hashw_term (t1 $ t2) = hashw (hashw_term t1, hashw_term t2) + | hashw_term (Const (s, _)) = hashw_string (s, 0w0) | hashw_term _ = 0w0 (* term -> int *) val hash_term = Word.toInt o hashw_term diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/Sledgehammer/named_thm_set.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Tools/Sledgehammer/named_thm_set.ML Thu Apr 01 10:27:06 2010 +0200 @@ -0,0 +1,45 @@ +(* Title: HOL/Tools/Sledgehammer/named_thm_set.ML + Author: Makarius + Author: Jasmin Blanchette, TU Muenchen + +Named sets of theorems. +*) + +signature NAMED_THM_SET = +sig + val member: Proof.context -> thm -> bool + val add_thm: thm -> Context.generic -> Context.generic + val del_thm: thm -> Context.generic -> Context.generic + val add: attribute + val del: attribute + val setup: theory -> theory +end; + +functor Named_Thm_Set(val name: string val description: string) + : NAMED_THM_SET = +struct + +structure Data = Generic_Data +( + type T = thm Termtab.table; + val empty = Termtab.empty; + val extend = I; + fun merge tabs = Termtab.merge (K true) tabs; +); + +fun member ctxt th = + Termtab.defined (Data.get (Context.Proof ctxt)) (Thm.full_prop_of th); + +fun add_thm th = Data.map (Termtab.update (Thm.full_prop_of th, th)); +fun del_thm th = Data.map (Termtab.delete_safe (Thm.full_prop_of th)); + +val add = Thm.declaration_attribute add_thm; +val del = Thm.declaration_attribute del_thm; + +val setup = + Attrib.setup (Binding.name name) (Attrib.add_del add del) + ("declaration of " ^ description) #> + PureThy.add_thms_dynamic (Binding.name name, + map #2 o Termtab.dest o Data.get); + +end; diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/Sledgehammer/sledgehammer_fact_filter.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_fact_filter.ML Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_fact_filter.ML Thu Apr 01 10:27:06 2010 +0200 @@ -18,8 +18,8 @@ val tfree_classes_of_terms : term list -> string list val type_consts_of_terms : theory -> term list -> string list val get_relevant_facts : - real -> bool option -> bool -> int -> bool -> relevance_override - -> Proof.context * (thm list * 'a) -> thm list + bool -> real -> real -> bool option -> bool -> int -> bool + -> relevance_override -> Proof.context * (thm list * 'a) -> thm list -> (thm * (string * int)) list val prepare_clauses : bool option -> bool -> thm list -> thm list -> (thm * (axiom_name * hol_clause_id)) list -> @@ -41,17 +41,6 @@ del: Facts.ref list, only: bool} -(********************************************************************) -(* some settings for both background automatic ATP calling procedure*) -(* and also explicit ATP invocation methods *) -(********************************************************************) - -(*** background linkup ***) -val run_blacklist_filter = true; - -(*** relevance filter parameters ***) -val convergence = 3.2; (*Higher numbers allow longer inference chains*) - (***************************************************************) (* Relevance Filtering *) (***************************************************************) @@ -138,8 +127,8 @@ (*Inserts a dummy "constant" referring to the theory name, so that relevance takes the given theory into account.*) -fun const_prop_of add_theory_const th = - if add_theory_const then +fun const_prop_of theory_const th = + if theory_const then let val name = Context.theory_name (theory_of_thm th) val t = Const (name ^ ". 1", HOLogic.boolT) in t $ prop_of th end @@ -168,7 +157,7 @@ structure CTtab = Table(type key = const_typ list val ord = dict_ord const_typ_ord); -fun count_axiom_consts add_theory_const thy ((thm,_), tab) = +fun count_axiom_consts theory_const thy ((thm,_), tab) = let fun count_const (a, T, tab) = let val (c, cts) = const_with_typ thy (a,T) in (*Two-dimensional table update. Constant maps to types maps to count.*) @@ -181,7 +170,7 @@ count_term_consts (t, count_term_consts (u, tab)) | count_term_consts (Abs(_,_,t), tab) = count_term_consts (t, tab) | count_term_consts (_, tab) = tab - in count_term_consts (const_prop_of add_theory_const thm, tab) end; + in count_term_consts (const_prop_of theory_const thm, tab) end; (**** Actual Filtering Code ****) @@ -213,8 +202,8 @@ let val tab = add_term_consts_typs_rm thy (t, null_const_tab) in Symtab.fold add_expand_pairs tab [] end; -fun pair_consts_typs_axiom add_theory_const thy (p as (thm, _)) = - (p, (consts_typs_of_term thy (const_prop_of add_theory_const thm))); +fun pair_consts_typs_axiom theory_const thy (p as (thm, _)) = + (p, (consts_typs_of_term thy (const_prop_of theory_const thm))); exception ConstFree; fun dest_ConstFree (Const aT) = aT @@ -263,7 +252,7 @@ end end; -fun relevant_clauses ctxt follow_defs max_new +fun relevant_clauses ctxt convergence follow_defs max_new (relevance_override as {add, del, only}) thy ctab p rel_consts = let val add_thms = maps (ProofContext.get_fact ctxt) add @@ -277,8 +266,9 @@ in trace_msg (fn () => "relevant this iteration: " ^ Int.toString (length newrels)); map #1 newrels @ - relevant_clauses ctxt follow_defs max_new relevance_override thy ctab - newp rel_consts' (more_rejects @ rejects) + relevant_clauses ctxt convergence follow_defs max_new + relevance_override thy ctab newp rel_consts' + (more_rejects @ rejects) end | relevant (newrels, rejects) ((ax as (clsthm as (thm, (name, n)), consts_typs)) :: axs) = @@ -298,21 +288,20 @@ relevant ([],[]) end; -fun relevance_filter ctxt relevance_threshold follow_defs max_new - add_theory_const relevance_override thy axioms goals = +fun relevance_filter ctxt relevance_threshold convergence follow_defs max_new + theory_const relevance_override thy axioms goals = if relevance_threshold > 0.0 then let - val const_tab = List.foldl (count_axiom_consts add_theory_const thy) + val const_tab = List.foldl (count_axiom_consts theory_const thy) Symtab.empty axioms val goal_const_tab = get_goal_consts_typs thy goals val _ = trace_msg (fn () => "Initial constants: " ^ commas (Symtab.keys goal_const_tab)) val relevant = - relevant_clauses ctxt follow_defs max_new relevance_override thy - const_tab relevance_threshold goal_const_tab - (map (pair_consts_typs_axiom add_theory_const thy) - axioms) + relevant_clauses ctxt convergence follow_defs max_new relevance_override + thy const_tab relevance_threshold goal_const_tab + (map (pair_consts_typs_axiom theory_const thy) axioms) in trace_msg (fn () => "Total relevant: " ^ Int.toString (length relevant)); relevant @@ -340,51 +329,18 @@ String.isSuffix "_def" a orelse String.isSuffix "_defs" a end; -(** a hash function from Term.term to int, and also a hash table **) -val xor_words = List.foldl Word.xorb 0w0; +fun mk_clause_table xs = + fold (Termtab.update o `(prop_of o fst)) xs Termtab.empty -fun hashw_term ((Const(c,_)), w) = Polyhash.hashw_string (c,w) - | hashw_term ((Free(a,_)), w) = Polyhash.hashw_string (a,w) - | hashw_term ((Var(_,_)), w) = w - | hashw_term ((Bound i), w) = Polyhash.hashw_int(i,w) - | hashw_term ((Abs(_,_,t)), w) = hashw_term (t, w) - | hashw_term ((P$Q), w) = hashw_term (Q, (hashw_term (P, w))); - -fun hash_literal (@{const Not} $ P) = Word.notb(hashw_term(P,0w0)) - | hash_literal P = hashw_term(P,0w0); - -fun hash_term t = Word.toIntX (xor_words (map hash_literal (HOLogic.disjuncts (strip_Trueprop t)))); - -fun equal_thm (thm1,thm2) = Term.aconv(prop_of thm1, prop_of thm2); - -exception HASH_CLAUSE; +fun make_unique xs = + Termtab.fold (cons o snd) (mk_clause_table xs) [] -(*Create a hash table for clauses, of the given size*) -fun mk_clause_table n = - Polyhash.mkTable (hash_term o prop_of, equal_thm) - (n, HASH_CLAUSE); +(* Remove existing axiom clauses from the conjecture clauses, as this can + dramatically boost an ATP's performance (for some reason). *) +fun subtract_cls ax_clauses = + filter_out (Termtab.defined (mk_clause_table ax_clauses) o prop_of) -(*Use a hash table to eliminate duplicates from xs. Argument is a list of - (thm * (string * int)) tuples. The theorems are hashed into the table. *) -fun make_unique xs = - let val ht = mk_clause_table 7000 - in - trace_msg (fn () => ("make_unique gets " ^ Int.toString (length xs) ^ " clauses")); - app (ignore o Polyhash.peekInsert ht) xs; - Polyhash.listItems ht - end; - -(*Remove existing axiom clauses from the conjecture clauses, as this can dramatically - boost an ATP's performance (for some reason)*) -fun subtract_cls c_clauses ax_clauses = - let val ht = mk_clause_table 2200 - fun known x = is_some (Polyhash.peek ht x) - in - app (ignore o Polyhash.peekInsert ht) ax_clauses; - filter (not o known) c_clauses - end; - -fun all_valid_thms ctxt = +fun all_valid_thms respect_no_atp ctxt = let val global_facts = PureThy.facts_of (ProofContext.theory_of ctxt); val local_facts = ProofContext.facts_of ctxt; @@ -404,7 +360,7 @@ val ths = filter_out bad_for_atp ths0; in if Facts.is_concealed facts name orelse null ths orelse - run_blacklist_filter andalso is_package_def name then I + respect_no_atp andalso is_package_def name then I else (case find_first check_thms [name1, name2, name] of NONE => I @@ -427,31 +383,23 @@ fun is_multi (a, ths) = length ths > 1 orelse String.isSuffix ".axioms" a; (*The single theorems go BEFORE the multiple ones. Blacklist is applied to all.*) -fun name_thm_pairs ctxt = +fun name_thm_pairs respect_no_atp ctxt = let - val (mults, singles) = List.partition is_multi (all_valid_thms ctxt) + val (mults, singles) = + List.partition is_multi (all_valid_thms respect_no_atp ctxt) val ps = [] |> fold add_multi_names mults |> fold add_single_names singles - in - if run_blacklist_filter then - let - val blacklist = No_ATPs.get ctxt - |> map (`Thm.full_prop_of) |> Termtab.make - val is_blacklisted = Termtab.defined blacklist o Thm.full_prop_of o snd - in ps |> filter_out is_blacklisted end - else - ps - end; + in ps |> respect_no_atp ? filter_out (No_ATPs.member ctxt o snd) end; fun check_named ("", th) = (warning ("No name for theorem " ^ Display.string_of_thm_without_context th); false) | check_named _ = true; -fun get_all_lemmas ctxt = +fun get_all_lemmas respect_no_atp ctxt = let val included_thms = tap (fn ths => trace_msg (fn () => ("Including all " ^ Int.toString (length ths) ^ " theorems"))) - (name_thm_pairs ctxt) + (name_thm_pairs respect_no_atp ctxt) in filter check_named included_thms end; @@ -548,19 +496,19 @@ NONE => forall (Meson.is_fol_term thy) (map prop_of goal_cls) | SOME b => not b -fun get_relevant_facts relevance_threshold higher_order follow_defs max_new - add_theory_const relevance_override - (ctxt, (chain_ths, th)) goal_cls = +fun get_relevant_facts respect_no_atp relevance_threshold convergence + higher_order follow_defs max_new theory_const + relevance_override (ctxt, (chain_ths, th)) goal_cls = let val thy = ProofContext.theory_of ctxt val is_FO = is_first_order thy higher_order goal_cls - val included_cls = get_all_lemmas ctxt + val included_cls = get_all_lemmas respect_no_atp ctxt |> cnf_rules_pairs thy |> make_unique |> restrict_to_logic thy is_FO |> remove_unwanted_clauses in - relevance_filter ctxt relevance_threshold follow_defs max_new - add_theory_const relevance_override thy included_cls + relevance_filter ctxt relevance_threshold convergence follow_defs max_new + theory_const relevance_override thy included_cls (map prop_of goal_cls) end; @@ -574,7 +522,7 @@ val axcls = chain_cls @ axcls val extra_cls = chain_cls @ extra_cls val is_FO = is_first_order thy higher_order goal_cls - val ccls = subtract_cls goal_cls extra_cls + val ccls = subtract_cls extra_cls goal_cls val _ = app (fn th => trace_msg (fn _ => Display.string_of_thm_global thy th)) ccls val ccltms = map prop_of ccls and axtms = map (prop_of o #1) extra_cls diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/Sledgehammer/sledgehammer_fol_clause.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_fol_clause.ML Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_fol_clause.ML Thu Apr 01 10:27:06 2010 +0200 @@ -80,6 +80,8 @@ structure Sledgehammer_FOL_Clause : SLEDGEHAMMER_FOL_CLAUSE = struct +open Sledgehammer_Util + val schematic_var_prefix = "V_"; val fixed_var_prefix = "v_"; @@ -180,12 +182,11 @@ tvar_prefix ^ (ascii_of_indexname (trim_type_var x,i)); fun make_fixed_type_var x = tfree_prefix ^ (ascii_of (trim_type_var x)); -(*HACK because SPASS truncates identifiers to 63 characters :-(( *) -(*32-bit hash,so we expect no collisions unless there are around 65536 long identifiers...*) -fun controlled_length dfg_format s = - if size s > 60 andalso dfg_format - then Word.toString (Polyhash.hashw_string(s,0w0)) - else s; +(* HACK because SPASS 3.0 truncates identifiers to 63 characters. (This is + solved in 3.7 and perhaps in earlier versions too.) *) +(* 32-bit hash, so we expect no collisions. *) +fun controlled_length dfg s = + if dfg andalso size s > 60 then Word.toString (hashw_string (s, 0w0)) else s; fun lookup_const dfg c = case Symtab.lookup const_trans_table c of @@ -197,8 +198,9 @@ SOME c' => c' | NONE => controlled_length dfg (ascii_of c); -fun make_fixed_const _ "op =" = "equal" (*MUST BE "equal" because it's built-in to ATPs*) - | make_fixed_const dfg c = const_prefix ^ lookup_const dfg c; +(* "op =" MUST BE "equal" because it's built into ATPs. *) +fun make_fixed_const _ (@{const_name "op ="}) = "equal" + | make_fixed_const dfg c = const_prefix ^ lookup_const dfg c; fun make_fixed_type_const dfg c = tconst_prefix ^ lookup_type_const dfg c; diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML Thu Apr 01 10:27:06 2010 +0200 @@ -40,21 +40,27 @@ val default_default_params = [("debug", "false"), ("verbose", "false"), + ("respect_no_atp", "true"), ("relevance_threshold", "50"), + ("convergence", "320"), + ("theory_const", "smart"), ("higher_order", "smart"), - ("respect_no_atp", "true"), ("follow_defs", "false"), ("isar_proof", "false"), + ("modulus", "1"), + ("sorts", "false"), ("minimize_timeout", "5 s")] val negated_params = [("no_debug", "debug"), ("quiet", "verbose"), + ("ignore_no_atp", "respect_no_atp"), ("partial_types", "full_types"), + ("no_theory_const", "theory_const"), ("first_order", "higher_order"), - ("ignore_no_atp", "respect_no_atp"), ("dont_follow_defs", "follow_defs"), - ("metis_proof", "isar_proof")] + ("metis_proof", "isar_proof"), + ("no_sorts", "sorts")] val property_dependent_params = ["atps", "full_types", "timeout"] @@ -119,26 +125,31 @@ val verbose = debug orelse lookup_bool "verbose" val atps = lookup_string "atps" |> space_explode " " val full_types = lookup_bool "full_types" + val respect_no_atp = lookup_bool "respect_no_atp" val relevance_threshold = 0.01 * Real.fromInt (lookup_int "relevance_threshold") + val convergence = 0.01 * Real.fromInt (lookup_int "convergence") + val theory_const = lookup_bool_option "theory_const" val higher_order = lookup_bool_option "higher_order" - val respect_no_atp = lookup_bool "respect_no_atp" val follow_defs = lookup_bool "follow_defs" val isar_proof = lookup_bool "isar_proof" + val modulus = Int.max (1, lookup_int "modulus") + val sorts = lookup_bool "sorts" val timeout = lookup_time "timeout" val minimize_timeout = lookup_time "minimize_timeout" in {debug = debug, verbose = verbose, atps = atps, full_types = full_types, - relevance_threshold = relevance_threshold, higher_order = higher_order, - respect_no_atp = respect_no_atp, follow_defs = follow_defs, - isar_proof = isar_proof, timeout = timeout, - minimize_timeout = minimize_timeout} + respect_no_atp = respect_no_atp, relevance_threshold = relevance_threshold, + convergence = convergence, theory_const = theory_const, + higher_order = higher_order, follow_defs = follow_defs, + isar_proof = isar_proof, modulus = modulus, sorts = sorts, + timeout = timeout, minimize_timeout = minimize_timeout} end fun get_params thy = extract_params thy (default_raw_params thy) fun default_params thy = get_params thy o map (apsnd single) -fun atp_minimize_command override_params old_style_args fact_refs state = +fun atp_minimize_command override_params old_style_args i fact_refs state = let val thy = Proof.theory_of state val ctxt = Proof.context_of state @@ -170,7 +181,7 @@ | NONE => error ("Unknown ATP: " ^ quote atp)) val name_thms_pairs = theorems_from_refs ctxt fact_refs in - writeln (#2 (minimize_theorems params linear_minimize prover atp state + writeln (#2 (minimize_theorems params linear_minimize prover atp i state name_thms_pairs)) end @@ -196,7 +207,8 @@ sledgehammer (get_params thy override_params) (the_default 1 opt_i) relevance_override state else if subcommand = minimizeN then - atp_minimize_command override_params [] (#add relevance_override) state + atp_minimize_command override_params [] (the_default 1 opt_i) + (#add relevance_override) state else if subcommand = messagesN then messages opt_i else if subcommand = available_atpsN then @@ -281,7 +293,7 @@ "minimize theorem list with external prover" K.diag (parse_minimize_args -- parse_fact_refs >> (fn (args, fact_refs) => Toplevel.no_timing o Toplevel.unknown_proof o - Toplevel.keep (atp_minimize_command [] args fact_refs + Toplevel.keep (atp_minimize_command [] args 1 fact_refs o Toplevel.proof_of))) val _ = diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_proof_reconstruct.ML Thu Apr 01 10:27:06 2010 +0200 @@ -12,11 +12,11 @@ val num_typargs: theory -> string -> int val make_tvar: string -> typ val strip_prefix: string -> string -> string option - val setup: theory -> theory val is_proof_well_formed: string -> bool + val metis_line: int -> int -> string list -> string val metis_lemma_list: bool -> string -> string * string vector * (int * int) * Proof.context * thm * int -> string * string list - val structured_isar_proof: string -> + val structured_isar_proof: int -> bool -> string -> string * string vector * (int * int) * Proof.context * thm * int -> string * string list end; @@ -33,14 +33,6 @@ fun string_of_thm ctxt = PrintMode.setmp [] (Display.string_of_thm ctxt); -(*For generating structured proofs: keep every nth proof line*) -val (modulus, modulus_setup) = Attrib.config_int "sledgehammer_modulus" (K 1); - -(*Indicates whether to include sort information in generated proofs*) -val (recon_sorts, recon_sorts_setup) = Attrib.config_bool "sledgehammer_sorts" (K true); - -val setup = modulus_setup #> recon_sorts_setup; - (**** PARSING OF TSTP FORMAT ****) (*Syntax trees, either termlist or formulae*) @@ -335,10 +327,13 @@ (*ctms is a list of conjecture clauses as yielded by Isabelle. Those returned by the ATP may have their literals reordered.*) -fun isar_proof_body ctxt ctms = +fun isar_proof_body ctxt sorts ctms = let val _ = trace_proof_msg (K "\n\nisar_proof_body: start\n") - val string_of_term = PrintMode.setmp [] (Syntax.string_of_term ctxt) + val string_of_term = + PrintMode.setmp (filter (curry (op =) Symbol.xsymbolsN) + (print_mode_value ())) + (Syntax.string_of_term ctxt) fun have_or_show "show" _ = " show \"" | have_or_show have lname = " " ^ have ^ " " ^ lname ^ ": \"" fun do_line _ (lname, t, []) = @@ -354,7 +349,7 @@ fun do_lines [(lname, t, deps)] = [do_line "show" (lname, t, deps)] | do_lines ((lname, t, deps) :: lines) = do_line "have" (lname, t, deps) :: do_lines lines - in setmp_CRITICAL show_sorts (Config.get ctxt recon_sorts) do_lines end; + in setmp_CRITICAL show_sorts sorts do_lines end; fun unequal t (_, t', _) = not (t aconv t'); @@ -404,13 +399,13 @@ counts the number of proof lines processed so far. Deleted lines are replaced by their own dependencies. Note that the "add_nonnull_prfline" phase may delete some dependencies, hence this phase comes later.*) -fun add_wanted_prfline ctxt ((lno, t, []), (nlines, lines)) = +fun add_wanted_prfline ctxt _ ((lno, t, []), (nlines, lines)) = (nlines, (lno, t, []) :: lines) (*conjecture clauses must be kept*) - | add_wanted_prfline ctxt ((lno, t, deps), (nlines, lines)) = + | add_wanted_prfline ctxt modulus ((lno, t, deps), (nlines, lines)) = if eq_types t orelse not (null (Term.add_tvars t [])) orelse exists_subterm bad_free t orelse (not (null lines) andalso (*final line can't be deleted for these reasons*) - (length deps < 2 orelse nlines mod (Config.get ctxt modulus) <> 0)) + (length deps < 2 orelse nlines mod modulus <> 0)) then (nlines+1, map (replace_deps (lno, deps)) lines) (*Delete line*) else (nlines+1, (lno, t, deps) :: lines); @@ -427,12 +422,15 @@ stringify_deps thm_names ((lno,lname)::deps_map) lines end; -val proofstart = "proof (neg_clausify)\n"; +fun isar_proof_start i = + (if i = 1 then "" else "prefer " ^ string_of_int i ^ "\n") ^ + "proof (neg_clausify)\n"; +fun isar_fixes [] = "" + | isar_fixes ts = " fix " ^ space_implode " " ts ^ "\n"; +fun isar_proof_end 1 = "qed" + | isar_proof_end _ = "next" -fun isar_header [] = proofstart - | isar_header ts = proofstart ^ "fix " ^ space_implode " " ts ^ "\n"; - -fun isar_proof_from_tstp_file cnfs ctxt th sgno thm_names = +fun isar_proof_from_tstp_file cnfs modulus sorts ctxt goal i thm_names = let val _ = trace_proof_msg (K "\nisar_proof_from_tstp_file: start\n") val tuples = map (dest_tstp o tstp_line o explode) cnfs @@ -445,19 +443,23 @@ val nonnull_lines = List.foldr add_nonnull_prfline [] raw_lines val _ = trace_proof_msg (fn () => Int.toString (length nonnull_lines) ^ " nonnull_lines extracted\n") - val (_, lines) = List.foldr (add_wanted_prfline ctxt) (0,[]) nonnull_lines + val (_, lines) = List.foldr (add_wanted_prfline ctxt modulus) (0,[]) nonnull_lines val _ = trace_proof_msg (fn () => Int.toString (length lines) ^ " lines extracted\n") - val (ccls, fixes) = neg_conjecture_clauses ctxt th sgno + val (ccls, fixes) = neg_conjecture_clauses ctxt goal i val _ = trace_proof_msg (fn () => Int.toString (length ccls) ^ " conjecture clauses\n") val ccls = map forall_intr_vars ccls val _ = app (fn th => trace_proof_msg (fn () => "\nccl: " ^ string_of_thm ctxt th)) ccls - val body = isar_proof_body ctxt (map prop_of ccls) + val body = isar_proof_body ctxt sorts (map prop_of ccls) (stringify_deps thm_names [] lines) + val n = Logic.count_prems (prop_of goal) val _ = trace_proof_msg (K "\nisar_proof_from_tstp_file: finishing\n") - in isar_header (map #1 fixes) ^ implode body ^ "qed\n" end + in + isar_proof_start i ^ isar_fixes (map #1 fixes) ^ implode body ^ + isar_proof_end n ^ "\n" + end handle STREE _ => error "Could not extract proof (ATP output malformed?)"; @@ -534,22 +536,30 @@ val chained_hint = "CHAINED"; val kill_chained = filter_out (curry (op =) chained_hint) -(* metis-command *) -fun metis_line [] = "by metis" - | metis_line xs = "by (metis " ^ space_implode " " xs ^ ")" - +fun apply_command _ 1 = "by " + | apply_command 1 _ = "apply " + | apply_command i _ = "prefer " ^ string_of_int i ^ " apply " +fun metis_command i n [] = + apply_command i n ^ "metis" + | metis_command i n xs = + apply_command i n ^ "(metis " ^ space_implode " " xs ^ ")" +fun metis_line i n xs = + "Try this command: " ^ + Markup.markup Markup.sendback (metis_command i n xs) ^ ".\n" fun minimize_line _ [] = "" - | minimize_line name lemmas = - "To minimize the number of lemmas, try this command:\n" ^ + | minimize_line name xs = + "To minimize the number of lemmas, try this command: " ^ Markup.markup Markup.sendback ("sledgehammer minimize [atps = " ^ name ^ "] (" ^ - space_implode " " (kill_chained lemmas) ^ ")") ^ "." + space_implode " " xs ^ ")") ^ ".\n" -fun metis_lemma_list dfg name result = - let val (lemmas, used_conj) = extract_lemmas (get_step_nums dfg) result in - ("Try this command: " ^ - Markup.markup Markup.sendback (metis_line (kill_chained lemmas)) ^ ".\n" ^ - minimize_line name lemmas ^ +fun metis_lemma_list dfg name (result as (_, _, _, _, goal, i)) = + let + val (lemmas, used_conj) = extract_lemmas (get_step_nums dfg) result + val n = Logic.count_prems (prop_of goal) + val xs = kill_chained lemmas + in + (metis_line i n xs ^ minimize_line name xs ^ (if used_conj then "" else @@ -557,10 +567,10 @@ kill_chained lemmas) end; -fun structured_isar_proof name (result as (proof, thm_names, conj_count, ctxt, - goal, subgoalno)) = +fun structured_isar_proof modulus sorts name + (result as (proof, thm_names, conj_count, ctxt, goal, i)) = let - (* Could use "split_lines", but it can return blank lines *) + (* We could use "split_lines", but it can return blank lines. *) val lines = String.tokens (equal #"\n"); val kill_spaces = String.translate (fn c => if Char.isSpace c then "" else str c) @@ -570,13 +580,12 @@ val tokens = String.tokens (fn c => c = #" ") one_line_proof val isar_proof = if member (op =) tokens chained_hint then "" - else isar_proof_from_tstp_file cnfs ctxt goal subgoalno thm_names + else isar_proof_from_tstp_file cnfs modulus sorts ctxt goal i thm_names in - (* Hack: The " \n" shouldn't be part of the markup. This is a workaround - for a strange ProofGeneral bug, whereby the first two letters of the word - "proof" are not highlighted. *) - (one_line_proof ^ "\n\nStructured proof:" ^ - Markup.markup Markup.sendback (" \n" ^ isar_proof), lemma_names) + (one_line_proof ^ + (if isar_proof = "" then "" + else "\nStructured proof:\n" ^ Markup.markup Markup.sendback isar_proof), + lemma_names) end end; diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/Sledgehammer/sledgehammer_util.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_util.ML Wed Mar 31 16:44:41 2010 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_util.ML Thu Apr 01 10:27:06 2010 +0200 @@ -1,4 +1,4 @@ -(* Title: HOL/Sledgehammer/sledgehammer_util.ML +(* Title: HOL/Tools/Sledgehammer/sledgehammer_util.ML Author: Jasmin Blanchette, TU Muenchen General-purpose functions used by the Sledgehammer modules. @@ -9,11 +9,21 @@ val serial_commas : string -> string list -> string list val parse_bool_option : bool -> string -> string -> bool option val parse_time_option : string -> string -> Time.time option + val hashw : word * word -> word + val hashw_char : Char.char * word -> word + val hashw_string : string * word -> word end; structure Sledgehammer_Util : SLEDGEHAMMER_UTIL = struct +(* This hash function is recommended in Compilers: Principles, Techniques, and + Tools, by Aho, Sethi and Ullman. The hashpjw function, which they + particularly recommend, triggers a bug in versions of Poly/ML up to 4.2.0. *) +fun hashw (u, w) = Word.+ (u, Word.* (0w65599, w)) +fun hashw_char (c, w) = hashw (Word.fromInt (Char.ord c), w) +fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s + fun serial_commas _ [] = ["??"] | serial_commas _ [s] = [s] | serial_commas conj [s1, s2] = [s1, conj, s2] diff -r ca6610908ae9 -r 1493b43204e9 src/HOL/Tools/polyhash.ML --- a/src/HOL/Tools/polyhash.ML Wed Mar 31 16:44:41 2010 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,406 +0,0 @@ -(* Modified for Poly/ML from SML/NJ Library version 0.2 - * - * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. - * See file mosml/copyrght/copyrght.att for details. - * - * Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974 - * Current version from the Moscow ML distribution, copied by permission. - *) - -(* Polyhash -- polymorphic hashtables as in the SML/NJ Library *) - -signature Polyhash = -sig - -type ('key, 'data) hash_table - -val mkTable : ('_key -> int) * ('_key * '_key -> bool) -> int * exn - -> ('_key, '_data) hash_table -val numItems : ('key, 'data) hash_table -> int -val insert : ('_key, '_data) hash_table -> '_key * '_data -> unit -val peekInsert : ('_key, '_data) hash_table -> '_key * '_data - -> '_data option -val find : ('key, 'data) hash_table -> 'key -> 'data -val peek : ('key, 'data) hash_table -> 'key -> 'data option -val remove : ('key, 'data) hash_table -> 'key -> 'data -val listItems : ('key, 'data) hash_table -> ('key * 'data) list -val apply : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit -val map : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table - -> ('_key, '_res) hash_table -val filter : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit -val transform : ('data -> '_res) -> ('_key, 'data) hash_table - -> ('_key, '_res) hash_table -val copy : ('_key, '_data) hash_table -> ('_key, '_data) hash_table -val bucketSizes : ('key, 'data) hash_table -> int list - -(*Additions due to L. C. Paulson and Jia Meng*) -val hashw : word * word -> word -val hashw_char : Char.char * word -> word -val hashw_int : int * word -> word -val hashw_vector : word Vector.vector * word -> word -val hashw_string : string * word -> word -val hashw_strings : string list * word -> word -val hash_string : string -> int - -(* - [('key, 'data) hash_table] is the type of hashtables with keys of type - 'key and data values of type 'data. - - [mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable, - using hash function hashVal and equality predicate sameKey. The sz - is a size hint, and exc is the exception raised by function find. - It must be the case that sameKey(k1, k2) implies hashVal(k1) = - hashVal(k2) for all k1,k2. - - [numItems htbl] is the number of items in the hash table. - - [insert htbl (k, d)] inserts data d for key k. If k already had an - item associated with it, then the old item is overwritten. - - [find htbl k] returns d, where d is the data item associated with key k, - or raises the exception (given at creation of htbl) if there is no such d. - - [peek htbl k] returns SOME d, where d is the data item associated with - key k, or NONE if there is no such d. - - [peekInsert htbl (k, d)] inserts data d for key k, if k is not - already in the table, returning NONE. If k is already in the - table, and the associated data value is d', then returns SOME d' - and leaves the table unmodified. - - [remove htbl k] returns d, where d is the data item associated with key k, - removing d from the table; or raises the exception if there is no such d. - - [listItems htbl] returns a list of the (key, data) pairs in the hashtable. - - [apply f htbl] applies function f to all (key, data) pairs in the - hashtable, in some order. - - [map f htbl] returns a new hashtable, whose data items have been - obtained by applying f to the (key, data) pairs in htbl. The new - tables have the same keys, hash function, equality predicate, and - exception, as htbl. - - [filter p htbl] deletes from htbl all data items which do not - satisfy predicate p. - - [transform f htbl] as map, but only the (old) data values are used - when computing the new data values. - - [copy htbl] returns a complete copy of htbl. - - [bucketSizes htbl] returns a list of the sizes of the buckets. - This is to allow users to gauge the quality of their hashing - function. -*) - -end - - -structure Polyhash :> Polyhash = -struct - -datatype ('key, 'data) bucket_t - = NIL - | B of int * 'key * 'data * ('key, 'data) bucket_t - -datatype ('key, 'data) hash_table = - HT of {hashVal : 'key -> int, - sameKey : 'key * 'key -> bool, - not_found : exn, - table : ('key, 'data) bucket_t Array.array Unsynchronized.ref, - n_items : int Unsynchronized.ref} - -local -(* - prim_val andb_ : int -> int -> int = 2 "and"; - prim_val lshift_ : int -> int -> int = 2 "shift_left"; -*) - fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y)); - fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y)); -in - fun index (i, sz) = andb_ i (sz-1) - - (* find smallest power of 2 (>= 32) that is >= n *) - fun roundUp n = - let fun f i = if (i >= n) then i else f (lshift_ i 1) - in f 32 end -end; - - (* Create a new table; the int is a size hint and the exception - * is to be raised by find. - *) - fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{ - hashVal=hashVal, - sameKey=sameKey, - not_found = notFound, - table = Unsynchronized.ref (Array.array(roundUp sizeHint, NIL)), - n_items = Unsynchronized.ref 0 - }; - - (* conditionally grow a table *) - fun growTable (HT{table, n_items, ...}) = let - val arr = !table - val sz = Array.length arr - in - if (!n_items >= sz) - then let - val newSz = sz+sz - val newArr = Array.array (newSz, NIL) - fun copy NIL = () - | copy (B(h, key, v, rest)) = let - val indx = index (h, newSz) - in - Array.update (newArr, indx, - B(h, key, v, Array.sub(newArr, indx))); - copy rest - end - fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1)) - in - (bucket 0) handle _ => (); (* FIXME avoid handle _ *) - table := newArr - end - else () - end (* growTable *); - - (* Insert an item. If the key already has an item associated with it, - * then the old item is discarded. - *) - fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) = - let - val arr = !table - val sz = Array.length arr - val hash = hashVal key - val indx = index (hash, sz) - fun look NIL = ( - Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx))); - Unsynchronized.inc n_items; - growTable tbl; - NIL) - | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) - then B(hash, key, item, r) - else (case (look r) - of NIL => NIL - | rest => B(h, k, v, rest) - (* end case *)) - in - case (look (Array.sub (arr, indx))) - of NIL => () - | b => Array.update(arr, indx, b) - end; - - (* Insert an item if not there already; if it is there already, - then return the old data value and leave the table unmodified.. - *) - fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) = - let val arr = !table - val sz = Array.length arr - val hash = hashVal key - val indx = index (hash, sz) - fun look NIL = - (Array.update(arr, indx, B(hash, key, item, - Array.sub(arr, indx))); - Unsynchronized.inc n_items; - growTable tbl; - NONE) - | look (B(h, k, v, r)) = - if hash = h andalso sameKey(key, k) then SOME v - else look r - in - look (Array.sub (arr, indx)) - end; - - (* find an item, the table's exception is raised if the item doesn't exist *) - fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let - val arr = !table - val sz = Array.length arr - val hash = hashVal key - val indx = index (hash, sz) - fun look NIL = raise not_found - | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) - then v - else look r - in - look (Array.sub (arr, indx)) - end; - - (* look for an item, return NONE if the item doesn't exist *) - fun peek (HT{hashVal, sameKey, table, ...}) key = let - val arr = !table - val sz = Array.length arr - val hash = hashVal key - val indx = index (hash, sz) - fun look NIL = NONE - | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) - then SOME v - else look r - in - look (Array.sub (arr, indx)) - end; - - (* Remove an item. The table's exception is raised if - * the item doesn't exist. - *) - fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let - val arr = !table - val sz = Array.length arr - val hash = hashVal key - val indx = index (hash, sz) - fun look NIL = raise not_found - | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) - then (v, r) - else let val (item, r') = look r in (item, B(h, k, v, r')) end - val (item, bucket) = look (Array.sub (arr, indx)) - in - Array.update (arr, indx, bucket); - n_items := !n_items - 1; - item - end (* remove *); - - (* Return the number of items in the table *) - fun numItems (HT{n_items, ...}) = !n_items - - (* return a list of the items in the table *) - fun listItems (HT{table = Unsynchronized.ref arr, n_items, ...}) = let - fun f (_, l, 0) = l - | f (~1, l, _) = l - | f (i, l, n) = let - fun g (NIL, l, n) = f (i-1, l, n) - | g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1) - in - g (Array.sub(arr, i), l, n) - end - in - f ((Array.length arr) - 1, [], !n_items) - end (* listItems *); - - (* Apply a function to the entries of the table *) - fun apply f (HT{table, ...}) = let - fun appF NIL = () - | appF (B(_, key, item, rest)) = ( - f (key, item); - appF rest) - val arr = !table - val sz = Array.length arr - fun appToTbl i = if (i < sz) - then (appF (Array.sub (arr, i)); appToTbl(i+1)) - else () - in - appToTbl 0 - end (* apply *); - - (* Map a table to a new table that has the same keys and exception *) - fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let - fun mapF NIL = NIL - | mapF (B(hash, key, item, rest)) = - B(hash, key, f (key, item), mapF rest) - val arr = !table - val sz = Array.length arr - val newArr = Array.array (sz, NIL) - fun mapTbl i = if (i < sz) - then ( - Array.update(newArr, i, mapF (Array.sub(arr, i))); - mapTbl (i+1)) - else () - in - mapTbl 0; - HT{hashVal=hashVal, - sameKey=sameKey, - table = Unsynchronized.ref newArr, - n_items = Unsynchronized.ref(!n_items), - not_found = not_found} - end (* transform *); - - (* remove any hash table items that do not satisfy the given - * predicate. - *) - fun filter pred (HT{table, n_items, not_found, ...}) = let - fun filterP NIL = NIL - | filterP (B(hash, key, item, rest)) = if (pred(key, item)) - then B(hash, key, item, filterP rest) - else filterP rest - val arr = !table - val sz = Array.length arr - fun filterTbl i = if (i < sz) - then ( - Array.update (arr, i, filterP (Array.sub (arr, i))); - filterTbl (i+1)) - else () - in - filterTbl 0 - end (* filter *); - - (* Map a table to a new table that has the same keys, exception, - hash function, and equality function *) - - fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let - fun mapF NIL = NIL - | mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest) - val arr = !table - val sz = Array.length arr - val newArr = Array.array (sz, NIL) - fun mapTbl i = if (i < sz) - then ( - Array.update(newArr, i, mapF (Array.sub(arr, i))); - mapTbl (i+1)) - else () - in - mapTbl 0; - HT{hashVal=hashVal, - sameKey=sameKey, - table = Unsynchronized.ref newArr, - n_items = Unsynchronized.ref(!n_items), - not_found = not_found} - end (* transform *); - - (* Create a copy of a hash table *) - fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let - val arr = !table - val sz = Array.length arr - val newArr = Array.array (sz, NIL) - fun mapTbl i = ( - Array.update (newArr, i, Array.sub(arr, i)); - mapTbl (i+1)) - in - (mapTbl 0) handle _ => (); (* FIXME avoid handle _ *) - HT{hashVal=hashVal, - sameKey=sameKey, - table = Unsynchronized.ref newArr, - n_items = Unsynchronized.ref(!n_items), - not_found = not_found} - end (* copy *); - - (* returns a list of the sizes of the various buckets. This is to - * allow users to gauge the quality of their hashing function. - *) - fun bucketSizes (HT{table = Unsynchronized.ref arr, ...}) = let - fun len (NIL, n) = n - | len (B(_, _, _, r), n) = len(r, n+1) - fun f (~1, l) = l - | f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l) - in - f ((Array.length arr)-1, []) - end - - (*Added by lcp. - This is essentially the described in Compilers: - Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*) - - (*This hash function is recommended in Compilers: Principles, Techniques, and - Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly - recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*) - fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w)) - - fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w); - - fun hashw_int (i,w) = hashw (Word.fromInt i, w); - - fun hashw_vector (v,w) = Vector.foldl hashw w v; - - fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s; - - fun hashw_strings (ss, w) = List.foldl hashw_string w ss; - - fun hash_string s = Word.toIntX (hashw_string(s,0w0)); - -end