src/HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML
author blanchet
Wed, 18 Jul 2012 08:44:04 +0200
changeset 48313 0faafdffa662
parent 48312 b40722a81ac9
child 48314 ee33ba3c0e05
permissions -rw-r--r--
mesh facts by taking into consideration whether a fact is known to MeSh

(*  Title:      HOL/Tools/Sledgehammer/sledgehammer_filter_mash.ML
    Author:     Jasmin Blanchette, TU Muenchen

Sledgehammer's machine-learning-based relevance filter (MaSh).
*)

signature SLEDGEHAMMER_FILTER_MASH =
sig
  type status = ATP_Problem_Generate.status
  type stature = ATP_Problem_Generate.stature
  type fact = Sledgehammer_Fact.fact
  type fact_override = Sledgehammer_Fact.fact_override
  type params = Sledgehammer_Provers.params
  type relevance_fudge = Sledgehammer_Provers.relevance_fudge
  type prover_result = Sledgehammer_Provers.prover_result

  val trace : bool Config.T
  val escape_meta : string -> string
  val escape_metas : string list -> string
  val unescape_meta : string -> string
  val unescape_metas : string -> string list
  val extract_query : string -> string * string list
  val suggested_facts : string list -> fact list -> fact list
  val mesh_facts : int -> (fact list * int option) list -> fact list
  val all_non_tautological_facts_of :
    theory -> status Termtab.table -> fact list
  val theory_ord : theory * theory -> order
  val thm_ord : thm * thm -> order
  val thy_facts_from_thms : fact list -> string list Symtab.table
  val has_thy : theory -> thm -> bool
  val parent_facts : theory -> string list Symtab.table -> string list
  val features_of : theory -> status -> term list -> string list
  val isabelle_dependencies_of : string list -> thm -> string list
  val goal_of_thm : theory -> thm -> thm
  val run_prover : Proof.context -> params -> fact list -> thm -> prover_result
  val thy_name_of_fact : string -> string
  val mash_RESET : Proof.context -> unit
  val mash_ADD :
    Proof.context -> (string * string list * string list * string list) list
    -> unit
  val mash_DEL : Proof.context -> string list -> string list -> unit
  val mash_QUERY : Proof.context -> string list * string list -> string list
  val mash_reset : Proof.context -> unit
  val mash_can_suggest_facts : Proof.context -> bool
  val mash_suggest_facts :
    Proof.context -> params -> string -> term list -> term -> fact list
    -> fact list
  val mash_can_learn_thy : Proof.context -> theory -> bool
  val mash_learn_thy : Proof.context -> theory -> real -> unit
  val mash_learn_proof : Proof.context -> theory -> term -> thm list -> unit
  val relevant_facts :
    Proof.context -> params -> string -> int -> fact_override -> term list
    -> term -> fact list -> fact list
end;

structure Sledgehammer_Filter_MaSh : SLEDGEHAMMER_FILTER_MASH =
struct

open ATP_Util
open ATP_Problem_Generate
open Sledgehammer_Util
open Sledgehammer_Fact
open Sledgehammer_Filter_Iter
open Sledgehammer_Provers

val trace =
  Attrib.setup_config_bool @{binding sledgehammer_filter_mash_trace} (K false)
fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()

fun mash_dir () =
  getenv "ISABELLE_HOME_USER" ^ "/mash"
  |> tap (fn dir => Isabelle_System.mkdir (Path.explode dir))
fun mash_state_path () = mash_dir () ^ "/state" |> Path.explode

(*** Isabelle helpers ***)

fun meta_char c =
  if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse
     c = #")" orelse c = #"," then
    String.str c
  else
    (* fixed width, in case more digits follow *)
    "\\" ^ stringN_of_int 3 (Char.ord c)

fun unmeta_chars accum [] = String.implode (rev accum)
  | unmeta_chars accum (#"\\" :: d1 :: d2 :: d3 :: cs) =
    (case Int.fromString (String.implode [d1, d2, d3]) of
       SOME n => unmeta_chars (Char.chr n :: accum) cs
     | NONE => "" (* error *))
  | unmeta_chars _ (#"\\" :: _) = "" (* error *)
  | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs

val escape_meta = String.translate meta_char
val escape_metas = map escape_meta #> space_implode " "
val unescape_meta = unmeta_chars [] o String.explode
val unescape_metas = map unescape_meta o space_explode " "

val explode_suggs =
  space_explode " " #> filter_out (curry (op =) "") #> map unescape_meta
fun extract_query line =
  case space_explode ":" line of
    [goal_name, suggs] => (unescape_meta goal_name, explode_suggs suggs)
  | _ => ("", [])

fun find_suggested facts sugg =
  find_first (fn (_, th) => Thm.get_name_hint th = sugg) facts
fun suggested_facts suggs facts = map_filter (find_suggested facts) suggs

fun sum_avg n xs =
  fold (Integer.add o Integer.mult n) xs 0 div (length xs)

fun mesh_facts max_facts mess =
  let
    val n = length mess
    val fact_eq = Thm.eq_thm o pairself snd
    fun score_in fact (facts, def) =
      case find_index (curry fact_eq fact) facts of
        ~1 => def
      | j => SOME j
    fun score_of fact = mess |> map_filter (score_in fact) |> sum_avg n
    val facts = fold (union fact_eq o take max_facts o fst) mess []
  in
    facts |> map (`score_of) |> sort (int_ord o pairself fst) |> map snd
          |> take max_facts
  end

val thy_feature_prefix = "y_"

val thy_feature_name_of = prefix thy_feature_prefix
val const_name_of = prefix const_prefix
val type_name_of = prefix type_const_prefix
val class_name_of = prefix class_prefix

local

fun has_bool @{typ bool} = true
  | has_bool (Type (_, Ts)) = exists has_bool Ts
  | has_bool _ = false

fun has_fun (Type (@{type_name fun}, _)) = true
  | has_fun (Type (_, Ts)) = exists has_fun Ts
  | has_fun _ = false

val is_conn = member (op =)
  [@{const_name Trueprop}, @{const_name HOL.conj}, @{const_name HOL.disj},
   @{const_name HOL.implies}, @{const_name Not},
   @{const_name All}, @{const_name Ex}, @{const_name Ball}, @{const_name Bex},
   @{const_name HOL.eq}]

val has_bool_arg_const =
  exists_Const (fn (c, T) =>
                   not (is_conn c) andalso exists has_bool (binder_types T))

fun higher_inst_const thy (c, T) =
  case binder_types T of
    [] => false
  | Ts => length (binder_types (Sign.the_const_type thy c)) <> length Ts

val binders = [@{const_name All}, @{const_name Ex}]

in

fun is_fo_term thy t =
  let
    val t =
      t |> Envir.beta_eta_contract
        |> transform_elim_prop
        |> Object_Logic.atomize_term thy
  in
    Term.is_first_order binders t andalso
    not (exists_subterm (fn Var (_, T) => has_bool T orelse has_fun T
                          | _ => false) t orelse
         has_bool_arg_const t orelse exists_Const (higher_inst_const thy) t)
  end

end

fun interesting_terms_types_and_classes term_max_depth type_max_depth ts =
  let
    val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}]
    val bad_consts = atp_widely_irrelevant_consts
    fun add_classes @{sort type} = I
      | add_classes S = union (op =) (map class_name_of S)
    fun do_add_type (Type (s, Ts)) =
        (not (member (op =) bad_types s) ? insert (op =) (type_name_of s))
        #> fold do_add_type Ts
      | do_add_type (TFree (_, S)) = add_classes S
      | do_add_type (TVar (_, S)) = add_classes S
    fun add_type T = type_max_depth >= 0 ? do_add_type T
    fun mk_app s args =
      if member (op <>) args "" then s ^ "(" ^ space_implode "," args ^ ")"
      else s
    fun patternify ~1 _ = ""
      | patternify depth t =
        case strip_comb t of
          (Const (s, _), args) =>
          mk_app (const_name_of s) (map (patternify (depth - 1)) args)
        | _ => ""
    fun add_term_patterns ~1 _ = I
      | add_term_patterns depth t =
        insert (op =) (patternify depth t)
        #> add_term_patterns (depth - 1) t
    val add_term = add_term_patterns term_max_depth
    fun add_patterns t =
      let val (head, args) = strip_comb t in
        (case head of
           Const (s, T) =>
           not (member (op =) bad_consts s) ? (add_term t #> add_type T)
         | Free (_, T) => add_type T
         | Var (_, T) => add_type T
         | Abs (_, T, body) => add_type T #> add_patterns body
         | _ => I)
        #> fold add_patterns args
      end
  in [] |> fold add_patterns ts |> sort string_ord end

fun is_likely_tautology th =
  null (interesting_terms_types_and_classes 0 ~1 [prop_of th]) andalso
  not (Thm.eq_thm_prop (@{thm ext}, th))

fun is_too_meta thy th =
  fastype_of (Object_Logic.atomize_term thy (prop_of th)) <> @{typ bool}

fun all_non_tautological_facts_of thy css_table =
  all_facts_of thy css_table
  |> filter_out ((is_likely_tautology orf is_too_meta thy) o snd)

fun theory_ord p =
  if Theory.eq_thy p then EQUAL
  else if Theory.subthy p then LESS
  else if Theory.subthy (swap p) then GREATER
  else EQUAL

val thm_ord = theory_ord o pairself theory_of_thm

(* ### FIXME: optimize *)
fun thy_facts_from_thms ths =
  ths |> map (snd #> `(theory_of_thm #> Context.theory_name))
      |> AList.group (op =)
      |> sort (int_ord o pairself (length o Theory.ancestors_of o theory_of_thm
                                   o hd o snd))
      |> map (apsnd (sort (rev_order o thm_ord) #> map Thm.get_name_hint))
      |> Symtab.make

fun has_thy thy th =
  Context.theory_name thy = Context.theory_name (theory_of_thm th)

fun parent_facts thy thy_facts =
  let
    fun add_last thy =
      case Symtab.lookup thy_facts (Context.theory_name thy) of
        SOME (last_fact :: _) => insert (op =) last_fact
      | _ => add_parent thy
    and add_parent thy = fold add_last (Theory.parents_of thy)
  in add_parent thy [] end

fun is_exists (s, _) = (s = @{const_name Ex} orelse s = @{const_name Ex1})

val term_max_depth = 1
val type_max_depth = 1

(* TODO: Generate type classes for types? *)
fun features_of thy status ts =
  thy_feature_name_of (Context.theory_name thy) ::
  interesting_terms_types_and_classes term_max_depth type_max_depth ts
  |> exists (not o is_lambda_free) ts ? cons "lambdas"
  |> exists (exists_Const is_exists) ts ? cons "skolems"
  |> exists (not o is_fo_term thy) ts ? cons "ho"
  |> (case status of
        General => I
      | Induction => cons "induction"
      | Intro => cons "intro"
      | Inductive => cons "inductive"
      | Elim => cons "elim"
      | Simp => cons "simp"
      | Def => cons "def")

fun isabelle_dependencies_of all_facts =
  thms_in_proof (SOME all_facts) #> sort string_ord

val freezeT = Type.legacy_freeze_type

fun freeze (t $ u) = freeze t $ freeze u
  | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t)
  | freeze (Var ((s, _), T)) = Free (s, freezeT T)
  | freeze (Const (s, T)) = Const (s, freezeT T)
  | freeze (Free (s, T)) = Free (s, freezeT T)
  | freeze t = t

fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init

fun run_prover ctxt (params as {provers, ...}) facts goal =
  let
    val problem =
      {state = Proof.init ctxt, goal = goal, subgoal = 1, subgoal_count = 1,
       facts = facts |> map (apfst (apfst (fn name => name ())))
                     |> map Sledgehammer_Provers.Untranslated_Fact}
    val prover =
      Sledgehammer_Minimize.get_minimizing_prover ctxt
          Sledgehammer_Provers.Normal (hd provers)
  in prover params (K (K (K ""))) problem end

fun accessibility_of thy thy_facts =
  case Symtab.lookup thy_facts (Context.theory_name thy) of
    SOME (fact :: _) => [fact]
  | _ => parent_facts thy thy_facts

val thy_name_of_fact = hd o Long_Name.explode


(*** Low-level communication with MaSh ***)

fun run_mash ctxt save write_cmds read_preds =
  let
    val temp_dir = getenv "ISABELLE_TMP"
    val serial = serial_string ()
    val cmd_file = temp_dir ^ "/mash_commands." ^ serial
    val cmd_path = Path.explode cmd_file
    val pred_file = temp_dir ^ "/mash_preds." ^ serial
    val log_file = temp_dir ^ "/mash_log." ^ serial
    val command =
      getenv "MASH_HOME" ^ "/mash.py --inputFile " ^ cmd_file ^
      " --outputDir " ^ mash_dir () ^ " --predictions " ^ pred_file ^
      " --log " ^ log_file ^ " --numberOfPredictions 1000" ^
      (if save then " --saveModel" else "") ^ " > /dev/null"
    val _ = File.write cmd_path ""
    val _ = write_cmds (File.append cmd_path)
    val _ = trace_msg ctxt (fn () => "  running " ^ command)
    val _ = Isabelle_System.bash command
  in read_preds (fn () => File.read_lines (Path.explode pred_file)) end

fun str_of_update (fact, access, feats, deps) =
  "! " ^ escape_meta fact ^ ": " ^ escape_metas access ^ "; " ^
  escape_metas feats ^ "; " ^ escape_metas deps ^ "\n"

fun str_of_query (access, feats) =
  "? " ^ escape_metas access ^ "; " ^ escape_metas feats

fun mash_RESET ctxt =
  let val path = mash_dir () |> Path.explode in
    trace_msg ctxt (K "MaSh RESET");
    File.fold_dir (fn file => fn () =>
                      File.rm (Path.append path (Path.basic file)))
                  path ()
  end

fun mash_ADD _ [] = ()
  | mash_ADD ctxt upds =
    (trace_msg ctxt (fn () => "MaSh ADD " ^ space_implode " " (map #1 upds));
     run_mash ctxt true (fn append => List.app (append o str_of_update) upds)
              (K ()))

fun mash_DEL ctxt facts feats =
  trace_msg ctxt (fn () =>
      "MaSh DEL " ^ escape_metas facts ^ "; " ^ escape_metas feats)

fun mash_QUERY ctxt (query as (_, feats)) =
  (trace_msg ctxt (fn () => "MaSh SUGGEST " ^ space_implode " " feats);
   run_mash ctxt false (fn append => append (str_of_query query))
                 (fn preds => snd (extract_query (List.last (preds ()))))
   handle List.Empty => [])


(*** High-level communication with MaSh ***)

type mash_state =
  {dirty_thys : unit Symtab.table,
   thy_facts : string list Symtab.table}

val empty_state =
  {dirty_thys = Symtab.empty,
   thy_facts = Symtab.empty}

local

fun mash_load (state as (true, _)) = state
  | mash_load _ =
    let val path = mash_state_path () in
      (true,
       case try File.read_lines path of
         SOME (dirty_line :: facts_lines) =>
         let
           fun dirty_thys_of_line line =
             Symtab.make (line |> unescape_metas |> map (rpair ()))
           fun add_facts_line line =
             case unescape_metas line of
               thy :: facts => Symtab.update_new (thy, facts)
             | _ => I (* shouldn't happen *)
         in
           {dirty_thys = dirty_thys_of_line dirty_line,
            thy_facts = fold add_facts_line facts_lines Symtab.empty}
         end
       | _ => empty_state)
    end

fun mash_save ({dirty_thys, thy_facts} : mash_state) =
  let
    val path = mash_state_path ()
    val dirty_line = (escape_metas (Symtab.keys dirty_thys)) ^ "\n"
    fun fact_line_for (thy, facts) = escape_metas (thy :: facts) ^ "\n"
  in
    File.write path dirty_line;
    Symtab.fold (fn thy_fact => fn () =>
                    File.append path (fact_line_for thy_fact)) thy_facts ()
  end

val global_state =
  Synchronized.var "Sledgehammer_Filter_MaSh.global_state" (false, empty_state)

in

fun mash_map f =
  Synchronized.change global_state (mash_load ##> (f #> tap mash_save))

fun mash_get () = Synchronized.change_result global_state (mash_load #> `snd)

fun mash_reset ctxt =
  Synchronized.change global_state (fn _ =>
      (mash_RESET ctxt; File.write (mash_state_path ()) "";
       (true, empty_state)))

end

fun mash_can_suggest_facts (_ : Proof.context) =
  not (Symtab.is_empty (#thy_facts (mash_get ())))

fun mash_suggest_facts ctxt params prover hyp_ts concl_t facts =
  let
    val thy = Proof_Context.theory_of ctxt
    val thy_facts = #thy_facts (mash_get ())
    val access = accessibility_of thy thy_facts
    val feats = features_of thy General (concl_t :: hyp_ts)
    val suggs = mash_QUERY ctxt (access, feats)
  in suggested_facts suggs facts end

fun mash_can_learn_thy (_ : Proof.context) thy =
  not (Symtab.defined (#dirty_thys (mash_get ())) (Context.theory_name thy))

fun is_fact_in_thy_facts thy_facts fact =
  case Symtab.lookup thy_facts (thy_name_of_fact fact) of
    SOME facts => member (op =) facts fact
  | NONE => false

fun zip_facts news [] = news
  | zip_facts [] olds = olds
  | zip_facts (new :: news) (old :: olds) =
    if new = old then
      new :: zip_facts news olds
    else if member (op =) news old then
      old :: zip_facts (filter_out (curry (op =) old) news) olds
    else if member (op =) olds new then
      new :: zip_facts news (filter_out (curry (op =) new) olds)
    else
      new :: old :: zip_facts news olds

fun add_thy_facts_from_thys new old =
  let
    fun add_thy (thy, new_facts) =
      case Symtab.lookup old thy of
        SOME old_facts => Symtab.update (thy, zip_facts old_facts new_facts)
      | NONE => Symtab.update_new (thy, new_facts)
  in old |> Symtab.fold add_thy new end

fun mash_learn_thy ctxt thy timeout =
  let
    val css_table = Sledgehammer_Fact.clasimpset_rule_table_of ctxt
    val facts = all_non_tautological_facts_of thy css_table
    val {thy_facts, ...} = mash_get ()
    fun is_old (_, th) = is_fact_in_thy_facts thy_facts (Thm.get_name_hint th)
    val (old_facts, new_facts) =
      facts |> List.partition is_old ||> sort (thm_ord o pairself snd)
  in
    if null new_facts then
      ()
    else
      let
        val ths = facts |> map snd
        val all_names = ths |> map Thm.get_name_hint
        fun do_fact ((_, (_, status)), th) (prevs, upds) =
          let
            val name = Thm.get_name_hint th
            val feats = features_of thy status [prop_of th]
            val deps = isabelle_dependencies_of all_names th
            val upd = (name, prevs, feats, deps)
          in ([name], upd :: upds) end
        val parents = parent_facts thy thy_facts
        val (_, upds) = (parents, []) |> fold do_fact new_facts
        val new_thy_facts = new_facts |> thy_facts_from_thms
        fun trans {dirty_thys, thy_facts} =
          (mash_ADD ctxt (rev upds);
           {dirty_thys = dirty_thys,
            thy_facts = thy_facts |> add_thy_facts_from_thys new_thy_facts})
      in mash_map trans end
  end

fun mash_learn_proof ctxt thy t ths =
  mash_map (fn state as {dirty_thys, thy_facts} =>
    let val deps = ths |> map Thm.get_name_hint in
      if forall (is_fact_in_thy_facts thy_facts) deps then
        let
          val fact = ATP_Util.timestamp () (* should be fairly fresh *)
          val access = accessibility_of thy thy_facts
          val feats = features_of thy General [t]
        in
          mash_ADD ctxt [(fact, access, feats, deps)];
          {dirty_thys = dirty_thys, thy_facts = thy_facts}
        end
      else
        state
    end)

fun relevant_facts ctxt params prover max_facts
        ({add, only, ...} : fact_override) hyp_ts concl_t facts =
  if only then
    facts
  else if max_facts <= 0 then
    []
  else
    let
      val add_ths = Attrib.eval_thms ctxt add
      fun prepend_facts ths accepts =
        ((facts |> filter (member Thm.eq_thm_prop ths o snd)) @
         (accepts |> filter_out (member Thm.eq_thm_prop ths o snd)))
        |> take max_facts
      val iter_facts =
        iterative_relevant_facts ctxt params prover max_facts NONE hyp_ts
                                 concl_t facts
      val mash_facts =
        facts |> mash_suggest_facts ctxt params prover hyp_ts concl_t
      val mess = [(iter_facts, SOME (length iter_facts)), (mash_facts, NONE)]
    in
      mesh_facts max_facts mess
      |> not (null add_ths) ? prepend_facts add_ths
    end

end;