src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
author blanchet
Thu, 26 Jun 2014 13:33:27 +0200
changeset 57355 a9e0f9d35125
parent 57354 ded92100ffd7
child 57356 9816f692b0ca
permissions -rw-r--r--
tuning
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
48380
d4b7c7be3116 renamed ML files
blanchet
parents: 48379
diff changeset
     1
(*  Title:      HOL/Tools/Sledgehammer/sledgehammer_mash.ML
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     2
    Author:     Jasmin Blanchette, TU Muenchen
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
     3
    Author:     Cezary Kaliszyk, University of Innsbruck
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     4
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     5
Sledgehammer's machine-learning-based relevance filter (MaSh).
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     6
*)
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     7
48381
1b7d798460bb renamed ML structures
blanchet
parents: 48380
diff changeset
     8
signature SLEDGEHAMMER_MASH =
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     9
sig
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    10
  type stature = ATP_Problem_Generate.stature
51004
5f2788c38127 distinguish raw and non-raw facts, using raw for 10 000s of facts and non-raw after selection of some hundreds
blanchet
parents: 51003
diff changeset
    11
  type raw_fact = Sledgehammer_Fact.raw_fact
48296
e7f01b7e244e gracefully handle the case of empty theories when going up the accessibility chain
blanchet
parents: 48293
diff changeset
    12
  type fact = Sledgehammer_Fact.fact
e7f01b7e244e gracefully handle the case of empty theories when going up the accessibility chain
blanchet
parents: 48293
diff changeset
    13
  type fact_override = Sledgehammer_Fact.fact_override
55201
1ee776da8da7 renamed ML file
blanchet
parents: 54816
diff changeset
    14
  type params = Sledgehammer_Prover.params
1ee776da8da7 renamed ML file
blanchet
parents: 54816
diff changeset
    15
  type prover_result = Sledgehammer_Prover.prover_result
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    16
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
    17
  val trace : bool Config.T
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
    18
  val duplicates : bool Config.T
51008
e096c0dc538b more precise output of selected facts
blanchet
parents: 51007
diff changeset
    19
  val MePoN : string
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
    20
  val MaShN : string
51008
e096c0dc538b more precise output of selected facts
blanchet
parents: 51007
diff changeset
    21
  val MeShN : string
48379
2b5ad61e2ccc renamed "iter" fact filter to "MePo" (Meng--Paulson)
blanchet
parents: 48378
diff changeset
    22
  val mepoN : string
2b5ad61e2ccc renamed "iter" fact filter to "MePo" (Meng--Paulson)
blanchet
parents: 48378
diff changeset
    23
  val mashN : string
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
    24
  val meshN : string
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
    25
  val unlearnN : string
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
    26
  val learn_isarN : string
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
    27
  val learn_proverN : string
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
    28
  val relearn_isarN : string
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
    29
  val relearn_proverN : string
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
    30
  val fact_filters : string list
50826
18ace05656cf start using MaSh hints
blanchet
parents: 50825
diff changeset
    31
  val encode_str : string -> string
18ace05656cf start using MaSh hints
blanchet
parents: 50825
diff changeset
    32
  val encode_strs : string list -> string
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
    33
  val decode_str : string -> string
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
    34
  val decode_strs : string -> string list
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
    35
  val encode_features : (string * real) list -> string
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
    36
  val extract_suggestions : string -> string * (string * real) list
50632
12c097ff3241 slightly more elegant naming convention (to keep low-level and high-level APIs separated)
blanchet
parents: 50631
diff changeset
    37
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
    38
  datatype mash_engine =
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
    39
    MaSh_Py
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
    40
  | MaSh_SML_kNN
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
    41
  | MaSh_SML_kNN_Cpp
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
    42
  | MaSh_SML_NB of bool * bool
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
    43
  | MaSh_SML_NB_Cpp
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
    44
  | MaSh_SML_NB_Py
57106
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    45
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57108
diff changeset
    46
  val is_mash_enabled : unit -> bool
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57108
diff changeset
    47
  val the_mash_engine : unit -> mash_engine
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57108
diff changeset
    48
57106
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    49
  structure MaSh_Py :
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    50
  sig
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    51
    val unlearn : Proof.context -> bool -> unit
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    52
    val learn : Proof.context -> bool -> bool ->
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    53
      (string * string list * string list * string list) list -> unit
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    54
    val relearn : Proof.context -> bool -> bool -> (string * string list) list -> unit
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    55
    val query : Proof.context -> bool -> int ->
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    56
      (string * string list * string list * string list) list * string list * string list
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    57
        * (string * real) list ->
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
    58
      (string * real) list
57106
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    59
  end
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    60
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    61
  structure MaSh_SML :
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    62
  sig
57124
e4c2c792226f make SML naive Bayes closer to Python version
blanchet
parents: 57122
diff changeset
    63
    val k_nearest_neighbors : int -> int -> (int -> int list) -> (int -> (int * real) list) ->
e4c2c792226f make SML naive Bayes closer to Python version
blanchet
parents: 57122
diff changeset
    64
      int -> (int * real) list -> (int * real) list
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
    65
    val naive_bayes : (bool * bool) -> int -> int -> (int -> int list) -> (int -> int list) ->
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
    66
      int -> int -> (int * real) list -> (int * real) list
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
    67
    val naive_bayes_py : Proof.context -> bool -> int -> int -> (int -> int list) ->
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
    68
      (int -> int list) -> int -> int -> (int * real) list -> (int * real) list
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
    69
    val query : Proof.context -> bool -> mash_engine -> string list -> int ->
57106
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    70
      (string * (string * real) list * string list) list * string list * (string * real) list ->
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    71
      string list
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    72
  end
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    73
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
    74
  val mash_unlearn : Proof.context -> params -> unit
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
    75
  val nickname_of_thm : thm -> string
57006
blanchet
parents: 57005
diff changeset
    76
  val find_suggested_facts : Proof.context -> ('b * thm) list -> string list -> ('b * thm) list
blanchet
parents: 57005
diff changeset
    77
  val mesh_facts : ('a * 'a -> bool) -> int -> (real * (('a * real) list * 'a list)) list -> 'a list
51135
e32114b25551 tuned code
blanchet
parents: 51134
diff changeset
    78
  val crude_thm_ord : thm * thm -> order
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
    79
  val thm_less : thm * thm -> bool
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    80
  val goal_of_thm : theory -> thm -> thm
57006
blanchet
parents: 57005
diff changeset
    81
  val run_prover_for_mash : Proof.context -> params -> string -> string -> fact list -> thm ->
blanchet
parents: 57005
diff changeset
    82
    prover_result
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
    83
  val features_of : Proof.context -> theory -> int -> int Symtab.table -> stature -> term list ->
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
    84
    (string * real) list
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
    85
  val trim_dependencies : string list -> string list option
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
    86
  val isar_dependencies_of : string Symtab.table * string Symtab.table -> thm -> string list option
57006
blanchet
parents: 57005
diff changeset
    87
  val prover_dependencies_of : Proof.context -> params -> string -> int -> raw_fact list ->
blanchet
parents: 57005
diff changeset
    88
    string Symtab.table * string Symtab.table -> thm -> bool * string list
blanchet
parents: 57005
diff changeset
    89
  val attach_parents_to_facts : ('a * thm) list -> ('a * thm) list ->
blanchet
parents: 57005
diff changeset
    90
    (string list * ('a * thm)) list
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
    91
  val num_extra_feature_facts : int
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
    92
  val extra_feature_factor : real
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
    93
  val weight_facts_smoothly : 'a list -> ('a * real) list
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
    94
  val weight_facts_steeply : 'a list -> ('a * real) list
57006
blanchet
parents: 57005
diff changeset
    95
  val find_mash_suggestions : Proof.context -> int -> string list -> ('b * thm) list ->
blanchet
parents: 57005
diff changeset
    96
    ('b * thm) list -> ('b * thm) list -> ('b * thm) list * ('b * thm) list
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
    97
  val add_const_counts : term -> int Symtab.table -> int Symtab.table
57006
blanchet
parents: 57005
diff changeset
    98
  val mash_suggested_facts : Proof.context -> params -> int -> term list -> term -> raw_fact list ->
blanchet
parents: 57005
diff changeset
    99
    fact list * fact list
54503
blanchet
parents: 54432
diff changeset
   100
  val mash_learn_proof : Proof.context -> params -> term -> ('a * thm) list -> thm list -> unit
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57108
diff changeset
   101
  val mash_learn_facts : Proof.context -> params -> string -> bool -> int -> bool -> Time.time ->
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57108
diff changeset
   102
    raw_fact list -> string
57006
blanchet
parents: 57005
diff changeset
   103
  val mash_learn : Proof.context -> params -> fact_override -> thm list -> bool -> unit
53819
e55d641d0a70 honor MaSh's zero-overhead policy -- no learning if the tool is disabled
blanchet
parents: 53790
diff changeset
   104
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
   105
  val mash_can_suggest_facts : Proof.context -> bool -> bool
57108
dc0b4f50e288 more generous max number of suggestions, for more safety
blanchet
parents: 57107
diff changeset
   106
  val generous_max_suggestions : int -> int
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50755
diff changeset
   107
  val mepo_weight : real
4247cbd78aaf export MeSh data as well
blanchet
parents: 50755
diff changeset
   108
  val mash_weight : real
57006
blanchet
parents: 57005
diff changeset
   109
  val relevant_facts : Proof.context -> params -> string -> int -> fact_override -> term list ->
blanchet
parents: 57005
diff changeset
   110
    term -> raw_fact list -> (string * fact list) list
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
   111
  val kill_learners : Proof.context -> params -> unit
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
   112
  val running_learners : unit -> unit
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
   113
end;
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
   114
48381
1b7d798460bb renamed ML structures
blanchet
parents: 48380
diff changeset
   115
structure Sledgehammer_MaSh : SLEDGEHAMMER_MASH =
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
   116
struct
48249
2bd242c56c90 dummy implementation
blanchet
parents: 48248
diff changeset
   117
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   118
open ATP_Util
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   119
open ATP_Problem_Generate
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   120
open Sledgehammer_Util
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   121
open Sledgehammer_Fact
55201
1ee776da8da7 renamed ML file
blanchet
parents: 54816
diff changeset
   122
open Sledgehammer_Prover
55202
824c48a539c9 renamed many Sledgehammer ML files to clarify structure
blanchet
parents: 55201
diff changeset
   123
open Sledgehammer_Prover_Minimize
48381
1b7d798460bb renamed ML structures
blanchet
parents: 48380
diff changeset
   124
open Sledgehammer_MePo
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   125
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   126
val trace = Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false)
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
   127
val duplicates = Attrib.setup_config_bool @{binding sledgehammer_fact_duplicates} (K false)
51032
69da236d7838 added option to use SNoW as machine learning algo
blanchet
parents: 51029
diff changeset
   128
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
   129
fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
   130
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
   131
fun gen_eq_thm ctxt = if Config.get ctxt duplicates then Thm.eq_thm_strict else Thm.eq_thm_prop
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
   132
51008
e096c0dc538b more precise output of selected facts
blanchet
parents: 51007
diff changeset
   133
val MePoN = "MePo"
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
   134
val MaShN = "MaSh"
51008
e096c0dc538b more precise output of selected facts
blanchet
parents: 51007
diff changeset
   135
val MeShN = "MeSh"
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
   136
48379
2b5ad61e2ccc renamed "iter" fact filter to "MePo" (Meng--Paulson)
blanchet
parents: 48378
diff changeset
   137
val mepoN = "mepo"
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
   138
val mashN = "mash"
48379
2b5ad61e2ccc renamed "iter" fact filter to "MePo" (Meng--Paulson)
blanchet
parents: 48378
diff changeset
   139
val meshN = "mesh"
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
   140
48379
2b5ad61e2ccc renamed "iter" fact filter to "MePo" (Meng--Paulson)
blanchet
parents: 48378
diff changeset
   141
val fact_filters = [meshN, mepoN, mashN]
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
   142
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   143
val unlearnN = "unlearn"
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   144
val learn_isarN = "learn_isar"
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   145
val learn_proverN = "learn_prover"
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   146
val relearn_isarN = "relearn_isar"
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   147
val relearn_proverN = "relearn_prover"
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   148
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   149
fun mash_state_dir () = Path.explode "$ISABELLE_HOME_USER/mash" |> tap Isabelle_System.mkdir
50310
b00eeb8e352e proper quoting of paths in MaSh
blanchet
parents: 50229
diff changeset
   150
fun mash_state_file () = Path.append (mash_state_dir ()) (Path.explode "state")
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   151
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   152
fun wipe_out_mash_state_dir () =
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   153
  let val path = mash_state_dir () in
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   154
    try (File.fold_dir (fn file => fn _ => try File.rm (Path.append path (Path.basic file))) path)
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   155
      NONE;
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   156
    ()
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   157
  end
48330
192444b53e86 speed up MaSh queries
blanchet
parents: 48329
diff changeset
   158
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   159
datatype mash_engine =
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   160
  MaSh_Py
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   161
| MaSh_SML_kNN
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   162
| MaSh_SML_kNN_Cpp
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   163
| MaSh_SML_NB of bool * bool
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   164
| MaSh_SML_NB_Cpp
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   165
| MaSh_SML_NB_Py
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   166
57281
bb671e6b740d changed default MaSh parameters based on (in vitro) evaluation
blanchet
parents: 57278
diff changeset
   167
val default_MaSh_SML_NB = MaSh_SML_NB (false, true)
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
   168
57028
e5466055e94f added Isabelle system option 'mash'
blanchet
parents: 57018
diff changeset
   169
fun mash_engine () =
57089
353652f47974 renamed 'MaSh' option
blanchet
parents: 57076
diff changeset
   170
  let val flag1 = Options.default_string @{system_option MaSh} in
57028
e5466055e94f added Isabelle system option 'mash'
blanchet
parents: 57018
diff changeset
   171
    (case if flag1 <> "none" (* default *) then flag1 else getenv "MASH" of
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   172
      "yes" => SOME default_MaSh_SML_NB
57028
e5466055e94f added Isabelle system option 'mash'
blanchet
parents: 57018
diff changeset
   173
    | "py" => SOME MaSh_Py
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   174
    | "sml" => SOME default_MaSh_SML_NB
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   175
    | "sml_knn" => SOME MaSh_SML_kNN
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   176
    | "sml_knn_cpp" => SOME MaSh_SML_kNN_Cpp
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   177
    | "sml_nb" => SOME default_MaSh_SML_NB
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   178
    | "sml_nbCC" => SOME (MaSh_SML_NB (false, false))
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   179
    | "sml_nbCD" => SOME (MaSh_SML_NB (false, true))
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   180
    | "sml_nbDC" => SOME (MaSh_SML_NB (true, false))
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   181
    | "sml_nbDD" => SOME (MaSh_SML_NB (true, true))
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   182
    | "sml_nb_cpp" => SOME MaSh_SML_NB_Cpp
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   183
    | "sml_nb_py" => SOME MaSh_SML_NB_Py
57028
e5466055e94f added Isabelle system option 'mash'
blanchet
parents: 57018
diff changeset
   184
    | _ => NONE)
e5466055e94f added Isabelle system option 'mash'
blanchet
parents: 57018
diff changeset
   185
  end
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
   186
57028
e5466055e94f added Isabelle system option 'mash'
blanchet
parents: 57018
diff changeset
   187
val is_mash_enabled = is_some o mash_engine
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   188
val the_mash_engine = the_default default_MaSh_SML_NB o mash_engine
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
   189
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   190
57107
2d502370ee99 changed MaSh to use SML version instead of Python version of naive Bayes by default (i.e. if MASH=yes in the settings, or 'fact_filter=mash' with no other explicit setting)
blanchet
parents: 57106
diff changeset
   191
(*** Low-level communication with the Python version of MaSh ***)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   192
53117
2381bdf47ba5 use new MaSh command-line arguments
blanchet
parents: 53116
diff changeset
   193
val save_models_arg = "--saveModels"
2381bdf47ba5 use new MaSh command-line arguments
blanchet
parents: 53116
diff changeset
   194
val shutdown_server_arg = "--shutdownServer"
2381bdf47ba5 use new MaSh command-line arguments
blanchet
parents: 53116
diff changeset
   195
57122
5f69b8c3af5a more work on exporter
blanchet
parents: 57120
diff changeset
   196
fun wipe_out_file file = ignore (try (File.rm o Path.explode) file)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   197
50335
b17b05c8d4a4 proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents: 50319
diff changeset
   198
fun write_file banner (xs, f) path =
b17b05c8d4a4 proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents: 50319
diff changeset
   199
  (case banner of SOME s => File.write path s | NONE => ();
53094
e33d77814a92 allow MaSh query to do some learning as well
blanchet
parents: 53090
diff changeset
   200
   xs |> chunk_list 500 |> List.app (File.append path o implode o map f))
50319
dddcaeb92e11 robust writing of MaSh state -- better drop learning data than cause other problems in Sledgehammer
blanchet
parents: 50311
diff changeset
   201
  handle IO.Io _ => ()
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   202
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
   203
fun run_mash_tool ctxt overlord extra_args background write_cmds read_suggs =
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   204
  let
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   205
    val (temp_dir, serial) =
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   206
      if overlord then (getenv "ISABELLE_HOME_USER", "")
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   207
      else (getenv "ISABELLE_TMP", serial_string ())
53129
f92b24047fc7 improved support for MaSh server
blanchet
parents: 53128
diff changeset
   208
    val log_file = temp_dir ^ "/mash_log" ^ serial
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   209
    val err_file = temp_dir ^ "/mash_err" ^ serial
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   210
    val sugg_file = temp_dir ^ "/mash_suggs" ^ serial
50335
b17b05c8d4a4 proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents: 50319
diff changeset
   211
    val sugg_path = Path.explode sugg_file
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   212
    val cmd_file = temp_dir ^ "/mash_commands" ^ serial
50335
b17b05c8d4a4 proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents: 50319
diff changeset
   213
    val cmd_path = Path.explode cmd_file
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   214
    val model_dir = File.shell_path (mash_state_dir ())
57130
f810d15ae625 added sleep to give time for the server to shut down -- this is a hack, but it's only in experimental code that will hopefully soon go away
blanchet
parents: 57125
diff changeset
   215
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   216
    val command =
53556
347f743e8336 invoke Python with "no bytecode" option to avoid litering Isabelle source directory with ".pyc" files (which can be problematic for a number of reasons)
blanchet
parents: 53201
diff changeset
   217
      "cd \"$ISABELLE_SLEDGEHAMMER_MASH\"/src; \
53790
298774dbdde0 provide a way to override MaSh's port from configuration file
blanchet
parents: 53757
diff changeset
   218
      \PYTHONDONTWRITEBYTECODE=y ./mash.py\
298774dbdde0 provide a way to override MaSh's port from configuration file
blanchet
parents: 53757
diff changeset
   219
      \ --quiet\
298774dbdde0 provide a way to override MaSh's port from configuration file
blanchet
parents: 53757
diff changeset
   220
      \ --port=$MASH_PORT\
53556
347f743e8336 invoke Python with "no bytecode" option to avoid litering Isabelle source directory with ".pyc" files (which can be problematic for a number of reasons)
blanchet
parents: 53201
diff changeset
   221
      \ --outputDir " ^ model_dir ^
347f743e8336 invoke Python with "no bytecode" option to avoid litering Isabelle source directory with ".pyc" files (which can be problematic for a number of reasons)
blanchet
parents: 53201
diff changeset
   222
      " --modelFile=" ^ model_dir ^ "/model.pickle\
347f743e8336 invoke Python with "no bytecode" option to avoid litering Isabelle source directory with ".pyc" files (which can be problematic for a number of reasons)
blanchet
parents: 53201
diff changeset
   223
      \ --dictsFile=" ^ model_dir ^ "/dict.pickle\
53790
298774dbdde0 provide a way to override MaSh's port from configuration file
blanchet
parents: 53757
diff changeset
   224
      \ --log " ^ log_file ^
298774dbdde0 provide a way to override MaSh's port from configuration file
blanchet
parents: 53757
diff changeset
   225
      " --inputFile " ^ cmd_file ^
298774dbdde0 provide a way to override MaSh's port from configuration file
blanchet
parents: 53757
diff changeset
   226
      " --predictions " ^ sugg_file ^
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   227
      (if extra_args = [] then "" else " " ^ space_implode " " extra_args) ^ " >& " ^ err_file ^
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
   228
      (if background then " &" else "")
57130
f810d15ae625 added sleep to give time for the server to shut down -- this is a hack, but it's only in experimental code that will hopefully soon go away
blanchet
parents: 57125
diff changeset
   229
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   230
    fun run_on () =
50750
518f0b5ef3d9 tap after, not before command invocation
blanchet
parents: 50735
diff changeset
   231
      (Isabelle_System.bash command
54100
6fefbaeccb63 more prominent MaSh errors
blanchet
parents: 54096
diff changeset
   232
       |> tap (fn _ =>
57006
blanchet
parents: 57005
diff changeset
   233
         (case try File.read (Path.explode err_file) |> the_default "" of
blanchet
parents: 57005
diff changeset
   234
           "" => trace_msg ctxt (K "Done")
blanchet
parents: 57005
diff changeset
   235
         | s => warning ("MaSh error: " ^ elide_string 1000 s)));
50335
b17b05c8d4a4 proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents: 50319
diff changeset
   236
       read_suggs (fn () => try File.read_lines sugg_path |> these))
57130
f810d15ae625 added sleep to give time for the server to shut down -- this is a hack, but it's only in experimental code that will hopefully soon go away
blanchet
parents: 57125
diff changeset
   237
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   238
    fun clean_up () =
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   239
      if overlord then () else List.app wipe_out_file [err_file, sugg_file, cmd_file]
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   240
  in
50335
b17b05c8d4a4 proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents: 50319
diff changeset
   241
    write_file (SOME "") ([], K "") sugg_path;
b17b05c8d4a4 proper quoting of paths in MaSh shell script, take 2 (cf. b00eeb8e352e)
blanchet
parents: 50319
diff changeset
   242
    write_file (SOME "") write_cmds cmd_path;
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   243
    trace_msg ctxt (fn () => "Running " ^ command);
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   244
    with_cleanup clean_up run_on ()
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   245
  end
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   246
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
   247
fun meta_char c =
57006
blanchet
parents: 57005
diff changeset
   248
  if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse c = #")" orelse
blanchet
parents: 57005
diff changeset
   249
     c = #"," then
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   250
    String.str c
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   251
  else
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   252
    (* fixed width, in case more digits follow *)
48395
85a7fb65507a learning should honor the fact override and the chained facts
blanchet
parents: 48394
diff changeset
   253
    "%" ^ stringN_of_int 3 (Char.ord c)
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   254
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
   255
fun unmeta_chars accum [] = String.implode (rev accum)
48395
85a7fb65507a learning should honor the fact override and the chained facts
blanchet
parents: 48394
diff changeset
   256
  | unmeta_chars accum (#"%" :: d1 :: d2 :: d3 :: cs) =
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
   257
    (case Int.fromString (String.implode [d1, d2, d3]) of
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   258
      SOME n => unmeta_chars (Char.chr n :: accum) cs
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   259
    | NONE => "" (* error *))
48395
85a7fb65507a learning should honor the fact override and the chained facts
blanchet
parents: 48394
diff changeset
   260
  | unmeta_chars _ (#"%" :: _) = "" (* error *)
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
   261
  | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
   262
50826
18ace05656cf start using MaSh hints
blanchet
parents: 50825
diff changeset
   263
val encode_str = String.translate meta_char
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   264
val decode_str = String.explode #> unmeta_chars []
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   265
50826
18ace05656cf start using MaSh hints
blanchet
parents: 50825
diff changeset
   266
val encode_strs = map encode_str #> space_implode " "
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   267
val decode_strs = space_explode " " #> filter_out (curry (op =) "") #> map decode_str
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   268
53558
f9682fdfd47b minor fixes
blanchet
parents: 53556
diff changeset
   269
(* Avoid scientific notation *)
f9682fdfd47b minor fixes
blanchet
parents: 53556
diff changeset
   270
fun safe_str_of_real r =
f9682fdfd47b minor fixes
blanchet
parents: 53556
diff changeset
   271
  if r < 0.00001 then "0.00001"
f9682fdfd47b minor fixes
blanchet
parents: 53556
diff changeset
   272
  else if r >= 1000000.0 then "1000000"
f9682fdfd47b minor fixes
blanchet
parents: 53556
diff changeset
   273
  else Markup.print_real r
f9682fdfd47b minor fixes
blanchet
parents: 53556
diff changeset
   274
54695
a9efdf970720 generate problems with type classes
blanchet
parents: 54693
diff changeset
   275
fun encode_feature (names, weight) =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   276
  encode_str names ^ (if Real.== (weight, 1.0) then "" else "=" ^ safe_str_of_real weight)
54695
a9efdf970720 generate problems with type classes
blanchet
parents: 54693
diff changeset
   277
57010
121b63d7bcdb take weights into consideration in knn
blanchet
parents: 57009
diff changeset
   278
fun decode_feature s =
121b63d7bcdb take weights into consideration in knn
blanchet
parents: 57009
diff changeset
   279
  (case space_explode "=" s of
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   280
    [feat, weight] => (decode_str feat, Real.fromString weight |> the_default 1.0)
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   281
  | _ => (decode_str s, 1.0))
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   282
50356
41f8f1f2e15d added feature weights in MaSh
blanchet
parents: 50355
diff changeset
   283
val encode_features = map encode_feature #> space_implode " "
57010
121b63d7bcdb take weights into consideration in knn
blanchet
parents: 57009
diff changeset
   284
val decode_features = space_explode " " #> map decode_feature
50356
41f8f1f2e15d added feature weights in MaSh
blanchet
parents: 50355
diff changeset
   285
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   286
fun str_of_learn (name, parents, feats, deps) =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   287
  "! " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^ encode_strs feats ^ "; " ^
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   288
  encode_strs deps ^ "\n"
48669
cdcdb0547f29 remember ATP flops to avoid repeating them too quickly
blanchet
parents: 48668
diff changeset
   289
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   290
fun str_of_relearn (name, deps) = "p " ^ encode_str name ^ ": " ^ encode_strs deps ^ "\n"
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   291
53099
5c7780d21d24 adapted to new MaSh syntax
blanchet
parents: 53098
diff changeset
   292
fun str_of_query max_suggs (learns, hints, parents, feats) =
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
   293
  implode (map str_of_learn learns) ^
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   294
  "? " ^ string_of_int max_suggs ^ " # " ^ encode_strs parents ^ "; " ^ encode_features feats ^
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
   295
  (if null hints then "" else "; " ^ encode_strs hints) ^ "\n"
48406
b002cc16aa99 honor suggested MaSh weights
blanchet
parents: 48404
diff changeset
   296
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   297
(* The suggested weights do not make much sense. *)
48406
b002cc16aa99 honor suggested MaSh weights
blanchet
parents: 48404
diff changeset
   298
fun extract_suggestion sugg =
55286
blanchet
parents: 55202
diff changeset
   299
  (case space_explode "=" sugg of
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   300
    [name, weight] => SOME (decode_str name, Real.fromString weight |> the_default 1.0)
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   301
  | [name] => SOME (decode_str name, 1.0)
55286
blanchet
parents: 55202
diff changeset
   302
  | _ => NONE)
48406
b002cc16aa99 honor suggested MaSh weights
blanchet
parents: 48404
diff changeset
   303
50633
87961472b404 tuned ML function name
blanchet
parents: 50632
diff changeset
   304
fun extract_suggestions line =
55286
blanchet
parents: 55202
diff changeset
   305
  (case space_explode ":" line of
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   306
    [goal, suggs] => (decode_str goal, map_filter extract_suggestion (space_explode " " suggs))
55286
blanchet
parents: 55202
diff changeset
   307
  | _ => ("", []))
48311
3c4e10606567 implemented MaSh QUERY operation
blanchet
parents: 48309
diff changeset
   308
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   309
structure MaSh_Py =
50632
12c097ff3241 slightly more elegant naming convention (to keep low-level and high-level APIs separated)
blanchet
parents: 50631
diff changeset
   310
struct
12c097ff3241 slightly more elegant naming convention (to keep low-level and high-level APIs separated)
blanchet
parents: 50631
diff changeset
   311
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
   312
fun shutdown ctxt overlord =
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   313
  (trace_msg ctxt (K "MaSh_Py shutdown");
53756
be91786f2419 MaSh tweaks to facilitate debugging
blanchet
parents: 53564
diff changeset
   314
   run_mash_tool ctxt overlord [shutdown_server_arg] false ([], K "") (K ()))
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
   315
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
   316
fun save ctxt overlord =
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   317
  (trace_msg ctxt (K "MaSh_Py save");
53153
1e9735cd27aa better tracing + honor blocking better
blanchet
parents: 53152
diff changeset
   318
   run_mash_tool ctxt overlord [save_models_arg] true ([], K "") (K ()))
53142
966a251efd16 have kill_all also kill MaSh server + be paranoid about reloading after clear_state, to allow for easier experimentation
blanchet
parents: 53141
diff changeset
   319
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
   320
fun unlearn ctxt overlord =
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   321
  (trace_msg ctxt (K "MaSh_Py unlearn");
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   322
   shutdown ctxt overlord;
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   323
   wipe_out_mash_state_dir ())
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   324
53757
8d1a059ebcdb reduce the number of emitted MaSh commands (among others to facilitate debugging)
blanchet
parents: 53756
diff changeset
   325
fun learn _ _ _ [] = ()
57006
blanchet
parents: 57005
diff changeset
   326
  | learn ctxt overlord save learns =
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   327
    (trace_msg ctxt (fn () =>
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   328
       "MaSh_Py learn {" ^ elide_string 1000 (space_implode " " (map #1 learns)) ^ "}");
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   329
     run_mash_tool ctxt overlord ([] |> save ? cons save_models_arg) false (learns, str_of_learn)
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   330
       (K ()))
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   331
53757
8d1a059ebcdb reduce the number of emitted MaSh commands (among others to facilitate debugging)
blanchet
parents: 53756
diff changeset
   332
fun relearn _ _ _ [] = ()
8d1a059ebcdb reduce the number of emitted MaSh commands (among others to facilitate debugging)
blanchet
parents: 53756
diff changeset
   333
  | relearn ctxt overlord save relearns =
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   334
    (trace_msg ctxt (fn () => "MaSh_Py relearn " ^
57006
blanchet
parents: 57005
diff changeset
   335
       elide_string 1000 (space_implode " " (map #1 relearns)));
53757
8d1a059ebcdb reduce the number of emitted MaSh commands (among others to facilitate debugging)
blanchet
parents: 53756
diff changeset
   336
     run_mash_tool ctxt overlord ([] |> save ? cons save_models_arg) false
57006
blanchet
parents: 57005
diff changeset
   337
       (relearns, str_of_relearn) (K ()))
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   338
53117
2381bdf47ba5 use new MaSh command-line arguments
blanchet
parents: 53116
diff changeset
   339
fun query ctxt overlord max_suggs (query as (_, _, _, feats)) =
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   340
  (trace_msg ctxt (fn () => "MaSh_Py query " ^ encode_features feats);
55286
blanchet
parents: 55202
diff changeset
   341
   run_mash_tool ctxt overlord [] false ([query], str_of_query max_suggs) (fn suggs =>
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   342
     (case suggs () of [] => [] | suggs => snd (extract_suggestions (List.last suggs))))
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   343
   handle List.Empty => [])
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   344
50632
12c097ff3241 slightly more elegant naming convention (to keep low-level and high-level APIs separated)
blanchet
parents: 50631
diff changeset
   345
end;
12c097ff3241 slightly more elegant naming convention (to keep low-level and high-level APIs separated)
blanchet
parents: 50631
diff changeset
   346
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   347
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   348
(*** Standard ML version of MaSh ***)
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   349
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   350
structure MaSh_SML =
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   351
struct
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   352
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   353
exception BOTTOM of int
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   354
57355
blanchet
parents: 57354
diff changeset
   355
fun heap cmp bnd al a =
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   356
  let
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   357
    fun maxson l i =
57017
blanchet
parents: 57014
diff changeset
   358
      let val i31 = i + i + i + 1 in
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   359
        if i31 + 2 < l then
57017
blanchet
parents: 57014
diff changeset
   360
          let val x = Unsynchronized.ref i31 in
blanchet
parents: 57014
diff changeset
   361
            if cmp (Array.sub (a, i31), Array.sub (a, i31 + 1)) = LESS then x := i31 + 1 else ();
blanchet
parents: 57014
diff changeset
   362
            if cmp (Array.sub (a, !x), Array.sub (a, i31 + 2)) = LESS then x := i31 + 2 else ();
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   363
            !x
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   364
          end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   365
        else
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   366
          if i31 + 1 < l andalso cmp (Array.sub (a, i31), Array.sub (a, i31 + 1)) = LESS
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   367
          then i31 + 1 else if i31 < l then i31 else raise BOTTOM i
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   368
      end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   369
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   370
    fun trickledown l i e =
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   371
      let val j = maxson l i in
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   372
        if cmp (Array.sub (a, j), e) = GREATER then
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   373
          (Array.update (a, i, Array.sub (a, j)); trickledown l j e)
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   374
        else
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   375
          Array.update (a, i, e)
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   376
      end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   377
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   378
    fun trickle l i e = trickledown l i e handle BOTTOM i => Array.update (a, i, e)
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   379
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   380
    fun bubbledown l i =
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   381
      let val j = maxson l i in
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   382
        Array.update (a, i, Array.sub (a, j));
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   383
        bubbledown l j
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   384
      end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   385
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   386
    fun bubble l i = bubbledown l i handle BOTTOM i => i
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   387
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   388
    fun trickleup i e =
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   389
      let val father = (i - 1) div 3 in
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   390
        if cmp (Array.sub (a, father), e) = LESS then
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   391
          (Array.update (a, i, Array.sub (a, father));
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   392
           if father > 0 then trickleup father e else Array.update (a, 0, e))
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   393
        else
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   394
          Array.update (a, i, e)
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   395
      end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   396
57355
blanchet
parents: 57354
diff changeset
   397
    fun for i = if i < 0 then () else (trickle al i (Array.sub (a, i)); for (i - 1))
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   398
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   399
    fun for2 i =
57355
blanchet
parents: 57354
diff changeset
   400
      if i < Integer.max 2 (al - bnd) then
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   401
        ()
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   402
      else
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   403
        let val e = Array.sub (a, i) in
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   404
          Array.update (a, i, Array.sub (a, 0));
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   405
          trickleup (bubble i 0) e;
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   406
          for2 (i - 1)
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   407
        end
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   408
  in
57355
blanchet
parents: 57354
diff changeset
   409
    for (((al + 1) div 3) - 1);
blanchet
parents: 57354
diff changeset
   410
    for2 (al - 1);
blanchet
parents: 57354
diff changeset
   411
    if al > 1 then
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   412
      let val e = Array.sub (a, 1) in
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   413
        Array.update (a, 1, Array.sub (a, 0));
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   414
        Array.update (a, 0, e)
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   415
      end
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   416
    else
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   417
      ()
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   418
  end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   419
57353
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   420
val number_of_nearest_neighbors = 10 (* FUDGE *)
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   421
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   422
exception EXIT of unit
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   423
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   424
(*
57102
blanchet
parents: 57101
diff changeset
   425
  num_facts = maximum number of theorems to check dependencies and symbols
blanchet
parents: 57101
diff changeset
   426
  num_visible_facts = do not return theorems over or equal to this number.
blanchet
parents: 57101
diff changeset
   427
    Must satisfy: num_visible_facts <= num_facts.
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   428
  get_deps = returns dependencies of a theorem
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   429
  get_sym_ths = get theorems that have this feature
57102
blanchet
parents: 57101
diff changeset
   430
  max_suggs = number of suggestions to return
blanchet
parents: 57101
diff changeset
   431
  feats = features of the goal
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   432
*)
57102
blanchet
parents: 57101
diff changeset
   433
fun k_nearest_neighbors num_facts num_visible_facts get_deps get_sym_ths max_suggs feats =
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   434
  let
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   435
    (* Can be later used for TFIDF *)
57017
blanchet
parents: 57014
diff changeset
   436
    fun sym_wght _ = 1.0
blanchet
parents: 57014
diff changeset
   437
57102
blanchet
parents: 57101
diff changeset
   438
    val overlaps_sqr = Array.tabulate (num_facts, rpair 0.0)
57017
blanchet
parents: 57014
diff changeset
   439
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   440
    fun inc_overlap j v =
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   441
      let
57017
blanchet
parents: 57014
diff changeset
   442
        val ov = snd (Array.sub (overlaps_sqr, j))
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   443
      in
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   444
        Array.update (overlaps_sqr, j, (j, v + ov))
57017
blanchet
parents: 57014
diff changeset
   445
      end
blanchet
parents: 57014
diff changeset
   446
57102
blanchet
parents: 57101
diff changeset
   447
    fun do_feat (s, con_wght) =
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   448
      let
57017
blanchet
parents: 57014
diff changeset
   449
        val sw = sym_wght s
blanchet
parents: 57014
diff changeset
   450
        val w2 = sw * sw * con_wght
blanchet
parents: 57014
diff changeset
   451
57102
blanchet
parents: 57101
diff changeset
   452
        fun do_th (j, prem_wght) = if j < num_facts then inc_overlap j (w2 * prem_wght) else ()
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   453
      in
57017
blanchet
parents: 57014
diff changeset
   454
        List.app do_th (get_sym_ths s)
blanchet
parents: 57014
diff changeset
   455
      end
blanchet
parents: 57014
diff changeset
   456
57102
blanchet
parents: 57101
diff changeset
   457
    val _ = List.app do_feat feats
57355
blanchet
parents: 57354
diff changeset
   458
    val _ = heap (Real.compare o pairself snd) num_facts num_facts overlaps_sqr
57353
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   459
    val no_recommends = Unsynchronized.ref 0
57102
blanchet
parents: 57101
diff changeset
   460
    val recommends = Array.tabulate (num_visible_facts, rpair 0.0)
57353
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   461
    val age = Unsynchronized.ref 1000000000.0
57017
blanchet
parents: 57014
diff changeset
   462
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   463
    fun inc_recommend j v =
57353
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   464
      let val ov = snd (Array.sub (recommends, j)) in
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   465
        if ov <= 0.0 then
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   466
          (no_recommends := !no_recommends + 1; Array.update (recommends, j, (j, !age + ov)))
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   467
        else
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   468
          (if ov < !age + 1000.0 then Array.update (recommends, j, (j, v + ov)) else ())
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   469
      end
57017
blanchet
parents: 57014
diff changeset
   470
57353
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   471
    val k = Unsynchronized.ref 0
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   472
    fun do_k k =
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   473
      if k >= num_visible_facts then
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   474
        raise EXIT ()
57017
blanchet
parents: 57014
diff changeset
   475
      else
blanchet
parents: 57014
diff changeset
   476
        let
57102
blanchet
parents: 57101
diff changeset
   477
          val (j, o2) = Array.sub (overlaps_sqr, num_facts - k - 1)
57017
blanchet
parents: 57014
diff changeset
   478
          val o1 = Math.sqrt o2
blanchet
parents: 57014
diff changeset
   479
          val _ = inc_recommend j o1
blanchet
parents: 57014
diff changeset
   480
          val ds = get_deps j
blanchet
parents: 57014
diff changeset
   481
          val l = Real.fromInt (length ds)
blanchet
parents: 57014
diff changeset
   482
        in
57353
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   483
          List.app (fn d => inc_recommend d (o1 / l)) ds
57017
blanchet
parents: 57014
diff changeset
   484
        end
blanchet
parents: 57014
diff changeset
   485
57353
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   486
    fun while1 () =
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   487
      if !k = number_of_nearest_neighbors then () else (do_k (!k); k := !k + 1; while1 ())
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   488
      handle EXIT () => ()
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   489
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   490
    fun while2 () =
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   491
      if !no_recommends >= max_suggs then ()
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   492
      else (do_k (!k); k := !k + 1; age := !age - 10000.0; while2 ())
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   493
      handle EXIT () => ()
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   494
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   495
    fun ret acc at =
57017
blanchet
parents: 57014
diff changeset
   496
      if at = Array.length recommends then acc else ret (Array.sub (recommends, at) :: acc) (at + 1)
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   497
  in
57353
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   498
    while1 (); while2 ();
57355
blanchet
parents: 57354
diff changeset
   499
    heap (Real.compare o pairself snd) max_suggs num_visible_facts recommends;
57102
blanchet
parents: 57101
diff changeset
   500
    ret [] (Integer.max 0 (num_visible_facts - max_suggs))
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   501
  end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   502
57132
4ddf5a8f8f38 make SML code closer to Python code when 'nb_kuehlwein_style' is true
blanchet
parents: 57130
diff changeset
   503
val nb_def_prior_weight = 21 (* FUDGE *)
57095
001ec97c3e59 updated naive Bayes
blanchet
parents: 57089
diff changeset
   504
57355
blanchet
parents: 57354
diff changeset
   505
fun learn_facts tfreq sfreq dffreq num_facts get_deps get_feats num_feats =
57354
ded92100ffd7 refactoring
blanchet
parents: 57353
diff changeset
   506
  let
57355
blanchet
parents: 57354
diff changeset
   507
    fun learn_fact th feats deps =
57354
ded92100ffd7 refactoring
blanchet
parents: 57353
diff changeset
   508
      let
57355
blanchet
parents: 57354
diff changeset
   509
        fun add_th weight t =
blanchet
parents: 57354
diff changeset
   510
          let
blanchet
parents: 57354
diff changeset
   511
            val im = Array.sub (sfreq, t)
blanchet
parents: 57354
diff changeset
   512
            fun fold_fn s sf = Inttab.map_default (s, 0) (Integer.add weight) sf
blanchet
parents: 57354
diff changeset
   513
          in
blanchet
parents: 57354
diff changeset
   514
            Array.update (tfreq, t, weight + Array.sub (tfreq, t));
blanchet
parents: 57354
diff changeset
   515
            Array.update (sfreq, t, fold fold_fn feats im)
blanchet
parents: 57354
diff changeset
   516
          end
blanchet
parents: 57354
diff changeset
   517
blanchet
parents: 57354
diff changeset
   518
        fun add_sym s = Array.update (dffreq, s, 1 + Array.sub (dffreq, s))
57354
ded92100ffd7 refactoring
blanchet
parents: 57353
diff changeset
   519
      in
57355
blanchet
parents: 57354
diff changeset
   520
        add_th nb_def_prior_weight th;
blanchet
parents: 57354
diff changeset
   521
        List.app (add_th 1) deps;
blanchet
parents: 57354
diff changeset
   522
        List.app add_sym feats
57354
ded92100ffd7 refactoring
blanchet
parents: 57353
diff changeset
   523
      end
ded92100ffd7 refactoring
blanchet
parents: 57353
diff changeset
   524
57102
blanchet
parents: 57101
diff changeset
   525
    fun for i =
57355
blanchet
parents: 57354
diff changeset
   526
      if i = num_facts then () else (learn_fact i (get_feats i) (get_deps i); for (i + 1))
57299
d473cadda13b optimize log
blanchet
parents: 57298
diff changeset
   527
d473cadda13b optimize log
blanchet
parents: 57298
diff changeset
   528
    val ln_afreq = Math.ln (Real.fromInt num_facts)
57102
blanchet
parents: 57101
diff changeset
   529
  in
57299
d473cadda13b optimize log
blanchet
parents: 57298
diff changeset
   530
    for 0;
d473cadda13b optimize log
blanchet
parents: 57298
diff changeset
   531
    (Array.vector tfreq, Array.vector sfreq,
d473cadda13b optimize log
blanchet
parents: 57298
diff changeset
   532
     Vector.map (fn i => ln_afreq - Math.ln (Real.fromInt i)) (Array.vector dffreq))
57102
blanchet
parents: 57101
diff changeset
   533
  end
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   534
57355
blanchet
parents: 57354
diff changeset
   535
fun learn num_facts get_deps get_feats num_feats =
blanchet
parents: 57354
diff changeset
   536
  let
blanchet
parents: 57354
diff changeset
   537
    val tfreq = Array.array (num_facts, 0)
blanchet
parents: 57354
diff changeset
   538
    val sfreq = Array.array (num_facts, Inttab.empty)
blanchet
parents: 57354
diff changeset
   539
    val dffreq = Array.array (num_feats, 0)
blanchet
parents: 57354
diff changeset
   540
  in
blanchet
parents: 57354
diff changeset
   541
    learn_facts tfreq sfreq dffreq num_facts get_deps get_feats num_feats
blanchet
parents: 57354
diff changeset
   542
  end
blanchet
parents: 57354
diff changeset
   543
57298
2502adc3c3f6 enable TF-IDF
blanchet
parents: 57297
diff changeset
   544
fun naive_bayes_query (kuehlwein_log, kuehlwein_params) num_facts num_visible_facts max_suggs feats
57299
d473cadda13b optimize log
blanchet
parents: 57298
diff changeset
   545
    (tfreq, sfreq, idf) =
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   546
  let
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   547
    val tau = if kuehlwein_params then 0.05 else 0.02 (* FUDGE *)
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   548
    val pos_weight = if kuehlwein_params then 10.0 else 2.0 (* FUDGE *)
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   549
    val def_val = ~15.0 (* FUDGE *)
57124
e4c2c792226f make SML naive Bayes closer to Python version
blanchet
parents: 57122
diff changeset
   550
57299
d473cadda13b optimize log
blanchet
parents: 57298
diff changeset
   551
    val ln_afreq = Math.ln (Real.fromInt num_facts)
d473cadda13b optimize log
blanchet
parents: 57298
diff changeset
   552
    fun tfidf feat = Vector.sub (idf, feat) handle Subscript => ln_afreq (* TODO: clean up *)
57102
blanchet
parents: 57101
diff changeset
   553
blanchet
parents: 57101
diff changeset
   554
    fun log_posterior i =
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   555
      let
57102
blanchet
parents: 57101
diff changeset
   556
        val tfreq = Real.fromInt (Vector.sub (tfreq, i))
57097
blanchet
parents: 57096
diff changeset
   557
57298
2502adc3c3f6 enable TF-IDF
blanchet
parents: 57297
diff changeset
   558
        fun fold_feats (f, _) (res, sfh) =
57102
blanchet
parents: 57101
diff changeset
   559
          (case Inttab.lookup sfh f of
blanchet
parents: 57101
diff changeset
   560
            SOME sf =>
57298
2502adc3c3f6 enable TF-IDF
blanchet
parents: 57297
diff changeset
   561
            (res + tfidf f * Math.ln (pos_weight * Real.fromInt sf / tfreq),
57102
blanchet
parents: 57101
diff changeset
   562
             Inttab.delete f sfh)
57298
2502adc3c3f6 enable TF-IDF
blanchet
parents: 57297
diff changeset
   563
          | NONE => (res + tfidf f * def_val, sfh))
57097
blanchet
parents: 57096
diff changeset
   564
57102
blanchet
parents: 57101
diff changeset
   565
        val (res, sfh) = fold fold_feats feats (Math.ln tfreq, Vector.sub (sfreq, i))
57097
blanchet
parents: 57096
diff changeset
   566
57124
e4c2c792226f make SML naive Bayes closer to Python version
blanchet
parents: 57122
diff changeset
   567
        val fold_sfh =
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   568
          if kuehlwein_log then
57133
afa80e25a5f3 made 'Kuehlwein-style' be really like Python code, we now think
blanchet
parents: 57132
diff changeset
   569
            (fn (f, sf) => fn sow => sow - tfidf f * Math.ln (Real.fromInt sf / tfreq))
57124
e4c2c792226f make SML naive Bayes closer to Python version
blanchet
parents: 57122
diff changeset
   570
          else
e4c2c792226f make SML naive Bayes closer to Python version
blanchet
parents: 57122
diff changeset
   571
            (fn (f, sf) => fn sow =>
e4c2c792226f make SML naive Bayes closer to Python version
blanchet
parents: 57122
diff changeset
   572
               sow + tfidf f * Math.ln (1.0 + (1.0 - Real.fromInt sf) / tfreq))
57097
blanchet
parents: 57096
diff changeset
   573
57102
blanchet
parents: 57101
diff changeset
   574
        val sum_of_weights = Inttab.fold fold_sfh sfh 0.0
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   575
      in
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   576
        res + tau * sum_of_weights
57097
blanchet
parents: 57096
diff changeset
   577
      end
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   578
57102
blanchet
parents: 57101
diff changeset
   579
    val posterior = Array.tabulate (num_visible_facts, (fn j => (j, log_posterior j)))
blanchet
parents: 57101
diff changeset
   580
blanchet
parents: 57101
diff changeset
   581
    fun ret acc at =
blanchet
parents: 57101
diff changeset
   582
      if at = num_visible_facts then acc else ret (Array.sub (posterior, at) :: acc) (at + 1)
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   583
  in
57355
blanchet
parents: 57354
diff changeset
   584
    heap (Real.compare o pairself snd) max_suggs num_visible_facts posterior;
57102
blanchet
parents: 57101
diff changeset
   585
    ret [] (Integer.max 0 (num_visible_facts - max_suggs))
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   586
  end
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   587
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   588
fun naive_bayes opts num_facts num_visible_facts get_deps get_feats num_feats max_suggs feats =
57355
blanchet
parents: 57354
diff changeset
   589
  learn num_facts get_deps get_feats num_feats
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   590
  |> naive_bayes_query opts num_facts num_visible_facts max_suggs feats
57102
blanchet
parents: 57101
diff changeset
   591
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   592
(* experimental *)
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   593
fun naive_bayes_py ctxt overlord num_facts num_visible_facts get_deps get_feats num_feats max_suggs
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   594
    feats =
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   595
  let
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   596
    fun name_of_fact j = "f" ^ string_of_int j
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   597
    fun fact_of_name s = the (Int.fromString (unprefix "f" s))
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   598
    fun name_of_feature j = "F" ^ string_of_int j
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   599
    fun parents_of j = if j = 0 then [] else [name_of_fact (j - 1)]
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   600
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   601
    val learns = map (fn j => (name_of_fact j, parents_of j, map name_of_feature (get_feats j),
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   602
      map name_of_fact (get_deps j))) (0 upto num_facts - 1)
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   603
    val parents' = parents_of num_visible_facts
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   604
    val feats' = map (apfst name_of_feature) feats
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   605
  in
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   606
    MaSh_Py.unlearn ctxt overlord;
57130
f810d15ae625 added sleep to give time for the server to shut down -- this is a hack, but it's only in experimental code that will hopefully soon go away
blanchet
parents: 57125
diff changeset
   607
    OS.Process.sleep (seconds 2.0); (* hack *)
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   608
    MaSh_Py.query ctxt overlord max_suggs (learns, [], parents', feats')
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   609
    |> map (apfst fact_of_name)
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   610
  end
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   611
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   612
(* experimental *)
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   613
fun c_plus_plus_tool tool max_suggs learns cfeats =
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   614
  let
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   615
    val ser = string_of_int (serial ()) (* poor person's attempt at thread-safety *)
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   616
    val ocs = TextIO.openOut ("adv_syms" ^ ser)
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   617
    val ocd = TextIO.openOut ("adv_deps" ^ ser)
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   618
    val ocq = TextIO.openOut ("adv_seq" ^ ser)
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   619
    val occ = TextIO.openOut ("adv_conj" ^ ser)
57296
8a98f08a0523 tweaked experimental setup
blanchet
parents: 57294
diff changeset
   620
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   621
    fun os oc s = TextIO.output (oc, s)
57296
8a98f08a0523 tweaked experimental setup
blanchet
parents: 57294
diff changeset
   622
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   623
    fun ol _ _ _ [] = ()
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   624
      | ol _ f _ [e] = f e
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   625
      | ol oc f sep (h :: t) = (f h; os oc sep; ol oc f sep t)
57296
8a98f08a0523 tweaked experimental setup
blanchet
parents: 57294
diff changeset
   626
57294
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   627
    fun do_learn (name, feats, deps) =
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   628
      (os ocs name; os ocs ":";
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   629
       ol ocs (fn (sy, _) => (os ocs "\""; os ocs sy; os ocs "\"")) ", " feats; os ocs "\n";
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   630
       os ocd name; os ocd ":"; ol ocd (os ocd) " " deps; os ocd "\n"; os ocq name; os ocq "\n")
57296
8a98f08a0523 tweaked experimental setup
blanchet
parents: 57294
diff changeset
   631
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   632
    fun forkexec no =
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   633
      let
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   634
        val cmd =
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   635
          "~/misc/" ^ tool ^ " adv_syms" ^ ser ^ " adv_deps" ^ ser ^ " " ^ string_of_int no ^
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   636
          " adv_seq" ^ ser ^ " < adv_conj" ^ ser
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   637
      in
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   638
        fst (Isabelle_System.bash_output cmd)
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   639
        |> space_explode " "
57294
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   640
        |> filter_out (curry (op =) "")
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   641
      end
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   642
  in
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   643
    (List.app do_learn learns; ol occ (fn sy => (os occ "\""; os occ sy; os occ "\"")) ", " cfeats;
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   644
     TextIO.closeOut ocs; TextIO.closeOut ocd; TextIO.closeOut ocq; TextIO.closeOut occ;
57294
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   645
     forkexec max_suggs)
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   646
  end
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   647
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   648
val k_nearest_neighbors_cpp =
57353
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
   649
  c_plus_plus_tool ("newknn/knn" ^ " " ^ string_of_int number_of_nearest_neighbors)
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   650
val naive_bayes_cpp = c_plus_plus_tool "predict/nbayes"
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   651
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   652
fun add_to_xtab key (next, tab, keys) = (next + 1, Symtab.update_new (key, next) tab, key :: keys)
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   653
57010
121b63d7bcdb take weights into consideration in knn
blanchet
parents: 57009
diff changeset
   654
fun map_array_at ary f i = Array.update (ary, i, f (Array.sub (ary, i)))
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   655
57294
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   656
fun query ctxt overlord engine visible_facts max_suggs (learns0, hints, feats) =
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   657
  let
57013
ed95456499e6 better way to take invisible facts into account than 'island' business
blanchet
parents: 57012
diff changeset
   658
    val visible_fact_set = Symtab.make_set visible_facts
57294
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   659
    val learns =
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   660
      (learns0 |> List.partition (Symtab.defined visible_fact_set o #1) |> op @) @
57298
2502adc3c3f6 enable TF-IDF
blanchet
parents: 57297
diff changeset
   661
      (if null hints then [] else [(".hints", feats, hints)])
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   662
  in
57294
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   663
    if engine = MaSh_SML_kNN_Cpp then
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   664
      k_nearest_neighbors_cpp max_suggs learns (map fst feats)
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   665
    else if engine = MaSh_SML_NB_Cpp then
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   666
      naive_bayes_cpp max_suggs learns (map fst feats)
57294
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   667
    else
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   668
      let
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   669
        val (rev_depss, rev_featss, (num_facts, _, rev_facts), (num_feats, feat_tab, _)) =
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   670
          fold (fn (fact, feats, deps) =>
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   671
                fn (rev_depss, rev_featss, fact_xtab as (_, fact_tab, _), feat_xtab) =>
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   672
              let
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   673
                fun add_feat (feat, weight) (xtab as (n, tab, _)) =
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   674
                  (case Symtab.lookup tab feat of
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   675
                    SOME i => ((i, weight), xtab)
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   676
                  | NONE => ((n, weight), add_to_xtab feat xtab))
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   677
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   678
                val (feats', feat_xtab') = fold_map add_feat feats feat_xtab
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   679
              in
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   680
                (map_filter (Symtab.lookup fact_tab) deps :: rev_depss, feats' :: rev_featss,
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   681
                 add_to_xtab fact fact_xtab, feat_xtab')
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   682
              end)
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   683
            learns ([], [], (0, Symtab.empty, []), (0, Symtab.empty, []))
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   684
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   685
        val facts = rev rev_facts
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   686
        val fact_vec = Vector.fromList facts
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   687
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   688
        val deps_vec = Vector.fromList (rev rev_depss)
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   689
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   690
        val num_visible_facts = length visible_facts
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   691
        val get_deps = curry Vector.sub deps_vec
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   692
      in
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   693
        trace_msg ctxt (fn () => "MaSh_SML query " ^ encode_features feats ^ " from {" ^
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   694
          elide_string 1000 (space_implode " " (take num_visible_facts facts)) ^ "}");
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   695
        (if engine = MaSh_SML_kNN then
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   696
           let
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   697
             val facts_ary = Array.array (num_feats, [])
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   698
             val _ =
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   699
               fold (fn feats => fn fact =>
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   700
                   let val fact' = fact - 1 in
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   701
                     List.app (fn (feat, weight) =>
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   702
                       map_array_at facts_ary (cons (fact', weight)) feat) feats;
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   703
                     fact'
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   704
                   end)
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   705
                 rev_featss num_facts
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   706
             val get_facts = curry Array.sub facts_ary
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   707
             val feats' = map_filter (fn (feat, weight) =>
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   708
               Option.map (rpair weight) (Symtab.lookup feat_tab feat)) feats
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   709
           in
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   710
             k_nearest_neighbors num_facts num_visible_facts get_deps get_facts max_suggs feats'
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   711
           end
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   712
         else
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   713
           let
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   714
             val unweighted_feats_ary = Vector.fromList (map (map fst) (rev rev_featss))
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   715
             val get_unweighted_feats = curry Vector.sub unweighted_feats_ary
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   716
             val int_feats = map (apfst (the_default ~1 o Symtab.lookup feat_tab)) feats
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   717
           in
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   718
             (case engine of
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   719
               MaSh_SML_NB opts =>
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   720
               naive_bayes opts num_facts num_visible_facts get_deps get_unweighted_feats num_feats
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   721
                 max_suggs int_feats
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   722
             | MaSh_SML_NB_Py => naive_bayes_py ctxt overlord num_facts num_visible_facts get_deps
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   723
                 get_unweighted_feats num_feats max_suggs int_feats)
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   724
           end)
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   725
        |> map (curry Vector.sub fact_vec o fst)
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   726
      end
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   727
  end
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   728
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   729
end;
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   730
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   731
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   732
(*** Middle-level communication with MaSh ***)
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   733
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   734
datatype proof_kind = Isar_Proof | Automatic_Proof | Isar_Proof_wegen_Prover_Flop
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   735
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   736
fun str_of_proof_kind Isar_Proof = "i"
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   737
  | str_of_proof_kind Automatic_Proof = "a"
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   738
  | str_of_proof_kind Isar_Proof_wegen_Prover_Flop = "x"
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   739
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   740
fun proof_kind_of_str "a" = Automatic_Proof
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   741
  | proof_kind_of_str "x" = Isar_Proof_wegen_Prover_Flop
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   742
  | proof_kind_of_str _ (* "i" *) = Isar_Proof
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   743
57011
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   744
fun add_edge_to name parent =
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   745
  Graph.default_node (parent, (Isar_Proof, [], []))
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   746
  #> Graph.add_edge (parent, name)
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   747
57061
be2602fbe585 properly mark relearns as dirty
blanchet
parents: 57060
diff changeset
   748
fun add_node kind name parents feats deps G =
be2602fbe585 properly mark relearns as dirty
blanchet
parents: 57060
diff changeset
   749
  (Graph.new_node (name, (kind, feats, deps)) G
be2602fbe585 properly mark relearns as dirty
blanchet
parents: 57060
diff changeset
   750
   handle Graph.DUP _ => Graph.map_node name (K (kind, feats, deps)) G)
be2602fbe585 properly mark relearns as dirty
blanchet
parents: 57060
diff changeset
   751
  |> fold (add_edge_to name) parents
57011
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   752
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   753
fun try_graph ctxt when def f =
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   754
  f ()
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   755
  handle
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   756
    Graph.CYCLES (cycle :: _) =>
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   757
    (trace_msg ctxt (fn () => "Cycle involving " ^ commas cycle ^ " when " ^ when); def)
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   758
  | Graph.DUP name =>
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   759
    (trace_msg ctxt (fn () => "Duplicate fact " ^ quote name ^ " when " ^ when); def)
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   760
  | Graph.UNDEF name =>
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   761
    (trace_msg ctxt (fn () => "Unknown fact " ^ quote name ^ " when " ^ when); def)
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   762
  | exn =>
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   763
    if Exn.is_interrupt exn then
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   764
      reraise exn
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   765
    else
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   766
      (trace_msg ctxt (fn () => "Internal error when " ^ when ^ ":\n" ^ Runtime.exn_message exn);
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   767
       def)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   768
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   769
fun graph_info G =
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   770
  string_of_int (length (Graph.keys G)) ^ " node(s), " ^
57006
blanchet
parents: 57005
diff changeset
   771
  string_of_int (fold (Integer.add o length o snd) (Graph.dest G) 0) ^ " edge(s), " ^
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   772
  string_of_int (length (Graph.maximals G)) ^ " maximal"
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   773
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
   774
type mash_state =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   775
  {access_G : (proof_kind * (string * real) list * string list) Graph.T,
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
   776
   num_known_facts : int,
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
   777
   dirty : string list option}
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   778
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   779
val empty_state = {access_G = Graph.empty, num_known_facts = 0, dirty = SOME []} : mash_state
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   780
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   781
local
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   782
57010
121b63d7bcdb take weights into consideration in knn
blanchet
parents: 57009
diff changeset
   783
val version = "*** MaSh version 20140519 ***"
50357
187ae76a1757 more robustness in the face of MaSh format changes -- don't overwrite new versions with old versions
blanchet
parents: 50356
diff changeset
   784
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
   785
exception FILE_VERSION_TOO_NEW of unit
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   786
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   787
fun extract_node line =
55286
blanchet
parents: 55202
diff changeset
   788
  (case space_explode ":" line of
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   789
    [head, tail] =>
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   790
    (case (space_explode " " head, map (unprefix " ") (space_explode ";" tail)) of
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   791
      ([kind, name], [parents, feats, deps]) =>
57010
121b63d7bcdb take weights into consideration in knn
blanchet
parents: 57009
diff changeset
   792
      SOME (proof_kind_of_str kind, decode_str name, decode_strs parents, decode_features feats,
121b63d7bcdb take weights into consideration in knn
blanchet
parents: 57009
diff changeset
   793
        decode_strs deps)
55286
blanchet
parents: 55202
diff changeset
   794
    | _ => NONE)
blanchet
parents: 55202
diff changeset
   795
  | _ => NONE)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   796
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   797
fun load_state ctxt overlord (time_state as (memory_time, _)) =
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   798
  let val path = mash_state_file () in
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   799
    (case try OS.FileSys.modTime (Path.implode (Path.expand path)) of
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   800
      NONE => time_state
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   801
    | SOME disk_time =>
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   802
      if Time.>= (memory_time, disk_time) then
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   803
        time_state
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   804
      else
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   805
        (disk_time,
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   806
         (case try File.read_lines path of
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   807
           SOME (version' :: node_lines) =>
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   808
           let
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   809
             fun extract_line_and_add_node line =
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   810
               (case extract_node line of
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   811
                 NONE => I (* should not happen *)
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   812
               | SOME (kind, name, parents, feats, deps) => add_node kind name parents feats deps)
57011
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   813
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   814
             val (access_G, num_known_facts) =
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   815
               (case string_ord (version', version) of
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   816
                 EQUAL =>
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   817
                 (try_graph ctxt "loading state" Graph.empty (fn () =>
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   818
                    fold extract_line_and_add_node node_lines Graph.empty),
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   819
                  length node_lines)
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   820
               | LESS =>
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   821
                 (* cannot parse old file *)
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   822
                 (if the_mash_engine () = MaSh_Py then MaSh_Py.unlearn ctxt overlord
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   823
                  else wipe_out_mash_state_dir ();
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   824
                  (Graph.empty, 0))
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   825
               | GREATER => raise FILE_VERSION_TOO_NEW ())
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   826
           in
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   827
             trace_msg ctxt (fn () => "Loaded fact graph (" ^ graph_info access_G ^ ")");
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   828
             {access_G = access_G, num_known_facts = num_known_facts, dirty = SOME []}
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   829
           end
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   830
         | _ => empty_state)))
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   831
  end
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   832
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   833
fun str_of_entry (kind, name, parents, feats, deps) =
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   834
  str_of_proof_kind kind ^ " " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
57010
121b63d7bcdb take weights into consideration in knn
blanchet
parents: 57009
diff changeset
   835
  encode_features feats ^ "; " ^ encode_strs deps ^ "\n"
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   836
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   837
fun save_state _ (time_state as (_, {dirty = SOME [], ...})) = time_state
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   838
  | save_state ctxt (memory_time, {access_G, num_known_facts, dirty}) =
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   839
    let
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   840
      fun append_entry (name, ((kind, feats, deps), (parents, _))) =
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   841
        cons (kind, name, Graph.Keys.dest parents, feats, deps)
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   842
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   843
      val path = mash_state_file ()
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   844
      val dirty' =
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   845
        (case try OS.FileSys.modTime (Path.implode path) of
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   846
          NONE => NONE
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   847
        | SOME disk_time => if Time.< (disk_time, memory_time) then dirty else NONE)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   848
      val (banner, entries) =
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   849
        (case dirty' of
55286
blanchet
parents: 55202
diff changeset
   850
          SOME names => (NONE, fold (append_entry o Graph.get_entry access_G) names [])
blanchet
parents: 55202
diff changeset
   851
        | NONE => (SOME (version ^ "\n"), Graph.fold append_entry access_G []))
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   852
    in
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   853
      write_file banner (entries, str_of_entry) path;
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   854
      trace_msg ctxt (fn () =>
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   855
        "Saved fact graph (" ^ graph_info access_G ^
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   856
        (case dirty of
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   857
          SOME dirty => "; " ^ string_of_int (length dirty) ^ " dirty fact(s)"
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   858
        | _ => "") ^  ")");
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   859
      (Time.now (), {access_G = access_G, num_known_facts = num_known_facts, dirty = SOME []})
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   860
    end
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   861
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   862
val global_state =
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   863
  Synchronized.var "Sledgehammer_MaSh.global_state" (Time.zeroTime, empty_state)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   864
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   865
in
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   866
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
   867
fun map_state ctxt overlord f =
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   868
  Synchronized.change global_state (load_state ctxt overlord ##> f #> save_state ctxt)
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
   869
  handle FILE_VERSION_TOO_NEW () => ()
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   870
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
   871
fun peek_state ctxt overlord f =
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   872
  Synchronized.change_result global_state (perhaps (try (load_state ctxt overlord)) #> `snd #>> f)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   873
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
   874
fun clear_state ctxt overlord =
57017
blanchet
parents: 57014
diff changeset
   875
  (* "MaSh_Py.unlearn" also removes the state file *)
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   876
  Synchronized.change global_state (fn _ =>
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   877
    (if the_mash_engine () = MaSh_Py then MaSh_Py.unlearn ctxt overlord
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   878
     else wipe_out_mash_state_dir ();
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   879
     (Time.zeroTime, empty_state)))
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   880
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   881
end
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   882
53558
f9682fdfd47b minor fixes
blanchet
parents: 53556
diff changeset
   883
fun mash_unlearn ctxt ({overlord, ...} : params) =
53559
3858246c7c8f when pouring in extra features into the goal, only consider facts from the current theory -- the bottom 10 facts of the last import might be completely unrelated
blanchet
parents: 53558
diff changeset
   884
  (clear_state ctxt overlord; Output.urgent_message "Reset MaSh.")
50570
blanchet
parents: 50557
diff changeset
   885
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   886
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   887
(*** Isabelle helpers ***)
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   888
50722
b422a48adc2d speed up generation of local theorem nicknames
blanchet
parents: 50718
diff changeset
   889
val local_prefix = "local" ^ Long_Name.separator
48378
9e96486d53ad handle local facts smoothly in MaSh
blanchet
parents: 48377
diff changeset
   890
50722
b422a48adc2d speed up generation of local theorem nicknames
blanchet
parents: 50718
diff changeset
   891
fun elided_backquote_thm threshold th =
57006
blanchet
parents: 57005
diff changeset
   892
  elide_string threshold (backquote_thm (Proof_Context.init_global (Thm.theory_of_thm th)) th)
48378
9e96486d53ad handle local facts smoothly in MaSh
blanchet
parents: 48377
diff changeset
   893
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
   894
val thy_name_of_thm = Context.theory_name o Thm.theory_of_thm
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
   895
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
   896
fun nickname_of_thm th =
48394
82fc8c956cdc fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents: 48392
diff changeset
   897
  if Thm.has_name_hint th then
82fc8c956cdc fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents: 48392
diff changeset
   898
    let val hint = Thm.get_name_hint th in
50722
b422a48adc2d speed up generation of local theorem nicknames
blanchet
parents: 50718
diff changeset
   899
      (* There must be a better way to detect local facts. *)
55286
blanchet
parents: 55202
diff changeset
   900
      (case try (unprefix local_prefix) hint of
48394
82fc8c956cdc fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents: 48392
diff changeset
   901
        SOME suf =>
55286
blanchet
parents: 55202
diff changeset
   902
        thy_name_of_thm th ^ Long_Name.separator ^ suf ^ Long_Name.separator ^
blanchet
parents: 55202
diff changeset
   903
        elided_backquote_thm 50 th
blanchet
parents: 55202
diff changeset
   904
      | NONE => hint)
48394
82fc8c956cdc fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents: 48392
diff changeset
   905
    end
82fc8c956cdc fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents: 48392
diff changeset
   906
  else
50722
b422a48adc2d speed up generation of local theorem nicknames
blanchet
parents: 50718
diff changeset
   907
    elided_backquote_thm 200 th
48378
9e96486d53ad handle local facts smoothly in MaSh
blanchet
parents: 48377
diff changeset
   908
51134
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   909
fun find_suggested_facts ctxt facts =
48330
192444b53e86 speed up MaSh queries
blanchet
parents: 48329
diff changeset
   910
  let
51134
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   911
    fun add (fact as (_, th)) = Symtab.default (nickname_of_thm th, fact)
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   912
    val tab = fold add facts Symtab.empty
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   913
    fun lookup nick =
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   914
      Symtab.lookup tab nick
57006
blanchet
parents: 57005
diff changeset
   915
      |> tap (fn NONE => trace_msg ctxt (fn () => "Cannot find " ^ quote nick) | _ => ())
51134
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   916
  in map_filter lookup end
48311
3c4e10606567 implemented MaSh QUERY operation
blanchet
parents: 48309
diff changeset
   917
50383
4274b25ff4e7 take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents: 50382
diff changeset
   918
fun scaled_avg [] = 0
57006
blanchet
parents: 57005
diff changeset
   919
  | scaled_avg xs = Real.ceil (100000000.0 * fold (curry (op +)) xs 0.0) div length xs
48328
ca0b7d19dd62 attempt at meshing according to more meaningful factors
blanchet
parents: 48327
diff changeset
   920
50383
4274b25ff4e7 take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents: 50382
diff changeset
   921
fun avg [] = 0.0
4274b25ff4e7 take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents: 50382
diff changeset
   922
  | avg xs = fold (curry (op +)) xs 0.0 / Real.fromInt (length xs)
48313
0faafdffa662 mesh facts by taking into consideration whether a fact is known to MeSh
blanchet
parents: 48312
diff changeset
   923
50383
4274b25ff4e7 take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents: 50382
diff changeset
   924
fun normalize_scores _ [] = []
4274b25ff4e7 take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents: 50382
diff changeset
   925
  | normalize_scores max_facts xs =
57006
blanchet
parents: 57005
diff changeset
   926
    map (apsnd (curry Real.* (1.0 / avg (map snd (take max_facts xs))))) xs
50383
4274b25ff4e7 take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents: 50382
diff changeset
   927
57096
e4074b91b2a6 always remove duplicates in meshing + use weights for Naive Bayes
blanchet
parents: 57095
diff changeset
   928
fun mesh_facts fact_eq max_facts [(_, (sels, unks))] =
e4074b91b2a6 always remove duplicates in meshing + use weights for Naive Bayes
blanchet
parents: 57095
diff changeset
   929
    distinct fact_eq (map fst (take max_facts sels) @ take (max_facts - length sels) unks)
50861
fa4253914e98 honor unknown chained in MaSh and a few other tweaks
blanchet
parents: 50860
diff changeset
   930
  | mesh_facts fact_eq max_facts mess =
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
   931
    let
51029
211a9240b1e3 killed deadcode
blanchet
parents: 51025
diff changeset
   932
      val mess = mess |> map (apsnd (apfst (normalize_scores max_facts)))
57006
blanchet
parents: 57005
diff changeset
   933
51029
211a9240b1e3 killed deadcode
blanchet
parents: 51025
diff changeset
   934
      fun score_in fact (global_weight, (sels, unks)) =
57006
blanchet
parents: 57005
diff changeset
   935
        let val score_at = try (nth sels) #> Option.map (fn (_, score) => global_weight * score) in
55286
blanchet
parents: 55202
diff changeset
   936
          (case find_index (curry fact_eq fact o fst) sels of
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
   937
            ~1 => if member fact_eq unks fact then NONE else SOME 0.0
55286
blanchet
parents: 55202
diff changeset
   938
          | rank => score_at rank)
50383
4274b25ff4e7 take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents: 50382
diff changeset
   939
        end
57006
blanchet
parents: 57005
diff changeset
   940
50383
4274b25ff4e7 take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents: 50382
diff changeset
   941
      fun weight_of fact = mess |> map_filter (score_in fact) |> scaled_avg
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
   942
    in
57006
blanchet
parents: 57005
diff changeset
   943
      fold (union fact_eq o map fst o take max_facts o fst o snd) mess []
blanchet
parents: 57005
diff changeset
   944
      |> map (`weight_of) |> sort (int_ord o swap o pairself fst)
blanchet
parents: 57005
diff changeset
   945
      |> map snd |> take max_facts
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
   946
    end
48312
b40722a81ac9 implemented meshing of Iter and MaSh results
blanchet
parents: 48311
diff changeset
   947
54693
dd5874e4553f more reasonable default weight
blanchet
parents: 54503
diff changeset
   948
val default_weight = 1.0
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   949
fun free_feature_of s = ("f" ^ s, 40.0 (* FUDGE *))
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   950
fun thy_feature_of s = ("y" ^ s, 8.0 (* FUDGE *))
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   951
fun type_feature_of s = ("t" ^ s, 4.0 (* FUDGE *))
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   952
fun class_feature_of s = ("s" ^ s, 1.0 (* FUDGE *))
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   953
val local_feature = ("local", 16.0 (* FUDGE *))
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   954
51135
e32114b25551 tuned code
blanchet
parents: 51134
diff changeset
   955
fun crude_theory_ord p =
50722
b422a48adc2d speed up generation of local theorem nicknames
blanchet
parents: 50718
diff changeset
   956
  if Theory.subthy p then
b422a48adc2d speed up generation of local theorem nicknames
blanchet
parents: 50718
diff changeset
   957
    if Theory.eq_thy p then EQUAL else LESS
48324
3ee5b5589402 speed up tautology/metaness check
blanchet
parents: 48323
diff changeset
   958
  else if Theory.subthy (swap p) then
3ee5b5589402 speed up tautology/metaness check
blanchet
parents: 48323
diff changeset
   959
    GREATER
55286
blanchet
parents: 55202
diff changeset
   960
  else
blanchet
parents: 55202
diff changeset
   961
    (case int_ord (pairself (length o Theory.ancestors_of) p) of
blanchet
parents: 55202
diff changeset
   962
      EQUAL => string_ord (pairself Context.theory_name p)
blanchet
parents: 55202
diff changeset
   963
    | order => order)
48324
3ee5b5589402 speed up tautology/metaness check
blanchet
parents: 48323
diff changeset
   964
51135
e32114b25551 tuned code
blanchet
parents: 51134
diff changeset
   965
fun crude_thm_ord p =
55286
blanchet
parents: 55202
diff changeset
   966
  (case crude_theory_ord (pairself theory_of_thm p) of
50359
da395f0e7dea tweaked order of theorems to avoid forward dependencies (MaSh)
blanchet
parents: 50357
diff changeset
   967
    EQUAL =>
57039
1ddd1f75fb40 added comment
blanchet
parents: 57029
diff changeset
   968
    (* The hack below is necessary because of odd dependencies that are not reflected in the theory
1ddd1f75fb40 added comment
blanchet
parents: 57029
diff changeset
   969
       comparison. *)
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
   970
    let val q = pairself nickname_of_thm p in
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
   971
      (* Hack to put "xxx_def" before "xxxI" and "xxxE" *)
55286
blanchet
parents: 55202
diff changeset
   972
      (case bool_ord (pairself (String.isSuffix "_def") (swap q)) of
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
   973
        EQUAL => string_ord q
55286
blanchet
parents: 55202
diff changeset
   974
      | ord => ord)
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
   975
    end
55286
blanchet
parents: 55202
diff changeset
   976
  | ord => ord)
48324
3ee5b5589402 speed up tautology/metaness check
blanchet
parents: 48323
diff changeset
   977
51136
fdcc06013f2d avoid crude/wrong theorem comparision
blanchet
parents: 51135
diff changeset
   978
val thm_less_eq = Theory.subthy o pairself theory_of_thm
fdcc06013f2d avoid crude/wrong theorem comparision
blanchet
parents: 51135
diff changeset
   979
fun thm_less p = thm_less_eq p andalso not (thm_less_eq (swap p))
fdcc06013f2d avoid crude/wrong theorem comparision
blanchet
parents: 51135
diff changeset
   980
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   981
val freezeT = Type.legacy_freeze_type
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   982
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   983
fun freeze (t $ u) = freeze t $ freeze u
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   984
  | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t)
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   985
  | freeze (Var ((s, _), T)) = Free (s, freezeT T)
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   986
  | freeze (Const (s, T)) = Const (s, freezeT T)
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   987
  | freeze (Free (s, T)) = Free (s, freezeT T)
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   988
  | freeze t = t
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   989
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   990
fun goal_of_thm thy = prop_of #> freeze #> cterm_of thy #> Goal.init
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   991
54141
f57f8e7a879f generate a comment storing the goal nickname in "learn_prover"
blanchet
parents: 54140
diff changeset
   992
fun run_prover_for_mash ctxt params prover goal_name facts goal =
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   993
  let
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   994
    val problem =
54141
f57f8e7a879f generate a comment storing the goal nickname in "learn_prover"
blanchet
parents: 54140
diff changeset
   995
      {comment = "Goal: " ^ goal_name, state = Proof.init ctxt, goal = goal, subgoal = 1,
f57f8e7a879f generate a comment storing the goal nickname in "learn_prover"
blanchet
parents: 54140
diff changeset
   996
       subgoal_count = 1, factss = [("", facts)]}
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   997
  in
54503
blanchet
parents: 54432
diff changeset
   998
    get_minimizing_prover ctxt MaSh (K ()) prover params (K (K (K ""))) problem
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   999
  end
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1000
48326
ef800e91d072 removed debugging output
blanchet
parents: 48325
diff changeset
  1001
val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}]
ef800e91d072 removed debugging output
blanchet
parents: 48325
diff changeset
  1002
53086
15fe0ca088b3 MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents: 53085
diff changeset
  1003
val pat_tvar_prefix = "_"
15fe0ca088b3 MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents: 53085
diff changeset
  1004
val pat_var_prefix = "_"
15fe0ca088b3 MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents: 53085
diff changeset
  1005
53089
a58b3b8631c6 keep long names to stay on the safe side
blanchet
parents: 53086
diff changeset
  1006
(* try "Long_Name.base_name" for shorter names *)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1007
fun massage_long_name s = s
53083
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
  1008
57006
blanchet
parents: 57005
diff changeset
  1009
val crude_str_of_sort = space_implode ":" o map massage_long_name o subtract (op =) @{sort type}
53086
15fe0ca088b3 MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents: 53085
diff changeset
  1010
15fe0ca088b3 MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents: 53085
diff changeset
  1011
fun crude_str_of_typ (Type (s, [])) = massage_long_name s
57006
blanchet
parents: 57005
diff changeset
  1012
  | crude_str_of_typ (Type (s, Ts)) = massage_long_name s ^ implode (map crude_str_of_typ Ts)
53083
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
  1013
  | crude_str_of_typ (TFree (_, S)) = crude_str_of_sort S
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
  1014
  | crude_str_of_typ (TVar (_, S)) = crude_str_of_sort S
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
  1015
53128
ea1b62ed5a54 get rid of some silly MaSh features
blanchet
parents: 53127
diff changeset
  1016
fun maybe_singleton_str _ "" = []
ea1b62ed5a54 get rid of some silly MaSh features
blanchet
parents: 53127
diff changeset
  1017
  | maybe_singleton_str pref s = [pref ^ s]
ea1b62ed5a54 get rid of some silly MaSh features
blanchet
parents: 53127
diff changeset
  1018
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
  1019
val max_pat_breadth = 10 (* FUDGE *)
50585
306c7b807e13 contain exponential explosion of term patterns
blanchet
parents: 50584
diff changeset
  1020
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1021
fun term_features_of ctxt thy_name num_facts const_tab term_max_depth type_max_depth ts =
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1022
  let
50392
190053ee24ed expand type classes into their ancestors for MaSh
blanchet
parents: 50391
diff changeset
  1023
    val thy = Proof_Context.theory_of ctxt
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1024
50393
d8aa26c78327 record free variables as a MaSh feature
blanchet
parents: 50392
diff changeset
  1025
    val fixes = map snd (Variable.dest_fixes ctxt)
50392
190053ee24ed expand type classes into their ancestors for MaSh
blanchet
parents: 50391
diff changeset
  1026
    val classes = Sign.classes_of thy
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1027
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
  1028
    fun add_classes @{sort type} = I
50392
190053ee24ed expand type classes into their ancestors for MaSh
blanchet
parents: 50391
diff changeset
  1029
      | add_classes S =
190053ee24ed expand type classes into their ancestors for MaSh
blanchet
parents: 50391
diff changeset
  1030
        fold (`(Sorts.super_classes classes)
57006
blanchet
parents: 57005
diff changeset
  1031
          #> swap #> op ::
blanchet
parents: 57005
diff changeset
  1032
          #> subtract (op =) @{sort type} #> map massage_long_name
blanchet
parents: 57005
diff changeset
  1033
          #> map class_feature_of
blanchet
parents: 57005
diff changeset
  1034
          #> union (eq_fst (op =))) S
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1035
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1036
    fun pattify_type 0 _ = []
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1037
      | pattify_type _ (Type (s, [])) =
53086
15fe0ca088b3 MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents: 53085
diff changeset
  1038
        if member (op =) bad_types s then [] else [massage_long_name s]
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1039
      | pattify_type depth (Type (s, U :: Ts)) =
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1040
        let
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1041
          val T = Type (s, Ts)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1042
          val ps = take max_pat_breadth (pattify_type depth T)
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1043
          val qs = take max_pat_breadth ("" :: pattify_type (depth - 1) U)
57006
blanchet
parents: 57005
diff changeset
  1044
        in
blanchet
parents: 57005
diff changeset
  1045
          map_product (fn p => fn "" => p | q => p ^ "(" ^ q ^ ")") ps qs
blanchet
parents: 57005
diff changeset
  1046
        end
53128
ea1b62ed5a54 get rid of some silly MaSh features
blanchet
parents: 53127
diff changeset
  1047
      | pattify_type _ (TFree (_, S)) =
ea1b62ed5a54 get rid of some silly MaSh features
blanchet
parents: 53127
diff changeset
  1048
        maybe_singleton_str pat_tvar_prefix (crude_str_of_sort S)
ea1b62ed5a54 get rid of some silly MaSh features
blanchet
parents: 53127
diff changeset
  1049
      | pattify_type _ (TVar (_, S)) =
ea1b62ed5a54 get rid of some silly MaSh features
blanchet
parents: 53127
diff changeset
  1050
        maybe_singleton_str pat_tvar_prefix (crude_str_of_sort S)
57006
blanchet
parents: 57005
diff changeset
  1051
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1052
    fun add_type_pat depth T =
53159
a5805fe4e91c repaired num_extra_feature_facts + tuning
blanchet
parents: 53156
diff changeset
  1053
      union (eq_fst (op =)) (map type_feature_of (pattify_type depth T))
57006
blanchet
parents: 57005
diff changeset
  1054
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1055
    fun add_type_pats 0 _ = I
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1056
      | add_type_pats depth t =
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1057
        add_type_pat depth t #> add_type_pats (depth - 1) t
57006
blanchet
parents: 57005
diff changeset
  1058
53083
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
  1059
    fun add_type T =
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
  1060
      add_type_pats type_max_depth T
53156
f79f4693868b minor MaSh fix
blanchet
parents: 53155
diff changeset
  1061
      #> fold_atyps_sorts (add_classes o snd) T
57006
blanchet
parents: 57005
diff changeset
  1062
53084
877f5c28016f add subtypes as well as features in MaSh
blanchet
parents: 53083
diff changeset
  1063
    fun add_subtypes (T as Type (_, Ts)) = add_type T #> fold add_subtypes Ts
877f5c28016f add subtypes as well as features in MaSh
blanchet
parents: 53083
diff changeset
  1064
      | add_subtypes T = add_type T
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
  1065
57099
blanchet
parents: 57098
diff changeset
  1066
    val base_weight_of_const = 16.0 (* FUDGE *)
blanchet
parents: 57098
diff changeset
  1067
    val weight_of_const =
blanchet
parents: 57098
diff changeset
  1068
      (if num_facts = 0 orelse Symtab.is_empty const_tab then
blanchet
parents: 57098
diff changeset
  1069
         K base_weight_of_const
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
  1070
       else
57099
blanchet
parents: 57098
diff changeset
  1071
         fn s =>
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
  1072
         let val count = Symtab.lookup const_tab s |> the_default 1 in
57099
blanchet
parents: 57098
diff changeset
  1073
           base_weight_of_const + Real.fromInt num_facts / Real.fromInt count
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
  1074
         end)
57006
blanchet
parents: 57005
diff changeset
  1075
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1076
    fun pattify_term _ 0 _ = []
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1077
      | pattify_term _ _ (Const (s, _)) =
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1078
        if is_widely_irrelevant_const s then [] else [(massage_long_name s, weight_of_const s)]
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
  1079
      | pattify_term _ _ (Free (s, T)) =
53128
ea1b62ed5a54 get rid of some silly MaSh features
blanchet
parents: 53127
diff changeset
  1080
        maybe_singleton_str pat_var_prefix (crude_str_of_typ T)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1081
        |> map (rpair 1.0)
53090
1426c97311f2 treat frees as both consts and vars, for more hits
blanchet
parents: 53089
diff changeset
  1082
        |> (if member (op =) fixes s then
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1083
              cons (free_feature_of (massage_long_name
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1084
                  (thy_name ^ Long_Name.separator ^ s)))
53090
1426c97311f2 treat frees as both consts and vars, for more hits
blanchet
parents: 53089
diff changeset
  1085
            else
1426c97311f2 treat frees as both consts and vars, for more hits
blanchet
parents: 53089
diff changeset
  1086
              I)
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
  1087
      | pattify_term _ _ (Var (_, T)) =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1088
        maybe_singleton_str pat_var_prefix (crude_str_of_typ T) |> map (rpair default_weight)
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
  1089
      | pattify_term Ts _ (Bound j) =
54695
a9efdf970720 generate problems with type classes
blanchet
parents: 54693
diff changeset
  1090
        maybe_singleton_str pat_var_prefix (crude_str_of_typ (nth Ts j))
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1091
        |> map (rpair default_weight)
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
  1092
      | pattify_term Ts depth (t $ u) =
50339
d8dae91f3107 MaSh improvements: deeper patterns + more respect for chained facts
blanchet
parents: 50338
diff changeset
  1093
        let
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1094
          val ps = take max_pat_breadth (pattify_term Ts depth t)
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1095
          val qs = take max_pat_breadth (("", default_weight) :: pattify_term Ts (depth - 1) u)
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
  1096
        in
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1097
          map_product (fn ppw as (p, pw) =>
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1098
            fn ("", _) => ppw
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1099
             | (q, qw) => (p ^ "(" ^ q ^ ")", pw + qw)) ps qs
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
  1100
        end
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
  1101
      | pattify_term _ _ _ = []
57006
blanchet
parents: 57005
diff changeset
  1102
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
  1103
    fun add_term_pat Ts = union (eq_fst (op =)) oo pattify_term Ts
57006
blanchet
parents: 57005
diff changeset
  1104
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
  1105
    fun add_term_pats _ 0 _ = I
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1106
      | add_term_pats Ts depth t = add_term_pat Ts depth t #> add_term_pats Ts (depth - 1) t
57006
blanchet
parents: 57005
diff changeset
  1107
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
  1108
    fun add_term Ts = add_term_pats Ts term_max_depth
57006
blanchet
parents: 57005
diff changeset
  1109
53085
15483854c83e handle Bounds as well in MaSh features
blanchet
parents: 53084
diff changeset
  1110
    fun add_subterms Ts t =
55286
blanchet
parents: 55202
diff changeset
  1111
      (case strip_comb t of
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
  1112
        (Const (s, T), args) =>
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
  1113
        (not (is_widely_irrelevant_const s) ? add_term Ts t)
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
  1114
        #> add_subtypes T
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
  1115
        #> fold (add_subterms Ts) args
50857
80768e28c9ee better handlig of built-ins -- at the top-level, not in subterms
blanchet
parents: 50841
diff changeset
  1116
      | (head, args) =>
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1117
        (case head of
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
  1118
           Free (_, T) => add_term Ts t #> add_subtypes T
53084
877f5c28016f add subtypes as well as features in MaSh
blanchet
parents: 53083
diff changeset
  1119
         | Var (_, T) => add_subtypes T
53085
15483854c83e handle Bounds as well in MaSh features
blanchet
parents: 53084
diff changeset
  1120
         | Abs (_, T, body) => add_subtypes T #> add_subterms (T :: Ts) body
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1121
         | _ => I)
55286
blanchet
parents: 55202
diff changeset
  1122
        #> fold (add_subterms Ts) args)
57006
blanchet
parents: 57005
diff changeset
  1123
  in
blanchet
parents: 57005
diff changeset
  1124
    fold (add_subterms []) ts []
blanchet
parents: 57005
diff changeset
  1125
  end
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1126
53085
15483854c83e handle Bounds as well in MaSh features
blanchet
parents: 53084
diff changeset
  1127
val term_max_depth = 2
53155
2c585fdbe197 eliminated some needless MaSh features
blanchet
parents: 53154
diff changeset
  1128
val type_max_depth = 1
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1129
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1130
(* TODO: Generate type classes for types? *)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1131
fun features_of ctxt thy num_facts const_tab (scope, _) ts =
50393
d8aa26c78327 record free variables as a MaSh feature
blanchet
parents: 50392
diff changeset
  1132
  let val thy_name = Context.theory_name thy in
d8aa26c78327 record free variables as a MaSh feature
blanchet
parents: 50392
diff changeset
  1133
    thy_feature_of thy_name ::
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1134
    term_features_of ctxt thy_name num_facts const_tab term_max_depth type_max_depth ts
50393
d8aa26c78327 record free variables as a MaSh feature
blanchet
parents: 50392
diff changeset
  1135
    |> scope <> Global ? cons local_feature
d8aa26c78327 record free variables as a MaSh feature
blanchet
parents: 50392
diff changeset
  1136
  end
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1137
57006
blanchet
parents: 57005
diff changeset
  1138
(* Too many dependencies is a sign that a decision procedure is at work. There is not much to learn
blanchet
parents: 57005
diff changeset
  1139
   from such proofs. *)
50434
960a3429615c more MaSh tweaking -- in particular, export the same facts in "MaSh_Export" as are later tried in "MaSh_Eval"
blanchet
parents: 50412
diff changeset
  1140
val max_dependencies = 20
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
  1141
54125
420b876ff1e2 if slicing is disabled, don't enforce last slice's "max_facts", but rather the maximum "max_facts"
blanchet
parents: 54123
diff changeset
  1142
val prover_default_max_facts = 25
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1143
48438
3e45c98fe127 distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents: 48436
diff changeset
  1144
(* "type_definition_xxx" facts are characterized by their use of "CollectI". *)
50755
4c781d65c0d6 get rid of spurious "Isar" proofs
blanchet
parents: 50754
diff changeset
  1145
val typedef_dep = nickname_of_thm @{thm CollectI}
57006
blanchet
parents: 57005
diff changeset
  1146
(* Mysterious parts of the class machinery create lots of proofs that refer exclusively to
blanchet
parents: 57005
diff changeset
  1147
   "someI_ex" (and to some internal constructions). *)
50755
4c781d65c0d6 get rid of spurious "Isar" proofs
blanchet
parents: 50754
diff changeset
  1148
val class_some_dep = nickname_of_thm @{thm someI_ex}
48438
3e45c98fe127 distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents: 48436
diff changeset
  1149
50828
91e6836bb68b don't learn from the proof of "psimps" etc.
blanchet
parents: 50827
diff changeset
  1150
val fundef_ths =
57006
blanchet
parents: 57005
diff changeset
  1151
  @{thms fundef_ex1_existence fundef_ex1_uniqueness fundef_ex1_iff fundef_default_value}
50828
91e6836bb68b don't learn from the proof of "psimps" etc.
blanchet
parents: 50827
diff changeset
  1152
  |> map nickname_of_thm
91e6836bb68b don't learn from the proof of "psimps" etc.
blanchet
parents: 50827
diff changeset
  1153
48438
3e45c98fe127 distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents: 48436
diff changeset
  1154
(* "Rep_xxx_inject", "Abs_xxx_inverse", etc., are derived using these facts. *)
3e45c98fe127 distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents: 48436
diff changeset
  1155
val typedef_ths =
57006
blanchet
parents: 57005
diff changeset
  1156
  @{thms type_definition.Abs_inverse type_definition.Rep_inverse type_definition.Rep
blanchet
parents: 57005
diff changeset
  1157
      type_definition.Rep_inject type_definition.Abs_inject type_definition.Rep_cases
blanchet
parents: 57005
diff changeset
  1158
      type_definition.Abs_cases type_definition.Rep_induct type_definition.Abs_induct
blanchet
parents: 57005
diff changeset
  1159
      type_definition.Rep_range type_definition.Abs_image}
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
  1160
  |> map nickname_of_thm
48438
3e45c98fe127 distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents: 48436
diff changeset
  1161
48441
2d2f009ca8eb remove MaSh junk associated with size functions
blanchet
parents: 48440
diff changeset
  1162
fun is_size_def [dep] th =
55642
63beb38e9258 adapted to renaming of datatype 'cases' and 'recs' to 'case' and 'rec'
blanchet
parents: 55286
diff changeset
  1163
    (case first_field ".rec" dep of
57006
blanchet
parents: 57005
diff changeset
  1164
      SOME (pref, _) =>
blanchet
parents: 57005
diff changeset
  1165
      (case first_field ".size" (nickname_of_thm th) of
blanchet
parents: 57005
diff changeset
  1166
        SOME (pref', _) => pref = pref'
blanchet
parents: 57005
diff changeset
  1167
      | NONE => false)
blanchet
parents: 57005
diff changeset
  1168
    | NONE => false)
48441
2d2f009ca8eb remove MaSh junk associated with size functions
blanchet
parents: 48440
diff changeset
  1169
  | is_size_def _ _ = false
2d2f009ca8eb remove MaSh junk associated with size functions
blanchet
parents: 48440
diff changeset
  1170
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1171
fun trim_dependencies deps =
50755
4c781d65c0d6 get rid of spurious "Isar" proofs
blanchet
parents: 50754
diff changeset
  1172
  if length deps > max_dependencies then NONE else SOME deps
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1173
50755
4c781d65c0d6 get rid of spurious "Isar" proofs
blanchet
parents: 50754
diff changeset
  1174
fun isar_dependencies_of name_tabs th =
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1175
  thms_in_proof max_dependencies (SOME name_tabs) th
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1176
  |> Option.map (fn deps =>
57006
blanchet
parents: 57005
diff changeset
  1177
    if deps = [typedef_dep] orelse deps = [class_some_dep] orelse
blanchet
parents: 57005
diff changeset
  1178
       exists (member (op =) fundef_ths) deps orelse exists (member (op =) typedef_ths) deps orelse
50828
91e6836bb68b don't learn from the proof of "psimps" etc.
blanchet
parents: 50827
diff changeset
  1179
       is_size_def deps th then
50755
4c781d65c0d6 get rid of spurious "Isar" proofs
blanchet
parents: 50754
diff changeset
  1180
      []
4c781d65c0d6 get rid of spurious "Isar" proofs
blanchet
parents: 50754
diff changeset
  1181
    else
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1182
      deps)
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1183
57006
blanchet
parents: 57005
diff changeset
  1184
fun prover_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover auto_level facts
blanchet
parents: 57005
diff changeset
  1185
    name_tabs th =
55286
blanchet
parents: 55202
diff changeset
  1186
  (case isar_dependencies_of name_tabs th of
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1187
    SOME [] => (false, [])
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1188
  | isar_deps0 =>
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1189
    let
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1190
      val isar_deps = these isar_deps0
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1191
      val thy = Proof_Context.theory_of ctxt
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1192
      val goal = goal_of_thm thy th
54141
f57f8e7a879f generate a comment storing the goal nickname in "learn_prover"
blanchet
parents: 54140
diff changeset
  1193
      val name = nickname_of_thm th
52196
2281f33e8da6 redid rac7830871177 to avoid duplicate fixed variable (e.g. lemma "P (a::nat)" proof - have "!!a::int. Q a" sledgehammer [e])
blanchet
parents: 52125
diff changeset
  1194
      val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal goal 1 ctxt
51136
fdcc06013f2d avoid crude/wrong theorem comparision
blanchet
parents: 51135
diff changeset
  1195
      val facts = facts |> filter (fn (_, th') => thm_less (th', th))
57006
blanchet
parents: 57005
diff changeset
  1196
51004
5f2788c38127 distinguish raw and non-raw facts, using raw for 10 000s of facts and non-raw after selection of some hundreds
blanchet
parents: 51003
diff changeset
  1197
      fun nickify ((_, stature), th) = ((nickname_of_thm th, stature), th)
57006
blanchet
parents: 57005
diff changeset
  1198
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
  1199
      fun is_dep dep (_, th) = nickname_of_thm th = dep
57006
blanchet
parents: 57005
diff changeset
  1200
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1201
      fun add_isar_dep facts dep accum =
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1202
        if exists (is_dep dep) accum then
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1203
          accum
55286
blanchet
parents: 55202
diff changeset
  1204
        else
blanchet
parents: 55202
diff changeset
  1205
          (case find_first (is_dep dep) facts of
blanchet
parents: 55202
diff changeset
  1206
            SOME ((_, status), th) => accum @ [(("", status), th)]
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1207
          | NONE => accum (* should not happen *))
57006
blanchet
parents: 57005
diff changeset
  1208
54123
271a8377656f remove overloading of "max_facts" -- it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents: 54115
diff changeset
  1209
      val mepo_facts =
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
  1210
        facts
54095
d80743f28fec simplify fudge factor code
blanchet
parents: 54091
diff changeset
  1211
        |> mepo_suggested_facts ctxt params (max_facts |> the_default prover_default_max_facts) NONE
d80743f28fec simplify fudge factor code
blanchet
parents: 54091
diff changeset
  1212
             hyp_ts concl_t
54123
271a8377656f remove overloading of "max_facts" -- it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents: 54115
diff changeset
  1213
      val facts =
271a8377656f remove overloading of "max_facts" -- it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents: 54115
diff changeset
  1214
        mepo_facts
50754
74a6adcb96ac also generate queries for goals with too many Isar dependencies
blanchet
parents: 50751
diff changeset
  1215
        |> fold (add_isar_dep facts) isar_deps
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
  1216
        |> map nickify
54123
271a8377656f remove overloading of "max_facts" -- it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents: 54115
diff changeset
  1217
      val num_isar_deps = length isar_deps
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1218
    in
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1219
      if verbose andalso auto_level = 0 then
57017
blanchet
parents: 57014
diff changeset
  1220
        Output.urgent_message ("MaSh: " ^ quote prover ^ " on " ^ quote name ^ " with " ^
blanchet
parents: 57014
diff changeset
  1221
          string_of_int num_isar_deps ^ " + " ^ string_of_int (length facts - num_isar_deps) ^
blanchet
parents: 57014
diff changeset
  1222
          " facts.")
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1223
      else
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1224
        ();
55286
blanchet
parents: 55202
diff changeset
  1225
      (case run_prover_for_mash ctxt params prover name facts goal of
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1226
        {outcome = NONE, used_facts, ...} =>
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1227
        (if verbose andalso auto_level = 0 then
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1228
           let val num_facts = length used_facts in
57017
blanchet
parents: 57014
diff changeset
  1229
             Output.urgent_message ("Found proof with " ^ string_of_int num_facts ^ " fact" ^
blanchet
parents: 57014
diff changeset
  1230
               plural_s num_facts ^ ".")
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1231
           end
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1232
         else
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1233
           ();
50754
74a6adcb96ac also generate queries for goals with too many Isar dependencies
blanchet
parents: 50751
diff changeset
  1234
         (true, map fst used_facts))
55286
blanchet
parents: 55202
diff changeset
  1235
      | _ => (false, isar_deps))
blanchet
parents: 55202
diff changeset
  1236
    end)
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1237
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1238
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1239
(*** High-level communication with MaSh ***)
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1240
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1241
(* In the following functions, chunks are risers w.r.t. "thm_less_eq". *)
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1242
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1243
fun chunks_and_parents_for chunks th =
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1244
  let
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1245
    fun insert_parent new parents =
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1246
      let val parents = parents |> filter_out (fn p => thm_less_eq (p, new)) in
57096
e4074b91b2a6 always remove duplicates in meshing + use weights for Naive Bayes
blanchet
parents: 57095
diff changeset
  1247
        parents |> forall (fn p => not (thm_less_eq (new, p))) parents ? cons new
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1248
      end
57006
blanchet
parents: 57005
diff changeset
  1249
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1250
    fun rechunk seen (rest as th' :: ths) =
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1251
      if thm_less_eq (th', th) then (rev seen, rest)
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1252
      else rechunk (th' :: seen) ths
57006
blanchet
parents: 57005
diff changeset
  1253
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1254
    fun do_chunk [] accum = accum
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1255
      | do_chunk (chunk as hd_chunk :: _) (chunks, parents) =
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1256
        if thm_less_eq (hd_chunk, th) then
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1257
          (chunk :: chunks, insert_parent hd_chunk parents)
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1258
        else if thm_less_eq (List.last chunk, th) then
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1259
          let val (front, back as hd_back :: _) = rechunk [] chunk in
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1260
            (front :: back :: chunks, insert_parent hd_back parents)
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1261
          end
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1262
        else
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1263
          (chunk :: chunks, parents)
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1264
  in
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1265
    fold_rev do_chunk chunks ([], [])
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1266
    |>> cons []
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1267
    ||> map nickname_of_thm
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1268
  end
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1269
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1270
fun attach_parents_to_facts _ [] = []
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1271
  | attach_parents_to_facts old_facts (facts as (_, th) :: _) =
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1272
    let
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1273
      fun do_facts _ [] = []
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1274
        | do_facts (_, parents) [fact] = [(parents, fact)]
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1275
        | do_facts (chunks, parents)
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1276
                   ((fact as (_, th)) :: (facts as (_, th') :: _)) =
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1277
          let
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1278
            val chunks = app_hd (cons th) chunks
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1279
            val chunks_and_parents' =
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1280
              if thm_less_eq (th, th') andalso
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1281
                 thy_name_of_thm th = thy_name_of_thm th' then
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1282
                (chunks, [nickname_of_thm th])
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1283
              else
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1284
                chunks_and_parents_for chunks th'
57006
blanchet
parents: 57005
diff changeset
  1285
          in
blanchet
parents: 57005
diff changeset
  1286
            (parents, fact) :: do_facts chunks_and_parents' facts
blanchet
parents: 57005
diff changeset
  1287
          end
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1288
    in
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1289
      old_facts @ facts
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1290
      |> do_facts (chunks_and_parents_for [[]] th)
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1291
      |> drop (length old_facts)
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1292
    end
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1293
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1294
fun maximal_wrt_graph G keys =
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1295
  let
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1296
    val tab = Symtab.empty |> fold (fn name => Symtab.default (name, ())) keys
57006
blanchet
parents: 57005
diff changeset
  1297
57012
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1298
    fun insert_new seen name = not (Symtab.defined seen name) ? insert (op =) name
57006
blanchet
parents: 57005
diff changeset
  1299
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1300
    fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0
57006
blanchet
parents: 57005
diff changeset
  1301
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1302
    fun find_maxes _ (maxs, []) = map snd maxs
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1303
      | find_maxes seen (maxs, new :: news) =
57012
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1304
        find_maxes (seen |> num_keys (Graph.imm_succs G new) > 1 ? Symtab.default (new, ()))
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1305
          (if Symtab.defined tab new then
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1306
             let
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1307
               val newp = Graph.all_preds G [new]
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1308
               fun is_ancestor x yp = member (op =) yp x
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1309
               val maxs = maxs |> filter (fn (_, max) => not (is_ancestor max newp))
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1310
             in
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1311
               if exists (is_ancestor new o fst) maxs then (maxs, news)
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1312
               else ((newp, new) :: filter_out (fn (_, max) => is_ancestor max newp) maxs, news)
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1313
             end
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1314
           else
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1315
             (maxs, Graph.Keys.fold (insert_new seen) (Graph.imm_preds G new) news))
57006
blanchet
parents: 57005
diff changeset
  1316
  in
blanchet
parents: 57005
diff changeset
  1317
    find_maxes Symtab.empty ([], Graph.maximals G)
blanchet
parents: 57005
diff changeset
  1318
  end
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1319
57012
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1320
fun maximal_wrt_access_graph access_G facts =
57013
ed95456499e6 better way to take invisible facts into account than 'island' business
blanchet
parents: 57012
diff changeset
  1321
  map (nickname_of_thm o snd) facts
57012
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1322
  |> maximal_wrt_graph access_G
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1323
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1324
fun is_fact_in_graph access_G = can (Graph.get_node access_G) o nickname_of_thm
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1325
53197
6c5e7143e1f6 tuned fudge factor in light of evaluation
blanchet
parents: 53159
diff changeset
  1326
val chained_feature_factor = 0.5 (* FUDGE *)
6c5e7143e1f6 tuned fudge factor in light of evaluation
blanchet
parents: 53159
diff changeset
  1327
val extra_feature_factor = 0.1 (* FUDGE *)
53201
2a2dc18f3e10 reverted 6c5e7143e1f6; took a better look at evaluation data this time
blanchet
parents: 53197
diff changeset
  1328
val num_extra_feature_facts = 10 (* FUDGE *)
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1329
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1330
(* FUDGE *)
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1331
fun weight_of_proximity_fact rank =
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1332
  Math.pow (1.3, 15.5 - 0.2 * Real.fromInt rank) + 15.0
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1333
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
  1334
fun weight_facts_smoothly facts =
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1335
  facts ~~ map weight_of_proximity_fact (0 upto length facts - 1)
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1336
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
  1337
(* FUDGE *)
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
  1338
fun steep_weight_of_fact rank =
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
  1339
  Math.pow (0.62, log2 (Real.fromInt (rank + 1)))
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
  1340
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
  1341
fun weight_facts_steeply facts =
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
  1342
  facts ~~ map steep_weight_of_fact (0 upto length facts - 1)
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
  1343
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1344
val max_proximity_facts = 100
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1345
54060
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1346
fun find_mash_suggestions ctxt max_facts suggs facts chained raw_unknown =
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1347
  let
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1348
    val inter_fact = inter (eq_snd Thm.eq_thm_prop)
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1349
    val raw_mash = find_suggested_facts ctxt facts suggs
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1350
    val proximate = take max_proximity_facts facts
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1351
    val unknown_chained = inter_fact raw_unknown chained
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1352
    val unknown_proximate = inter_fact raw_unknown proximate
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1353
    val mess =
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1354
      [(0.9 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])),
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1355
       (0.4 (* FUDGE *), (weight_facts_smoothly unknown_proximate, [])),
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1356
       (0.1 (* FUDGE *), (weight_facts_steeply raw_mash, raw_unknown))]
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1357
    val unknown = raw_unknown
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1358
      |> fold (subtract (eq_snd Thm.eq_thm_prop)) [unknown_chained, unknown_proximate]
57006
blanchet
parents: 57005
diff changeset
  1359
  in
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
  1360
    (mesh_facts (eq_snd (gen_eq_thm ctxt)) max_facts mess, unknown)
57006
blanchet
parents: 57005
diff changeset
  1361
  end
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1362
53127
60801776d8af weight MaSh constants by frequency
blanchet
parents: 53121
diff changeset
  1363
fun add_const_counts t =
57006
blanchet
parents: 57005
diff changeset
  1364
  fold (fn s => Symtab.map_default (s, 0) (Integer.add 1)) (Term.add_const_names t [])
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1365
54503
blanchet
parents: 54432
diff changeset
  1366
fun mash_suggested_facts ctxt ({debug, overlord, ...} : params) max_facts hyp_ts concl_t facts =
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1367
  let
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1368
    val thy = Proof_Context.theory_of ctxt
53559
3858246c7c8f when pouring in extra features into the goal, only consider facts from the current theory -- the bottom 10 facts of the last import might be completely unrelated
blanchet
parents: 53558
diff changeset
  1369
    val thy_name = Context.theory_name thy
57052
ea5912e3b008 until naive Bayes supports weights, don't incorporate 'extra' low-weight features
blanchet
parents: 57039
diff changeset
  1370
    val engine = the_mash_engine ()
ea5912e3b008 until naive Bayes supports weights, don't incorporate 'extra' low-weight features
blanchet
parents: 57039
diff changeset
  1371
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
  1372
    val facts = facts |> sort (crude_thm_ord o pairself snd o swap)
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1373
    val chained = facts |> filter (fn ((_, (scope, _)), _) => scope = Chained)
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
  1374
    val num_facts = length facts
57060
7a1167331c8c disable weights that cause more harm than they help in kNN
blanchet
parents: 57059
diff changeset
  1375
57096
e4074b91b2a6 always remove duplicates in meshing + use weights for Naive Bayes
blanchet
parents: 57095
diff changeset
  1376
    (* Weights appear to hurt kNN more than they help. *)
57104
b93e0680a5b3 disabled IDF for now -- empirical evidence points the wrong way (as usual)
blanchet
parents: 57103
diff changeset
  1377
    val const_tab = Symtab.empty |> engine <> MaSh_SML_kNN
b93e0680a5b3 disabled IDF for now -- empirical evidence points the wrong way (as usual)
blanchet
parents: 57103
diff changeset
  1378
      ? fold (add_const_counts o prop_of o snd) facts
54085
b6b41e1d5689 optimized built-in const check
blanchet
parents: 54064
diff changeset
  1379
53559
3858246c7c8f when pouring in extra features into the goal, only consider facts from the current theory -- the bottom 10 facts of the last import might be completely unrelated
blanchet
parents: 53558
diff changeset
  1380
    fun fact_has_right_theory (_, th) =
3858246c7c8f when pouring in extra features into the goal, only consider facts from the current theory -- the bottom 10 facts of the last import might be completely unrelated
blanchet
parents: 53558
diff changeset
  1381
      thy_name = Context.theory_name (theory_of_thm th)
56995
61855ade6c7e hide more consts to beautify documentation
blanchet
parents: 56303
diff changeset
  1382
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
  1383
    fun chained_or_extra_features_of factor (((_, stature), th), weight) =
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
  1384
      [prop_of th]
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1385
      |> features_of ctxt (theory_of_thm th) num_facts const_tab stature
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
  1386
      |> map (apsnd (fn r => weight * factor * r))
54085
b6b41e1d5689 optimized built-in const check
blanchet
parents: 54064
diff changeset
  1387
57017
blanchet
parents: 57014
diff changeset
  1388
    fun query_args access_G =
blanchet
parents: 57014
diff changeset
  1389
      let
blanchet
parents: 57014
diff changeset
  1390
        val parents = maximal_wrt_access_graph access_G facts
blanchet
parents: 57014
diff changeset
  1391
        val hints = chained
blanchet
parents: 57014
diff changeset
  1392
          |> filter (is_fact_in_graph access_G o snd)
blanchet
parents: 57014
diff changeset
  1393
          |> map (nickname_of_thm o snd)
blanchet
parents: 57014
diff changeset
  1394
blanchet
parents: 57014
diff changeset
  1395
        val goal_feats =
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1396
          features_of ctxt thy num_facts const_tab (Local, General) (concl_t :: hyp_ts)
57017
blanchet
parents: 57014
diff changeset
  1397
        val chained_feats = chained
blanchet
parents: 57014
diff changeset
  1398
          |> map (rpair 1.0)
blanchet
parents: 57014
diff changeset
  1399
          |> map (chained_or_extra_features_of chained_feature_factor)
blanchet
parents: 57014
diff changeset
  1400
          |> rpair [] |-> fold (union (eq_fst (op =)))
57052
ea5912e3b008 until naive Bayes supports weights, don't incorporate 'extra' low-weight features
blanchet
parents: 57039
diff changeset
  1401
        val extra_feats =
57095
001ec97c3e59 updated naive Bayes
blanchet
parents: 57089
diff changeset
  1402
          facts
001ec97c3e59 updated naive Bayes
blanchet
parents: 57089
diff changeset
  1403
          |> take (Int.max (0, num_extra_feature_facts - length chained))
001ec97c3e59 updated naive Bayes
blanchet
parents: 57089
diff changeset
  1404
          |> filter fact_has_right_theory
001ec97c3e59 updated naive Bayes
blanchet
parents: 57089
diff changeset
  1405
          |> weight_facts_steeply
001ec97c3e59 updated naive Bayes
blanchet
parents: 57089
diff changeset
  1406
          |> map (chained_or_extra_features_of extra_feature_factor)
001ec97c3e59 updated naive Bayes
blanchet
parents: 57089
diff changeset
  1407
          |> rpair [] |-> fold (union (eq_fst (op =)))
57017
blanchet
parents: 57014
diff changeset
  1408
        val feats = fold (union (eq_fst (op =))) [chained_feats, extra_feats] goal_feats
blanchet
parents: 57014
diff changeset
  1409
          |> debug ? sort (Real.compare o swap o pairself snd)
blanchet
parents: 57014
diff changeset
  1410
      in
blanchet
parents: 57014
diff changeset
  1411
        (parents, hints, feats)
blanchet
parents: 57014
diff changeset
  1412
      end
blanchet
parents: 57014
diff changeset
  1413
blanchet
parents: 57014
diff changeset
  1414
    val (access_G, py_suggs) =
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
  1415
      peek_state ctxt overlord (fn {access_G, ...} =>
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1416
        if Graph.is_empty access_G then
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1417
          (trace_msg ctxt (K "Nothing has been learned yet"); (access_G, []))
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1418
        else
57017
blanchet
parents: 57014
diff changeset
  1419
          (access_G,
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1420
           if engine = MaSh_Py then
57017
blanchet
parents: 57014
diff changeset
  1421
             let val (parents, hints, feats) = query_args access_G in
blanchet
parents: 57014
diff changeset
  1422
               MaSh_Py.query ctxt overlord max_facts ([], hints, parents, feats)
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
  1423
               |> map fst
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1424
             end
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1425
           else
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1426
             []))
57017
blanchet
parents: 57014
diff changeset
  1427
blanchet
parents: 57014
diff changeset
  1428
    val sml_suggs =
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1429
      if engine = MaSh_Py then
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1430
        []
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1431
      else
57103
blanchet
parents: 57102
diff changeset
  1432
        let
blanchet
parents: 57102
diff changeset
  1433
          val (parents, hints, feats) = query_args access_G
blanchet
parents: 57102
diff changeset
  1434
          val visible_facts = Graph.all_preds access_G parents
blanchet
parents: 57102
diff changeset
  1435
          val learns =
blanchet
parents: 57102
diff changeset
  1436
            Graph.schedule (fn _ => fn (fact, (_, feats, deps)) => (fact, feats, deps)) access_G
blanchet
parents: 57102
diff changeset
  1437
        in
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
  1438
          MaSh_SML.query ctxt overlord engine visible_facts max_facts (learns, hints, feats)
57017
blanchet
parents: 57014
diff changeset
  1439
        end
blanchet
parents: 57014
diff changeset
  1440
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1441
    val unknown = filter_out (is_fact_in_graph access_G o snd) facts
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1442
  in
57017
blanchet
parents: 57014
diff changeset
  1443
    find_mash_suggestions ctxt max_facts (py_suggs @ sml_suggs) facts chained unknown
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1444
    |> pairself (map fact_of_raw_fact)
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1445
  end
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1446
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1447
fun learn_wrt_access_graph ctxt (name, parents, feats, deps) (learns, G) =
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1448
  let
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1449
    fun maybe_learn_from from (accum as (parents, G)) =
57013
ed95456499e6 better way to take invisible facts into account than 'island' business
blanchet
parents: 57012
diff changeset
  1450
      try_graph ctxt "updating graph" accum (fn () =>
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1451
        (from :: parents, Graph.add_edge_acyclic (from, name) G))
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1452
    val G = G |> Graph.default_node (name, (Isar_Proof, feats, deps))
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1453
    val (parents, G) = ([], G) |> fold maybe_learn_from parents
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1454
    val (deps, _) = ([], G) |> fold maybe_learn_from deps
57006
blanchet
parents: 57005
diff changeset
  1455
  in
57010
121b63d7bcdb take weights into consideration in knn
blanchet
parents: 57009
diff changeset
  1456
    ((name, parents, map fst feats, deps) :: learns, G)
57006
blanchet
parents: 57005
diff changeset
  1457
  end
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1458
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1459
fun relearn_wrt_access_graph ctxt (name, deps) (relearns, G) =
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1460
  let
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1461
    fun maybe_relearn_from from (accum as (parents, G)) =
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1462
      try_graph ctxt "updating graph" accum (fn () =>
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1463
        (from :: parents, Graph.add_edge_acyclic (from, name) G))
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1464
    val G = G |> Graph.map_node name (fn (_, feats, _) => (Automatic_Proof, feats, deps))
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1465
    val (deps, _) = ([], G) |> fold maybe_relearn_from deps
57006
blanchet
parents: 57005
diff changeset
  1466
  in
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
  1467
    ((name, deps) :: relearns, G)
57006
blanchet
parents: 57005
diff changeset
  1468
  end
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1469
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1470
fun flop_wrt_access_graph name =
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1471
  Graph.map_node name (fn (_, feats, deps) => (Isar_Proof_wegen_Prover_Flop, feats, deps))
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1472
57273
01b68f625550 automatically learn MaSh facts also in 'blocking' mode
blanchet
parents: 57150
diff changeset
  1473
val learn_timeout_slack = 20.0
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1474
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1475
fun launch_thread timeout task =
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1476
  let
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1477
    val hard_timeout = time_mult learn_timeout_slack timeout
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1478
    val birth_time = Time.now ()
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1479
    val death_time = Time.+ (birth_time, hard_timeout)
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1480
    val desc = ("Machine learner for Sledgehammer", "")
57006
blanchet
parents: 57005
diff changeset
  1481
  in
blanchet
parents: 57005
diff changeset
  1482
    Async_Manager.thread MaShN birth_time death_time desc task
blanchet
parents: 57005
diff changeset
  1483
  end
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1484
57013
ed95456499e6 better way to take invisible facts into account than 'island' business
blanchet
parents: 57012
diff changeset
  1485
fun learned_proof_name () =
ed95456499e6 better way to take invisible facts into account than 'island' business
blanchet
parents: 57012
diff changeset
  1486
  Date.fmt ".%Y%m%d.%H%M%S." (Date.fromTimeLocal (Time.now ())) ^ serial_string ()
ed95456499e6 better way to take invisible facts into account than 'island' business
blanchet
parents: 57012
diff changeset
  1487
54503
blanchet
parents: 54432
diff changeset
  1488
fun mash_learn_proof ctxt ({overlord, timeout, ...} : params) t facts used_ths =
53819
e55d641d0a70 honor MaSh's zero-overhead policy -- no learning if the tool is disabled
blanchet
parents: 53790
diff changeset
  1489
  if is_mash_enabled () then
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1490
    launch_thread timeout (fn () =>
57006
blanchet
parents: 57005
diff changeset
  1491
      let
blanchet
parents: 57005
diff changeset
  1492
        val thy = Proof_Context.theory_of ctxt
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1493
        val feats = features_of ctxt thy 0 Symtab.empty (Local, General) [t]
57006
blanchet
parents: 57005
diff changeset
  1494
      in
57011
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
  1495
        map_state ctxt overlord (fn state as {access_G, num_known_facts, dirty} =>
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1496
          let
57013
ed95456499e6 better way to take invisible facts into account than 'island' business
blanchet
parents: 57012
diff changeset
  1497
            val parents = maximal_wrt_access_graph access_G facts
57011
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
  1498
            val deps = used_ths
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
  1499
              |> filter (is_fact_in_graph access_G)
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
  1500
              |> map nickname_of_thm
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1501
          in
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1502
            if the_mash_engine () = MaSh_Py then
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1503
              (MaSh_Py.learn ctxt overlord true [("", parents, map fst feats, deps)]; state)
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1504
            else
57097
blanchet
parents: 57096
diff changeset
  1505
              let
blanchet
parents: 57096
diff changeset
  1506
                val name = learned_proof_name ()
blanchet
parents: 57096
diff changeset
  1507
                val access_G = access_G |> add_node Automatic_Proof name parents feats deps
blanchet
parents: 57096
diff changeset
  1508
              in
57012
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1509
                {access_G = access_G, num_known_facts = num_known_facts + 1,
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1510
                 dirty = Option.map (cons name) dirty}
43fd82a537a3 cleaner handling of learned proofs
blanchet
parents: 57011
diff changeset
  1511
              end
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1512
          end);
57006
blanchet
parents: 57005
diff changeset
  1513
        (true, "")
blanchet
parents: 57005
diff changeset
  1514
      end)
53819
e55d641d0a70 honor MaSh's zero-overhead policy -- no learning if the tool is disabled
blanchet
parents: 53790
diff changeset
  1515
  else
e55d641d0a70 honor MaSh's zero-overhead policy -- no learning if the tool is disabled
blanchet
parents: 53790
diff changeset
  1516
    ()
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1517
57353
ee493eb30c7b adaptive k-NN
blanchet
parents: 57306
diff changeset
  1518
fun sendback sub = Active.sendback_markup [Markup.padding_command] (sledgehammerN ^ " " ^ sub)
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1519
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1520
val commit_timeout = seconds 30.0
48332
271a4a6af734 optimize parent computation in MaSh + remove temporary files
blanchet
parents: 48330
diff changeset
  1521
50485
3c6ac2da2f45 merge aliased theorems in MaSh dependencies, modulo symmetry of equality
blanchet
parents: 50484
diff changeset
  1522
(* The timeout is understood in a very relaxed fashion. *)
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1523
fun mash_learn_facts ctxt (params as {debug, verbose, overlord, ...}) prover save auto_level
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1524
    run_prover learn_timeout facts =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
  1525
  let
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1526
    val timer = Timer.startRealTimer ()
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1527
    fun next_commit_time () = Time.+ (Timer.checkRealTimer timer, commit_timeout)
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1528
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1529
    val engine = the_mash_engine ()
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
  1530
    val {access_G, ...} = peek_state ctxt overlord I
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1531
    val is_in_access_G = is_fact_in_graph access_G o snd
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1532
    val no_new_facts = forall is_in_access_G facts
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
  1533
  in
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1534
    if no_new_facts andalso not run_prover then
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1535
      if auto_level < 2 then
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1536
        "No new " ^ (if run_prover then "automatic" else "Isar") ^ " proofs to learn." ^
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
  1537
        (if auto_level = 0 andalso not run_prover then
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1538
           "\n\nHint: Try " ^ sendback learn_proverN ^ " to learn from an automatic prover."
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1539
         else
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1540
           "")
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1541
      else
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1542
        ""
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
  1543
    else
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
  1544
      let
50735
6b232d76cbc9 refined class handling, to prevent cycles in fact graph
blanchet
parents: 50732
diff changeset
  1545
        val name_tabs = build_name_tables nickname_of_thm facts
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1546
48439
67a6bcbd3587 removed MaSh junk arising from primrec definitions
blanchet
parents: 48438
diff changeset
  1547
        fun deps_of status th =
57017
blanchet
parents: 57014
diff changeset
  1548
          if status = Non_Rec_Def orelse status = Rec_Def then
48439
67a6bcbd3587 removed MaSh junk arising from primrec definitions
blanchet
parents: 48438
diff changeset
  1549
            SOME []
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
  1550
          else if run_prover then
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1551
            prover_dependencies_of ctxt params prover auto_level facts name_tabs th
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1552
            |> (fn (false, _) => NONE | (true, deps) => trim_dependencies deps)
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1553
          else
50735
6b232d76cbc9 refined class handling, to prevent cycles in fact graph
blanchet
parents: 50732
diff changeset
  1554
            isar_dependencies_of name_tabs th
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1555
48669
cdcdb0547f29 remember ATP flops to avoid repeating them too quickly
blanchet
parents: 48668
diff changeset
  1556
        fun do_commit [] [] [] state = state
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1557
          | do_commit learns relearns flops {access_G, num_known_facts, dirty} =
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1558
            let
50610
d9c4fbbb0c11 name tuning
blanchet
parents: 50608
diff changeset
  1559
              val was_empty = Graph.is_empty access_G
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1560
              val (learns, access_G) = ([], access_G) |> fold (learn_wrt_access_graph ctxt) learns
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1561
              val (relearns, access_G) =
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1562
                ([], access_G) |> fold (relearn_wrt_access_graph ctxt) relearns
50610
d9c4fbbb0c11 name tuning
blanchet
parents: 50608
diff changeset
  1563
              val access_G = access_G |> fold flop_wrt_access_graph flops
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1564
              val num_known_facts = num_known_facts + length learns
48699
a89b83204c24 optimized saving
blanchet
parents: 48669
diff changeset
  1565
              val dirty =
57062
eb6777796782 avoid slow inspection of proof terms now that dependencies are stored in 'state'
blanchet
parents: 57061
diff changeset
  1566
                (case (was_empty, dirty) of
eb6777796782 avoid slow inspection of proof terms now that dependencies are stored in 'state'
blanchet
parents: 57061
diff changeset
  1567
                  (false, SOME names) => SOME (map #1 learns @ map #1 relearns @ names)
55286
blanchet
parents: 55202
diff changeset
  1568
                | _ => NONE)
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1569
            in
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1570
              if engine = MaSh_Py then
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1571
                (MaSh_Py.learn ctxt overlord (save andalso null relearns) (rev learns);
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1572
                 MaSh_Py.relearn ctxt overlord save relearns)
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1573
              else
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1574
                ();
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1575
              {access_G = access_G, num_known_facts = num_known_facts, dirty = dirty}
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1576
            end
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1577
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1578
        fun commit last learns relearns flops =
57017
blanchet
parents: 57014
diff changeset
  1579
          (if debug andalso auto_level = 0 then Output.urgent_message "Committing..." else ();
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
  1580
           map_state ctxt overlord (do_commit (rev learns) relearns flops);
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1581
           if not last andalso auto_level = 0 then
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1582
             let val num_proofs = length learns + length relearns in
57017
blanchet
parents: 57014
diff changeset
  1583
               Output.urgent_message ("Learned " ^ string_of_int num_proofs ^ " " ^
blanchet
parents: 57014
diff changeset
  1584
                 (if run_prover then "automatic" else "Isar") ^ " proof" ^
blanchet
parents: 57014
diff changeset
  1585
                 plural_s num_proofs ^ " in the last " ^ string_of_time commit_timeout ^ ".")
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1586
             end
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1587
           else
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1588
             ())
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1589
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1590
        fun learn_new_fact _ (accum as (_, (_, _, true))) = accum
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1591
          | learn_new_fact (parents, ((_, stature as (_, status)), th))
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1592
              (learns, (n, next_commit, _)) =
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1593
            let
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
  1594
              val name = nickname_of_thm th
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
  1595
              val feats = features_of ctxt (theory_of_thm th) 0 Symtab.empty stature [prop_of th]
48439
67a6bcbd3587 removed MaSh junk arising from primrec definitions
blanchet
parents: 48438
diff changeset
  1596
              val deps = deps_of status th |> these
48394
82fc8c956cdc fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents: 48392
diff changeset
  1597
              val n = n |> not (null deps) ? Integer.add 1
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1598
              val learns = (name, parents, feats, deps) :: learns
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1599
              val (learns, next_commit) =
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1600
                if Time.> (Timer.checkRealTimer timer, next_commit) then
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1601
                  (commit false learns [] []; ([], next_commit_time ()))
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1602
                else
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1603
                  (learns, next_commit)
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1604
              val timed_out = Time.> (Timer.checkRealTimer timer, learn_timeout)
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1605
            in
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1606
              (learns, (n, next_commit, timed_out))
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1607
            end
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1608
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1609
        val n =
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1610
          if no_new_facts then
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1611
            0
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1612
          else
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1613
            let
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1614
              val new_facts = facts
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1615
                |> sort (crude_thm_ord o pairself snd)
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1616
                |> attach_parents_to_facts []
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1617
                |> filter_out (is_in_access_G o snd)
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1618
              val (learns, (n, _, _)) =
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1619
                ([], (0, next_commit_time (), false))
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1620
                |> fold learn_new_fact new_facts
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1621
            in
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1622
              commit true learns [] []; n
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1623
            end
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1624
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1625
        fun relearn_old_fact _ (accum as (_, (_, _, true))) = accum
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1626
          | relearn_old_fact ((_, (_, status)), th) ((relearns, flops), (n, next_commit, _)) =
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1627
            let
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
  1628
              val name = nickname_of_thm th
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1629
              val (n, relearns, flops) =
55286
blanchet
parents: 55202
diff changeset
  1630
                (case deps_of status th of
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1631
                  SOME deps => (n + 1, (name, deps) :: relearns, flops)
55286
blanchet
parents: 55202
diff changeset
  1632
                | NONE => (n, relearns, name :: flops))
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1633
              val (relearns, flops, next_commit) =
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1634
                if Time.> (Timer.checkRealTimer timer, next_commit) then
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1635
                  (commit false [] relearns flops; ([], [], next_commit_time ()))
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1636
                else
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1637
                  (relearns, flops, next_commit)
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1638
              val timed_out = Time.> (Timer.checkRealTimer timer, learn_timeout)
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1639
            in
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1640
              ((relearns, flops), (n, next_commit, timed_out))
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1641
            end
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1642
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1643
        val n =
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1644
          if not run_prover then
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1645
            n
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1646
          else
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1647
            let
48668
5d63c23b4042 remember which MaSh proofs were found using ATPs
blanchet
parents: 48667
diff changeset
  1648
              val max_isar = 1000 * max_dependencies
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1649
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1650
              fun priority_of th =
57062
eb6777796782 avoid slow inspection of proof terms now that dependencies are stored in 'state'
blanchet
parents: 57061
diff changeset
  1651
                random_range 0 max_isar +
eb6777796782 avoid slow inspection of proof terms now that dependencies are stored in 'state'
blanchet
parents: 57061
diff changeset
  1652
                (case try (Graph.get_node access_G) (nickname_of_thm th) of
eb6777796782 avoid slow inspection of proof terms now that dependencies are stored in 'state'
blanchet
parents: 57061
diff changeset
  1653
                  SOME (Isar_Proof, _, deps) => ~100 * length deps
eb6777796782 avoid slow inspection of proof terms now that dependencies are stored in 'state'
blanchet
parents: 57061
diff changeset
  1654
                | SOME (Automatic_Proof, _, _) => 2 * max_isar
eb6777796782 avoid slow inspection of proof terms now that dependencies are stored in 'state'
blanchet
parents: 57061
diff changeset
  1655
                | SOME (Isar_Proof_wegen_Prover_Flop, _, _) => max_isar
eb6777796782 avoid slow inspection of proof terms now that dependencies are stored in 'state'
blanchet
parents: 57061
diff changeset
  1656
                | NONE => 0)
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1657
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1658
              val old_facts = facts
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1659
                |> filter is_in_access_G
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1660
                |> map (`(priority_of o snd))
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1661
                |> sort (int_ord o pairself fst)
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1662
                |> map snd
50631
b69079c14665 tuned ML function names
blanchet
parents: 50624
diff changeset
  1663
              val ((relearns, flops), (n, _, _)) =
48669
cdcdb0547f29 remember ATP flops to avoid repeating them too quickly
blanchet
parents: 48668
diff changeset
  1664
                (([], []), (n, next_commit_time (), false))
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1665
                |> fold relearn_old_fact old_facts
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1666
            in
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1667
              commit true [] relearns flops; n
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1668
            end
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1669
      in
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1670
        if verbose orelse auto_level < 2 then
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1671
          "Learned " ^ string_of_int n ^ " nontrivial " ^
54140
564b8adb0952 clarified message
blanchet
parents: 54131
diff changeset
  1672
          (if run_prover then "automatic and " else "") ^ "Isar proof" ^ plural_s n ^
57006
blanchet
parents: 57005
diff changeset
  1673
          (if verbose then " in " ^ string_of_time (Timer.checkRealTimer timer) else "") ^ "."
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
  1674
        else
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
  1675
          ""
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1676
      end
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
  1677
  end
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
  1678
54123
271a8377656f remove overloading of "max_facts" -- it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents: 54115
diff changeset
  1679
fun mash_learn ctxt (params as {provers, timeout, ...}) fact_override chained run_prover =
48316
252f45c04042 drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents: 48315
diff changeset
  1680
  let
48396
dd82d190c2af name tuning
blanchet
parents: 48395
diff changeset
  1681
    val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt
48395
85a7fb65507a learning should honor the fact override and the chained facts
blanchet
parents: 48394
diff changeset
  1682
    val ctxt = ctxt |> Config.put instantiate_inducts false
57006
blanchet
parents: 57005
diff changeset
  1683
    val facts = nearly_all_facts ctxt false fact_override Symtab.empty css chained [] @{prop True}
54115
2b7e063c7abc improved duplicate detection in "build_name_tables" by ensuring that the earliest occurrence of a duplicate (if it exists) gets picked as the canonical instance
blanchet
parents: 54100
diff changeset
  1684
      |> sort (crude_thm_ord o pairself snd o swap)
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1685
    val num_facts = length facts
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1686
    val prover = hd provers
57006
blanchet
parents: 57005
diff changeset
  1687
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
  1688
    fun learn auto_level run_prover =
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1689
      mash_learn_facts ctxt params prover true auto_level run_prover one_year facts
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1690
      |> Output.urgent_message
48316
252f45c04042 drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents: 48315
diff changeset
  1691
  in
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
  1692
    if run_prover then
57017
blanchet
parents: 57014
diff changeset
  1693
      (Output.urgent_message ("MaShing through " ^ string_of_int num_facts ^ " fact" ^
blanchet
parents: 57014
diff changeset
  1694
         plural_s num_facts ^ " for automatic proofs (" ^ quote prover ^ " timeout: " ^
blanchet
parents: 57014
diff changeset
  1695
         string_of_time timeout ^ ").\n\nCollecting Isar proofs first...");
50340
72519bf5f135 simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents: 50339
diff changeset
  1696
       learn 1 false;
57017
blanchet
parents: 57014
diff changeset
  1697
       Output.urgent_message "Now collecting automatic proofs. This may take several hours. You \
blanchet
parents: 57014
diff changeset
  1698
         \can safely stop the learning process at any point.";
50340
72519bf5f135 simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents: 50339
diff changeset
  1699
       learn 0 true)
72519bf5f135 simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents: 50339
diff changeset
  1700
    else
57006
blanchet
parents: 57005
diff changeset
  1701
      (Output.urgent_message ("MaShing through " ^ string_of_int num_facts ^ " fact" ^
blanchet
parents: 57005
diff changeset
  1702
         plural_s num_facts ^ " for Isar proofs...");
50340
72519bf5f135 simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents: 50339
diff changeset
  1703
       learn 0 false)
48316
252f45c04042 drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents: 48315
diff changeset
  1704
  end
48249
2bd242c56c90 dummy implementation
blanchet
parents: 48248
diff changeset
  1705
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
  1706
fun mash_can_suggest_facts ctxt overlord =
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
  1707
  not (Graph.is_empty (#access_G (peek_state ctxt overlord I)))
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
  1708
57274
0acfdb151e52 more generous formula -- there are lots of duplicates out there
blanchet
parents: 57273
diff changeset
  1709
(* Generate more suggestions than requested, because some might be thrown out later for various
0acfdb151e52 more generous formula -- there are lots of duplicates out there
blanchet
parents: 57273
diff changeset
  1710
   reasons (e.g., duplicates). *)
0acfdb151e52 more generous formula -- there are lots of duplicates out there
blanchet
parents: 57273
diff changeset
  1711
fun generous_max_suggestions max_facts = 3 * max_facts div 2 + 25
50383
4274b25ff4e7 take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents: 50382
diff changeset
  1712
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50755
diff changeset
  1713
val mepo_weight = 0.5
4247cbd78aaf export MeSh data as well
blanchet
parents: 50755
diff changeset
  1714
val mash_weight = 0.5
4247cbd78aaf export MeSh data as well
blanchet
parents: 50755
diff changeset
  1715
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1716
val max_facts_to_learn_before_query = 100
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1717
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1718
(* The threshold should be large enough so that MaSh does not get activated for Auto Sledgehammer
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1719
   and Try. *)
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1720
val min_secs_for_learning = 15
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1721
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1722
fun relevant_facts ctxt (params as {overlord, blocking, learn, fact_filter, timeout, ...}) prover
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1723
    max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts =
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
  1724
  if not (subset (op =) (the_list fact_filter, fact_filters)) then
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
  1725
    error ("Unknown fact filter: " ^ quote (the fact_filter) ^ ".")
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
  1726
  else if only then
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
  1727
    [("", map fact_of_raw_fact facts)]
48321
c552d7f1720b learn from minimized ATP proofs
blanchet
parents: 48320
diff changeset
  1728
  else if max_facts <= 0 orelse null facts then
51010
afd0213a3dab tuned data structure
blanchet
parents: 51008
diff changeset
  1729
    [("", [])]
48288
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1730
  else
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1731
    let
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1732
      fun maybe_launch_thread () =
57273
01b68f625550 automatically learn MaSh facts also in 'blocking' mode
blanchet
parents: 57150
diff changeset
  1733
        if not (Async_Manager.has_running_threads MaShN) andalso
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1734
           Time.toSeconds timeout >= min_secs_for_learning then
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1735
          let val timeout = time_mult learn_timeout_slack timeout in
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1736
            launch_thread timeout
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1737
              (fn () => (true, mash_learn_facts ctxt params prover true 2 false timeout facts))
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
  1738
          end
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1739
        else
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1740
          ()
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1741
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1742
      fun maybe_learn () =
53819
e55d641d0a70 honor MaSh's zero-overhead policy -- no learning if the tool is disabled
blanchet
parents: 53790
diff changeset
  1743
        if is_mash_enabled () andalso learn then
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1744
          let
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1745
            val {access_G, num_known_facts, ...} = peek_state ctxt overlord I
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1746
            val is_in_access_G = is_fact_in_graph access_G o snd
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1747
          in
57011
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
  1748
            if length facts - num_known_facts <= max_facts_to_learn_before_query then
55286
blanchet
parents: 55202
diff changeset
  1749
              (case length (filter_out is_in_access_G facts) of
54012
7a8263843acb removed spurious save if nothing needs to bee learned
blanchet
parents: 53819
diff changeset
  1750
                0 => false
7a8263843acb removed spurious save if nothing needs to bee learned
blanchet
parents: 53819
diff changeset
  1751
              | num_facts_to_learn =>
7a8263843acb removed spurious save if nothing needs to bee learned
blanchet
parents: 53819
diff changeset
  1752
                if num_facts_to_learn <= max_facts_to_learn_before_query then
7a8263843acb removed spurious save if nothing needs to bee learned
blanchet
parents: 53819
diff changeset
  1753
                  (mash_learn_facts ctxt params prover false 2 false timeout facts
7a8263843acb removed spurious save if nothing needs to bee learned
blanchet
parents: 53819
diff changeset
  1754
                   |> (fn "" => () | s => Output.urgent_message (MaShN ^ ": " ^ s));
7a8263843acb removed spurious save if nothing needs to bee learned
blanchet
parents: 53819
diff changeset
  1755
                   true)
7a8263843acb removed spurious save if nothing needs to bee learned
blanchet
parents: 53819
diff changeset
  1756
                else
55286
blanchet
parents: 55202
diff changeset
  1757
                  (maybe_launch_thread (); false))
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1758
            else
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1759
              (maybe_launch_thread (); false)
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1760
          end
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1761
        else
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1762
          false
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1763
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1764
      val (save, effective_fact_filter) =
55286
blanchet
parents: 55202
diff changeset
  1765
        (case fact_filter of
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1766
          SOME ff => (ff <> mepoN andalso maybe_learn (), ff)
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1767
        | NONE =>
51024
98fb341d32e3 distinguish MeSh and smart -- with smart, allow combinations of MaSh, MeSh, and MePo in different slices -- and use MaSh also with SMT solvers, based on evaluation
blanchet
parents: 51020
diff changeset
  1768
          if is_mash_enabled () then
57107
2d502370ee99 changed MaSh to use SML version instead of Python version of naive Bayes by default (i.e. if MASH=yes in the settings, or 'fact_filter=mash' with no other explicit setting)
blanchet
parents: 57106
diff changeset
  1769
            (maybe_learn (), if mash_can_suggest_facts ctxt overlord then meshN else mepoN)
48407
47fe0ca12fc2 faster maximal node computation
blanchet
parents: 48406
diff changeset
  1770
          else
55286
blanchet
parents: 55202
diff changeset
  1771
            (false, mepoN))
54143
18def1c73c79 make sure add: doesn't add duplicates, and works for [no_atp] facts
blanchet
parents: 54141
diff changeset
  1772
18def1c73c79 make sure add: doesn't add duplicates, and works for [no_atp] facts
blanchet
parents: 54141
diff changeset
  1773
      val unique_facts = drop_duplicate_facts facts
48288
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1774
      val add_ths = Attrib.eval_thms ctxt add
54143
18def1c73c79 make sure add: doesn't add duplicates, and works for [no_atp] facts
blanchet
parents: 54141
diff changeset
  1775
51004
5f2788c38127 distinguish raw and non-raw facts, using raw for 10 000s of facts and non-raw after selection of some hundreds
blanchet
parents: 51003
diff changeset
  1776
      fun in_add (_, th) = member Thm.eq_thm_prop add_ths th
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1777
51003
198cb05fb35b report (MeSh, MePo, MaSh) triple, to be able to use different filters in different slices
blanchet
parents: 51001
diff changeset
  1778
      fun add_and_take accepts =
198cb05fb35b report (MeSh, MePo, MaSh) triple, to be able to use different filters in different slices
blanchet
parents: 51001
diff changeset
  1779
        (case add_ths of
198cb05fb35b report (MeSh, MePo, MaSh) triple, to be able to use different filters in different slices
blanchet
parents: 51001
diff changeset
  1780
           [] => accepts
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
  1781
         | _ =>
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
  1782
           (unique_facts |> filter in_add |> map fact_of_raw_fact) @ (accepts |> filter_out in_add))
48293
914ca0827804 renamed Sledgehammer options
blanchet
parents: 48292
diff changeset
  1783
        |> take max_facts
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1784
48406
b002cc16aa99 honor suggested MaSh weights
blanchet
parents: 48404
diff changeset
  1785
      fun mepo () =
54143
18def1c73c79 make sure add: doesn't add duplicates, and works for [no_atp] facts
blanchet
parents: 54141
diff changeset
  1786
        (mepo_suggested_facts ctxt params max_facts NONE hyp_ts concl_t unique_facts
54091
4df62d7eae34 parallelize MeSh
blanchet
parents: 54089
diff changeset
  1787
         |> weight_facts_steeply, [])
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1788
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
  1789
      fun mash () =
57108
dc0b4f50e288 more generous max number of suggestions, for more safety
blanchet
parents: 57107
diff changeset
  1790
        mash_suggested_facts ctxt params (generous_max_suggestions max_facts) hyp_ts concl_t facts
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
  1791
        |>> weight_facts_steeply
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1792
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
  1793
      val mess =
51003
198cb05fb35b report (MeSh, MePo, MaSh) triple, to be able to use different filters in different slices
blanchet
parents: 51001
diff changeset
  1794
        (* the order is important for the "case" expression below *)
54091
4df62d7eae34 parallelize MeSh
blanchet
parents: 54089
diff changeset
  1795
        [] |> effective_fact_filter <> mepoN ? cons (mash_weight, mash)
4df62d7eae34 parallelize MeSh
blanchet
parents: 54089
diff changeset
  1796
           |> effective_fact_filter <> mashN ? cons (mepo_weight, mepo)
4df62d7eae34 parallelize MeSh
blanchet
parents: 54089
diff changeset
  1797
           |> Par_List.map (apsnd (fn f => f ()))
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
  1798
      val mesh = mesh_facts (eq_snd (gen_eq_thm ctxt)) max_facts mess |> add_and_take
48288
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1799
    in
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1800
      if the_mash_engine () = MaSh_Py andalso save then MaSh_Py.save ctxt overlord else ();
55286
blanchet
parents: 55202
diff changeset
  1801
      (case (fact_filter, mess) of
51024
98fb341d32e3 distinguish MeSh and smart -- with smart, allow combinations of MaSh, MeSh, and MePo in different slices -- and use MaSh also with SMT solvers, based on evaluation
blanchet
parents: 51020
diff changeset
  1802
        (NONE, [(_, (mepo, _)), (_, (mash, _))]) =>
51010
afd0213a3dab tuned data structure
blanchet
parents: 51008
diff changeset
  1803
        [(meshN, mesh), (mepoN, mepo |> map fst |> add_and_take),
afd0213a3dab tuned data structure
blanchet
parents: 51008
diff changeset
  1804
         (mashN, mash |> map fst |> add_and_take)]
55286
blanchet
parents: 55202
diff changeset
  1805
      | _ => [(effective_fact_filter, mesh)])
48288
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1806
    end
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1807
53148
c898409d8630 fixed subtle bug with "take" + thread overlord through
blanchet
parents: 53142
diff changeset
  1808
fun kill_learners ctxt ({overlord, ...} : params) =
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1809
  (Async_Manager.kill_threads MaShN "learner";
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
  1810
   if the_mash_engine () = MaSh_Py then MaSh_Py.shutdown ctxt overlord else ())
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1811
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
  1812
fun running_learners () = Async_Manager.running_threads MaShN "learner"
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
  1813
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
  1814
end;