src/HOL/Tools/Sledgehammer/sledgehammer_mash.ML
author wenzelm
Thu, 01 Feb 2018 15:12:57 +0100
changeset 67560 0fa87bd86566
parent 65458 cf504b7a7aa7
child 69593 3dda49e08b9d
permissions -rw-r--r--
tuned signature: more operations;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
48380
d4b7c7be3116 renamed ML files
blanchet
parents: 48379
diff changeset
     1
(*  Title:      HOL/Tools/Sledgehammer/sledgehammer_mash.ML
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     2
    Author:     Jasmin Blanchette, TU Muenchen
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
     3
    Author:     Cezary Kaliszyk, University of Innsbruck
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     4
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     5
Sledgehammer's machine-learning-based relevance filter (MaSh).
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     6
*)
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     7
48381
1b7d798460bb renamed ML structures
blanchet
parents: 48380
diff changeset
     8
signature SLEDGEHAMMER_MASH =
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
     9
sig
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    10
  type stature = ATP_Problem_Generate.stature
51004
5f2788c38127 distinguish raw and non-raw facts, using raw for 10 000s of facts and non-raw after selection of some hundreds
blanchet
parents: 51003
diff changeset
    11
  type raw_fact = Sledgehammer_Fact.raw_fact
48296
e7f01b7e244e gracefully handle the case of empty theories when going up the accessibility chain
blanchet
parents: 48293
diff changeset
    12
  type fact = Sledgehammer_Fact.fact
e7f01b7e244e gracefully handle the case of empty theories when going up the accessibility chain
blanchet
parents: 48293
diff changeset
    13
  type fact_override = Sledgehammer_Fact.fact_override
55201
1ee776da8da7 renamed ML file
blanchet
parents: 54816
diff changeset
    14
  type params = Sledgehammer_Prover.params
1ee776da8da7 renamed ML file
blanchet
parents: 54816
diff changeset
    15
  type prover_result = Sledgehammer_Prover.prover_result
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    16
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
    17
  val trace : bool Config.T
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
    18
  val duplicates : bool Config.T
51008
e096c0dc538b more precise output of selected facts
blanchet
parents: 51007
diff changeset
    19
  val MePoN : string
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
    20
  val MaShN : string
51008
e096c0dc538b more precise output of selected facts
blanchet
parents: 51007
diff changeset
    21
  val MeShN : string
48379
2b5ad61e2ccc renamed "iter" fact filter to "MePo" (Meng--Paulson)
blanchet
parents: 48378
diff changeset
    22
  val mepoN : string
2b5ad61e2ccc renamed "iter" fact filter to "MePo" (Meng--Paulson)
blanchet
parents: 48378
diff changeset
    23
  val mashN : string
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
    24
  val meshN : string
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
    25
  val unlearnN : string
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
    26
  val learn_isarN : string
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
    27
  val learn_proverN : string
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
    28
  val relearn_isarN : string
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
    29
  val relearn_proverN : string
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
    30
  val fact_filters : string list
50826
18ace05656cf start using MaSh hints
blanchet
parents: 50825
diff changeset
    31
  val encode_str : string -> string
18ace05656cf start using MaSh hints
blanchet
parents: 50825
diff changeset
    32
  val encode_strs : string list -> string
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
    33
  val decode_str : string -> string
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
    34
  val decode_strs : string -> string list
50632
12c097ff3241 slightly more elegant naming convention (to keep low-level and high-level APIs separated)
blanchet
parents: 50631
diff changeset
    35
57532
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
    36
  datatype mash_algorithm =
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
    37
    MaSh_NB
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
    38
  | MaSh_kNN
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
    39
  | MaSh_NB_kNN
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
    40
  | MaSh_NB_Ext
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
    41
  | MaSh_kNN_Ext
57106
52e1e424eec1 export more ML functions, for experimentation
blanchet
parents: 57104
diff changeset
    42
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57108
diff changeset
    43
  val is_mash_enabled : unit -> bool
57532
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
    44
  val the_mash_algorithm : unit -> mash_algorithm
57557
242ce8d3d16b record MaSh algorithm in spying data
blanchet
parents: 57555
diff changeset
    45
  val str_of_mash_algorithm : mash_algorithm -> string
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57108
diff changeset
    46
61322
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
    47
  val mesh_facts : ('a list -> 'a list) -> ('a * 'a -> bool) -> int ->
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
    48
    (real * (('a * real) list * 'a list)) list -> 'a list
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
    49
  val nickname_of_thm : thm -> string
57006
blanchet
parents: 57005
diff changeset
    50
  val find_suggested_facts : Proof.context -> ('b * thm) list -> string list -> ('b * thm) list
60641
f6e8293747fd clarified context;
wenzelm
parents: 60640
diff changeset
    51
  val crude_thm_ord : Proof.context -> thm * thm -> order
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
    52
  val thm_less : thm * thm -> bool
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    53
  val goal_of_thm : theory -> thm -> thm
57006
blanchet
parents: 57005
diff changeset
    54
  val run_prover_for_mash : Proof.context -> params -> string -> string -> fact list -> thm ->
blanchet
parents: 57005
diff changeset
    55
    prover_result
60640
58326c4a3ea2 tuned signature;
wenzelm
parents: 60638
diff changeset
    56
  val features_of : Proof.context -> string -> stature -> term list -> string list
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
    57
  val trim_dependencies : string list -> string list option
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
    58
  val isar_dependencies_of : string Symtab.table * string Symtab.table -> thm -> string list option
57006
blanchet
parents: 57005
diff changeset
    59
  val prover_dependencies_of : Proof.context -> params -> string -> int -> raw_fact list ->
blanchet
parents: 57005
diff changeset
    60
    string Symtab.table * string Symtab.table -> thm -> bool * string list
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
    61
  val attach_parents_to_facts : ('a * thm) list -> ('a * thm) list ->
57006
blanchet
parents: 57005
diff changeset
    62
    (string list * ('a * thm)) list
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
    63
  val num_extra_feature_facts : int
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
    64
  val extra_feature_factor : real
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
    65
  val weight_facts_smoothly : 'a list -> ('a * real) list
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
    66
  val weight_facts_steeply : 'a list -> ('a * real) list
57432
blanchet
parents: 57431
diff changeset
    67
  val find_mash_suggestions : Proof.context -> int -> string list -> ('a * thm) list ->
blanchet
parents: 57431
diff changeset
    68
    ('a * thm) list -> ('a * thm) list -> ('a * thm) list * ('a * thm) list
60649
e007aa6a8aa2 more explicit use of context and elimination of Thm.theory_of_thm, although unclear (and untested?) situations remain;
wenzelm
parents: 60641
diff changeset
    69
  val mash_suggested_facts : Proof.context -> string -> params -> int -> term list -> term ->
57407
140e16bc2a40 use right theory name for theorems in evaluation driver
blanchet
parents: 57406
diff changeset
    70
    raw_fact list -> fact list * fact list
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
    71
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
    72
  val mash_unlearn : Proof.context -> unit
54503
blanchet
parents: 54432
diff changeset
    73
  val mash_learn_proof : Proof.context -> params -> term -> ('a * thm) list -> thm list -> unit
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
    74
  val mash_learn_facts : Proof.context -> params -> string -> int -> bool -> Time.time ->
57120
f8112c4b9cb8 extend exporter with new versions of MaSh
blanchet
parents: 57108
diff changeset
    75
    raw_fact list -> string
57006
blanchet
parents: 57005
diff changeset
    76
  val mash_learn : Proof.context -> params -> fact_override -> thm list -> bool -> unit
57432
blanchet
parents: 57431
diff changeset
    77
  val mash_can_suggest_facts : Proof.context -> bool
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
    78
  val mash_can_suggest_facts_fast : Proof.context -> bool
53819
e55d641d0a70 honor MaSh's zero-overhead policy -- no learning if the tool is disabled
blanchet
parents: 53790
diff changeset
    79
57108
dc0b4f50e288 more generous max number of suggestions, for more safety
blanchet
parents: 57107
diff changeset
    80
  val generous_max_suggestions : int -> int
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50755
diff changeset
    81
  val mepo_weight : real
4247cbd78aaf export MeSh data as well
blanchet
parents: 50755
diff changeset
    82
  val mash_weight : real
57006
blanchet
parents: 57005
diff changeset
    83
  val relevant_facts : Proof.context -> params -> string -> int -> fact_override -> term list ->
blanchet
parents: 57005
diff changeset
    84
    term -> raw_fact list -> (string * fact list) list
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
    85
end;
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
    86
48381
1b7d798460bb renamed ML structures
blanchet
parents: 48380
diff changeset
    87
structure Sledgehammer_MaSh : SLEDGEHAMMER_MASH =
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
    88
struct
48249
2bd242c56c90 dummy implementation
blanchet
parents: 48248
diff changeset
    89
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    90
open ATP_Util
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    91
open ATP_Problem_Generate
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    92
open Sledgehammer_Util
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    93
open Sledgehammer_Fact
55201
1ee776da8da7 renamed ML file
blanchet
parents: 54816
diff changeset
    94
open Sledgehammer_Prover
55202
824c48a539c9 renamed many Sledgehammer ML files to clarify structure
blanchet
parents: 55201
diff changeset
    95
open Sledgehammer_Prover_Minimize
48381
1b7d798460bb renamed ML structures
blanchet
parents: 48380
diff changeset
    96
open Sledgehammer_MePo
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
    97
61318
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
    98
val anonymous_proof_prefix = "."
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
    99
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   100
val trace = Attrib.setup_config_bool @{binding sledgehammer_mash_trace} (K false)
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
   101
val duplicates = Attrib.setup_config_bool @{binding sledgehammer_fact_duplicates} (K false)
51032
69da236d7838 added option to use SNoW as machine learning algo
blanchet
parents: 51029
diff changeset
   102
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
   103
fun trace_msg ctxt msg = if Config.get ctxt trace then tracing (msg ()) else ()
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
   104
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
   105
fun gen_eq_thm ctxt = if Config.get ctxt duplicates then Thm.eq_thm_strict else Thm.eq_thm_prop
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
   106
51008
e096c0dc538b more precise output of selected facts
blanchet
parents: 51007
diff changeset
   107
val MePoN = "MePo"
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
   108
val MaShN = "MaSh"
51008
e096c0dc538b more precise output of selected facts
blanchet
parents: 51007
diff changeset
   109
val MeShN = "MeSh"
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
   110
48379
2b5ad61e2ccc renamed "iter" fact filter to "MePo" (Meng--Paulson)
blanchet
parents: 48378
diff changeset
   111
val mepoN = "mepo"
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
   112
val mashN = "mash"
48379
2b5ad61e2ccc renamed "iter" fact filter to "MePo" (Meng--Paulson)
blanchet
parents: 48378
diff changeset
   113
val meshN = "mesh"
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
   114
48379
2b5ad61e2ccc renamed "iter" fact filter to "MePo" (Meng--Paulson)
blanchet
parents: 48378
diff changeset
   115
val fact_filters = [meshN, mepoN, mashN]
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
   116
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   117
val unlearnN = "unlearn"
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   118
val learn_isarN = "learn_isar"
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   119
val learn_proverN = "learn_prover"
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   120
val relearn_isarN = "relearn_isar"
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   121
val relearn_proverN = "relearn_prover"
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   122
57368
blanchet
parents: 57367
diff changeset
   123
fun map_array_at ary f i = Array.update (ary, i, f (Array.sub (ary, i)))
blanchet
parents: 57367
diff changeset
   124
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   125
type xtab = int * int Symtab.table
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   126
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   127
val empty_xtab = (0, Symtab.empty)
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   128
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   129
fun add_to_xtab key (next, tab) = (next + 1, Symtab.update_new (key, next) tab)
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   130
fun maybe_add_to_xtab key = perhaps (try (add_to_xtab key))
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   131
57552
1072599c43f6 no need for 'mash' subdirectory after removal of Python program
blanchet
parents: 57546
diff changeset
   132
fun state_file () = Path.expand (Path.explode "$ISABELLE_HOME_USER/mash_state")
1072599c43f6 no need for 'mash' subdirectory after removal of Python program
blanchet
parents: 57546
diff changeset
   133
val remove_state_file = try File.rm o state_file
48330
192444b53e86 speed up MaSh queries
blanchet
parents: 48329
diff changeset
   134
57532
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
   135
datatype mash_algorithm =
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   136
  MaSh_NB
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   137
| MaSh_kNN
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   138
| MaSh_NB_kNN
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   139
| MaSh_NB_Ext
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
   140
| MaSh_kNN_Ext
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   141
57532
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
   142
fun mash_algorithm () =
61043
0810068379d8 eliminated obsolete environment variable
blanchet
parents: 60971
diff changeset
   143
  (case Options.default_string @{system_option MaSh} of
0810068379d8 eliminated obsolete environment variable
blanchet
parents: 60971
diff changeset
   144
    "yes" => SOME MaSh_NB_kNN
0810068379d8 eliminated obsolete environment variable
blanchet
parents: 60971
diff changeset
   145
  | "sml" => SOME MaSh_NB_kNN
0810068379d8 eliminated obsolete environment variable
blanchet
parents: 60971
diff changeset
   146
  | "nb" => SOME MaSh_NB
0810068379d8 eliminated obsolete environment variable
blanchet
parents: 60971
diff changeset
   147
  | "knn" => SOME MaSh_kNN
0810068379d8 eliminated obsolete environment variable
blanchet
parents: 60971
diff changeset
   148
  | "nb_knn" => SOME MaSh_NB_kNN
0810068379d8 eliminated obsolete environment variable
blanchet
parents: 60971
diff changeset
   149
  | "nb_ext" => SOME MaSh_NB_Ext
0810068379d8 eliminated obsolete environment variable
blanchet
parents: 60971
diff changeset
   150
  | "knn_ext" => SOME MaSh_kNN_Ext
0810068379d8 eliminated obsolete environment variable
blanchet
parents: 60971
diff changeset
   151
  | "none" => NONE
0810068379d8 eliminated obsolete environment variable
blanchet
parents: 60971
diff changeset
   152
  | "" => NONE
63692
1bc4bc2c9fd1 killed final stops in Sledgehammer and friends
blanchet
parents: 63518
diff changeset
   153
  | algorithm => (warning ("Unknown MaSh algorithm: " ^ quote algorithm); NONE))
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
   154
57532
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
   155
val is_mash_enabled = is_some o mash_algorithm
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
   156
val the_mash_algorithm = the_default MaSh_NB_kNN o mash_algorithm
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
   157
57557
242ce8d3d16b record MaSh algorithm in spying data
blanchet
parents: 57555
diff changeset
   158
fun str_of_mash_algorithm MaSh_NB = "nb"
242ce8d3d16b record MaSh algorithm in spying data
blanchet
parents: 57555
diff changeset
   159
  | str_of_mash_algorithm MaSh_kNN = "knn"
242ce8d3d16b record MaSh algorithm in spying data
blanchet
parents: 57555
diff changeset
   160
  | str_of_mash_algorithm MaSh_NB_kNN = "nb_knn"
242ce8d3d16b record MaSh algorithm in spying data
blanchet
parents: 57555
diff changeset
   161
  | str_of_mash_algorithm MaSh_NB_Ext = "nb_ext"
242ce8d3d16b record MaSh algorithm in spying data
blanchet
parents: 57555
diff changeset
   162
  | str_of_mash_algorithm MaSh_kNN_Ext = "knn_ext"
242ce8d3d16b record MaSh algorithm in spying data
blanchet
parents: 57555
diff changeset
   163
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   164
fun scaled_avg [] = 0
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   165
  | scaled_avg xs = Real.ceil (100000000.0 * fold (curry (op +)) xs 0.0) div length xs
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   166
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   167
fun avg [] = 0.0
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   168
  | avg xs = fold (curry (op +)) xs 0.0 / Real.fromInt (length xs)
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   169
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   170
fun normalize_scores _ [] = []
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   171
  | normalize_scores max_facts xs =
62826
eb94e570c1a4 prefer infix operations;
wenzelm
parents: 62735
diff changeset
   172
    map (apsnd (curry (op *) (1.0 / avg (map snd (take max_facts xs))))) xs
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   173
61322
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
   174
fun mesh_facts maybe_distinct _ max_facts [(_, (sels, unks))] =
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
   175
    map fst (take max_facts sels) @ take (max_facts - length sels) unks
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
   176
    |> maybe_distinct
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
   177
  | mesh_facts _ fact_eq max_facts mess =
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   178
    let
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   179
      val mess = mess |> map (apsnd (apfst (normalize_scores max_facts)))
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   180
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   181
      fun score_in fact (global_weight, (sels, unks)) =
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   182
        let val score_at = try (nth sels) #> Option.map (fn (_, score) => global_weight * score) in
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   183
          (case find_index (curry fact_eq fact o fst) sels of
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   184
            ~1 => if member fact_eq unks fact then NONE else SOME 0.0
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   185
          | rank => score_at rank)
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   186
        end
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   187
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   188
      fun weight_of fact = mess |> map_filter (score_in fact) |> scaled_avg
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   189
    in
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   190
      fold (union fact_eq o map fst o take max_facts o fst o snd) mess []
59058
a78612c67ec0 renamed "pairself" to "apply2", in accordance to @{apply 2};
wenzelm
parents: 58919
diff changeset
   191
      |> map (`weight_of) |> sort (int_ord o apply2 fst o swap)
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   192
      |> map snd |> take max_facts
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   193
    end
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   194
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   195
fun smooth_weight_of_fact rank = Math.pow (1.3, 15.5 - 0.2 * Real.fromInt rank) + 15.0 (* FUDGE *)
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   196
fun steep_weight_of_fact rank = Math.pow (0.62, log2 (Real.fromInt (rank + 1))) (* FUDGE *)
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   197
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   198
fun weight_facts_smoothly facts = facts ~~ map smooth_weight_of_fact (0 upto length facts - 1)
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   199
fun weight_facts_steeply facts = facts ~~ map steep_weight_of_fact (0 upto length facts - 1)
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   200
57660
86b853448f08 fixed sorting (broken since 9cc802a8ab06)
blanchet
parents: 57658
diff changeset
   201
fun sort_array_suffix cmp needed a =
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   202
  let
57406
e844dcf57deb killed dead code
blanchet
parents: 57405
diff changeset
   203
    exception BOTTOM of int
e844dcf57deb killed dead code
blanchet
parents: 57405
diff changeset
   204
57460
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   205
    val al = Array.length a
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   206
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   207
    fun maxson l i =
57017
blanchet
parents: 57014
diff changeset
   208
      let val i31 = i + i + i + 1 in
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   209
        if i31 + 2 < l then
57017
blanchet
parents: 57014
diff changeset
   210
          let val x = Unsynchronized.ref i31 in
67560
0fa87bd86566 tuned signature: more operations;
wenzelm
parents: 65458
diff changeset
   211
            if is_less (cmp (Array.sub (a, i31), Array.sub (a, i31 + 1))) then x := i31 + 1 else ();
0fa87bd86566 tuned signature: more operations;
wenzelm
parents: 65458
diff changeset
   212
            if is_less (cmp (Array.sub (a, !x), Array.sub (a, i31 + 2))) then x := i31 + 2 else ();
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   213
            !x
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   214
          end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   215
        else
67560
0fa87bd86566 tuned signature: more operations;
wenzelm
parents: 65458
diff changeset
   216
          if i31 + 1 < l andalso is_less (cmp (Array.sub (a, i31), Array.sub (a, i31 + 1)))
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   217
          then i31 + 1 else if i31 < l then i31 else raise BOTTOM i
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   218
      end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   219
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   220
    fun trickledown l i e =
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   221
      let val j = maxson l i in
67560
0fa87bd86566 tuned signature: more operations;
wenzelm
parents: 65458
diff changeset
   222
        if is_greater (cmp (Array.sub (a, j), e)) then
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   223
          (Array.update (a, i, Array.sub (a, j)); trickledown l j e)
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   224
        else
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   225
          Array.update (a, i, e)
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   226
      end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   227
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   228
    fun trickle l i e = trickledown l i e handle BOTTOM i => Array.update (a, i, e)
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   229
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   230
    fun bubbledown l i =
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   231
      let val j = maxson l i in
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   232
        Array.update (a, i, Array.sub (a, j));
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   233
        bubbledown l j
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   234
      end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   235
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   236
    fun bubble l i = bubbledown l i handle BOTTOM i => i
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   237
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   238
    fun trickleup i e =
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   239
      let val father = (i - 1) div 3 in
67560
0fa87bd86566 tuned signature: more operations;
wenzelm
parents: 65458
diff changeset
   240
        if is_less (cmp (Array.sub (a, father), e)) then
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   241
          (Array.update (a, i, Array.sub (a, father));
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   242
           if father > 0 then trickleup father e else Array.update (a, 0, e))
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   243
        else
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   244
          Array.update (a, i, e)
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   245
      end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   246
57355
blanchet
parents: 57354
diff changeset
   247
    fun for i = if i < 0 then () else (trickle al i (Array.sub (a, i)); for (i - 1))
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   248
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   249
    fun for2 i =
57660
86b853448f08 fixed sorting (broken since 9cc802a8ab06)
blanchet
parents: 57658
diff changeset
   250
      if i < Integer.max 2 (al - needed) then
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   251
        ()
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   252
      else
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   253
        let val e = Array.sub (a, i) in
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   254
          Array.update (a, i, Array.sub (a, 0));
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   255
          trickleup (bubble i 0) e;
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   256
          for2 (i - 1)
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   257
        end
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   258
  in
57355
blanchet
parents: 57354
diff changeset
   259
    for (((al + 1) div 3) - 1);
blanchet
parents: 57354
diff changeset
   260
    for2 (al - 1);
blanchet
parents: 57354
diff changeset
   261
    if al > 1 then
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   262
      let val e = Array.sub (a, 1) in
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   263
        Array.update (a, 1, Array.sub (a, 0));
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   264
        Array.update (a, 0, e)
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   265
      end
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   266
    else
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   267
      ()
57009
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   268
  end
8cb6a5f1ae84 added SML implementation of MaSh
blanchet
parents: 57007
diff changeset
   269
57660
86b853448f08 fixed sorting (broken since 9cc802a8ab06)
blanchet
parents: 57658
diff changeset
   270
fun rev_sort_list_prefix cmp needed xs =
57460
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   271
  let val ary = Array.fromList xs in
57660
86b853448f08 fixed sorting (broken since 9cc802a8ab06)
blanchet
parents: 57658
diff changeset
   272
    sort_array_suffix cmp needed ary;
86b853448f08 fixed sorting (broken since 9cc802a8ab06)
blanchet
parents: 57658
diff changeset
   273
    Array.foldl (op ::) [] ary
57460
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   274
  end
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   275
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   276
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   277
(*** Convenience functions for synchronized access ***)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   278
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   279
fun synchronized_timed_value var time_limit =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   280
  Synchronized.timed_access var time_limit (fn value => SOME (value, value))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   281
fun synchronized_timed_change_result var time_limit f =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   282
  Synchronized.timed_access var time_limit (SOME o f)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   283
fun synchronized_timed_change var time_limit f =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   284
  synchronized_timed_change_result var time_limit (fn x => ((), f x))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   285
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   286
fun mash_time_limit _ = SOME (seconds 0.1)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   287
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   288
57460
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   289
(*** Isabelle-agnostic machine learning ***)
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   290
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   291
structure MaSh =
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   292
struct
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
   293
57574
e4ddd52e1b96 made SML/NJ happier
blanchet
parents: 57561
diff changeset
   294
fun select_visible_facts (big_number : real) recommends =
57364
c1060d10089f honor visible in SML naive Bayes
blanchet
parents: 57363
diff changeset
   295
  List.app (fn at =>
c1060d10089f honor visible in SML naive Bayes
blanchet
parents: 57363
diff changeset
   296
    let val (j, ov) = Array.sub (recommends, at) in
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   297
      Array.update (recommends, at, (j, big_number + ov))
57364
c1060d10089f honor visible in SML naive Bayes
blanchet
parents: 57363
diff changeset
   298
    end)
c1060d10089f honor visible in SML naive Bayes
blanchet
parents: 57363
diff changeset
   299
57374
blanchet
parents: 57373
diff changeset
   300
fun wider_array_of_vector init vec =
blanchet
parents: 57373
diff changeset
   301
  let val ary = Array.array init in
blanchet
parents: 57373
diff changeset
   302
    Array.copyVec {src = vec, dst = ary, di = 0};
blanchet
parents: 57373
diff changeset
   303
    ary
blanchet
parents: 57373
diff changeset
   304
  end
blanchet
parents: 57373
diff changeset
   305
57531
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   306
val nb_def_prior_weight = 1000 (* FUDGE *)
57095
001ec97c3e59 updated naive Bayes
blanchet
parents: 57089
diff changeset
   307
57374
blanchet
parents: 57373
diff changeset
   308
fun learn_facts (tfreq0, sfreq0, dffreq0) num_facts0 num_facts num_feats depss featss =
57354
ded92100ffd7 refactoring
blanchet
parents: 57353
diff changeset
   309
  let
57374
blanchet
parents: 57373
diff changeset
   310
    val tfreq = wider_array_of_vector (num_facts, 0) tfreq0
blanchet
parents: 57373
diff changeset
   311
    val sfreq = wider_array_of_vector (num_facts, Inttab.empty) sfreq0
blanchet
parents: 57373
diff changeset
   312
    val dffreq = wider_array_of_vector (num_feats, 0) dffreq0
blanchet
parents: 57373
diff changeset
   313
blanchet
parents: 57373
diff changeset
   314
    fun learn_one th feats deps =
57354
ded92100ffd7 refactoring
blanchet
parents: 57353
diff changeset
   315
      let
57355
blanchet
parents: 57354
diff changeset
   316
        fun add_th weight t =
blanchet
parents: 57354
diff changeset
   317
          let
blanchet
parents: 57354
diff changeset
   318
            val im = Array.sub (sfreq, t)
57374
blanchet
parents: 57373
diff changeset
   319
            fun fold_fn s = Inttab.map_default (s, 0) (Integer.add weight)
57355
blanchet
parents: 57354
diff changeset
   320
          in
57368
blanchet
parents: 57367
diff changeset
   321
            map_array_at tfreq (Integer.add weight) t;
57355
blanchet
parents: 57354
diff changeset
   322
            Array.update (sfreq, t, fold fold_fn feats im)
blanchet
parents: 57354
diff changeset
   323
          end
blanchet
parents: 57354
diff changeset
   324
57368
blanchet
parents: 57367
diff changeset
   325
        val add_sym = map_array_at dffreq (Integer.add 1)
57354
ded92100ffd7 refactoring
blanchet
parents: 57353
diff changeset
   326
      in
57355
blanchet
parents: 57354
diff changeset
   327
        add_th nb_def_prior_weight th;
blanchet
parents: 57354
diff changeset
   328
        List.app (add_th 1) deps;
blanchet
parents: 57354
diff changeset
   329
        List.app add_sym feats
57354
ded92100ffd7 refactoring
blanchet
parents: 57353
diff changeset
   330
      end
ded92100ffd7 refactoring
blanchet
parents: 57353
diff changeset
   331
57102
blanchet
parents: 57101
diff changeset
   332
    fun for i =
57383
ba0fe0639bc8 right array indexing
blanchet
parents: 57382
diff changeset
   333
      if i = num_facts then ()
ba0fe0639bc8 right array indexing
blanchet
parents: 57382
diff changeset
   334
      else (learn_one i (Vector.sub (featss, i)) (Vector.sub (depss, i)); for (i + 1))
57102
blanchet
parents: 57101
diff changeset
   335
  in
57383
ba0fe0639bc8 right array indexing
blanchet
parents: 57382
diff changeset
   336
    for num_facts0;
57374
blanchet
parents: 57373
diff changeset
   337
    (Array.vector tfreq, Array.vector sfreq, Array.vector dffreq)
57102
blanchet
parents: 57101
diff changeset
   338
  end
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   339
57376
f40ac83d076c refactoring
blanchet
parents: 57375
diff changeset
   340
fun naive_bayes (tfreq, sfreq, dffreq) num_facts max_suggs visible_facts goal_feats =
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   341
  let
57531
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   342
    val tau = 0.2 (* FUDGE *)
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   343
    val pos_weight = 5.0 (* FUDGE *)
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   344
    val def_val = ~18.0 (* FUDGE *)
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   345
    val init_val = 30.0 (* FUDGE *)
57124
e4c2c792226f make SML naive Bayes closer to Python version
blanchet
parents: 57122
diff changeset
   346
57366
blanchet
parents: 57365
diff changeset
   347
    val ln_afreq = Math.ln (Real.fromInt num_facts)
57374
blanchet
parents: 57373
diff changeset
   348
    val idf = Vector.map (fn i => ln_afreq - Math.ln (Real.fromInt i)) dffreq
57366
blanchet
parents: 57365
diff changeset
   349
57359
dd89d0ec8e41 avoid subscripting array with ~1
blanchet
parents: 57358
diff changeset
   350
    fun tfidf feat = Vector.sub (idf, feat)
57102
blanchet
parents: 57101
diff changeset
   351
blanchet
parents: 57101
diff changeset
   352
    fun log_posterior i =
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   353
      let
57374
blanchet
parents: 57373
diff changeset
   354
        val tfreq = Real.fromInt (Vector.sub (tfreq, i))
57097
blanchet
parents: 57096
diff changeset
   355
57531
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   356
        fun add_feat (f, fw0) (res, sfh) =
57102
blanchet
parents: 57101
diff changeset
   357
          (case Inttab.lookup sfh f of
blanchet
parents: 57101
diff changeset
   358
            SOME sf =>
57401
02f56126b4e4 reintroduced 'extra features' but with lower weight than before (to account for tfidf)
blanchet
parents: 57387
diff changeset
   359
            (res + fw0 * tfidf f * Math.ln (pos_weight * Real.fromInt sf / tfreq),
57102
blanchet
parents: 57101
diff changeset
   360
             Inttab.delete f sfh)
57409
c9e8cd8ec9e2 correctly take weights into consideration
blanchet
parents: 57407
diff changeset
   361
          | NONE => (res + fw0 * tfidf f * def_val, sfh))
57097
blanchet
parents: 57096
diff changeset
   362
57531
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   363
        val (res, sfh) = fold add_feat goal_feats (init_val * Math.ln tfreq, Vector.sub (sfreq, i))
57097
blanchet
parents: 57096
diff changeset
   364
57409
c9e8cd8ec9e2 correctly take weights into consideration
blanchet
parents: 57407
diff changeset
   365
        fun fold_sfh (f, sf) sow =
59476
blanchet
parents: 59471
diff changeset
   366
          sow + tfidf f * Math.ln (1.0 - Real.fromInt (sf - 1) / tfreq)
57097
blanchet
parents: 57096
diff changeset
   367
57102
blanchet
parents: 57101
diff changeset
   368
        val sum_of_weights = Inttab.fold fold_sfh sfh 0.0
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   369
      in
57278
8f7d6f01a775 more MaSh engine variations, for evaluations
blanchet
parents: 57277
diff changeset
   370
        res + tau * sum_of_weights
57097
blanchet
parents: 57096
diff changeset
   371
      end
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   372
57356
9816f692b0ca refactoring
blanchet
parents: 57355
diff changeset
   373
    val posterior = Array.tabulate (num_facts, (fn j => (j, log_posterior j)))
57102
blanchet
parents: 57101
diff changeset
   374
57364
c1060d10089f honor visible in SML naive Bayes
blanchet
parents: 57363
diff changeset
   375
    fun ret at acc =
c1060d10089f honor visible in SML naive Bayes
blanchet
parents: 57363
diff changeset
   376
      if at = num_facts then acc else ret (at + 1) (Array.sub (posterior, at) :: acc)
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   377
  in
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   378
    select_visible_facts 100000.0 posterior visible_facts;
59058
a78612c67ec0 renamed "pairself" to "apply2", in accordance to @{apply 2};
wenzelm
parents: 58919
diff changeset
   379
    sort_array_suffix (Real.compare o apply2 snd) max_suggs posterior;
57364
c1060d10089f honor visible in SML naive Bayes
blanchet
parents: 57363
diff changeset
   380
    ret (Integer.max 0 (num_facts - max_suggs)) []
57029
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   381
  end
75cc30d2b83f added naive Bayes ML implementation, due to Cezary Kaliszyk (like k-NN)
blanchet
parents: 57028
diff changeset
   382
59477
1b3385de296d less confusing constant
blanchet
parents: 59476
diff changeset
   383
val initial_k = 0
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   384
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   385
fun k_nearest_neighbors dffreq num_facts num_feats depss featss max_suggs visible_facts goal_feats =
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   386
  let
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   387
    exception EXIT of unit
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   388
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   389
    val ln_afreq = Math.ln (Real.fromInt num_facts)
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   390
    fun tfidf feat = ln_afreq - Math.ln (Real.fromInt (Vector.sub (dffreq, feat)))
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   391
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   392
    val overlaps_sqr = Array.tabulate (num_facts, rpair 0.0)
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   393
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   394
    val feat_facts = Array.array (num_feats, [])
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   395
    val _ = Vector.foldl (fn (feats, fact) =>
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   396
      (List.app (map_array_at feat_facts (cons fact)) feats; fact + 1)) 0 featss
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   397
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   398
    fun do_feat (s, sw0) =
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   399
      let
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   400
        val sw = sw0 * tfidf s
59476
blanchet
parents: 59471
diff changeset
   401
        val w6 = Math.pow (sw, 6.0 (* FUDGE *))
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   402
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   403
        fun inc_overlap j =
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   404
          let val (_, ov) = Array.sub (overlaps_sqr, j) in
57531
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   405
            Array.update (overlaps_sqr, j, (j, w6 + ov))
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   406
          end
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   407
      in
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   408
        List.app inc_overlap (Array.sub (feat_facts, s))
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   409
      end
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   410
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   411
    val _ = List.app do_feat goal_feats
59058
a78612c67ec0 renamed "pairself" to "apply2", in accordance to @{apply 2};
wenzelm
parents: 58919
diff changeset
   412
    val _ = sort_array_suffix (Real.compare o apply2 snd) num_facts overlaps_sqr
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   413
    val no_recommends = Unsynchronized.ref 0
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   414
    val recommends = Array.tabulate (num_facts, rpair 0.0)
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   415
    val age = Unsynchronized.ref 500000000.0
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   416
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   417
    fun inc_recommend v j =
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   418
      let val (_, ov) = Array.sub (recommends, j) in
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   419
        if ov <= 0.0 then
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   420
          (no_recommends := !no_recommends + 1; Array.update (recommends, j, (j, !age + ov)))
59476
blanchet
parents: 59471
diff changeset
   421
        else
blanchet
parents: 59471
diff changeset
   422
          Array.update (recommends, j, (j, v + ov))
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   423
      end
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   424
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   425
    val k = Unsynchronized.ref 0
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   426
    fun do_k k =
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   427
      if k >= num_facts then
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   428
        raise EXIT ()
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   429
      else
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   430
        let
57531
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   431
          val deps_factor = 2.7 (* FUDGE *)
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   432
          val (j, o2) = Array.sub (overlaps_sqr, num_facts - k - 1)
57531
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   433
          val _ = inc_recommend o2 j
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   434
          val ds = Vector.sub (depss, j)
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   435
          val l = Real.fromInt (length ds)
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   436
        in
57531
4d9895d39b59 improvements to the machine learning algos (due to Cezary K.)
blanchet
parents: 57462
diff changeset
   437
          List.app (inc_recommend (deps_factor * o2 / l)) ds
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   438
        end
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   439
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   440
    fun while1 () =
59477
1b3385de296d less confusing constant
blanchet
parents: 59476
diff changeset
   441
      if !k = initial_k + 1 then () else (do_k (!k); k := !k + 1; while1 ())
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   442
      handle EXIT () => ()
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   443
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   444
    fun while2 () =
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   445
      if !no_recommends >= max_suggs then ()
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   446
      else (do_k (!k); k := !k + 1; age := !age - 10000.0; while2 ())
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   447
      handle EXIT () => ()
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   448
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   449
    fun ret acc at =
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   450
      if at = num_facts then acc else ret (Array.sub (recommends, at) :: acc) (at + 1)
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   451
  in
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   452
    while1 ();
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   453
    while2 ();
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   454
    select_visible_facts 1000000000.0 recommends visible_facts;
59058
a78612c67ec0 renamed "pairself" to "apply2", in accordance to @{apply 2};
wenzelm
parents: 58919
diff changeset
   455
    sort_array_suffix (Real.compare o apply2 snd) max_suggs recommends;
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   456
    ret [] (Integer.max 0 (num_facts - max_suggs))
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   457
  end
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   458
57125
2f620ef839ee added another way of invoking Python code, for experiments
blanchet
parents: 57124
diff changeset
   459
(* experimental *)
57376
f40ac83d076c refactoring
blanchet
parents: 57375
diff changeset
   460
fun external_tool tool max_suggs learns goal_feats =
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   461
  let
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   462
    val ser = string_of_int (serial ()) (* poor person's attempt at thread-safety *)
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   463
    val ocs = TextIO.openOut ("adv_syms" ^ ser)
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   464
    val ocd = TextIO.openOut ("adv_deps" ^ ser)
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   465
    val ocq = TextIO.openOut ("adv_seq" ^ ser)
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   466
    val occ = TextIO.openOut ("adv_conj" ^ ser)
57296
8a98f08a0523 tweaked experimental setup
blanchet
parents: 57294
diff changeset
   467
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   468
    fun os oc s = TextIO.output (oc, s)
57296
8a98f08a0523 tweaked experimental setup
blanchet
parents: 57294
diff changeset
   469
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   470
    fun ol _ _ _ [] = ()
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   471
      | ol _ f _ [e] = f e
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   472
      | ol oc f sep (h :: t) = (f h; os oc sep; ol oc f sep t)
57296
8a98f08a0523 tweaked experimental setup
blanchet
parents: 57294
diff changeset
   473
57294
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   474
    fun do_learn (name, feats, deps) =
57357
30ee18eb23ac new version of adaptive k-NN with TFIDF
blanchet
parents: 57356
diff changeset
   475
      (os ocs name; os ocs ":"; ol ocs (os ocs o quote) ", " feats; os ocs "\n";
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   476
       os ocd name; os ocd ":"; ol ocd (os ocd) " " deps; os ocd "\n"; os ocq name; os ocq "\n")
57296
8a98f08a0523 tweaked experimental setup
blanchet
parents: 57294
diff changeset
   477
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   478
    fun forkexec no =
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   479
      let
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   480
        val cmd =
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   481
          "~/misc/" ^ tool ^ " adv_syms" ^ ser ^ " adv_deps" ^ ser ^ " " ^ string_of_int no ^
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   482
          " adv_seq" ^ ser ^ " < adv_conj" ^ ser
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   483
      in
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   484
        fst (Isabelle_System.bash_output cmd)
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   485
        |> space_explode " "
57294
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   486
        |> filter_out (curry (op =) "")
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   487
      end
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   488
  in
57401
02f56126b4e4 reintroduced 'extra features' but with lower weight than before (to account for tfidf)
blanchet
parents: 57387
diff changeset
   489
    (List.app do_learn learns; ol occ (os occ o quote) ", " (map fst goal_feats);
57297
3d4647ea3e57 added another experimental engine
blanchet
parents: 57296
diff changeset
   490
     TextIO.closeOut ocs; TextIO.closeOut ocd; TextIO.closeOut ocq; TextIO.closeOut occ;
57294
ef9d4e1ceb00 use strings to communicate with external process, to ease debugging
blanchet
parents: 57291
diff changeset
   491
     forkexec max_suggs)
57291
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   492
  end
1bac14e0a728 added experimental MaSh engine
blanchet
parents: 57281
diff changeset
   493
57561
8200e1eb367f made SML/NJ happier
blanchet
parents: 57557
diff changeset
   494
fun k_nearest_neighbors_ext max_suggs =
59477
1b3385de296d less confusing constant
blanchet
parents: 59476
diff changeset
   495
  external_tool ("newknn/knn" ^ " " ^ string_of_int initial_k) max_suggs
57561
8200e1eb367f made SML/NJ happier
blanchet
parents: 57557
diff changeset
   496
fun naive_bayes_ext max_suggs = external_tool "predict/nbayes" max_suggs
57374
blanchet
parents: 57373
diff changeset
   497
57532
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
   498
fun query_external ctxt algorithm max_suggs learns goal_feats =
57432
blanchet
parents: 57431
diff changeset
   499
  (trace_msg ctxt (fn () => "MaSh query external " ^ commas (map fst goal_feats));
57532
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
   500
   (case algorithm of
57458
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   501
     MaSh_NB_Ext => naive_bayes_ext max_suggs learns goal_feats
419180c354c0 tuned (reordered) code
blanchet
parents: 57432
diff changeset
   502
   | MaSh_kNN_Ext => k_nearest_neighbors_ext max_suggs learns goal_feats))
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   503
57532
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
   504
fun query_internal ctxt algorithm num_facts num_feats (fact_names, featss, depss)
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   505
    (freqs as (_, _, dffreq)) visible_facts max_suggs goal_feats int_goal_feats =
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   506
  let
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   507
    fun nb () =
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   508
      naive_bayes freqs num_facts max_suggs visible_facts int_goal_feats
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   509
      |> map fst
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   510
    fun knn () =
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   511
      k_nearest_neighbors dffreq num_facts num_feats depss featss max_suggs visible_facts
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   512
        int_goal_feats
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   513
      |> map fst
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   514
  in
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   515
    (trace_msg ctxt (fn () => "MaSh query internal " ^ commas (map fst goal_feats) ^ " from {" ^
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   516
       elide_string 1000 (space_implode " " (Vector.foldr (op ::) [] fact_names)) ^ "}");
57532
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
   517
     (case algorithm of
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   518
       MaSh_NB => nb ()
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   519
     | MaSh_kNN => knn ()
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   520
     | MaSh_NB_kNN =>
61322
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
   521
       mesh_facts I (op =) max_suggs
57552
1072599c43f6 no need for 'mash' subdirectory after removal of Python program
blanchet
parents: 57546
diff changeset
   522
         [(0.5 (* FUDGE *), (weight_facts_steeply (nb ()), [])),
1072599c43f6 no need for 'mash' subdirectory after removal of Python program
blanchet
parents: 57546
diff changeset
   523
          (0.5 (* FUDGE *), (weight_facts_steeply (knn ()), []))])
57459
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   524
     |> map (curry Vector.sub fact_names))
22023ab4df3c mix NB and kNN
blanchet
parents: 57458
diff changeset
   525
   end
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   526
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   527
end;
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   528
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
   529
57432
blanchet
parents: 57431
diff changeset
   530
(*** Persistent, stringly-typed state ***)
blanchet
parents: 57431
diff changeset
   531
blanchet
parents: 57431
diff changeset
   532
fun meta_char c =
blanchet
parents: 57431
diff changeset
   533
  if Char.isAlphaNum c orelse c = #"_" orelse c = #"." orelse c = #"(" orelse c = #")" orelse
63696
af310e622d64 tuned MaSh's metacharacters to avoid needless decoding
blanchet
parents: 63695
diff changeset
   534
     c = #"," orelse c = #"'" then
57432
blanchet
parents: 57431
diff changeset
   535
    String.str c
blanchet
parents: 57431
diff changeset
   536
  else
blanchet
parents: 57431
diff changeset
   537
    (* fixed width, in case more digits follow *)
blanchet
parents: 57431
diff changeset
   538
    "%" ^ stringN_of_int 3 (Char.ord c)
blanchet
parents: 57431
diff changeset
   539
blanchet
parents: 57431
diff changeset
   540
fun unmeta_chars accum [] = String.implode (rev accum)
blanchet
parents: 57431
diff changeset
   541
  | unmeta_chars accum (#"%" :: d1 :: d2 :: d3 :: cs) =
blanchet
parents: 57431
diff changeset
   542
    (case Int.fromString (String.implode [d1, d2, d3]) of
blanchet
parents: 57431
diff changeset
   543
      SOME n => unmeta_chars (Char.chr n :: accum) cs
blanchet
parents: 57431
diff changeset
   544
    | NONE => "" (* error *))
blanchet
parents: 57431
diff changeset
   545
  | unmeta_chars _ (#"%" :: _) = "" (* error *)
blanchet
parents: 57431
diff changeset
   546
  | unmeta_chars accum (c :: cs) = unmeta_chars (c :: accum) cs
blanchet
parents: 57431
diff changeset
   547
blanchet
parents: 57431
diff changeset
   548
val encode_str = String.translate meta_char
63695
9ad6a63cc8bd optimization in MaSh parsing
blanchet
parents: 63692
diff changeset
   549
val encode_strs = map encode_str #> space_implode " "
57432
blanchet
parents: 57431
diff changeset
   550
63695
9ad6a63cc8bd optimization in MaSh parsing
blanchet
parents: 63692
diff changeset
   551
fun decode_str s =
9ad6a63cc8bd optimization in MaSh parsing
blanchet
parents: 63692
diff changeset
   552
  if String.isSubstring "%" s then unmeta_chars [] (String.explode s) else s;
9ad6a63cc8bd optimization in MaSh parsing
blanchet
parents: 63692
diff changeset
   553
9ad6a63cc8bd optimization in MaSh parsing
blanchet
parents: 63692
diff changeset
   554
fun decode_strs s =
9ad6a63cc8bd optimization in MaSh parsing
blanchet
parents: 63692
diff changeset
   555
  space_explode " " s |> String.isSubstring "%" s ? map decode_str;
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   556
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   557
datatype proof_kind = Isar_Proof | Automatic_Proof | Isar_Proof_wegen_Prover_Flop
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   558
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   559
fun str_of_proof_kind Isar_Proof = "i"
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   560
  | str_of_proof_kind Automatic_Proof = "a"
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   561
  | str_of_proof_kind Isar_Proof_wegen_Prover_Flop = "x"
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   562
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   563
fun proof_kind_of_str "a" = Automatic_Proof
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   564
  | proof_kind_of_str "x" = Isar_Proof_wegen_Prover_Flop
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   565
  | proof_kind_of_str _ (* "i" *) = Isar_Proof
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   566
57011
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   567
fun add_edge_to name parent =
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   568
  Graph.default_node (parent, (Isar_Proof, [], []))
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   569
  #> Graph.add_edge (parent, name)
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   570
57661
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
   571
fun add_node kind name parents feats deps (accum as (access_G, (fact_xtab, feat_xtab), learns)) =
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
   572
  let val fact_xtab' = add_to_xtab name fact_xtab in
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
   573
    ((Graph.new_node (name, (kind, feats, deps)) access_G
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
   574
      handle Graph.DUP _ => Graph.map_node name (K (kind, feats, deps)) access_G)
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
   575
     |> fold (add_edge_to name) parents,
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
   576
     (fact_xtab', fold maybe_add_to_xtab feats feat_xtab),
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
   577
     (name, feats, deps) :: learns)
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
   578
  end
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
   579
  handle Symtab.DUP _ => accum (* robustness (in case the state file violates the invariant) *)
57011
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   580
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   581
fun try_graph ctxt when def f =
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   582
  f ()
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   583
  handle
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   584
    Graph.CYCLES (cycle :: _) =>
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   585
    (trace_msg ctxt (fn () => "Cycle involving " ^ commas cycle ^ " when " ^ when); def)
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   586
  | Graph.DUP name =>
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   587
    (trace_msg ctxt (fn () => "Duplicate fact " ^ quote name ^ " when " ^ when); def)
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   588
  | Graph.UNDEF name =>
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   589
    (trace_msg ctxt (fn () => "Unknown fact " ^ quote name ^ " when " ^ when); def)
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   590
  | exn =>
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   591
    if Exn.is_interrupt exn then
62505
9e2a65912111 clarified modules;
wenzelm
parents: 61755
diff changeset
   592
      Exn.reraise exn
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   593
    else
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   594
      (trace_msg ctxt (fn () => "Internal error when " ^ when ^ ":\n" ^ Runtime.exn_message exn);
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   595
       def)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   596
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   597
fun graph_info G =
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   598
  string_of_int (length (Graph.keys G)) ^ " node(s), " ^
57006
blanchet
parents: 57005
diff changeset
   599
  string_of_int (fold (Integer.add o length o snd) (Graph.dest G) 0) ^ " edge(s), " ^
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   600
  string_of_int (length (Graph.maximals G)) ^ " maximal"
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   601
57545
2126b355f0ca made SML/NJ happier
blanchet
parents: 57532
diff changeset
   602
type ffds = string vector * int list vector * int list vector
2126b355f0ca made SML/NJ happier
blanchet
parents: 57532
diff changeset
   603
type freqs = int vector * int Inttab.table vector * int vector
2126b355f0ca made SML/NJ happier
blanchet
parents: 57532
diff changeset
   604
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
   605
type mash_state =
57358
545d02691b32 killed dead data
blanchet
parents: 57357
diff changeset
   606
  {access_G : (proof_kind * string list * string list) Graph.T,
57374
blanchet
parents: 57373
diff changeset
   607
   xtabs : xtab * xtab,
57545
2126b355f0ca made SML/NJ happier
blanchet
parents: 57532
diff changeset
   608
   ffds : ffds,
2126b355f0ca made SML/NJ happier
blanchet
parents: 57532
diff changeset
   609
   freqs : freqs,
57365
blanchet
parents: 57364
diff changeset
   610
   dirty_facts : string list option}
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   611
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   612
val empty_xtabs = (empty_xtab, empty_xtab)
57545
2126b355f0ca made SML/NJ happier
blanchet
parents: 57532
diff changeset
   613
val empty_ffds = (Vector.fromList [], Vector.fromList [], Vector.fromList []) : ffds
2126b355f0ca made SML/NJ happier
blanchet
parents: 57532
diff changeset
   614
val empty_freqs = (Vector.fromList [], Vector.fromList [], Vector.fromList []) : freqs
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   615
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   616
val empty_state =
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   617
  {access_G = Graph.empty,
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   618
   xtabs = empty_xtabs,
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   619
   ffds = empty_ffds,
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   620
   freqs = empty_freqs,
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   621
   dirty_facts = SOME []} : mash_state
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
   622
57545
2126b355f0ca made SML/NJ happier
blanchet
parents: 57532
diff changeset
   623
fun recompute_ffds_freqs_from_learns (learns : (string * string list * string list) list)
2126b355f0ca made SML/NJ happier
blanchet
parents: 57532
diff changeset
   624
    ((num_facts, fact_tab), (num_feats, feat_tab)) num_facts0 (fact_names0, featss0, depss0) freqs0 =
57379
dcaf04545de2 avoid needless (trivial) reordering on load
blanchet
parents: 57378
diff changeset
   625
  let
57380
a212bee30bba more incremental learning of single fact
blanchet
parents: 57379
diff changeset
   626
    val fact_names = Vector.concat [fact_names0, Vector.fromList (map #1 learns)]
a212bee30bba more incremental learning of single fact
blanchet
parents: 57379
diff changeset
   627
    val featss = Vector.concat [featss0,
a212bee30bba more incremental learning of single fact
blanchet
parents: 57379
diff changeset
   628
      Vector.fromList (map (map_filter (Symtab.lookup feat_tab) o #2) learns)]
a212bee30bba more incremental learning of single fact
blanchet
parents: 57379
diff changeset
   629
    val depss = Vector.concat [depss0,
a212bee30bba more incremental learning of single fact
blanchet
parents: 57379
diff changeset
   630
      Vector.fromList (map (map_filter (Symtab.lookup fact_tab) o #3) learns)]
57379
dcaf04545de2 avoid needless (trivial) reordering on load
blanchet
parents: 57378
diff changeset
   631
  in
dcaf04545de2 avoid needless (trivial) reordering on load
blanchet
parents: 57378
diff changeset
   632
    ((fact_names, featss, depss),
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
   633
     MaSh.learn_facts freqs0 num_facts0 num_facts num_feats depss featss)
57379
dcaf04545de2 avoid needless (trivial) reordering on load
blanchet
parents: 57378
diff changeset
   634
  end
dcaf04545de2 avoid needless (trivial) reordering on load
blanchet
parents: 57378
diff changeset
   635
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   636
fun reorder_learns (num_facts, fact_tab) learns =
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   637
  let val ary = Array.array (num_facts, ("", [], [])) in
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   638
    List.app (fn learn as (fact, _, _) =>
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   639
        Array.update (ary, the (Symtab.lookup fact_tab fact), learn))
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   640
      learns;
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   641
    Array.foldr (op ::) [] ary
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   642
  end
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   643
57380
a212bee30bba more incremental learning of single fact
blanchet
parents: 57379
diff changeset
   644
fun recompute_ffds_freqs_from_access_G access_G (xtabs as (fact_xtab, _)) =
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   645
  let
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   646
    val learns =
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   647
      Graph.schedule (fn _ => fn (fact, (_, feats, deps)) => (fact, feats, deps)) access_G
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   648
      |> reorder_learns fact_xtab
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   649
  in
57380
a212bee30bba more incremental learning of single fact
blanchet
parents: 57379
diff changeset
   650
    recompute_ffds_freqs_from_learns learns xtabs 0 empty_ffds empty_freqs
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   651
  end
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   652
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   653
local
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   654
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   655
val version = "*** MaSh version 20161123 ***"
50357
187ae76a1757 more robustness in the face of MaSh format changes -- don't overwrite new versions with old versions
blanchet
parents: 50356
diff changeset
   656
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
   657
exception FILE_VERSION_TOO_NEW of unit
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   658
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   659
fun extract_node line =
55286
blanchet
parents: 55202
diff changeset
   660
  (case space_explode ":" line of
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   661
    [head, tail] =>
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   662
    (case (space_explode " " head, map (unprefix " ") (space_explode ";" tail)) of
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   663
      ([kind, name], [parents, feats, deps]) =>
57358
545d02691b32 killed dead data
blanchet
parents: 57357
diff changeset
   664
      SOME (proof_kind_of_str kind, decode_str name, decode_strs parents, decode_strs feats,
57010
121b63d7bcdb take weights into consideration in knn
blanchet
parents: 57009
diff changeset
   665
        decode_strs deps)
55286
blanchet
parents: 55202
diff changeset
   666
    | _ => NONE)
blanchet
parents: 55202
diff changeset
   667
  | _ => NONE)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   668
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
   669
fun would_load_state (memory_time, _) =
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
   670
  let val path = state_file () in
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
   671
    (case try OS.FileSys.modTime (File.platform_path path) of
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
   672
      NONE => false
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
   673
    | SOME disk_time => memory_time < disk_time)
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
   674
  end;
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
   675
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
   676
fun load_state ctxt (time_state as (memory_time, _)) =
57552
1072599c43f6 no need for 'mash' subdirectory after removal of Python program
blanchet
parents: 57546
diff changeset
   677
  let val path = state_file () in
60971
6dfe08f5834e proper platform_path;
wenzelm
parents: 60948
diff changeset
   678
    (case try OS.FileSys.modTime (File.platform_path path) of
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   679
      NONE => time_state
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   680
    | SOME disk_time =>
62826
eb94e570c1a4 prefer infix operations;
wenzelm
parents: 62735
diff changeset
   681
      if memory_time >= disk_time then
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   682
        time_state
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   683
      else
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   684
        (disk_time,
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   685
         (case try File.read_lines path of
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   686
           SOME (version' :: node_lines) =>
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   687
           let
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   688
             fun extract_line_and_add_node line =
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   689
               (case extract_node line of
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   690
                 NONE => I (* should not happen *)
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   691
               | SOME (kind, name, parents, feats, deps) => add_node kind name parents feats deps)
57011
a4428f517f46 implemented learning of single proofs in SML MaSh
blanchet
parents: 57010
diff changeset
   692
57379
dcaf04545de2 avoid needless (trivial) reordering on load
blanchet
parents: 57378
diff changeset
   693
             val empty_G_etc = (Graph.empty, empty_xtabs, [])
dcaf04545de2 avoid needless (trivial) reordering on load
blanchet
parents: 57378
diff changeset
   694
dcaf04545de2 avoid needless (trivial) reordering on load
blanchet
parents: 57378
diff changeset
   695
             val (access_G, xtabs, rev_learns) =
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   696
               (case string_ord (version', version) of
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   697
                 EQUAL =>
57379
dcaf04545de2 avoid needless (trivial) reordering on load
blanchet
parents: 57378
diff changeset
   698
                 try_graph ctxt "loading state" empty_G_etc
dcaf04545de2 avoid needless (trivial) reordering on load
blanchet
parents: 57378
diff changeset
   699
                   (fn () => fold extract_line_and_add_node node_lines empty_G_etc)
57552
1072599c43f6 no need for 'mash' subdirectory after removal of Python program
blanchet
parents: 57546
diff changeset
   700
               | LESS => (remove_state_file (); empty_G_etc) (* cannot parse old file *)
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   701
               | GREATER => raise FILE_VERSION_TOO_NEW ())
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   702
57380
a212bee30bba more incremental learning of single fact
blanchet
parents: 57379
diff changeset
   703
             val (ffds, freqs) =
a212bee30bba more incremental learning of single fact
blanchet
parents: 57379
diff changeset
   704
               recompute_ffds_freqs_from_learns (rev rev_learns) xtabs 0 empty_ffds empty_freqs
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   705
           in
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   706
             trace_msg ctxt (fn () => "Loaded fact graph (" ^ graph_info access_G ^ ")");
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   707
             {access_G = access_G, xtabs = xtabs, ffds = ffds, freqs = freqs, dirty_facts = SOME []}
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   708
           end
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   709
         | _ => empty_state)))
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   710
  end
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   711
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   712
fun str_of_entry (kind, name, parents, feats, deps) =
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   713
  str_of_proof_kind kind ^ " " ^ encode_str name ^ ": " ^ encode_strs parents ^ "; " ^
57358
545d02691b32 killed dead data
blanchet
parents: 57357
diff changeset
   714
  encode_strs feats ^ "; " ^ encode_strs deps ^ "\n"
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   715
57365
blanchet
parents: 57364
diff changeset
   716
fun save_state _ (time_state as (_, {dirty_facts = SOME [], ...})) = time_state
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   717
  | save_state ctxt (memory_time, {access_G, xtabs, ffds, freqs, dirty_facts}) =
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   718
    let
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   719
      fun append_entry (name, ((kind, feats, deps), (parents, _))) =
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   720
        cons (kind, name, Graph.Keys.dest parents, feats, deps)
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   721
57552
1072599c43f6 no need for 'mash' subdirectory after removal of Python program
blanchet
parents: 57546
diff changeset
   722
      val path = state_file ()
57365
blanchet
parents: 57364
diff changeset
   723
      val dirty_facts' =
60971
6dfe08f5834e proper platform_path;
wenzelm
parents: 60948
diff changeset
   724
        (case try OS.FileSys.modTime (File.platform_path path) of
57076
3d4b172d2209 automatically reload state file when it changes on disk
blanchet
parents: 57062
diff changeset
   725
          NONE => NONE
62826
eb94e570c1a4 prefer infix operations;
wenzelm
parents: 62735
diff changeset
   726
        | SOME disk_time => if disk_time <= memory_time then dirty_facts else NONE)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   727
      val (banner, entries) =
57365
blanchet
parents: 57364
diff changeset
   728
        (case dirty_facts' of
55286
blanchet
parents: 55202
diff changeset
   729
          SOME names => (NONE, fold (append_entry o Graph.get_entry access_G) names [])
blanchet
parents: 55202
diff changeset
   730
        | NONE => (SOME (version ^ "\n"), Graph.fold append_entry access_G []))
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   731
    in
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
   732
      (case banner of SOME s => File.write path s | NONE => ();
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
   733
       entries |> chunk_list 500 |> List.app (File.append path o implode o map str_of_entry))
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
   734
      handle IO.Io _ => ();
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   735
      trace_msg ctxt (fn () =>
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   736
        "Saved fact graph (" ^ graph_info access_G ^
57365
blanchet
parents: 57364
diff changeset
   737
        (case dirty_facts of
blanchet
parents: 57364
diff changeset
   738
          SOME dirty_facts => "; " ^ string_of_int (length dirty_facts) ^ " dirty fact(s)"
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
   739
        | _ => "") ^  ")");
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   740
      (Time.now (),
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
   741
       {access_G = access_G, xtabs = xtabs, ffds = ffds, freqs = freqs, dirty_facts = SOME []})
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   742
    end
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   743
57365
blanchet
parents: 57364
diff changeset
   744
val global_state = Synchronized.var "Sledgehammer_MaSh.global_state" (Time.zeroTime, empty_state)
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   745
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   746
in
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   747
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
   748
fun map_state ctxt f =
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   749
  (trace_msg ctxt (fn () => "Changing MaSh state");
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   750
   synchronized_timed_change global_state mash_time_limit
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   751
     (load_state ctxt ##> f #> save_state ctxt))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   752
  |> ignore
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
   753
  handle FILE_VERSION_TOO_NEW () => ()
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   754
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   755
fun peek_state ctxt =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   756
  (trace_msg ctxt (fn () => "Peeking at MaSh state");
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   757
   (case synchronized_timed_value global_state mash_time_limit of
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   758
     NONE => NONE
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   759
   | SOME state => if would_load_state state then NONE else SOME state))
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
   760
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
   761
fun get_state ctxt =
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   762
  (trace_msg ctxt (fn () => "Retrieving MaSh state");
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   763
   synchronized_timed_change_result global_state mash_time_limit
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   764
     (perhaps (try (load_state ctxt)) #> `snd))
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   765
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   766
fun clear_state ctxt =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   767
  (trace_msg ctxt (fn () => "Clearing MaSh state");
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   768
   Synchronized.change global_state (fn _ => (remove_state_file (); (Time.zeroTime, empty_state))))
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   769
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   770
end
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   771
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   772
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   773
(*** Isabelle helpers ***)
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
   774
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   775
fun crude_printed_term size t =
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   776
  let
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   777
    fun term _ (res, 0) = (res, 0)
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   778
      | term (t $ u) (res, size) =
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   779
        let
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   780
          val (res, size) = term t (res ^ "(", size)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   781
          val (res, size) = term u (res ^ " ", size)
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   782
        in
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   783
          (res ^ ")", size)
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   784
        end
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   785
      | term (Abs (s, _, t)) (res, size) = term t (res ^ "%" ^ s ^ ".", size - 1)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   786
      | term (Bound n) (res, size) = (res ^ "#" ^ string_of_int n, size - 1)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   787
      | term (Const (s, _)) (res, size) = (res ^ Long_Name.base_name s, size - 1)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   788
      | term (Free (s, _)) (res, size) = (res ^ s, size - 1)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   789
      | term (Var ((s, _), _)) (res, size) = (res ^ s, size - 1)
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   790
  in
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   791
    fst (term t ("", size))
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   792
  end
48378
9e96486d53ad handle local facts smoothly in MaSh
blanchet
parents: 48377
diff changeset
   793
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   794
fun nickname_of_thm th =
48394
82fc8c956cdc fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents: 48392
diff changeset
   795
  if Thm.has_name_hint th then
82fc8c956cdc fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents: 48392
diff changeset
   796
    let val hint = Thm.get_name_hint th in
50722
b422a48adc2d speed up generation of local theorem nicknames
blanchet
parents: 50718
diff changeset
   797
      (* There must be a better way to detect local facts. *)
59888
27e4d0ab0948 tuned signature;
wenzelm
parents: 59877
diff changeset
   798
      (case Long_Name.dest_local hint of
60641
f6e8293747fd clarified context;
wenzelm
parents: 60640
diff changeset
   799
        SOME suf =>
65458
cf504b7a7aa7 tuned signature;
wenzelm
parents: 64522
diff changeset
   800
        Long_Name.implode [Thm.theory_name th, suf, crude_printed_term 25 (Thm.prop_of th)]
55286
blanchet
parents: 55202
diff changeset
   801
      | NONE => hint)
48394
82fc8c956cdc fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents: 48392
diff changeset
   802
    end
82fc8c956cdc fixed various issues with MaSh's file handling + tune output + generate local facts again + handle nameless facts gracefully
blanchet
parents: 48392
diff changeset
   803
  else
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   804
    crude_printed_term 50 (Thm.prop_of th)
48378
9e96486d53ad handle local facts smoothly in MaSh
blanchet
parents: 48377
diff changeset
   805
51134
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   806
fun find_suggested_facts ctxt facts =
48330
192444b53e86 speed up MaSh queries
blanchet
parents: 48329
diff changeset
   807
  let
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   808
    fun add (fact as (_, th)) = Symtab.default (nickname_of_thm th, fact)
51134
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   809
    val tab = fold add facts Symtab.empty
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   810
    fun lookup nick =
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   811
      Symtab.lookup tab nick
57006
blanchet
parents: 57005
diff changeset
   812
      |> tap (fn NONE => trace_msg ctxt (fn () => "Cannot find " ^ quote nick) | _ => ())
51134
d03ded5dcf65 more MaSh tracing
blanchet
parents: 51133
diff changeset
   813
  in map_filter lookup end
48311
3c4e10606567 implemented MaSh QUERY operation
blanchet
parents: 48309
diff changeset
   814
57404
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   815
fun free_feature_of s = "f" ^ s
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   816
fun thy_feature_of s = "y" ^ s
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   817
fun type_feature_of s = "t" ^ s
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   818
fun class_feature_of s = "s" ^ s
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   819
val local_feature = "local"
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   820
60948
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   821
fun crude_thm_ord ctxt =
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   822
  let
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   823
    val ancestor_lengths =
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   824
      fold (fn thy => Symtab.update (Context.theory_name thy, length (Context.ancestors_of thy)))
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   825
        (Theory.nodes_of (Proof_Context.theory_of ctxt)) Symtab.empty
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   826
    val ancestor_length = Symtab.lookup ancestor_lengths o Context.theory_id_name
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   827
60948
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   828
    fun crude_theory_ord p =
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   829
      if Context.eq_thy_id p then EQUAL
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   830
      else if Context.proper_subthy_id p then LESS
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   831
      else if Context.proper_subthy_id (swap p) then GREATER
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   832
      else
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   833
        (case apply2 ancestor_length p of
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   834
          (SOME m, SOME n) =>
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   835
            (case int_ord (m, n) of
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   836
              EQUAL => string_ord (apply2 Context.theory_id_name p)
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   837
            | ord => ord)
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   838
        | _ => string_ord (apply2 Context.theory_id_name p))
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   839
  in
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   840
    fn p =>
65458
cf504b7a7aa7 tuned signature;
wenzelm
parents: 64522
diff changeset
   841
      (case crude_theory_ord (apply2 Thm.theory_id p) of
60948
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   842
        EQUAL =>
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   843
        (* The hack below is necessary because of odd dependencies that are not reflected in the theory
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   844
           comparison. *)
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   845
        let val q = apply2 nickname_of_thm p in
60948
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   846
          (* Hack to put "xxx_def" before "xxxI" and "xxxE" *)
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   847
          (case bool_ord (apply2 (String.isSuffix "_def") (swap q)) of
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   848
            EQUAL => string_ord q
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   849
          | ord => ord)
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   850
        end
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   851
      | ord => ord)
b710a5087116 prefer theory_id operations;
wenzelm
parents: 60649
diff changeset
   852
  end;
48324
3ee5b5589402 speed up tautology/metaness check
blanchet
parents: 48323
diff changeset
   853
65458
cf504b7a7aa7 tuned signature;
wenzelm
parents: 64522
diff changeset
   854
val thm_less_eq = Context.subthy_id o apply2 Thm.theory_id
51136
fdcc06013f2d avoid crude/wrong theorem comparision
blanchet
parents: 51135
diff changeset
   855
fun thm_less p = thm_less_eq p andalso not (thm_less_eq (swap p))
fdcc06013f2d avoid crude/wrong theorem comparision
blanchet
parents: 51135
diff changeset
   856
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   857
val freezeT = Type.legacy_freeze_type
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   858
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   859
fun freeze (t $ u) = freeze t $ freeze u
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   860
  | freeze (Abs (s, T, t)) = Abs (s, freezeT T, freeze t)
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   861
  | freeze (Var ((s, _), T)) = Free (s, freezeT T)
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   862
  | freeze (Const (s, T)) = Const (s, freezeT T)
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   863
  | freeze (Free (s, T)) = Free (s, freezeT T)
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   864
  | freeze t = t
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   865
59621
291934bac95e Thm.cterm_of and Thm.ctyp_of operate on local context;
wenzelm
parents: 59582
diff changeset
   866
fun goal_of_thm thy = Thm.prop_of #> freeze #> Thm.global_cterm_of thy #> Goal.init
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   867
54141
f57f8e7a879f generate a comment storing the goal nickname in "learn_prover"
blanchet
parents: 54140
diff changeset
   868
fun run_prover_for_mash ctxt params prover goal_name facts goal =
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   869
  let
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   870
    val problem =
54141
f57f8e7a879f generate a comment storing the goal nickname in "learn_prover"
blanchet
parents: 54140
diff changeset
   871
      {comment = "Goal: " ^ goal_name, state = Proof.init ctxt, goal = goal, subgoal = 1,
62735
23de054397e5 early warning when Sledgehammer finds a proof
blanchet
parents: 62505
diff changeset
   872
       subgoal_count = 1, factss = [("", facts)], found_proof = I}
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   873
  in
57735
056a55b44ec7 eliminated Sledgehammer's "min" subcommand (and lots of complications in the code)
blanchet
parents: 57662
diff changeset
   874
    get_minimizing_prover ctxt MaSh (K ()) prover params problem
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   875
  end
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
   876
48326
ef800e91d072 removed debugging output
blanchet
parents: 48325
diff changeset
   877
val bad_types = [@{type_name prop}, @{type_name bool}, @{type_name fun}]
ef800e91d072 removed debugging output
blanchet
parents: 48325
diff changeset
   878
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   879
val crude_str_of_sort = space_implode "," o map Long_Name.base_name o subtract (op =) @{sort type}
53086
15fe0ca088b3 MaSh tweaking: shorter names + killed (broken) SNoW
blanchet
parents: 53085
diff changeset
   880
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   881
fun crude_str_of_typ (Type (s, [])) = Long_Name.base_name s
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   882
  | crude_str_of_typ (Type (s, Ts)) = Long_Name.base_name s ^ implode (map crude_str_of_typ Ts)
53083
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
   883
  | crude_str_of_typ (TFree (_, S)) = crude_str_of_sort S
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
   884
  | crude_str_of_typ (TVar (_, S)) = crude_str_of_sort S
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
   885
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   886
fun maybe_singleton_str "" = []
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   887
  | maybe_singleton_str s = [s]
53128
ea1b62ed5a54 get rid of some silly MaSh features
blanchet
parents: 53127
diff changeset
   888
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   889
val max_pat_breadth = 5 (* FUDGE *)
50585
306c7b807e13 contain exponential explosion of term patterns
blanchet
parents: 50584
diff changeset
   890
57406
e844dcf57deb killed dead code
blanchet
parents: 57405
diff changeset
   891
fun term_features_of ctxt thy_name term_max_depth type_max_depth ts =
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   892
  let
50392
190053ee24ed expand type classes into their ancestors for MaSh
blanchet
parents: 50391
diff changeset
   893
    val thy = Proof_Context.theory_of ctxt
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
   894
50393
d8aa26c78327 record free variables as a MaSh feature
blanchet
parents: 50392
diff changeset
   895
    val fixes = map snd (Variable.dest_fixes ctxt)
50392
190053ee24ed expand type classes into their ancestors for MaSh
blanchet
parents: 50391
diff changeset
   896
    val classes = Sign.classes_of thy
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
   897
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
   898
    fun add_classes @{sort type} = I
50392
190053ee24ed expand type classes into their ancestors for MaSh
blanchet
parents: 50391
diff changeset
   899
      | add_classes S =
190053ee24ed expand type classes into their ancestors for MaSh
blanchet
parents: 50391
diff changeset
   900
        fold (`(Sorts.super_classes classes)
57006
blanchet
parents: 57005
diff changeset
   901
          #> swap #> op ::
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   902
          #> subtract (op =) @{sort type}
57006
blanchet
parents: 57005
diff changeset
   903
          #> map class_feature_of
57404
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   904
          #> union (op =)) S
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
   905
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
   906
    fun pattify_type 0 _ = []
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   907
      | pattify_type depth (Type (s, [])) = if member (op =) bad_types s then [] else [s]
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
   908
      | pattify_type depth (Type (s, U :: Ts)) =
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
   909
        let
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
   910
          val T = Type (s, Ts)
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   911
          val ps = take max_pat_breadth (pattify_type depth T)
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   912
          val qs = take max_pat_breadth ("" :: pattify_type (depth - 1) U)
57006
blanchet
parents: 57005
diff changeset
   913
        in
blanchet
parents: 57005
diff changeset
   914
          map_product (fn p => fn "" => p | q => p ^ "(" ^ q ^ ")") ps qs
blanchet
parents: 57005
diff changeset
   915
        end
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   916
      | pattify_type _ (TFree (_, S)) = maybe_singleton_str (crude_str_of_sort S)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   917
      | pattify_type _ (TVar (_, S)) = maybe_singleton_str (crude_str_of_sort S)
57006
blanchet
parents: 57005
diff changeset
   918
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
   919
    fun add_type_pat depth T =
57404
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   920
      union (op =) (map type_feature_of (pattify_type depth T))
57006
blanchet
parents: 57005
diff changeset
   921
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
   922
    fun add_type_pats 0 _ = I
57404
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   923
      | add_type_pats depth t = add_type_pat depth t #> add_type_pats (depth - 1) t
57006
blanchet
parents: 57005
diff changeset
   924
53083
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
   925
    fun add_type T =
019ecbb18e3f generate patterns for variables as well in MaSh (cf. HOL(y)Hammer)
blanchet
parents: 53082
diff changeset
   926
      add_type_pats type_max_depth T
53156
f79f4693868b minor MaSh fix
blanchet
parents: 53155
diff changeset
   927
      #> fold_atyps_sorts (add_classes o snd) T
57006
blanchet
parents: 57005
diff changeset
   928
53084
877f5c28016f add subtypes as well as features in MaSh
blanchet
parents: 53083
diff changeset
   929
    fun add_subtypes (T as Type (_, Ts)) = add_type T #> fold add_subtypes Ts
877f5c28016f add subtypes as well as features in MaSh
blanchet
parents: 53083
diff changeset
   930
      | add_subtypes T = add_type T
53082
369e39511555 generate deep type patterns in MaSh
blanchet
parents: 52697
diff changeset
   931
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   932
    fun pattify_term _ 0 _ = []
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   933
      | pattify_term _ depth (Const (s, _)) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   934
        if is_widely_irrelevant_const s then [] else [s]
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
   935
      | pattify_term _ _ (Free (s, T)) =
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   936
        maybe_singleton_str (crude_str_of_typ T)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   937
        |> (if member (op =) fixes s then cons (free_feature_of (Long_Name.append thy_name s))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   938
            else I)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   939
      | pattify_term _ _ (Var (_, T)) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   940
        maybe_singleton_str (crude_str_of_typ T)
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
   941
      | pattify_term Ts _ (Bound j) =
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   942
        maybe_singleton_str (crude_str_of_typ (nth Ts j))
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
   943
      | pattify_term Ts depth (t $ u) =
50339
d8dae91f3107 MaSh improvements: deeper patterns + more respect for chained facts
blanchet
parents: 50338
diff changeset
   944
        let
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   945
          val ps = take max_pat_breadth (pattify_term Ts depth t)
57404
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   946
          val qs = take max_pat_breadth ("" :: pattify_term Ts (depth - 1) u)
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
   947
        in
57404
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   948
          map_product (fn p => fn "" => p | q => p ^ "(" ^ q ^ ")") ps qs
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
   949
        end
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
   950
      | pattify_term _ _ _ = []
57006
blanchet
parents: 57005
diff changeset
   951
57404
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   952
    fun add_term_pat Ts = union (op =) oo pattify_term Ts
57006
blanchet
parents: 57005
diff changeset
   953
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
   954
    fun add_term_pats _ 0 _ = I
57055
df3a26987a8d reverted '|' features in MaSh -- these sounded like a good idea but never really worked
blanchet
parents: 57052
diff changeset
   955
      | add_term_pats Ts depth t = add_term_pat Ts depth t #> add_term_pats Ts (depth - 1) t
57006
blanchet
parents: 57005
diff changeset
   956
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
   957
    fun add_term Ts = add_term_pats Ts term_max_depth
57006
blanchet
parents: 57005
diff changeset
   958
53085
15483854c83e handle Bounds as well in MaSh features
blanchet
parents: 53084
diff changeset
   959
    fun add_subterms Ts t =
55286
blanchet
parents: 55202
diff changeset
   960
      (case strip_comb t of
54089
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
   961
        (Const (s, T), args) =>
b13f6731f873 use same relevance filter for ATP and SMT solvers -- attempting to filter out certain ground instances of polymorphic symbols like + and 0 has unexpected side-effects that lead to incompletenesses (relevant facts not being selected)
blanchet
parents: 54085
diff changeset
   962
        (not (is_widely_irrelevant_const s) ? add_term Ts t)
57404
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
   963
        #> add_subtypes T #> fold (add_subterms Ts) args
50857
80768e28c9ee better handlig of built-ins -- at the top-level, not in subterms
blanchet
parents: 50841
diff changeset
   964
      | (head, args) =>
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   965
        (case head of
53130
6741ba8d5c6d improve weight computation for complex terms
blanchet
parents: 53129
diff changeset
   966
           Free (_, T) => add_term Ts t #> add_subtypes T
53084
877f5c28016f add subtypes as well as features in MaSh
blanchet
parents: 53083
diff changeset
   967
         | Var (_, T) => add_subtypes T
53085
15483854c83e handle Bounds as well in MaSh features
blanchet
parents: 53084
diff changeset
   968
         | Abs (_, T, body) => add_subtypes T #> add_subterms (T :: Ts) body
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   969
         | _ => I)
55286
blanchet
parents: 55202
diff changeset
   970
        #> fold (add_subterms Ts) args)
57006
blanchet
parents: 57005
diff changeset
   971
  in
blanchet
parents: 57005
diff changeset
   972
    fold (add_subterms []) ts []
blanchet
parents: 57005
diff changeset
   973
  end
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   974
53085
15483854c83e handle Bounds as well in MaSh features
blanchet
parents: 53084
diff changeset
   975
val term_max_depth = 2
53155
2c585fdbe197 eliminated some needless MaSh features
blanchet
parents: 53154
diff changeset
   976
val type_max_depth = 1
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   977
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   978
(* TODO: Generate type classes for types? *)
60640
58326c4a3ea2 tuned signature;
wenzelm
parents: 60638
diff changeset
   979
fun features_of ctxt thy_name (scope, _) ts =
58326c4a3ea2 tuned signature;
wenzelm
parents: 60638
diff changeset
   980
  thy_feature_of thy_name ::
58326c4a3ea2 tuned signature;
wenzelm
parents: 60638
diff changeset
   981
  term_features_of ctxt thy_name term_max_depth type_max_depth ts
58326c4a3ea2 tuned signature;
wenzelm
parents: 60638
diff changeset
   982
  |> scope <> Global ? cons local_feature
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   983
57006
blanchet
parents: 57005
diff changeset
   984
(* Too many dependencies is a sign that a decision procedure is at work. There is not much to learn
blanchet
parents: 57005
diff changeset
   985
   from such proofs. *)
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   986
val max_dependencies = 20 (* FUDGE *)
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
   987
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
   988
val prover_default_max_facts = 25 (* FUDGE *)
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
   989
48438
3e45c98fe127 distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents: 48436
diff changeset
   990
(* "type_definition_xxx" facts are characterized by their use of "CollectI". *)
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   991
val typedef_dep = nickname_of_thm @{thm CollectI}
57006
blanchet
parents: 57005
diff changeset
   992
(* Mysterious parts of the class machinery create lots of proofs that refer exclusively to
blanchet
parents: 57005
diff changeset
   993
   "someI_ex" (and to some internal constructions). *)
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   994
val class_some_dep = nickname_of_thm @{thm someI_ex}
48438
3e45c98fe127 distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents: 48436
diff changeset
   995
50828
91e6836bb68b don't learn from the proof of "psimps" etc.
blanchet
parents: 50827
diff changeset
   996
val fundef_ths =
57006
blanchet
parents: 57005
diff changeset
   997
  @{thms fundef_ex1_existence fundef_ex1_uniqueness fundef_ex1_iff fundef_default_value}
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
   998
  |> map nickname_of_thm
50828
91e6836bb68b don't learn from the proof of "psimps" etc.
blanchet
parents: 50827
diff changeset
   999
48438
3e45c98fe127 distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents: 48436
diff changeset
  1000
(* "Rep_xxx_inject", "Abs_xxx_inverse", etc., are derived using these facts. *)
3e45c98fe127 distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents: 48436
diff changeset
  1001
val typedef_ths =
57006
blanchet
parents: 57005
diff changeset
  1002
  @{thms type_definition.Abs_inverse type_definition.Rep_inverse type_definition.Rep
blanchet
parents: 57005
diff changeset
  1003
      type_definition.Rep_inject type_definition.Abs_inject type_definition.Rep_cases
blanchet
parents: 57005
diff changeset
  1004
      type_definition.Abs_cases type_definition.Rep_induct type_definition.Abs_induct
blanchet
parents: 57005
diff changeset
  1005
      type_definition.Rep_range type_definition.Abs_image}
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1006
  |> map nickname_of_thm
48438
3e45c98fe127 distinguish between recursive and nonrecursive definitions + clean up typedef dependencies in MaSh
blanchet
parents: 48436
diff changeset
  1007
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1008
fun is_size_def [dep] th =
55642
63beb38e9258 adapted to renaming of datatype 'cases' and 'recs' to 'case' and 'rec'
blanchet
parents: 55286
diff changeset
  1009
    (case first_field ".rec" dep of
57006
blanchet
parents: 57005
diff changeset
  1010
      SOME (pref, _) =>
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1011
      (case first_field ".size" (nickname_of_thm th) of
57006
blanchet
parents: 57005
diff changeset
  1012
        SOME (pref', _) => pref = pref'
blanchet
parents: 57005
diff changeset
  1013
      | NONE => false)
blanchet
parents: 57005
diff changeset
  1014
    | NONE => false)
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1015
  | is_size_def _ _ = false
48441
2d2f009ca8eb remove MaSh junk associated with size functions
blanchet
parents: 48440
diff changeset
  1016
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1017
fun trim_dependencies deps =
50755
4c781d65c0d6 get rid of spurious "Isar" proofs
blanchet
parents: 50754
diff changeset
  1018
  if length deps > max_dependencies then NONE else SOME deps
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1019
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1020
fun isar_dependencies_of name_tabs th =
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1021
  thms_in_proof max_dependencies (SOME name_tabs) th
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1022
  |> Option.map (fn deps =>
57006
blanchet
parents: 57005
diff changeset
  1023
    if deps = [typedef_dep] orelse deps = [class_some_dep] orelse
57757
blanchet
parents: 57735
diff changeset
  1024
        exists (member (op =) fundef_ths) deps orelse exists (member (op =) typedef_ths) deps orelse
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1025
        is_size_def deps th then
50755
4c781d65c0d6 get rid of spurious "Isar" proofs
blanchet
parents: 50754
diff changeset
  1026
      []
4c781d65c0d6 get rid of spurious "Isar" proofs
blanchet
parents: 50754
diff changeset
  1027
    else
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1028
      deps)
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1029
57006
blanchet
parents: 57005
diff changeset
  1030
fun prover_dependencies_of ctxt (params as {verbose, max_facts, ...}) prover auto_level facts
blanchet
parents: 57005
diff changeset
  1031
    name_tabs th =
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1032
  (case isar_dependencies_of name_tabs th of
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1033
    SOME [] => (false, [])
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1034
  | isar_deps0 =>
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1035
    let
57306
ff10067b2248 optimized traversal of proof terms by skipping bad apples (e.g. full_exhaustive_int'.pinduct)
blanchet
parents: 57305
diff changeset
  1036
      val isar_deps = these isar_deps0
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1037
      val thy = Proof_Context.theory_of ctxt
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1038
      val goal = goal_of_thm thy th
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1039
      val name = nickname_of_thm th
52196
2281f33e8da6 redid rac7830871177 to avoid duplicate fixed variable (e.g. lemma "P (a::nat)" proof - have "!!a::int. Q a" sledgehammer [e])
blanchet
parents: 52125
diff changeset
  1040
      val (_, hyp_ts, concl_t) = ATP_Util.strip_subgoal goal 1 ctxt
51136
fdcc06013f2d avoid crude/wrong theorem comparision
blanchet
parents: 51135
diff changeset
  1041
      val facts = facts |> filter (fn (_, th') => thm_less (th', th))
57006
blanchet
parents: 57005
diff changeset
  1042
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1043
      fun nickify ((_, stature), th) = ((nickname_of_thm th, stature), th)
57006
blanchet
parents: 57005
diff changeset
  1044
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1045
      fun is_dep dep (_, th) = (nickname_of_thm th = dep)
57006
blanchet
parents: 57005
diff changeset
  1046
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1047
      fun add_isar_dep facts dep accum =
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1048
        if exists (is_dep dep) accum then
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1049
          accum
55286
blanchet
parents: 55202
diff changeset
  1050
        else
blanchet
parents: 55202
diff changeset
  1051
          (case find_first (is_dep dep) facts of
blanchet
parents: 55202
diff changeset
  1052
            SOME ((_, status), th) => accum @ [(("", status), th)]
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1053
          | NONE => accum (* should not happen *))
57006
blanchet
parents: 57005
diff changeset
  1054
54123
271a8377656f remove overloading of "max_facts" -- it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents: 54115
diff changeset
  1055
      val mepo_facts =
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
  1056
        facts
54095
d80743f28fec simplify fudge factor code
blanchet
parents: 54091
diff changeset
  1057
        |> mepo_suggested_facts ctxt params (max_facts |> the_default prover_default_max_facts) NONE
d80743f28fec simplify fudge factor code
blanchet
parents: 54091
diff changeset
  1058
             hyp_ts concl_t
54123
271a8377656f remove overloading of "max_facts" -- it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents: 54115
diff changeset
  1059
      val facts =
271a8377656f remove overloading of "max_facts" -- it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents: 54115
diff changeset
  1060
        mepo_facts
50754
74a6adcb96ac also generate queries for goals with too many Isar dependencies
blanchet
parents: 50751
diff changeset
  1061
        |> fold (add_isar_dep facts) isar_deps
50624
4d0997abce79 improved thm order hack, in case the default names are overridden
blanchet
parents: 50623
diff changeset
  1062
        |> map nickify
54123
271a8377656f remove overloading of "max_facts" -- it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents: 54115
diff changeset
  1063
      val num_isar_deps = length isar_deps
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1064
    in
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1065
      if verbose andalso auto_level = 0 then
58843
521cea5fa777 discontinued obsolete Output.urgent_message;
wenzelm
parents: 57757
diff changeset
  1066
        writeln ("MaSh: " ^ quote prover ^ " on " ^ quote name ^ " with " ^
57017
blanchet
parents: 57014
diff changeset
  1067
          string_of_int num_isar_deps ^ " + " ^ string_of_int (length facts - num_isar_deps) ^
63692
1bc4bc2c9fd1 killed final stops in Sledgehammer and friends
blanchet
parents: 63518
diff changeset
  1068
          " facts")
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1069
      else
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1070
        ();
55286
blanchet
parents: 55202
diff changeset
  1071
      (case run_prover_for_mash ctxt params prover name facts goal of
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1072
        {outcome = NONE, used_facts, ...} =>
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1073
        (if verbose andalso auto_level = 0 then
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1074
           let val num_facts = length used_facts in
58843
521cea5fa777 discontinued obsolete Output.urgent_message;
wenzelm
parents: 57757
diff changeset
  1075
             writeln ("Found proof with " ^ string_of_int num_facts ^ " fact" ^
63692
1bc4bc2c9fd1 killed final stops in Sledgehammer and friends
blanchet
parents: 63518
diff changeset
  1076
               plural_s num_facts)
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1077
           end
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1078
         else
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1079
           ();
50754
74a6adcb96ac also generate queries for goals with too many Isar dependencies
blanchet
parents: 50751
diff changeset
  1080
         (true, map fst used_facts))
55286
blanchet
parents: 55202
diff changeset
  1081
      | _ => (false, isar_deps))
blanchet
parents: 55202
diff changeset
  1082
    end)
48251
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1083
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1084
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1085
(*** High-level communication with MaSh ***)
6cdcfbddc077 moved most of MaSh exporter code to Sledgehammer
blanchet
parents: 48249
diff changeset
  1086
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1087
(* In the following functions, chunks are risers w.r.t. "thm_less_eq". *)
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1088
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1089
fun chunks_and_parents_for chunks th =
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1090
  let
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1091
    fun insert_parent new parents =
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1092
      let val parents = parents |> filter_out (fn p => thm_less_eq (p, new)) in
57096
e4074b91b2a6 always remove duplicates in meshing + use weights for Naive Bayes
blanchet
parents: 57095
diff changeset
  1093
        parents |> forall (fn p => not (thm_less_eq (new, p))) parents ? cons new
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1094
      end
57006
blanchet
parents: 57005
diff changeset
  1095
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1096
    fun rechunk seen (rest as th' :: ths) =
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1097
      if thm_less_eq (th', th) then (rev seen, rest)
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1098
      else rechunk (th' :: seen) ths
57006
blanchet
parents: 57005
diff changeset
  1099
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1100
    fun do_chunk [] accum = accum
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1101
      | do_chunk (chunk as hd_chunk :: _) (chunks, parents) =
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1102
        if thm_less_eq (hd_chunk, th) then
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1103
          (chunk :: chunks, insert_parent hd_chunk parents)
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1104
        else if thm_less_eq (List.last chunk, th) then
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1105
          let val (front, back as hd_back :: _) = rechunk [] chunk in
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1106
            (front :: back :: chunks, insert_parent hd_back parents)
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1107
          end
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1108
        else
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1109
          (chunk :: chunks, parents)
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1110
  in
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1111
    fold_rev do_chunk chunks ([], [])
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1112
    |>> cons []
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1113
    ||> map nickname_of_thm
51181
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1114
  end
d0fa18638478 implement (more) accurate computation of parents
blanchet
parents: 51180
diff changeset
  1115
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1116
fun attach_parents_to_facts _ [] = []
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1117
  | attach_parents_to_facts old_facts (facts as (_, th) :: _) =
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1118
    let
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1119
      fun do_facts _ [] = []
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1120
        | do_facts (_, parents) [fact] = [(parents, fact)]
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1121
        | do_facts (chunks, parents)
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1122
                   ((fact as (_, th)) :: (facts as (_, th') :: _)) =
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1123
          let
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1124
            val chunks = app_hd (cons th) chunks
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1125
            val chunks_and_parents' =
60638
16d80e5ef2dc tuned signature;
wenzelm
parents: 59888
diff changeset
  1126
              if thm_less_eq (th, th') andalso
65458
cf504b7a7aa7 tuned signature;
wenzelm
parents: 64522
diff changeset
  1127
                Thm.theory_name th = Thm.theory_name th'
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1128
              then (chunks, [nickname_of_thm th])
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1129
              else chunks_and_parents_for chunks th'
57006
blanchet
parents: 57005
diff changeset
  1130
          in
blanchet
parents: 57005
diff changeset
  1131
            (parents, fact) :: do_facts chunks_and_parents' facts
blanchet
parents: 57005
diff changeset
  1132
          end
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1133
    in
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1134
      old_facts @ facts
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1135
      |> do_facts (chunks_and_parents_for [[]] th)
51182
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1136
      |> drop (length old_facts)
962190eab40d provide two modes for MaSh driver: linearized or real visibility
blanchet
parents: 51181
diff changeset
  1137
    end
51177
e8c9755fd14e tuned code: factored out parent computation
blanchet
parents: 51176
diff changeset
  1138
61318
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1139
fun maximal_wrt_graph _ [] = []
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1140
  | maximal_wrt_graph G keys =
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1141
    if can (Graph.get_node G o the_single) keys then
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1142
      keys
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1143
    else
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1144
      let
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1145
        val tab = Symtab.empty |> fold (fn name => Symtab.default (name, ())) keys
57006
blanchet
parents: 57005
diff changeset
  1146
61318
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1147
        fun insert_new seen name = not (Symtab.defined seen name) ? insert (op =) name
57006
blanchet
parents: 57005
diff changeset
  1148
61318
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1149
        fun num_keys keys = Graph.Keys.fold (K (Integer.add 1)) keys 0
57006
blanchet
parents: 57005
diff changeset
  1150
61318
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1151
        fun find_maxes _ (maxs, []) = map snd maxs
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1152
          | find_maxes seen (maxs, new :: news) =
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1153
            find_maxes (seen |> num_keys (Graph.imm_succs G new) > 1 ? Symtab.default (new, ()))
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1154
              (if Symtab.defined tab new then
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1155
                 let
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1156
                   val newp = Graph.all_preds G [new]
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1157
                   fun is_ancestor x yp = member (op =) yp x
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1158
                   val maxs = maxs |> filter (fn (_, max) => not (is_ancestor max newp))
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1159
                 in
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1160
                   if exists (is_ancestor new o fst) maxs then (maxs, news)
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1161
                   else ((newp, new) :: filter_out (fn (_, max) => is_ancestor max newp) maxs, news)
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1162
                 end
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1163
               else
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1164
                 (maxs, Graph.Keys.fold (insert_new seen) (Graph.imm_preds G new) news))
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1165
      in
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1166
        find_maxes Symtab.empty ([],
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1167
          G |> Graph.restrict (not o String.isPrefix anonymous_proof_prefix) |> Graph.maximals)
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1168
      end
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1169
63698
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1170
val max_facts_for_shuffle_cleanup = 20
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1171
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1172
fun maximal_wrt_access_graph _ [] = []
63698
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1173
  | maximal_wrt_access_graph access_G (fact :: facts) =
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1174
    let
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1175
      fun cleanup_wrt (_, th) =
65458
cf504b7a7aa7 tuned signature;
wenzelm
parents: 64522
diff changeset
  1176
        let val thy_id = Thm.theory_id th in
63698
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1177
          filter_out (fn (_, th') =>
65458
cf504b7a7aa7 tuned signature;
wenzelm
parents: 64522
diff changeset
  1178
            Context.proper_subthy_id (Thm.theory_id th', thy_id))
63698
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1179
        end
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1180
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1181
      fun shuffle_cleanup accum [] = accum
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1182
        | shuffle_cleanup accum (fact :: facts) =
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1183
          let
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1184
            val accum' = accum |> cleanup_wrt fact
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1185
            val facts' = facts |> cleanup_wrt fact
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1186
          in
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1187
            shuffle_cleanup accum' facts'
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1188
          end
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1189
    in
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1190
      fact :: cleanup_wrt fact facts
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1191
      |> (fn facts => facts
4de35d16e533 optimized parent computation in MaSh
blanchet
parents: 63697
diff changeset
  1192
        |> length facts <= max_facts_for_shuffle_cleanup ? shuffle_cleanup [])
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1193
      |> map (nickname_of_thm o snd)
57460
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
  1194
      |> maximal_wrt_graph access_G
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
  1195
    end
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1196
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1197
fun is_fact_in_graph access_G = can (Graph.get_node access_G) o nickname_of_thm
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1198
53197
6c5e7143e1f6 tuned fudge factor in light of evaluation
blanchet
parents: 53159
diff changeset
  1199
val chained_feature_factor = 0.5 (* FUDGE *)
57405
1f49da059947 reintroduced 'extra features' + only print message in verbose mode
blanchet
parents: 57404
diff changeset
  1200
val extra_feature_factor = 0.1 (* FUDGE *)
1f49da059947 reintroduced 'extra features' + only print message in verbose mode
blanchet
parents: 57404
diff changeset
  1201
val num_extra_feature_facts = 10 (* FUDGE *)
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1202
57660
86b853448f08 fixed sorting (broken since 9cc802a8ab06)
blanchet
parents: 57658
diff changeset
  1203
val max_proximity_facts = 100 (* FUDGE *)
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1204
54060
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1205
fun find_mash_suggestions ctxt max_facts suggs facts chained raw_unknown =
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1206
  let
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1207
    val inter_fact = inter (eq_snd Thm.eq_thm_prop)
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1208
    val raw_mash = find_suggested_facts ctxt facts suggs
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1209
    val proximate = take max_proximity_facts facts
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1210
    val unknown_chained = inter_fact raw_unknown chained
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1211
    val unknown_proximate = inter_fact raw_unknown proximate
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1212
    val mess =
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1213
      [(0.9 (* FUDGE *), (map (rpair 1.0) unknown_chained, [])),
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1214
       (0.4 (* FUDGE *), (weight_facts_smoothly unknown_proximate, [])),
5f96d29fb7c2 removed pointless special case
blanchet
parents: 54012
diff changeset
  1215
       (0.1 (* FUDGE *), (weight_facts_steeply raw_mash, raw_unknown))]
57007
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1216
    val unknown = raw_unknown
d3eed0518882 started work on MaSh/SML
blanchet
parents: 57006
diff changeset
  1217
      |> fold (subtract (eq_snd Thm.eq_thm_prop)) [unknown_chained, unknown_proximate]
57006
blanchet
parents: 57005
diff changeset
  1218
  in
61322
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
  1219
    (mesh_facts (fact_distinct (op aconv)) (eq_snd (gen_eq_thm ctxt)) max_facts mess, unknown)
57006
blanchet
parents: 57005
diff changeset
  1220
  end
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1221
60649
e007aa6a8aa2 more explicit use of context and elimination of Thm.theory_of_thm, although unclear (and untested?) situations remain;
wenzelm
parents: 60641
diff changeset
  1222
fun mash_suggested_facts ctxt thy_name ({debug, ...} : params) max_suggs hyp_ts concl_t facts =
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1223
  let
57532
c7dc1f0a2b8a tuned terminology
blanchet
parents: 57531
diff changeset
  1224
    val algorithm = the_mash_algorithm ()
57052
ea5912e3b008 until naive Bayes supports weights, don't incorporate 'extra' low-weight features
blanchet
parents: 57039
diff changeset
  1225
57460
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
  1226
    val facts = facts
60641
f6e8293747fd clarified context;
wenzelm
parents: 60640
diff changeset
  1227
      |> rev_sort_list_prefix (crude_thm_ord ctxt o apply2 snd)
57460
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
  1228
        (Int.max (num_extra_feature_facts, max_proximity_facts))
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
  1229
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
  1230
    val chained = filter (fn ((_, (scope, _)), _) => scope = Chained) facts
54085
b6b41e1d5689 optimized built-in const check
blanchet
parents: 54064
diff changeset
  1231
65458
cf504b7a7aa7 tuned signature;
wenzelm
parents: 64522
diff changeset
  1232
    fun fact_has_right_theory (_, th) = thy_name = Thm.theory_name th
56995
61855ade6c7e hide more consts to beautify documentation
blanchet
parents: 56303
diff changeset
  1233
53141
d27e99a6a679 take chained and proximate facts into consideration when computing MaSh features
blanchet
parents: 53140
diff changeset
  1234
    fun chained_or_extra_features_of factor (((_, stature), th), weight) =
59582
0fbed69ff081 tuned signature -- prefer qualified names;
wenzelm
parents: 59486
diff changeset
  1235
      [Thm.prop_of th]
65458
cf504b7a7aa7 tuned signature;
wenzelm
parents: 64522
diff changeset
  1236
      |> features_of ctxt (Thm.theory_name th) stature
57404
a68ae60c1504 got rid of hard-coded weights, now that we have TFIDF
blanchet
parents: 57403
diff changeset
  1237
      |> map (rpair (weight * factor))
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1238
  in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1239
    (case get_state ctxt of
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1240
      NONE => ([], [])
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1241
    | SOME {access_G, xtabs = ((num_facts, fact_tab), (num_feats, feat_tab)), ffds, freqs, ...} =>
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1242
      let
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1243
        val goal_feats0 =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1244
          features_of ctxt thy_name (Local, General) (concl_t :: hyp_ts)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1245
        val chained_feats = chained
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1246
          |> map (rpair 1.0)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1247
          |> map (chained_or_extra_features_of chained_feature_factor)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1248
          |> rpair [] |-> fold (union (eq_fst (op =)))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1249
        val extra_feats = facts
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1250
          |> take (Int.max (0, num_extra_feature_facts - length chained))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1251
          |> filter fact_has_right_theory
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1252
          |> weight_facts_steeply
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1253
          |> map (chained_or_extra_features_of extra_feature_factor)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1254
          |> rpair [] |-> fold (union (eq_fst (op =)))
57017
blanchet
parents: 57014
diff changeset
  1255
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1256
        val goal_feats =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1257
          fold (union (eq_fst (op =))) [chained_feats, extra_feats] (map (rpair 1.0) goal_feats0)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1258
          |> debug ? sort (Real.compare o swap o apply2 snd)
57460
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
  1259
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1260
        val parents = maximal_wrt_access_graph access_G facts
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1261
        val visible_facts = map_filter (Symtab.lookup fact_tab) (Graph.all_preds access_G parents)
57460
9cc802a8ab06 speed up MaSh a bit
blanchet
parents: 57459
diff changeset
  1262
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1263
        val suggs =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1264
          if algorithm = MaSh_NB_Ext orelse algorithm = MaSh_kNN_Ext then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1265
            let
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1266
              val learns =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1267
                Graph.schedule (fn _ => fn (fact, (_, feats, deps)) => (fact, feats, deps))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1268
                  access_G
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1269
            in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1270
              MaSh.query_external ctxt algorithm max_suggs learns goal_feats
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1271
            end
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1272
          else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1273
            let
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1274
              val int_goal_feats =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1275
                map_filter (fn (s, w) => Option.map (rpair w) (Symtab.lookup feat_tab s)) goal_feats
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1276
            in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1277
              MaSh.query_internal ctxt algorithm num_facts num_feats ffds freqs visible_facts
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1278
                max_suggs goal_feats int_goal_feats
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1279
            end
57017
blanchet
parents: 57014
diff changeset
  1280
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1281
        val unknown = filter_out (is_fact_in_graph access_G o snd) facts
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1282
      in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1283
        find_mash_suggestions ctxt max_suggs suggs facts chained unknown
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1284
        |> apply2 (map fact_of_raw_fact)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1285
      end)
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1286
  end
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1287
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1288
fun mash_unlearn ctxt = (clear_state ctxt; writeln "Reset MaSh")
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1289
57661
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
  1290
fun learn_wrt_access_graph ctxt (name, parents, feats, deps)
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
  1291
    (accum as (access_G, (fact_xtab, feat_xtab))) =
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1292
  let
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1293
    fun maybe_learn_from from (accum as (parents, access_G)) =
57013
ed95456499e6 better way to take invisible facts into account than 'island' business
blanchet
parents: 57012
diff changeset
  1294
      try_graph ctxt "updating graph" accum (fn () =>
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1295
        (from :: parents, Graph.add_edge_acyclic (from, name) access_G))
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1296
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1297
    val access_G = access_G |> Graph.default_node (name, (Isar_Proof, feats, deps))
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1298
    val (parents, access_G) = ([], access_G) |> fold maybe_learn_from parents
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1299
    val (deps, _) = ([], access_G) |> fold maybe_learn_from deps
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1300
57661
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
  1301
    val fact_xtab = add_to_xtab name fact_xtab
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1302
    val feat_xtab = fold maybe_add_to_xtab feats feat_xtab
57006
blanchet
parents: 57005
diff changeset
  1303
  in
57661
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
  1304
    (SOME (name, parents, feats, deps), (access_G, (fact_xtab, feat_xtab)))
57006
blanchet
parents: 57005
diff changeset
  1305
  end
57661
1586d0479f2c eliminated source of 'DUP's in MaSh
blanchet
parents: 57660
diff changeset
  1306
  handle Symtab.DUP _ => (NONE, accum) (* facts sometimes have the same name, confusingly *)
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1307
57381
blanchet
parents: 57380
diff changeset
  1308
fun relearn_wrt_access_graph ctxt (name, deps) access_G =
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1309
  let
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1310
    fun maybe_relearn_from from (accum as (parents, access_G)) =
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1311
      try_graph ctxt "updating graph" accum (fn () =>
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1312
        (from :: parents, Graph.add_edge_acyclic (from, name) access_G))
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1313
    val access_G =
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1314
      access_G |> Graph.map_node name (fn (_, feats, _) => (Automatic_Proof, feats, deps))
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1315
    val (deps, _) = ([], access_G) |> fold maybe_relearn_from deps
57006
blanchet
parents: 57005
diff changeset
  1316
  in
57381
blanchet
parents: 57380
diff changeset
  1317
    ((name, deps), access_G)
57006
blanchet
parents: 57005
diff changeset
  1318
  end
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1319
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1320
fun flop_wrt_access_graph name =
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1321
  Graph.map_node name (fn (_, feats, deps) => (Isar_Proof_wegen_Prover_Flop, feats, deps))
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1322
57273
01b68f625550 automatically learn MaSh facts also in 'blocking' mode
blanchet
parents: 57150
diff changeset
  1323
val learn_timeout_slack = 20.0
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1324
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1325
fun launch_thread timeout task =
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1326
  let
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1327
    val hard_timeout = time_mult learn_timeout_slack timeout
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1328
    val birth_time = Time.now ()
62826
eb94e570c1a4 prefer infix operations;
wenzelm
parents: 62735
diff changeset
  1329
    val death_time = birth_time + hard_timeout
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1330
    val desc = ("Machine learner for Sledgehammer", "")
57006
blanchet
parents: 57005
diff changeset
  1331
  in
59471
ca459352d8c5 more explicit indication of Async_Manager_Legacy as Proof General legacy;
wenzelm
parents: 59172
diff changeset
  1332
    Async_Manager_Legacy.thread MaShN birth_time death_time desc task
57006
blanchet
parents: 57005
diff changeset
  1333
  end
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1334
61318
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1335
fun anonymous_proof_name () =
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1336
  Date.fmt (anonymous_proof_prefix ^ "%Y%m%d.%H%M%S.") (Date.fromTimeLocal (Time.now ())) ^
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1337
  serial_string ()
57013
ed95456499e6 better way to take invisible facts into account than 'island' business
blanchet
parents: 57012
diff changeset
  1338
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1339
fun mash_learn_proof ctxt ({timeout, ...} : params) t facts used_ths =
57387
2b6fe2a48352 reintroduced MaSh hints, this time as persistent creatures
blanchet
parents: 57386
diff changeset
  1340
  if not (null used_ths) andalso is_mash_enabled () then
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1341
    launch_thread timeout (fn () =>
57006
blanchet
parents: 57005
diff changeset
  1342
      let
blanchet
parents: 57005
diff changeset
  1343
        val thy = Proof_Context.theory_of ctxt
60640
58326c4a3ea2 tuned signature;
wenzelm
parents: 60638
diff changeset
  1344
        val feats = features_of ctxt (Context.theory_name thy) (Local, General) [t]
60641
f6e8293747fd clarified context;
wenzelm
parents: 60640
diff changeset
  1345
        val facts = rev_sort_list_prefix (crude_thm_ord ctxt o apply2 snd) 1 facts
57006
blanchet
parents: 57005
diff changeset
  1346
      in
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1347
        map_state ctxt
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1348
          (fn {access_G, xtabs as ((num_facts0, _), _), ffds, freqs, dirty_facts} =>
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1349
             let
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1350
               val parents = maximal_wrt_access_graph access_G facts
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1351
               val deps = used_ths
61321
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1352
                 |> filter (is_fact_in_graph access_G)
c982a4cc8dc4 sped up MaSh nickname generation
blanchet
parents: 61318
diff changeset
  1353
                 |> map nickname_of_thm
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1354
61318
6a5a188ab3e7 speed up MaSh
blanchet
parents: 61312
diff changeset
  1355
               val name = anonymous_proof_name ()
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1356
               val (access_G', xtabs', rev_learns) =
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1357
                 add_node Automatic_Proof name parents feats deps (access_G, xtabs, [])
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1358
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1359
               val (ffds', freqs') =
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1360
                 recompute_ffds_freqs_from_learns (rev rev_learns) xtabs' num_facts0 ffds freqs
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1361
             in
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1362
               {access_G = access_G', xtabs = xtabs', ffds = ffds', freqs = freqs',
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1363
                dirty_facts = Option.map (cons name) dirty_facts}
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1364
             end);
57006
blanchet
parents: 57005
diff changeset
  1365
        (true, "")
blanchet
parents: 57005
diff changeset
  1366
      end)
53819
e55d641d0a70 honor MaSh's zero-overhead policy -- no learning if the tool is disabled
blanchet
parents: 53790
diff changeset
  1367
  else
e55d641d0a70 honor MaSh's zero-overhead policy -- no learning if the tool is disabled
blanchet
parents: 53790
diff changeset
  1368
    ()
53095
667717a5ad80 learn MaSh facts on the fly
blanchet
parents: 53094
diff changeset
  1369
63518
ae8fd6fe63a1 tuned signature;
wenzelm
parents: 62826
diff changeset
  1370
fun sendback sub = Active.sendback_markup_command (sledgehammerN ^ " " ^ sub)
48392
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1371
ca998fa08cd9 added "learn_from_atp" command to MaSh, for patient users
blanchet
parents: 48390
diff changeset
  1372
val commit_timeout = seconds 30.0
48332
271a4a6af734 optimize parent computation in MaSh + remove temporary files
blanchet
parents: 48330
diff changeset
  1373
50485
3c6ac2da2f45 merge aliased theorems in MaSh dependencies, modulo symmetry of equality
blanchet
parents: 50484
diff changeset
  1374
(* The timeout is understood in a very relaxed fashion. *)
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1375
fun mash_learn_facts ctxt (params as {debug, verbose, ...}) prover auto_level run_prover
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1376
    learn_timeout facts =
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
  1377
  let
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1378
    val timer = Timer.startRealTimer ()
62826
eb94e570c1a4 prefer infix operations;
wenzelm
parents: 62735
diff changeset
  1379
    fun next_commit_time () = Timer.checkRealTimer timer + commit_timeout
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
  1380
  in
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1381
    (case get_state ctxt of
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1382
      NONE => "MaSh is busy\nPlease try again later"
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1383
    | SOME {access_G, ...} =>
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
  1384
      let
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1385
        val is_in_access_G = is_fact_in_graph access_G o snd
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1386
        val no_new_facts = forall is_in_access_G facts
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1387
      in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1388
        if no_new_facts andalso not run_prover then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1389
          if auto_level < 2 then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1390
            "No new " ^ (if run_prover then "automatic" else "Isar") ^ " proofs to learn" ^
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1391
            (if auto_level = 0 andalso not run_prover then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1392
               "\n\nHint: Try " ^ sendback learn_proverN ^ " to learn from an automatic prover"
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1393
             else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1394
               "")
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1395
          else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1396
            ""
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1397
        else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1398
          let
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1399
            val name_tabs = build_name_tables nickname_of_thm facts
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1400
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1401
            fun deps_of status th =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1402
              if status = Non_Rec_Def orelse status = Rec_Def then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1403
                SOME []
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1404
              else if run_prover then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1405
                prover_dependencies_of ctxt params prover auto_level facts name_tabs th
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1406
                |> (fn (false, _) => NONE | (true, deps) => trim_dependencies deps)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1407
              else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1408
                isar_dependencies_of name_tabs th
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1409
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1410
            fun do_commit [] [] [] state = state
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1411
              | do_commit learns relearns flops
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1412
                  {access_G, xtabs as ((num_facts0, _), _), ffds, freqs, dirty_facts} =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1413
                let
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1414
                  val was_empty = Graph.is_empty access_G
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
  1415
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1416
                  val (learns, (access_G', xtabs')) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1417
                    fold_map (learn_wrt_access_graph ctxt) learns (access_G, xtabs)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1418
                    |>> map_filter I
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1419
                  val (relearns, access_G'') =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1420
                    fold_map (relearn_wrt_access_graph ctxt) relearns access_G'
57371
0b2bce982afd store string-to-index tables in memory
blanchet
parents: 57370
diff changeset
  1421
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1422
                  val access_G''' = access_G'' |> fold flop_wrt_access_graph flops
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1423
                  val dirty_facts' =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1424
                    (case (was_empty, dirty_facts) of
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1425
                      (false, SOME names) => SOME (map #1 learns @ map #1 relearns @ names)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1426
                    | _ => NONE)
57378
fe96689f393b recompute learning data at learning time, not query time
blanchet
parents: 57377
diff changeset
  1427
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1428
                  val (ffds', freqs') =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1429
                    if null relearns then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1430
                      recompute_ffds_freqs_from_learns
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1431
                        (map (fn (name, _, feats, deps) => (name, feats, deps)) learns) xtabs'
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1432
                        num_facts0 ffds freqs
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1433
                    else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1434
                      recompute_ffds_freqs_from_access_G access_G''' xtabs'
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1435
                in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1436
                  {access_G = access_G''', xtabs = xtabs', ffds = ffds', freqs = freqs',
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1437
                   dirty_facts = dirty_facts'}
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1438
                end
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1439
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1440
            fun commit last learns relearns flops =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1441
              (if debug andalso auto_level = 0 then writeln "Committing..." else ();
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1442
               map_state ctxt (do_commit (rev learns) relearns flops);
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1443
               if not last andalso auto_level = 0 then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1444
                 let val num_proofs = length learns + length relearns in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1445
                   writeln ("Learned " ^ string_of_int num_proofs ^ " " ^
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1446
                     (if run_prover then "automatic" else "Isar") ^ " proof" ^
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1447
                     plural_s num_proofs ^ " in the last " ^ string_of_time commit_timeout)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1448
                 end
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1449
               else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1450
                 ())
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1451
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1452
            fun learn_new_fact _ (accum as (_, (_, _, true))) = accum
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1453
              | learn_new_fact (parents, ((_, stature as (_, status)), th))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1454
                  (learns, (num_nontrivial, next_commit, _)) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1455
                let
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1456
                  val name = nickname_of_thm th
65458
cf504b7a7aa7 tuned signature;
wenzelm
parents: 64522
diff changeset
  1457
                  val feats = features_of ctxt (Thm.theory_name th) stature [Thm.prop_of th]
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1458
                  val deps = these (deps_of status th)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1459
                  val num_nontrivial = num_nontrivial |> not (null deps) ? Integer.add 1
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1460
                  val learns = (name, parents, feats, deps) :: learns
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1461
                  val (learns, next_commit) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1462
                    if Timer.checkRealTimer timer > next_commit then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1463
                      (commit false learns [] []; ([], next_commit_time ()))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1464
                    else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1465
                      (learns, next_commit)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1466
                  val timed_out = Timer.checkRealTimer timer > learn_timeout
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1467
                in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1468
                  (learns, (num_nontrivial, next_commit, timed_out))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1469
                end
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1470
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1471
            val (num_new_facts, num_nontrivial) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1472
              if no_new_facts then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1473
                (0, 0)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1474
              else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1475
                let
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1476
                  val new_facts = facts
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1477
                    |> sort (crude_thm_ord ctxt o apply2 snd)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1478
                    |> attach_parents_to_facts []
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1479
                    |> filter_out (is_in_access_G o snd)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1480
                  val (learns, (num_nontrivial, _, _)) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1481
                    ([], (0, next_commit_time (), false))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1482
                    |> fold learn_new_fact new_facts
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1483
                in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1484
                  commit true learns [] []; (length new_facts, num_nontrivial)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1485
                end
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1486
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1487
            fun relearn_old_fact _ (accum as (_, (_, _, true))) = accum
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1488
              | relearn_old_fact ((_, (_, status)), th)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1489
                  ((relearns, flops), (num_nontrivial, next_commit, _)) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1490
                let
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1491
                  val name = nickname_of_thm th
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1492
                  val (num_nontrivial, relearns, flops) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1493
                    (case deps_of status th of
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1494
                      SOME deps => (num_nontrivial + 1, (name, deps) :: relearns, flops)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1495
                    | NONE => (num_nontrivial, relearns, name :: flops))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1496
                  val (relearns, flops, next_commit) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1497
                    if Timer.checkRealTimer timer > next_commit then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1498
                      (commit false [] relearns flops; ([], [], next_commit_time ()))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1499
                    else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1500
                      (relearns, flops, next_commit)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1501
                  val timed_out = Timer.checkRealTimer timer > learn_timeout
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1502
                in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1503
                  ((relearns, flops), (num_nontrivial, next_commit, timed_out))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1504
                end
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1505
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1506
            val num_nontrivial =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1507
              if not run_prover then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1508
                num_nontrivial
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1509
              else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1510
                let
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1511
                  val max_isar = 1000 * max_dependencies
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1512
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1513
                  fun priority_of th =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1514
                    Random.random_range 0 max_isar +
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1515
                    (case try (Graph.get_node access_G) (nickname_of_thm th) of
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1516
                      SOME (Isar_Proof, _, deps) => ~100 * length deps
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1517
                    | SOME (Automatic_Proof, _, _) => 2 * max_isar
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1518
                    | SOME (Isar_Proof_wegen_Prover_Flop, _, _) => max_isar
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1519
                    | NONE => 0)
57005
33f3d2ea803d store all MaSh data on the Isabelle side, in preparation for replacing 'mash.py' with ML solution
blanchet
parents: 56995
diff changeset
  1520
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1521
                  val old_facts = facts
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1522
                    |> filter is_in_access_G
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1523
                    |> map (`(priority_of o snd))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1524
                    |> sort (int_ord o apply2 fst)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1525
                    |> map snd
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1526
                  val ((relearns, flops), (num_nontrivial, _, _)) =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1527
                    (([], []), (num_nontrivial, next_commit_time (), false))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1528
                    |> fold relearn_old_fact old_facts
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1529
                in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1530
                  commit true [] relearns flops; num_nontrivial
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1531
                end
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1532
          in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1533
            if verbose orelse auto_level < 2 then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1534
              "Learned " ^ string_of_int num_new_facts ^ " fact" ^ plural_s num_new_facts ^
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1535
              " and " ^ string_of_int num_nontrivial ^ " nontrivial " ^
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1536
              (if run_prover then "automatic and " else "") ^ "Isar proof" ^
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1537
              plural_s num_nontrivial ^
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1538
              (if verbose then " in " ^ string_of_time (Timer.checkRealTimer timer) else "")
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1539
            else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1540
              ""
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1541
          end
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1542
      end)
48308
89674e5a4d35 make tracing an option
blanchet
parents: 48306
diff changeset
  1543
  end
48304
50e64af9c829 more work on MaSh
blanchet
parents: 48303
diff changeset
  1544
54123
271a8377656f remove overloading of "max_facts" -- it already controls the number of facts passed to ATPs for 'learn_prover'
blanchet
parents: 54115
diff changeset
  1545
fun mash_learn ctxt (params as {provers, timeout, ...}) fact_override chained run_prover =
48316
252f45c04042 drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents: 48315
diff changeset
  1546
  let
48396
dd82d190c2af name tuning
blanchet
parents: 48395
diff changeset
  1547
    val css = Sledgehammer_Fact.clasimpset_rule_table_of ctxt
48395
85a7fb65507a learning should honor the fact override and the chained facts
blanchet
parents: 48394
diff changeset
  1548
    val ctxt = ctxt |> Config.put instantiate_inducts false
58919
82a71046dce8 prefer explicit Keyword.keywords;
wenzelm
parents: 58843
diff changeset
  1549
    val facts =
82a71046dce8 prefer explicit Keyword.keywords;
wenzelm
parents: 58843
diff changeset
  1550
      nearly_all_facts ctxt false fact_override Keyword.empty_keywords css chained [] @{prop True}
60641
f6e8293747fd clarified context;
wenzelm
parents: 60640
diff changeset
  1551
      |> sort (crude_thm_ord ctxt o apply2 snd o swap)
48404
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1552
    val num_facts = length facts
0a261b4aa093 relearn ATP proofs
blanchet
parents: 48403
diff changeset
  1553
    val prover = hd provers
57006
blanchet
parents: 57005
diff changeset
  1554
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
  1555
    fun learn auto_level run_prover =
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1556
      mash_learn_facts ctxt params prover auto_level run_prover one_year facts
58843
521cea5fa777 discontinued obsolete Output.urgent_message;
wenzelm
parents: 57757
diff changeset
  1557
      |> writeln
48316
252f45c04042 drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents: 48315
diff changeset
  1558
  in
50484
8ec31bdb9d36 adopt the neutral "prover" terminology for MaSh rather than the ambiguous/wrong ATP terminology (which sometimes excludes SMT solvers)
blanchet
parents: 50450
diff changeset
  1559
    if run_prover then
58843
521cea5fa777 discontinued obsolete Output.urgent_message;
wenzelm
parents: 57757
diff changeset
  1560
      (writeln ("MaShing through " ^ string_of_int num_facts ^ " fact" ^
57017
blanchet
parents: 57014
diff changeset
  1561
         plural_s num_facts ^ " for automatic proofs (" ^ quote prover ^ " timeout: " ^
blanchet
parents: 57014
diff changeset
  1562
         string_of_time timeout ^ ").\n\nCollecting Isar proofs first...");
50340
72519bf5f135 simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents: 50339
diff changeset
  1563
       learn 1 false;
63692
1bc4bc2c9fd1 killed final stops in Sledgehammer and friends
blanchet
parents: 63518
diff changeset
  1564
       writeln "Now collecting automatic proofs\n\
1bc4bc2c9fd1 killed final stops in Sledgehammer and friends
blanchet
parents: 63518
diff changeset
  1565
         \This may take several hours; you can safely stop the learning process at any point";
50340
72519bf5f135 simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents: 50339
diff changeset
  1566
       learn 0 true)
72519bf5f135 simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents: 50339
diff changeset
  1567
    else
58843
521cea5fa777 discontinued obsolete Output.urgent_message;
wenzelm
parents: 57757
diff changeset
  1568
      (writeln ("MaShing through " ^ string_of_int num_facts ^ " fact" ^
57006
blanchet
parents: 57005
diff changeset
  1569
         plural_s num_facts ^ " for Isar proofs...");
50340
72519bf5f135 simplify MaSh term patterns + add missing global facts if there aren't too many
blanchet
parents: 50339
diff changeset
  1570
       learn 0 false)
48316
252f45c04042 drastic overhaul of MaSh data structures + fixed a few performance issues
blanchet
parents: 48315
diff changeset
  1571
  end
48249
2bd242c56c90 dummy implementation
blanchet
parents: 48248
diff changeset
  1572
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1573
fun mash_can_suggest_facts ctxt =
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1574
  (case get_state ctxt of
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1575
    NONE => false
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1576
  | SOME {access_G, ...} => not (Graph.is_empty access_G))
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1577
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1578
fun mash_can_suggest_facts_fast ctxt =
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1579
  (case peek_state ctxt of
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1580
    NONE => false
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1581
  | SOME (_, {access_G, ...}) => not (Graph.is_empty access_G))
50311
c9d7ccd090e1 tuned order of functions
blanchet
parents: 50310
diff changeset
  1582
57274
0acfdb151e52 more generous formula -- there are lots of duplicates out there
blanchet
parents: 57273
diff changeset
  1583
(* Generate more suggestions than requested, because some might be thrown out later for various
0acfdb151e52 more generous formula -- there are lots of duplicates out there
blanchet
parents: 57273
diff changeset
  1584
   reasons (e.g., duplicates). *)
0acfdb151e52 more generous formula -- there are lots of duplicates out there
blanchet
parents: 57273
diff changeset
  1585
fun generous_max_suggestions max_facts = 3 * max_facts div 2 + 25
50383
4274b25ff4e7 take proximity into account for MaSh + fix a debilitating bug in feature generation
blanchet
parents: 50382
diff changeset
  1586
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1587
val mepo_weight = 0.5 (* FUDGE *)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1588
val mash_weight = 0.5 (* FUDGE *)
50814
4247cbd78aaf export MeSh data as well
blanchet
parents: 50755
diff changeset
  1589
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1590
val max_facts_to_learn_before_query = 100 (* FUDGE *)
53152
cbd3c7c48d2c learn new facts on query if there aren't too many of them in MaSh
blanchet
parents: 53150
diff changeset
  1591
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1592
(* The threshold should be large enough so that MaSh does not get activated for Auto Sledgehammer. *)
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1593
val min_secs_for_learning = 10
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1594
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1595
fun relevant_facts ctxt (params as {verbose, learn, fact_filter, timeout, ...}) prover
57405
1f49da059947 reintroduced 'extra features' + only print message in verbose mode
blanchet
parents: 57404
diff changeset
  1596
    max_facts ({add, only, ...} : fact_override) hyp_ts concl_t facts =
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
  1597
  if not (subset (op =) (the_list fact_filter, fact_filters)) then
63692
1bc4bc2c9fd1 killed final stops in Sledgehammer and friends
blanchet
parents: 63518
diff changeset
  1598
    error ("Unknown fact filter: " ^ quote (the fact_filter))
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
  1599
  else if only then
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
  1600
    [("", map fact_of_raw_fact facts)]
48321
c552d7f1720b learn from minimized ATP proofs
blanchet
parents: 48320
diff changeset
  1601
  else if max_facts <= 0 orelse null facts then
51010
afd0213a3dab tuned data structure
blanchet
parents: 51008
diff changeset
  1602
    [("", [])]
48288
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1603
  else
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1604
    let
60649
e007aa6a8aa2 more explicit use of context and elimination of Thm.theory_of_thm, although unclear (and untested?) situations remain;
wenzelm
parents: 60641
diff changeset
  1605
      val thy_name = Context.theory_name (Proof_Context.theory_of ctxt)
57407
140e16bc2a40 use right theory name for theorems in evaluation driver
blanchet
parents: 57406
diff changeset
  1606
57555
f60d70566525 also learn when 'fact_filter =' is set explicitly
blanchet
parents: 57554
diff changeset
  1607
      fun maybe_launch_thread exact min_num_facts_to_learn =
59471
ca459352d8c5 more explicit indication of Async_Manager_Legacy as Proof General legacy;
wenzelm
parents: 59172
diff changeset
  1608
        if not (Async_Manager_Legacy.has_running_threads MaShN) andalso
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1609
           Time.toSeconds timeout >= min_secs_for_learning then
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1610
          let val timeout = time_mult learn_timeout_slack timeout in
57405
1f49da059947 reintroduced 'extra features' + only print message in verbose mode
blanchet
parents: 57404
diff changeset
  1611
            (if verbose then
58843
521cea5fa777 discontinued obsolete Output.urgent_message;
wenzelm
parents: 57757
diff changeset
  1612
               writeln ("Started MaShing through " ^
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1613
                 (if exact then "" else "up to ") ^ string_of_int min_num_facts_to_learn ^
63692
1bc4bc2c9fd1 killed final stops in Sledgehammer and friends
blanchet
parents: 63518
diff changeset
  1614
                 " fact" ^ plural_s min_num_facts_to_learn ^ " in the background")
57405
1f49da059947 reintroduced 'extra features' + only print message in verbose mode
blanchet
parents: 57404
diff changeset
  1615
             else
1f49da059947 reintroduced 'extra features' + only print message in verbose mode
blanchet
parents: 57404
diff changeset
  1616
               ());
54816
10d48c2a3e32 made timeouts in Sledgehammer not be 'option's -- simplified lots of code
blanchet
parents: 54799
diff changeset
  1617
            launch_thread timeout
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1618
              (fn () => (true, mash_learn_facts ctxt params prover 2 false timeout facts))
48319
340187063d84 use async manager to manage MaSh learners to make sure they get killed cleanly
blanchet
parents: 48318
diff changeset
  1619
          end
48318
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1620
        else
325c8fd0d762 more consolidation of MaSh code
blanchet
parents: 48316
diff changeset
  1621
          ()
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1622
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1623
      val mash_enabled = is_mash_enabled ()
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1624
      val mash_fast = mash_can_suggest_facts_fast ctxt
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1625
57555
f60d70566525 also learn when 'fact_filter =' is set explicitly
blanchet
parents: 57554
diff changeset
  1626
      fun please_learn () =
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1627
        if mash_fast then
64522
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1628
          (case get_state ctxt of
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1629
            NONE => maybe_launch_thread false (length facts)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1630
          | SOME {access_G, xtabs = ((num_facts0, _), _), ...} =>
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1631
            let
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1632
              val is_in_access_G = is_fact_in_graph access_G o snd
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1633
              val min_num_facts_to_learn = length facts - num_facts0
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1634
            in
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1635
              if min_num_facts_to_learn <= max_facts_to_learn_before_query then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1636
                (case length (filter_out is_in_access_G facts) of
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1637
                  0 => ()
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1638
                | num_facts_to_learn =>
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1639
                  if num_facts_to_learn <= max_facts_to_learn_before_query then
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1640
                    mash_learn_facts ctxt params prover 2 false timeout facts
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1641
                    |> (fn "" => () | s => writeln (MaShN ^ ": " ^ s))
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1642
                  else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1643
                    maybe_launch_thread true num_facts_to_learn)
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1644
              else
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1645
                maybe_launch_thread false min_num_facts_to_learn
b66f8caf86b6 made MaSh faster and less likely to hang seemingly forever
blanchet
parents: 63698
diff changeset
  1646
            end)
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1647
        else
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1648
          maybe_launch_thread false (length facts)
57555
f60d70566525 also learn when 'fact_filter =' is set explicitly
blanchet
parents: 57554
diff changeset
  1649
f60d70566525 also learn when 'fact_filter =' is set explicitly
blanchet
parents: 57554
diff changeset
  1650
      val _ =
f60d70566525 also learn when 'fact_filter =' is set explicitly
blanchet
parents: 57554
diff changeset
  1651
        if learn andalso mash_enabled andalso fact_filter <> SOME mepoN then please_learn () else ()
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1652
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1653
      val effective_fact_filter =
55286
blanchet
parents: 55202
diff changeset
  1654
        (case fact_filter of
57431
02c408aed5ee killed Python version of MaSh, now that the SML version works adequately
blanchet
parents: 57409
diff changeset
  1655
          SOME ff => ff
63697
0afe49623cf9 avoid loading MaSh file first time around for higher responsiveness of Sledgehammer
blanchet
parents: 63696
diff changeset
  1656
        | NONE => if mash_enabled andalso mash_fast then meshN else mepoN)
54143
18def1c73c79 make sure add: doesn't add duplicates, and works for [no_atp] facts
blanchet
parents: 54141
diff changeset
  1657
18def1c73c79 make sure add: doesn't add duplicates, and works for [no_atp] facts
blanchet
parents: 54141
diff changeset
  1658
      val unique_facts = drop_duplicate_facts facts
48288
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1659
      val add_ths = Attrib.eval_thms ctxt add
54143
18def1c73c79 make sure add: doesn't add duplicates, and works for [no_atp] facts
blanchet
parents: 54141
diff changeset
  1660
51004
5f2788c38127 distinguish raw and non-raw facts, using raw for 10 000s of facts and non-raw after selection of some hundreds
blanchet
parents: 51003
diff changeset
  1661
      fun in_add (_, th) = member Thm.eq_thm_prop add_ths th
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1662
51003
198cb05fb35b report (MeSh, MePo, MaSh) triple, to be able to use different filters in different slices
blanchet
parents: 51001
diff changeset
  1663
      fun add_and_take accepts =
198cb05fb35b report (MeSh, MePo, MaSh) triple, to be able to use different filters in different slices
blanchet
parents: 51001
diff changeset
  1664
        (case add_ths of
198cb05fb35b report (MeSh, MePo, MaSh) triple, to be able to use different filters in different slices
blanchet
parents: 51001
diff changeset
  1665
           [] => accepts
57150
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
  1666
         | _ =>
f591096a9c94 add option to keep duplicates, for more precise evaluation of relevance filters
blanchet
parents: 57133
diff changeset
  1667
           (unique_facts |> filter in_add |> map fact_of_raw_fact) @ (accepts |> filter_out in_add))
48293
914ca0827804 renamed Sledgehammer options
blanchet
parents: 48292
diff changeset
  1668
        |> take max_facts
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1669
48406
b002cc16aa99 honor suggested MaSh weights
blanchet
parents: 48404
diff changeset
  1670
      fun mepo () =
54143
18def1c73c79 make sure add: doesn't add duplicates, and works for [no_atp] facts
blanchet
parents: 54141
diff changeset
  1671
        (mepo_suggested_facts ctxt params max_facts NONE hyp_ts concl_t unique_facts
54091
4df62d7eae34 parallelize MeSh
blanchet
parents: 54089
diff changeset
  1672
         |> weight_facts_steeply, [])
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1673
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
  1674
      fun mash () =
61322
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
  1675
        mash_suggested_facts ctxt thy_name params (generous_max_suggestions max_facts) hyp_ts
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
  1676
          concl_t facts
53140
a1235e90da5f pour extra features from proximate facts into goal, in exporter
blanchet
parents: 53137
diff changeset
  1677
        |>> weight_facts_steeply
57018
142950e9c7e2 more flexible environment variable
blanchet
parents: 57017
diff changeset
  1678
48314
ee33ba3c0e05 added option to control which fact filter is used
blanchet
parents: 48313
diff changeset
  1679
      val mess =
51003
198cb05fb35b report (MeSh, MePo, MaSh) triple, to be able to use different filters in different slices
blanchet
parents: 51001
diff changeset
  1680
        (* the order is important for the "case" expression below *)
54091
4df62d7eae34 parallelize MeSh
blanchet
parents: 54089
diff changeset
  1681
        [] |> effective_fact_filter <> mepoN ? cons (mash_weight, mash)
4df62d7eae34 parallelize MeSh
blanchet
parents: 54089
diff changeset
  1682
           |> effective_fact_filter <> mashN ? cons (mepo_weight, mepo)
4df62d7eae34 parallelize MeSh
blanchet
parents: 54089
diff changeset
  1683
           |> Par_List.map (apsnd (fn f => f ()))
61322
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
  1684
      val mesh =
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
  1685
        mesh_facts (fact_distinct (op aconv)) (eq_snd (gen_eq_thm ctxt)) max_facts mess
44f4ffe2b210 speed up MaSh duplicate check
blanchet
parents: 61321
diff changeset
  1686
        |> add_and_take
48288
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1687
    in
55286
blanchet
parents: 55202
diff changeset
  1688
      (case (fact_filter, mess) of
51024
98fb341d32e3 distinguish MeSh and smart -- with smart, allow combinations of MaSh, MeSh, and MePo in different slices -- and use MaSh also with SMT solvers, based on evaluation
blanchet
parents: 51020
diff changeset
  1689
        (NONE, [(_, (mepo, _)), (_, (mash, _))]) =>
57658
f55c173a3cbc beware of duplicate fact names
blanchet
parents: 57574
diff changeset
  1690
        [(meshN, mesh),
f55c173a3cbc beware of duplicate fact names
blanchet
parents: 57574
diff changeset
  1691
         (mepoN, mepo |> map fst |> add_and_take),
51010
afd0213a3dab tuned data structure
blanchet
parents: 51008
diff changeset
  1692
         (mashN, mash |> map fst |> add_and_take)]
55286
blanchet
parents: 55202
diff changeset
  1693
      | _ => [(effective_fact_filter, mesh)])
48288
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1694
    end
255c6e1fd505 rationalize relevance filter, slowing moving code from Iter to MaSh
blanchet
parents: 48251
diff changeset
  1695
48248
b6eb45a52c28 split relevance filter code into three files
blanchet
parents:
diff changeset
  1696
end;