completely rewrote SH compress; added two parameters for experimentation/fine grained control
authorsmolkas
Tue, 09 Jul 2013 18:45:06 +0200
changeset 52556 c8357085217c
parent 52555 6811291d1869
child 52557 92ed2926596d
completely rewrote SH compress; added two parameters for experimentation/fine grained control
src/HOL/Tools/Sledgehammer/sledgehammer_compress.ML
src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML
src/HOL/Tools/Sledgehammer/sledgehammer_minimize.ML
src/HOL/Tools/Sledgehammer/sledgehammer_preplay.ML
src/HOL/Tools/Sledgehammer/sledgehammer_proof.ML
src/HOL/Tools/Sledgehammer/sledgehammer_provers.ML
src/HOL/Tools/Sledgehammer/sledgehammer_reconstruct.ML
src/HOL/Tools/Sledgehammer/sledgehammer_util.ML
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_compress.ML	Tue Jul 09 18:44:59 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_compress.ML	Tue Jul 09 18:45:06 2013 +0200
@@ -2,18 +2,22 @@
     Author:     Jasmin Blanchette, TU Muenchen
     Author:     Steffen Juilf Smolka, TU Muenchen
 
-Compression of reconstructed isar proofs.
+Compression of isar proofs.
+
+PRE CONDITION: the proof must be labeled canocially, see
+Slegehammer_Proof.relabel_proof_canonically
 *)
 
 signature SLEDGEHAMMER_COMPRESS =
 sig
   type isar_proof = Sledgehammer_Proof.isar_proof
-  type preplay_time = Sledgehammer_Preplay.preplay_time
-  val compress_and_preplay_proof :
-    bool -> Proof.context -> string -> string -> bool -> Time.time option
-    -> bool -> real -> isar_proof -> isar_proof * (bool * preplay_time)
+  type preplay_interface = Sledgehammer_Preplay.preplay_interface
+
+  val compress_proof :
+    real -> int -> real -> preplay_interface -> isar_proof -> isar_proof
 end
 
+
 structure Sledgehammer_Compress : SLEDGEHAMMER_COMPRESS =
 struct
 
@@ -21,231 +25,345 @@
 open Sledgehammer_Proof
 open Sledgehammer_Preplay
 
-(* Parameters *)
-val merge_timeout_slack = 1.2
+
+(*** util ***)
 
-(* Data structures, orders *)
-val label_ord = prod_ord int_ord fast_string_ord o pairself swap
-structure Label_Table = Table(
-  type key = label
-  val ord = label_ord)
-
-(* clean vector interface *)
-fun get i v = Vector.sub (v, i)
-fun replace x i v = Vector.update (v, i, x)
-fun update f i v = replace (get i v |> f) i v
-fun v_fold_index f v s =
-  Vector.foldl (fn (x, (i, s)) => (i+1, f (i, x) s)) (0, s) v |> snd
+(* traverses steps in post-order and collects the steps with the given labels *)
+fun collect_successors steps lbls =
+  let
+    fun do_steps _ ([], accu) = ([], accu)
+      | do_steps [] (lbls, accu) = (lbls, accu)
+      | do_steps (step::steps) (lbls, accu) =
+          do_steps steps (do_step step (lbls, accu))
 
-(* Queue interface to table *)
-fun pop tab key =
-  (let val v = hd (Inttab.lookup_list tab key) in
-    (v, Inttab.remove_list (op =) (key, v) tab)
-  end) handle List.Empty => raise Fail "sledgehammer_compress: pop"
-fun pop_max tab = pop tab (fst (the (Inttab.max tab)))
-  handle Option.Option => raise Fail "sledgehammer_compress: pop_max"
-fun add_list tab xs = fold (Inttab.insert_list (op =)) xs tab
+    and do_step (Let _) x = x
+      | do_step (step as Prove (_, _, l, _, subproofs, _)) x =
+        (case do_subproofs subproofs x of
+          ([], accu) => ([], accu)
+        | (lbls as l'::lbls', accu) =>
+            if l=l'
+              then (lbls', step::accu)
+              else (lbls, accu))
 
-(* Main function for compresing proofs *)
-fun compress_and_preplay_proof debug ctxt type_enc lam_trans preplay
-      preplay_timeout preplay_trace isar_compress proof =
-  let
-    (* 60 seconds seems like a good interpreation of "no timeout" *)
-    val preplay_timeout = preplay_timeout |> the_default (seconds 60.0)
-
-    (* handle metis preplay fail *)
-    local
-      val metis_fail = Unsynchronized.ref false
-    in
-      fun handle_metis_fail try_metis () =
-        try_metis () handle exn =>
-          (if Exn.is_interrupt exn orelse debug then reraise exn
-           else metis_fail := true; some_preplay_time)
-      fun get_time lazy_time =
-        if !metis_fail andalso not (Lazy.is_finished lazy_time)
-          then some_preplay_time
-          else Lazy.force lazy_time
-      val metis_fail = fn () => !metis_fail
-    end
+    and do_subproofs [] x = x
+      | do_subproofs (proof::subproofs) x =
+          (case do_steps (steps_of_proof proof) x of
+            ([], accu) => ([], accu)
+          | x => do_subproofs subproofs x)
+  in
+    case do_steps steps (lbls, []) of
+      ([], succs) => rev succs
+    | _ => raise Fail "Sledgehammer_Compress: collect_successors"
+  end
 
-    (* compress top level steps - do not compress subproofs *)
-    fun compress_top_level on_top_level ctxt n steps =
-    let
-      (* proof step vector *)
-      val step_vect = steps |> map SOME |> Vector.fromList
-      val n_metis = add_metis_steps_top_level steps 0
-      val target_n_metis = Real.fromInt n_metis / isar_compress |> Real.round
-
-      (* table for mapping from (top-level-)label to step_vect position *)
-      fun update_table (i, Prove (_, _, l, _, _, _)) =
-            Label_Table.update_new (l, i)
-        | update_table _ = I
-      val label_index_table = fold_index update_table steps Label_Table.empty
-      val lookup_indices = map_filter (Label_Table.lookup label_index_table)
+(* traverses steps in reverse post-order and inserts the given updates *)
+fun update_steps steps updates =
+  let
+    fun do_steps [] updates = ([], updates)
+      | do_steps steps [] = (steps, [])
+      | do_steps (step::steps) updates =
+          do_step step (do_steps steps updates)
 
-      (* proof step references *)
-      fun refs step =
-        fold_isar_step
-          (byline_of_step
-           #> (fn SOME (By_Metis (lfs, _)) => append (lookup_indices lfs)
-                | _ => I))
-          step []
-      val refed_by_vect =
-        Vector.tabulate (Vector.length step_vect, K [])
-        |> fold_index (fn (i, step) => fold (update (cons i)) (refs step)) steps
-        |> Vector.map rev (* after rev, indices are sorted in ascending order *)
+    and do_step step (steps, []) = (step::steps, [])
+      | do_step (step as Let _) (steps, updates) = (step::steps, updates)
+      | do_step (Prove (qs, xs, l, t, subproofs, by))
+          (steps, updates as
+           Prove(qs', xs', l', t', subproofs', by') :: updates') =
+          let
+            val (subproofs, updates) =
+              if l=l'
+                then do_subproofs subproofs' updates'
+                else do_subproofs subproofs updates
+          in
+            if l=l'
+              then (Prove (qs', xs', l', t', subproofs, by') :: steps,
+                    updates)
+              else (Prove (qs, xs, l, t, subproofs, by) :: steps,
+                    updates)
+          end
+      | do_step _ _ =
+          raise Fail "Sledgehammer_Compress: update_steps (invalid update)"
 
-      (* candidates for elimination, use table as priority queue (greedy
-         algorithm) *)
-      fun add_if_cand step_vect (i, [j]) =
-          ((case (the (get i step_vect), the (get j step_vect)) of
-            (Prove (_, Fix [], _, t, _, By_Metis _),
-             Prove (_, _, _, _, _, By_Metis _)) => cons (Term.size_of_term t, i)
-          | _ => I)
-            handle Option.Option => raise Fail "sledgehammer_compress: add_if_cand")
-        | add_if_cand _ _ = I
-      val cand_tab =
-        v_fold_index (add_if_cand step_vect) refed_by_vect []
-        |> Inttab.make_list
-
-      (* cache metis preplay times in lazy time vector *)
-      val metis_time =
-        Vector.map
-          (if not preplay then K (zero_preplay_time) #> Lazy.value
-           else
-             the
-             #> try_metis debug preplay_trace type_enc lam_trans ctxt
-              preplay_timeout
-             #> handle_metis_fail
-             #> Lazy.lazy)
-          step_vect
-        handle Option.Option => raise Fail "sledgehammer_compress: metis_time"
+    and do_subproofs [] updates = ([], updates)
+      | do_subproofs steps [] = (steps, [])
+      | do_subproofs (proof::subproofs) updates =
+          do_proof proof (do_subproofs subproofs updates)
 
-      fun sum_up_time lazy_time_vector =
-        Vector.foldl
-          (apfst get_time #> uncurry add_preplay_time)
-          zero_preplay_time lazy_time_vector
-
-      (* Merging *)
-      fun merge
-          (Prove (_, Fix [], lbl1, _, subproofs1, By_Metis (lfs1, gfs1)))
-          (Prove (qs2, fix, lbl2, t, subproofs2, By_Metis (lfs2, gfs2))) =
-            let
-              val lfs = remove (op =) lbl1 lfs2 |> union (op =) lfs1
-              val gfs = union (op =) gfs1 gfs2
-              val subproofs = subproofs1 @ subproofs2
-            in Prove (qs2, fix, lbl2, t, subproofs, By_Metis (lfs, gfs)) end
-        | merge _ _ = raise Fail "sledgehammer_compress: unmergeable Isar steps"
+    and do_proof proof (proofs, []) = (proof :: proofs, [])
+      | do_proof (Proof (fix, assms, steps)) (proofs, updates) =
+          let val (steps, updates) = do_steps steps updates in
+            (Proof (fix, assms, steps) :: proofs, updates)
+          end
+  in
+    case do_steps steps (rev updates) of
+      (steps, []) => steps
+    | _ => raise Fail "Sledgehammer_Compress: update_steps"
+  end
 
-      fun try_merge metis_time (s1, i) (s2, j) =
-        if not preplay then (merge s1 s2 |> SOME, metis_time)
-        else
-          (case get i metis_time |> Lazy.force of
-            (true, _) => (NONE, metis_time)
-          | (_, t1) =>
-            (case get j metis_time |> Lazy.force of
-              (true, _) => (NONE, metis_time)
-            | (_, t2) =>
-              let
-                val s12 = merge s1 s2
-                val timeout = time_mult merge_timeout_slack (Time.+(t1, t2))
-              in
-                case try_metis_quietly debug preplay_trace type_enc
-                                                lam_trans ctxt timeout s12 () of
-                  (true, _) => (NONE, metis_time)
-                | exact_time =>
-                  (SOME s12, metis_time
-                             |> replace (zero_preplay_time |> Lazy.value) i
-                             |> replace (Lazy.value exact_time) j)
+(* tries merging the first step into the second step *)
+fun try_merge
+  (Prove (_, Fix [], lbl1, _, [], By_Metis (lfs1, gfs1)))
+  (Prove (qs2, fix, lbl2, t, subproofs, By_Metis (lfs2, gfs2))) =
+      let
+        val lfs = remove (op =) lbl1 lfs2 |> union (op =) lfs1
+        val gfs = union (op =) gfs1 gfs2
+      in
+        SOME (Prove (qs2, fix, lbl2, t, subproofs, By_Metis (lfs, gfs)))
+      end
+  | try_merge _ _ = NONE
 
-              end))
+
+
+(*** main function ***)
+
+(* PRE CONDITION: the proof must be labeled canocially, see
+   Slegehammer_Proof.relabel_proof_canonically *)
+fun compress_proof isar_compress isar_compress_degree merge_timeout_slack
+  { get_time, set_time, preplay_quietly, preplay_fail, ... } proof =
+  if isar_compress <= 1.0 then
+    proof
+  else
+  let
 
-      fun merge_steps metis_time step_vect refed_by cand_tab n' n_metis' =
-        if Inttab.is_empty cand_tab
-          orelse n_metis' <= target_n_metis
-          orelse (on_top_level andalso n'<3)
-          orelse metis_fail()
-        then
-          (Vector.foldr
-             (fn (NONE, steps) => steps | (SOME s, steps) => s :: steps)
-             [] step_vect,
-           sum_up_time metis_time)
-        else
-          let
-            val (i, cand_tab) = pop_max cand_tab
-            val j = get i refed_by |> the_single
-            val s1 = get i step_vect |> the
-            val s2 = get j step_vect |> the
-          in
-            case try_merge metis_time (s1, i) (s2, j) of
-              (NONE, metis_time) =>
-              merge_steps metis_time step_vect refed_by cand_tab n' n_metis'
-            | (s, metis_time) =>
-            let
-              val refs_s1 = refs s1
-              val refed_by = refed_by |> fold
-                (update (Ord_List.remove int_ord i #> Ord_List.insert int_ord j))
-                refs_s1
-              val shared_refs = Ord_List.inter int_ord refs_s1 (refs s2)
-              val new_candidates =
-                fold (add_if_cand step_vect)
-                  (map (fn i => (i, get i refed_by)) shared_refs) []
-              val cand_tab = add_list cand_tab new_candidates
-              val step_vect = step_vect |> replace NONE i |> replace s j
-            in
-              merge_steps metis_time step_vect refed_by cand_tab (n' - 1)
-                          (n_metis' - 1)
-            end
-          end
-        handle Option.Option => raise Fail "sledgehammer_compress: merge_steps"
-             | List.Empty => raise Fail "sledgehammer_compress: merge_steps"
-    in
-      merge_steps metis_time step_vect refed_by_vect cand_tab n n_metis
-    end
+    val (compress_further : unit -> bool,
+         decrement_step_count : unit -> unit) =
+      let
+        val number_of_steps = add_metis_steps (steps_of_proof proof) 0
+        val target_number_of_steps =
+          Real.fromInt number_of_steps / isar_compress
+          |> Real.round
+          |> curry Int.max 2 (* don't produce one-step isar proofs *)
+        val delta =
+          number_of_steps - target_number_of_steps |> Unsynchronized.ref
+      in
+        (fn () => not (preplay_fail ()) andalso !delta > 0,
+         fn () => delta := !delta - 1)
+      end
+
+
+    val (get_successors : label -> label list,
+         replace_successor: label -> label list -> label -> unit) =
+      let
 
-    fun do_proof on_top_level ctxt (Proof (Fix fix, Assume assms, steps)) =
-      let
-        (* Enrich context with top-level facts *)
-        val thy = Proof_Context.theory_of ctxt
-        (* TODO: add Skolem variables to context? *)
-        fun enrich_with_fact l t =
-          Proof_Context.put_thms false
-              (string_of_label l, SOME [Skip_Proof.make_thm thy t])
-        fun enrich_with_step (Prove (_, _, l, t, _, _)) = enrich_with_fact l t
-          | enrich_with_step _ = I
-        val enrich_with_steps = fold enrich_with_step
-        val enrich_with_assms = fold (uncurry enrich_with_fact)
-        val rich_ctxt =
-          ctxt |> enrich_with_assms assms |> enrich_with_steps steps
+        fun add_refs (Let _) tab = tab
+          | add_refs (Prove (_, _, l as v, _, _, By_Metis (lfs, _))) tab =
+              fold (fn lf as key => Canonical_Lbl_Tab.cons_list (key, v)) lfs tab
+
+        val tab =
+          Canonical_Lbl_Tab.empty
+          |> fold_isar_steps add_refs (steps_of_proof proof)
+          (* rev should have the same effect as sort canonical_label_ord *)
+          |> Canonical_Lbl_Tab.map (K rev)
+          |> Unsynchronized.ref
 
-        val n = List.length fix + List.length assms + List.length steps
+        fun get_successors l = Canonical_Lbl_Tab.lookup_list (!tab) l
+        fun set_successors l refs =
+          tab := Canonical_Lbl_Tab.update (l, refs) (!tab)
+        fun replace_successor old new dest =
+          set_successors dest
+            (get_successors dest |> Ord_List.remove canonical_label_ord old
+                                |> Ord_List.union canonical_label_ord new)
 
-        (* compress subproofs and top-levl steps *)
-        val ((steps, top_level_time), lower_level_time) =
-          steps |> do_subproofs rich_ctxt
-                |>> compress_top_level on_top_level rich_ctxt n
       in
-        (Proof (Fix fix, Assume assms, steps),
-          add_preplay_time lower_level_time top_level_time)
+         (get_successors, replace_successor)
       end
 
-    and do_subproofs ctxt subproofs =
+
+    (** elimination of trivial, one-step subproofs **)
+
+    fun elim_subproofs' time qs fix l t lfs gfs subs nontriv_subs =
+      if null subs orelse not (compress_further ()) then
+        (set_time l (false, time);
+         Prove (qs, fix, l, t, List.revAppend (nontriv_subs, subs),
+                By_Metis (lfs, gfs) ))
+      else
+        case subs of
+          ((sub as Proof(_, Assume assms, sub_steps)) :: subs) =>
+            (let
+
+              (* trivial subproofs have exactly one Prove step *)
+              val SOME (Prove (_, Fix [], l', _, [], By_Metis(lfs', gfs'))) =
+                (try the_single) sub_steps
+
+              (* only touch proofs that can be preplayed sucessfully *)
+              val (false, time') = get_time l'
+
+              (* merge steps *)
+              val subs'' = subs @ nontriv_subs
+              val lfs'' =
+                subtract (op =) (map fst assms) lfs'
+                |> union (op =) lfs
+              val gfs'' = union (op =) gfs' gfs
+              val by = By_Metis (lfs'', gfs'')
+              val step'' = Prove (qs, fix, l, t, subs'', by)
+
+              (* check if the modified step can be preplayed fast enough *)
+              val timeout = time_mult merge_timeout_slack (Time.+(time, time'))
+              val (false, time'') = preplay_quietly timeout step''
+
+            in
+              set_time l' zero_preplay_time; (* l' successfully eliminated! *)
+              map (replace_successor l' [l]) lfs';
+              decrement_step_count ();
+              elim_subproofs' time'' qs fix l t lfs'' gfs'' subs nontriv_subs
+            end
+            handle Bind =>
+              elim_subproofs' time qs fix l t lfs gfs subs (sub::nontriv_subs))
+        | _ => raise Fail "Sledgehammer_Compress: elim_subproofs'"
+
+
+    fun elim_subproofs (step as Let _) = step
+      | elim_subproofs
+        (step as Prove (qs, fix, l, t, subproofs, By_Metis(lfs, gfs))) =
+          if subproofs = [] then step else
+            case get_time l of
+              (true, _) => step (* timeout *)
+            | (false, time) =>
+                elim_subproofs' time qs fix l t lfs gfs subproofs []
+
+
+
+    (** top_level compression: eliminate steps by merging them into their
+        successors **)
+
+    fun compress_top_level steps =
       let
-        fun compress_each_and_collect_time compress subproofs =
-          let fun f_m proof time = compress proof ||> add_preplay_time time
-          in fold_map f_m subproofs zero_preplay_time end
-        val compress_subproofs =
-          compress_each_and_collect_time (do_proof false ctxt)
-        fun compress (Prove (qs, fix, l, t, subproofs, By_Metis facts)) =
-              let val (subproofs, time) = compress_subproofs subproofs
-              in (Prove (qs, fix, l, t, subproofs, By_Metis  facts), time) end
-          | compress atomic_step = (atomic_step, zero_preplay_time)
+
+        (* #successors, (size_of_term t, position) *)
+        fun cand_key (i, l, t_size) =
+          (get_successors l |> length, (t_size, i))
+
+        val compression_ord =
+          prod_ord int_ord (prod_ord (int_ord #> rev_order) int_ord)
+          #> rev_order
+
+        val cand_ord = pairself cand_key #> compression_ord
+
+        fun pop_next_cand candidates =
+          case max_list cand_ord candidates of
+            NONE => (NONE, [])
+          | cand as SOME (i, _, _) =>
+              (cand, filter_out (fn (j, _, _) => j=i) candidates)
+
+        val candidates =
+          let
+            fun add_cand (i, Let _) = I
+              | add_cand (i, Prove (_, _, l, t, _, _)) =
+                  cons (i, l, size_of_term t)
+          in
+            (steps
+            |> split_last |> fst (* last step must NOT be eliminated *)
+            |> fold_index add_cand) []
+          end
+
+        fun try_eliminate (cand as (i, l, _)) succ_lbls steps =
+          let
+            (* only touch steps that can be preplayed successfully *)
+            val (false, time) = get_time l
+
+            val succ_times = map (get_time #> (fn (false, t) => t)) succ_lbls
+
+            val timeslice =
+              time_mult (1.0 / (Real.fromInt (length succ_lbls))) time
+
+            val timeouts =
+              map (curry Time.+ timeslice #> time_mult merge_timeout_slack)
+                succ_times
+
+            val ((cand as Prove (_, _, l, _, _, By_Metis(lfs, _))) :: steps') =
+              drop i steps
+
+            (* FIXME: debugging *)
+            val _ = (if (label_of_step cand |> the) <> l then
+                      raise Fail "Sledgehammer_Compress: try_eliminate"
+                    else ())
+
+            val succs = collect_successors steps' succ_lbls
+            val succs' = map (try_merge cand #> the) succs
+
+            val preplay_times =
+              map (uncurry preplay_quietly) (timeouts ~~ succs')
+
+            (* ensure none of the modified successors timed out *)
+            val false = List.exists fst preplay_times
+
+            val (steps1, _::steps2) = chop i steps
+
+            (* replace successors with their modified versions *)
+            val steps2 = update_steps steps2 succs'
+
+          in
+            set_time l zero_preplay_time; (* candidate successfully eliminated *)
+            decrement_step_count ();
+            map (uncurry set_time) (succ_lbls ~~ preplay_times);
+            map (replace_successor l succ_lbls) lfs;
+            (* removing the step would mess up the indices
+               -> replace with dummy step instead *)
+            steps1 @ dummy_isar_step :: steps2
+          end
+          handle Bind => steps
+               | Match => steps
+               | Option.Option => steps
+
+        fun compression_loop candidates steps =
+          if not (compress_further ()) then
+            steps
+          else
+            case pop_next_cand candidates of
+              (NONE, _) => steps (* no more candidates for elimination *)
+            | (SOME (cand as (_, l, _)), candidates) =>
+              let
+                val successors = get_successors l
+              in
+                if length successors > isar_compress_degree
+                  then steps
+                  else compression_loop candidates
+                         (try_eliminate cand successors steps)
+              end
+
+
       in
-        compress_each_and_collect_time compress subproofs
+        compression_loop candidates steps
+        |> filter_out (fn step => step = dummy_isar_step)
       end
+
+
+
+    (** recusion over the proof tree **)
+    (*
+       Proofs are compressed bottom-up, beginning with the innermost
+       subproofs.
+       On the innermost proof level, the proof steps have no subproofs.
+       In the best case, these steps can be merged into just one step,
+       resulting in a trivial subproof. Going one level up, trivial subproofs
+       can be eliminated. In the best case, this once again leads to a proof
+       whose proof steps do not have subproofs. Applying this approach
+       recursively will result in a flat proof in the best cast.
+    *)
+
+    infix 1 ?>
+    fun x ?> f = if compress_further () then f x else x
+
+    fun do_proof (proof as (Proof (fix, assms, steps))) =
+      if compress_further ()
+        then Proof (fix, assms, do_steps steps)
+        else proof
+
+    and do_steps steps =
+      (* bottom-up: compress innermost proofs first *)
+      steps |> map (fn step => step ?> do_sub_levels)
+            ?> compress_top_level
+
+    and do_sub_levels (Let x) = Let x
+      | do_sub_levels (Prove (qs, xs, l, t, subproofs, by)) =
+          (* compress subproofs *)
+          Prove (qs, xs, l, t, map do_proof subproofs, by)
+            (* eliminate trivial subproofs *)
+            ?> elim_subproofs
+
   in
-    do_proof true ctxt proof
-    |> apsnd (pair (metis_fail ()))
+    do_proof proof
   end
 
+
 end
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML	Tue Jul 09 18:44:59 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_isar.ML	Tue Jul 09 18:45:06 2013 +0200
@@ -100,6 +100,8 @@
    ("max_new_mono_instances", "smart"),
    ("isar_proofs", "smart"),
    ("isar_compress", "10"),
+   ("isar_compress_degree", "2"), (* TODO: document; right value?? *)
+   ("merge_timeout_slack", "1.2"), (* TODO: document *)
    ("slice", "true"),
    ("minimize", "smart"),
    ("preplay_timeout", "3"),
@@ -127,7 +129,7 @@
   ["debug", "verbose", "overlord", "type_enc", "strict", "lam_trans",
    "uncurried_aliases", "max_mono_iters", "max_new_mono_instances",
    "learn", "isar_proofs", "isar_compress", "timeout", "preplay_timeout"]
-(* TODO: add preplay_trace? *)
+(* TODO: add isar_compress_degree, merge_timeout_slack,  preplay_trace? *)
 
 val property_dependent_params = ["provers", "timeout"]
 
@@ -309,6 +311,8 @@
       lookup_option lookup_int "max_new_mono_instances"
     val isar_proofs = lookup_option lookup_bool "isar_proofs"
     val isar_compress = Real.max (1.0, lookup_real "isar_compress")
+    val isar_compress_degree = Int.max (1, lookup_int "isar_compress_degree")
+    val merge_timeout_slack = Real.max (1.0, lookup_real "merge_timeout_slack")
     val slice = mode <> Auto_Try andalso lookup_bool "slice"
     val minimize =
       if mode = Auto_Try then NONE else lookup_option lookup_bool "minimize"
@@ -325,8 +329,9 @@
      learn = learn, fact_filter = fact_filter, max_facts = max_facts,
      fact_thresholds = fact_thresholds, max_mono_iters = max_mono_iters,
      max_new_mono_instances = max_new_mono_instances, isar_proofs = isar_proofs,
-     isar_compress = isar_compress, slice = slice, minimize = minimize,
-     timeout = timeout, preplay_timeout = preplay_timeout,
+     isar_compress = isar_compress, isar_compress_degree = isar_compress_degree,
+     merge_timeout_slack = merge_timeout_slack, slice = slice,
+     minimize = minimize, timeout = timeout, preplay_timeout = preplay_timeout,
      preplay_trace = preplay_trace, expect = expect}
   end
 
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_minimize.ML	Tue Jul 09 18:44:59 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_minimize.ML	Tue Jul 09 18:45:06 2013 +0200
@@ -58,6 +58,7 @@
 fun test_facts ({debug, verbose, overlord, provers, max_mono_iters,
                  max_new_mono_instances, type_enc, strict, lam_trans,
                  uncurried_aliases, isar_proofs, isar_compress,
+                 isar_compress_degree, merge_timeout_slack,
                  preplay_timeout, preplay_trace, ...} : params)
                silent (prover : prover) timeout i n state facts =
   let
@@ -80,6 +81,8 @@
        fact_thresholds = (1.01, 1.01), max_mono_iters = max_mono_iters,
        max_new_mono_instances = max_new_mono_instances,
        isar_proofs = isar_proofs, isar_compress = isar_compress,
+       isar_compress_degree = isar_compress_degree,
+       merge_timeout_slack = merge_timeout_slack,
        slice = false, minimize = SOME false, timeout = timeout,
        preplay_timeout = preplay_timeout, preplay_trace = preplay_trace,
        expect = ""}
@@ -252,8 +255,8 @@
         ({debug, verbose, overlord, blocking, provers, type_enc, strict,
          lam_trans, uncurried_aliases, learn, fact_filter, max_facts,
          fact_thresholds, max_mono_iters, max_new_mono_instances, isar_proofs,
-         isar_compress, slice, minimize, timeout, preplay_timeout,
-         preplay_trace, expect} : params) =
+         isar_compress, isar_compress_degree, merge_timeout_slack, slice,
+         minimize, timeout, preplay_timeout, preplay_trace, expect} : params) =
   let
     fun lookup_override name default_value =
       case AList.lookup (op =) override_params name of
@@ -270,8 +273,9 @@
      learn = learn, fact_filter = fact_filter, max_facts = max_facts,
      fact_thresholds = fact_thresholds, max_mono_iters = max_mono_iters,
      max_new_mono_instances = max_new_mono_instances, isar_proofs = isar_proofs,
-     isar_compress = isar_compress, slice = slice, minimize = minimize,
-     timeout = timeout, preplay_timeout = preplay_timeout,
+     isar_compress = isar_compress, isar_compress_degree = isar_compress_degree,
+     merge_timeout_slack = merge_timeout_slack, slice = slice,
+     minimize = minimize, timeout = timeout, preplay_timeout = preplay_timeout,
      preplay_trace = preplay_trace, expect = expect}
   end
 
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_preplay.ML	Tue Jul 09 18:44:59 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_preplay.ML	Tue Jul 09 18:45:06 2013 +0200
@@ -7,16 +7,29 @@
 
 signature SLEDGEHAMMER_PREPLAY =
 sig
+  type isar_proof = Sledgehammer_Proof.isar_proof
   type isar_step = Sledgehammer_Proof.isar_step
+  type label = Sledgehammer_Proof.label
+
   eqtype preplay_time
   val zero_preplay_time : preplay_time
   val some_preplay_time : preplay_time
   val add_preplay_time : preplay_time -> preplay_time -> preplay_time
   val string_of_preplay_time : preplay_time -> string
-  val try_metis : bool -> bool -> string -> string -> Proof.context ->
-    Time.time -> isar_step -> unit -> preplay_time
-  val try_metis_quietly : bool -> bool -> string -> string -> Proof.context ->
-    Time.time -> isar_step -> unit -> preplay_time
+  val preplay : bool -> bool -> string -> string -> Proof.context ->
+    Time.time -> isar_step -> preplay_time
+
+  type preplay_interface =
+  { get_time : label -> preplay_time,
+    set_time : label -> preplay_time -> unit,
+    preplay_quietly : Time.time -> isar_step -> preplay_time,
+    preplay_fail : unit -> bool,
+    overall_preplay_stats : unit -> preplay_time * bool }
+
+  val proof_preplay_interface :
+    bool -> Proof.context -> string -> string -> bool -> Time.time option
+    -> bool -> isar_proof -> preplay_interface
+
 end
 
 structure Sledgehammer_Preplay : SLEDGEHAMMER_PREPLAY =
@@ -91,8 +104,10 @@
       |> thm_of_term ctxt
   end
 
-fun try_metis _ _ _ _ _ _ (Let _) = K zero_preplay_time
-  | try_metis debug trace type_enc lam_trans ctxt timeout
+
+(* main function for preplaying isar_steps *)
+fun preplay _ _ _ _ _ _ (Let _) = zero_preplay_time
+  | preplay debug trace type_enc lam_trans ctxt timeout
       (Prove (_, Fix xs, _, t, subproofs, By_Metis fact_names)) =
   let
     val (prop, obtain) =
@@ -126,20 +141,132 @@
       Metis_Tactic.metis_tac [type_enc] lam_trans ctxt facts 1
     fun run_tac () = goal tac
       handle ERROR msg => error ("preplay error: " ^ msg)
+    val preplay_time = take_time timeout run_tac ()
   in
-    fn () =>
-      let
-        val preplay_time = take_time timeout run_tac ()
-        (* tracing *)
-        val _ = if trace then preplay_trace ctxt facts prop preplay_time else ()
-      in
-        preplay_time
-      end
+    (* tracing *)
+    (if trace then preplay_trace ctxt facts prop preplay_time else () ;
+     preplay_time)
+  end
+
+
+
+(*** proof preplay interface ***)
+
+type preplay_interface =
+  { get_time : label -> preplay_time,
+    set_time : label -> preplay_time -> unit,
+    preplay_quietly : Time.time -> isar_step -> preplay_time,
+    preplay_fail : unit -> bool,
+    overall_preplay_stats : unit -> preplay_time * bool }
+
+
+(* enriches context with local proof facts *)
+fun enrich_context proof ctxt =
+  let
+    val thy = Proof_Context.theory_of ctxt
+
+    fun enrich_with_fact l t =
+      Proof_Context.put_thms false
+        (string_of_label l, SOME [Skip_Proof.make_thm thy t])
+
+    val enrich_with_assms = fold (uncurry enrich_with_fact)
+
+    fun enrich_with_proof (Proof (_, Assume assms, isar_steps)) =
+      enrich_with_assms assms #> fold enrich_with_step isar_steps
+
+    and enrich_with_step (Let _) = I
+      | enrich_with_step (Prove (_, _, l, t, subproofs, _)) =
+          enrich_with_fact l t #> fold enrich_with_proof subproofs
+  in
+    enrich_with_proof proof ctxt
   end
 
-(* this version treats exceptions like timeouts *)
-fun try_metis_quietly debug trace type_enc lam_trans ctxt timeout =
-  the_default (true, timeout) oo try
-  o try_metis debug trace type_enc lam_trans ctxt timeout
+
+(* Given a proof, produces an imperative preplay interface with a shared state.
+   The preplay times are caluclated lazyly and cached to avoid repeated
+   calculation.
+
+   PRE CONDITION: the proof must be labeled canocially, see
+   Slegehammer_Proof.relabel_proof_canonically
+*)
+fun proof_preplay_interface debug ctxt type_enc lam_trans do_preplay
+      preplay_timeout preplay_trace proof : preplay_interface =
+  if not do_preplay then
+    (* the dont_preplay option pretends that everything works just fine *)
+    { get_time = K zero_preplay_time,
+      set_time = K (K ()),
+      preplay_quietly = K (K zero_preplay_time),
+      preplay_fail = K false,
+      overall_preplay_stats = K (zero_preplay_time, false)}
+  else
+    let
+
+      (* 60 seconds seems like a good interpreation of "no timeout" *)
+      val preplay_timeout = preplay_timeout |> the_default (seconds 60.0)
+
+      (* add local proof facts to context *)
+      val ctxt = enrich_context proof ctxt
+
+      val fail = Unsynchronized.ref false
+      fun preplay_fail () = !fail
+
+      fun preplay' timeout step =
+        (* after preplay has failed once, exact preplay times are pointless *)
+        if preplay_fail () then some_preplay_time
+          else preplay debug preplay_trace type_enc lam_trans ctxt timeout step
+
+      (* preplay steps without registering preplay_fails, treating exceptions
+         like timeouts *)
+      fun preplay_quietly timeout step =
+        try (preplay' timeout) step
+        |> the_default (true, timeout)
+
+      val preplay_time_tab =
+        let
+          fun add_step_to_tab step tab =
+            case label_of_step step of
+              NONE => tab
+            | SOME l =>
+                Canonical_Lbl_Tab.update_new
+                  (l, (fn () => preplay' preplay_timeout step) |> Lazy.lazy)
+                  tab
+            handle (exn as Canonical_Lbl_Tab.DUP _) =>
+              raise Fail ("Sledgehammer_Preplay: preplay time table - "
+                          ^ PolyML.makestring exn)
+        in
+          Canonical_Lbl_Tab.empty
+          |> fold_isar_steps add_step_to_tab (steps_of_proof proof)
+          |> Unsynchronized.ref
+        end
+
+      fun register_preplay_fail lazy_time = Lazy.force lazy_time
+        handle exn =>
+          if Exn.is_interrupt exn orelse debug then reraise exn
+          else (fail := true; some_preplay_time)
+
+      fun get_time lbl =
+        register_preplay_fail
+          (Canonical_Lbl_Tab.lookup (!preplay_time_tab) lbl |> the)
+        handle
+          Option.Option =>
+            raise Fail "Sledgehammer_Preplay: preplay time table"
+
+      fun set_time lbl time =
+        preplay_time_tab :=
+          Canonical_Lbl_Tab.update (lbl, Lazy.value time) (!preplay_time_tab)
+
+      fun total_preplay_time () =
+        Canonical_Lbl_Tab.fold
+          (snd #> register_preplay_fail #> add_preplay_time)
+          (!preplay_time_tab) zero_preplay_time
+
+      fun overall_preplay_stats () = (total_preplay_time (), preplay_fail ())
+    in
+      { get_time = get_time,
+        set_time = set_time,
+        preplay_quietly = preplay_quietly,
+        preplay_fail = preplay_fail,
+        overall_preplay_stats = overall_preplay_stats}
+    end
 
 end
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_proof.ML	Tue Jul 09 18:44:59 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_proof.ML	Tue Jul 09 18:45:06 2013 +0200
@@ -28,6 +28,10 @@
   val no_label : label
   val no_facts : facts
 
+  (*val label_ord : label * label -> order*)
+
+  val dummy_isar_step : isar_step
+
   val string_of_label : label -> string
   val fix_of_proof : isar_proof -> fix
   val assms_of_proof : isar_proof -> assms
@@ -43,6 +47,13 @@
 
   val add_metis_steps_top_level : isar_step list -> int -> int
   val add_metis_steps : isar_step list -> int -> int
+
+  (** canonical proof labels: 1, 2, 3, ... in post traversal order **)
+  val canonical_label_ord : (label * label) -> order
+  val relabel_proof_canonically : isar_proof -> isar_proof
+
+  structure Canonical_Lbl_Tab : TABLE
+
 end
 
 structure Sledgehammer_Proof : SLEDGEHAMMER_PROOF =
@@ -68,6 +79,10 @@
 val no_label = ("", ~1)
 val no_facts = ([],[])
 
+(*val label_ord = pairself swap #> prod_ord int_ord fast_string_ord*)
+
+val dummy_isar_step = Let (Term.dummy, Term.dummy)
+
 fun string_of_label (s, num) = s ^ string_of_int num
 
 fun fix_of_proof (Proof (fix, _, _)) = fix
@@ -83,17 +98,64 @@
 fun byline_of_step (Prove (_, _, _, _, _, byline)) = SOME byline
   | byline_of_step _ = NONE
 
-fun fold_isar_step f step s =
-  fold (steps_of_proof #> fold (fold_isar_step f))
-    (the_default [] (subproofs_of_step step)) s
+fun fold_isar_steps f = fold (fold_isar_step f)
+and fold_isar_step f step s =
+  fold (steps_of_proof #> fold_isar_steps f)
+       (these (subproofs_of_step step)) s
     |> f step
 
-fun fold_isar_steps f = fold (fold_isar_step f)
-
 val add_metis_steps_top_level =
   fold (byline_of_step #> (fn SOME (By_Metis _) => Integer.add 1 | _ => I))
 
 val add_metis_steps =
   fold_isar_steps
     (byline_of_step #> (fn SOME (By_Metis _) => Integer.add 1 | _ => I))
+
+
+(** canonical proof labels: 1, 2, 3, ... in post traversal order **)
+
+val canonical_label_ord = pairself snd #> int_ord
+
+structure Canonical_Lbl_Tab = Table(
+  type key = label
+  val ord = canonical_label_ord)
+
+fun relabel_proof_canonically proof =
+  let
+    val lbl = pair ""
+
+    fun next_label l (next, subst) =
+      (lbl next, (next + 1, (l, lbl next) :: subst))
+
+    fun do_byline (By_Metis (lfs, gfs)) (_, subst) =
+      By_Metis (map (AList.lookup (op =) subst #> the) lfs, gfs)
+    handle Option.Option =>
+      raise Fail "Sledgehammer_Compress: relabel_proof_canonically"
+
+    fun do_assm (l, t) state =
+      let
+        val (l, state) = next_label l state
+      in ((l, t), state) end
+
+    fun do_proof (Proof (fix, Assume assms, steps)) state =
+      let
+        val (assms, state) = fold_map do_assm assms state
+        val (steps, state) = fold_map do_step steps state
+      in
+        (Proof (fix, Assume assms, steps), state)
+      end
+
+    and do_step (step as Let _) state = (step, state)
+      | do_step (Prove (qs, fix, l, t, subproofs, by)) state=
+      let
+        val by = do_byline by state
+        val (subproofs, state) = fold_map do_proof subproofs state
+        val (l, state) = next_label l state
+      in
+        (Prove (qs, fix, l, t, subproofs, by), state)
+      end
+  in
+    fst (do_proof proof (0, []))
+  end
+
 end
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_provers.ML	Tue Jul 09 18:44:59 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_provers.ML	Tue Jul 09 18:45:06 2013 +0200
@@ -36,6 +36,8 @@
      max_new_mono_instances : int option,
      isar_proofs : bool option,
      isar_compress : real,
+     isar_compress_degree : int,
+     merge_timeout_slack : real,
      slice : bool,
      minimize : bool option,
      timeout : Time.time option,
@@ -347,6 +349,8 @@
    max_new_mono_instances : int option,
    isar_proofs : bool option,
    isar_compress : real,
+   isar_compress_degree : int,
+   merge_timeout_slack : real,
    slice : bool,
    minimize : bool option,
    timeout : Time.time option,
@@ -679,6 +683,7 @@
         (params as {debug, verbose, overlord, type_enc, strict, lam_trans,
                     uncurried_aliases, fact_filter, max_facts, max_mono_iters,
                     max_new_mono_instances, isar_proofs, isar_compress,
+                    isar_compress_degree, merge_timeout_slack,
                     slice, timeout, preplay_timeout, preplay_trace, ...})
         minimize_command
         ({state, goal, subgoal, subgoal_count, factss, ...} : prover_problem) =
@@ -953,6 +958,7 @@
                     ()
                 val isar_params =
                   (debug, verbose, preplay_timeout, preplay_trace, isar_compress,
+                   isar_compress_degree, merge_timeout_slack,
                    pool, fact_names, lifted, sym_tab, atp_proof, goal)
                 val one_line_params =
                   (preplay, proof_banner mode name, used_facts,
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_reconstruct.ML	Tue Jul 09 18:44:59 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_reconstruct.ML	Tue Jul 09 18:45:06 2013 +0200
@@ -12,9 +12,9 @@
   type one_line_params = Sledgehammer_Print.one_line_params
 
   type isar_params =
-    bool * bool * Time.time option * bool * real * string Symtab.table
-    * (string * stature) list vector * (string * term) list * int Symtab.table
-    * string proof * thm
+    bool * bool * Time.time option * bool * real * int * real
+    * string Symtab.table * (string * stature) list vector
+    * (string * term) list * int Symtab.table * string proof * thm
 
   val lam_trans_of_atp_proof : string proof -> string -> string
   val is_typed_helper_used_in_atp_proof : string proof -> bool
@@ -44,6 +44,7 @@
 open Sledgehammer_Proof
 open Sledgehammer_Annotate
 open Sledgehammer_Print
+open Sledgehammer_Preplay
 open Sledgehammer_Compress
 
 structure String_Redirect = ATP_Proof_Redirect(
@@ -405,13 +406,14 @@
   in chain_proof end
 
 type isar_params =
-  bool * bool * Time.time option * bool * real * string Symtab.table
-  * (string * stature) list vector * (string * term) list * int Symtab.table
-  * string proof * thm
+  bool * bool * Time.time option * bool * real * int * real
+  * string Symtab.table * (string * stature) list vector
+  * (string * term) list * int Symtab.table * string proof * thm
 
 fun isar_proof_text ctxt isar_proofs
-    (debug, verbose, preplay_timeout, preplay_trace, isar_compress, pool,
-     fact_names, lifted, sym_tab, atp_proof, goal)
+    (debug, verbose, preplay_timeout, preplay_trace, isar_compress,
+     isar_compress_degree, merge_timeout_slack, pool, fact_names, lifted,
+     sym_tab, atp_proof, goal)
     (one_line_params as (_, _, _, _, subgoal, subgoal_count)) =
   let
     val (params, hyp_ts, concl_t) = strip_subgoal goal subgoal ctxt
@@ -426,7 +428,7 @@
       if is_typed_helper_used_in_atp_proof atp_proof then full_typesN
       else partial_typesN
     val lam_trans = lam_trans_of_atp_proof atp_proof metis_default_lam_trans
-    val preplay = preplay_timeout <> SOME Time.zeroTime
+    val do_preplay = preplay_timeout <> SOME Time.zeroTime
 
     fun isar_proof_of () =
       let
@@ -573,20 +575,24 @@
           chain_direct_proof
           #> kill_useless_labels_in_proof
           #> relabel_proof
-        val (isar_proof, (preplay_fail, preplay_time)) =
+        val (preplay_interface as { overall_preplay_stats, ... }, isar_proof) =
           refute_graph
           |> redirect_graph axioms tainted bot
           |> isar_proof_of_direct_proof
-          |> (if not preplay andalso isar_compress <= 1.0 then
-                rpair (false, (true, seconds 0.0))
-              else
-                compress_and_preplay_proof debug ctxt type_enc lam_trans preplay
-                  preplay_timeout preplay_trace
-                  (if isar_proofs = SOME true then isar_compress else 1000.0))
-          |>> clean_up_labels_in_proof
+          |> relabel_proof_canonically
+          |> `(proof_preplay_interface debug ctxt type_enc lam_trans do_preplay
+               preplay_timeout preplay_trace)
+        val isar_proof =
+          isar_proof
+          |> compress_proof
+               (if isar_proofs = SOME true then isar_compress else 1000.0)
+               (if isar_proofs = SOME true then isar_compress_degree else 100)
+               merge_timeout_slack preplay_interface
+          |> clean_up_labels_in_proof
         val isar_text =
           string_of_proof ctxt type_enc lam_trans subgoal subgoal_count
                           isar_proof
+        val (preplay_time, preplay_fail) = overall_preplay_stats ()
       in
         case isar_text of
           "" =>
@@ -603,9 +609,9 @@
                 in [string_of_int num_steps ^ " step" ^ plural_s num_steps] end
                else
                  []) @
-              (if preplay then
+              (if do_preplay then
                 [(if preplay_fail then "may fail, " else "") ^
-                   Sledgehammer_Preplay.string_of_preplay_time preplay_time]
+                   string_of_preplay_time preplay_time]
                else
                  [])
           in
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_util.ML	Tue Jul 09 18:44:59 2013 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_util.ML	Tue Jul 09 18:45:06 2013 +0200
@@ -26,6 +26,11 @@
   val one_year : Time.time
   val time_limit : Time.time option -> ('a -> 'b) -> 'a -> 'b
   val with_vanilla_print_mode : ('a -> 'b) -> 'a -> 'b
+
+  (** extrema **)
+  val max : ('a * 'a -> order) -> 'a -> 'a -> 'a
+  val max_option : ('a * 'a -> order) -> 'a option -> 'a option -> 'a option
+  val max_list : ('a * 'a -> order) -> 'a list -> 'a option
 end;
 
 structure Sledgehammer_Util : SLEDGEHAMMER_UTIL =
@@ -152,4 +157,12 @@
   Print_Mode.setmp (filter (curry (op =) Symbol.xsymbolsN)
                            (print_mode_value ())) f x
 
+(** extrema **)
+
+fun max ord x y = case ord(x,y) of LESS => y | _ => x
+
+fun max_option ord = max (option_ord ord)
+
+fun max_list ord xs = fold (SOME #> max_option ord) xs NONE
+
 end;