making quickcheck's result value more formal; allowing more result information to be returned after timeout; adding output of timing information in quickcheck
authorbulwahn
Wed, 23 Mar 2011 08:50:31 +0100
changeset 42088 8d00484551fe
parent 42087 5e236f6ef04f
child 42089 904897d0c5bd
making quickcheck's result value more formal; allowing more result information to be returned after timeout; adding output of timing information in quickcheck
src/HOL/Tools/Predicate_Compile/predicate_compile.ML
src/HOL/Tools/Predicate_Compile/predicate_compile_core.ML
src/Tools/quickcheck.ML
--- a/src/HOL/Tools/Predicate_Compile/predicate_compile.ML	Wed Mar 23 08:50:29 2011 +0100
+++ b/src/HOL/Tools/Predicate_Compile/predicate_compile.ML	Wed Mar 23 08:50:31 2011 +0100
@@ -130,12 +130,12 @@
 fun preprocess options t thy =
   let
     val _ = print_step options "Fetching definitions from theory..."
-    val gr = cond_timeit (!Quickcheck.timing) "preprocess-obtain graph"
+    val gr = cond_timeit (Config.get_global thy Quickcheck.timing) "preprocess-obtain graph"
           (fn () => Predicate_Compile_Data.obtain_specification_graph options thy t
           |> (fn gr => Term_Graph.subgraph (member (op =) (Term_Graph.all_succs gr [t])) gr))
     val _ = if !present_graph then Predicate_Compile_Data.present_graph gr else ()
   in
-    cond_timeit (!Quickcheck.timing) "preprocess-process"
+    cond_timeit (Config.get_global thy Quickcheck.timing) "preprocess-process"
       (fn () => (fold_rev (preprocess_strong_conn_constnames options gr)
         (Term_Graph.strong_conn gr) thy))
   end
--- a/src/HOL/Tools/Predicate_Compile/predicate_compile_core.ML	Wed Mar 23 08:50:29 2011 +0100
+++ b/src/HOL/Tools/Predicate_Compile/predicate_compile_core.ML	Wed Mar 23 08:50:31 2011 +0100
@@ -1363,7 +1363,7 @@
     val (lookup_mode, lookup_neg_mode, needs_random) = (modes_of compilation ctxt,
       modes_of (negative_compilation_of compilation) ctxt, needs_random ctxt)
     val ((moded_clauses, needs_random), errors) =
-      cond_timeit (!Quickcheck.timing) "Infering modes"
+      cond_timeit (Config.get ctxt Quickcheck.timing) "Infering modes"
       (fn _ => infer_modes mode_analysis_options
         options (lookup_mode, lookup_neg_mode, needs_random) ctxt preds all_modes param_vs clauses)
     val thy' = fold (fn (s, ms) => set_needs_random s ms) needs_random thy
@@ -1373,19 +1373,19 @@
     val _ = print_modes options modes
     val _ = print_step options "Defining executable functions..."
     val thy'' =
-      cond_timeit (!Quickcheck.timing) "Defining executable functions..."
+      cond_timeit (Config.get ctxt Quickcheck.timing) "Defining executable functions..."
       (fn _ => fold (#define_functions (dest_steps steps) options preds) modes thy'
       |> Theory.checkpoint)
     val ctxt'' = ProofContext.init_global thy''
     val _ = print_step options "Compiling equations..."
     val compiled_terms =
-      cond_timeit (!Quickcheck.timing) "Compiling equations...." (fn _ =>
+      cond_timeit (Config.get ctxt Quickcheck.timing) "Compiling equations...." (fn _ =>
         compile_preds options
           (#comp_modifiers (dest_steps steps)) ctxt'' all_vs param_vs preds moded_clauses)
     val _ = print_compiled_terms options ctxt'' compiled_terms
     val _ = print_step options "Proving equations..."
     val result_thms =
-      cond_timeit (!Quickcheck.timing) "Proving equations...." (fn _ =>
+      cond_timeit (Config.get ctxt Quickcheck.timing) "Proving equations...." (fn _ =>
       #prove (dest_steps steps) options thy'' clauses preds moded_clauses compiled_terms)
     val result_thms' = #add_code_equations (dest_steps steps) ctxt'' preds
       (maps_modes result_thms)
@@ -1393,7 +1393,7 @@
     val attrib = fn thy => Attrib.attribute_i thy (Attrib.internal (K (Thm.declaration_attribute
       (fn thm => Context.mapping (Code.add_eqn thm) I))))
     val thy''' =
-      cond_timeit (!Quickcheck.timing) "Setting code equations...." (fn _ =>
+      cond_timeit (Config.get ctxt Quickcheck.timing) "Setting code equations...." (fn _ =>
       fold (fn (name, result_thms) => fn thy => snd (Global_Theory.add_thmss
       [((Binding.qualify true (Long_Name.base_name name) (Binding.name qname), result_thms),
         [attrib thy ])] thy))
--- a/src/Tools/quickcheck.ML	Wed Mar 23 08:50:29 2011 +0100
+++ b/src/Tools/quickcheck.ML	Wed Mar 23 08:50:31 2011 +0100
@@ -9,12 +9,12 @@
   val setup: theory -> theory
   (* configuration *)
   val auto: bool Unsynchronized.ref
-  val timing : bool Unsynchronized.ref
   val tester : string Config.T
   val size : int Config.T
   val iterations : int Config.T
   val no_assms : bool Config.T
   val report : bool Config.T
+  val timing : bool Config.T
   val quiet : bool Config.T
   val timeout : real Config.T
   val finite_types : bool Config.T
@@ -33,13 +33,19 @@
   val add_batch_generator:
     string * (Proof.context -> term list -> (int -> term list option) list)
       -> Context.generic -> Context.generic
+  datatype result =
+    Result of
+     {counterexample : (string * term) list option,
+      evaluation_terms : (term * term) list option,
+      timings : (string * int) list,
+      reports : (int * report) list}
   (* testing terms and proof states *)
-  val test_term: Proof.context -> bool * bool -> term * term list ->
-    ((string * term) list * (term * term) list) option * ((string * int) list * ((int * report) list) option)
+  val test_term: Proof.context -> bool * bool -> term * term list -> result
   val test_goal_terms:
     Proof.context -> bool * bool -> (string * typ) list -> (term * term list) list  
-      -> ((string * term) list * (term * term) list) option * ((string * int) list * ((int * report) list) option) list
+      -> result list
   val quickcheck: (string * string list) list -> int -> Proof.state -> (string * term) list option
+  
   (* testing a batch of terms *)
   val test_terms: Proof.context -> term list -> (string * term) list option list option
 end;
@@ -51,8 +57,6 @@
 
 val auto = Unsynchronized.ref false;
 
-val timing = Unsynchronized.ref false;
-
 val _ =
   ProofGeneralPgip.add_preference Preferences.category_tracing
   (Unsynchronized.setmp auto true (fn () =>
@@ -66,6 +70,50 @@
   { iterations : int, raised_match_errors : int,
     satisfied_assms : int list, positive_concl_tests : int }
 
+(* Quickcheck Result *)
+
+datatype result = Result of
+  { counterexample : (string * term) list option, evaluation_terms : (term * term) list option,
+    timings : (string * int) list, reports : (int * report) list}
+
+val empty_result =
+  Result {counterexample = NONE, evaluation_terms = NONE, timings = [], reports = []}
+
+fun counterexample_of (Result r) = #counterexample r
+
+fun found_counterexample (Result r) = is_some (#counterexample r)
+
+fun response_of (Result r) = case (#counterexample r, #evaluation_terms r) of
+    (SOME ts, SOME evals) => SOME (ts, evals)
+  | (NONE, NONE) => NONE
+
+fun timings_of (Result r) = #timings r
+
+fun set_reponse names eval_terms (SOME ts) (Result r) =
+  let
+    val (ts1, ts2) = chop (length names) ts
+  in
+    Result {counterexample = SOME (names ~~ ts1), evaluation_terms = SOME (eval_terms ~~ ts2),
+      timings = #timings r, reports = #reports r}
+  end
+  | set_reponse _ _ NONE result = result
+
+fun cons_timing timing (Result r) =
+  Result {counterexample = #counterexample r, evaluation_terms = #evaluation_terms r,
+    timings = cons timing (#timings r), reports = #reports r}
+
+fun cons_report size (SOME report) (Result r) =
+  Result {counterexample = #counterexample r, evaluation_terms = #evaluation_terms r,
+    timings = #timings r, reports = cons (size, report) (#reports r)}
+  | cons_report _ NONE result = result
+
+fun add_timing timing result_ref = (result_ref := cons_timing timing (!result_ref))
+
+fun add_report size report result_ref = (result_ref := cons_report size report (!result_ref))
+
+fun add_response names eval_terms response result_ref =
+  (result_ref := set_reponse names eval_terms response (!result_ref))
+
 (* expectation *)
 
 datatype expectation = No_Expectation | No_Counterexample | Counterexample;
@@ -79,6 +127,7 @@
 val (iterations, setup_iterations) = Attrib.config_int "quickcheck_iterations" (K 100)
 val (no_assms, setup_no_assms) = Attrib.config_bool "quickcheck_no_assms" (K false)
 val (report, setup_report) = Attrib.config_bool "quickcheck_report" (K true)
+val (timing, setup_timing) = Attrib.config_bool "quickcheck_timing" (K false)
 val (quiet, setup_quiet) = Attrib.config_bool "quickcheck_quiet" (K false)
 val (timeout, setup_timeout) = Attrib.config_real "quickcheck_timeout" (K 30.0)
 val (finite_types, setup_finite_types) = Attrib.config_bool "quickcheck_finite_types" (K true)
@@ -86,8 +135,8 @@
   Attrib.config_int "quickcheck_finite_type_size" (K 3)
 
 val setup_config =
-  setup_tester #> setup_size #> setup_iterations #> setup_no_assms #> setup_report #> setup_quiet
-    #> setup_timeout #> setup_finite_types #> setup_finite_type_size
+  setup_tester #> setup_size #> setup_iterations #> setup_no_assms #> setup_report #> setup_timing
+    #> setup_quiet #> setup_timeout #> setup_finite_types #> setup_finite_type_size
 
 datatype test_params = Test_Params of
   {default_type: typ list, expect : expectation};
@@ -178,48 +227,44 @@
   else
     f ()
 
-fun mk_result names eval_terms ts =
-  let
-    val (ts1, ts2) = chop (length names) ts
-  in
-    (names ~~ ts1, eval_terms ~~ ts2) 
-  end
-
 fun test_term ctxt (limit_time, is_interactive) (t, eval_terms) =
   let
+    fun message s = if Config.get ctxt quiet then () else Output.urgent_message s
     val _ = check_test_term t
     val names = Term.add_free_names t []
     val current_size = Unsynchronized.ref 0
+    val current_result = Unsynchronized.ref empty_result 
     fun excipit () =
       "Quickcheck ran out of time while testing at size " ^ string_of_int (!current_size)
     val (test_fun, comp_time) = cpu_time "quickcheck compilation"
       (fn () => mk_tester ctxt (t, eval_terms));
-    fun with_size test_fun k reports =
+    val _ = add_timing comp_time current_result
+    fun with_size test_fun k =
       if k > Config.get ctxt size then
-        (NONE, reports)
+        NONE
       else
         let
-          val _ = if Config.get ctxt quiet then () else Output.urgent_message
-            ("Test data size: " ^ string_of_int k)
+          val _ = message ("Test data size: " ^ string_of_int k)
           val _ = current_size := k
-          val ((result, new_report), timing) =
+          val ((result, report), timing) =
             cpu_time ("size " ^ string_of_int k) (fn () => test_fun (k - 1))
-          val reports = case new_report of NONE => reports | SOME rep => (k, rep) :: reports
+          val _ = add_timing timing current_result
+          val _ = add_report k report current_result
         in
-          case result of NONE => with_size test_fun (k + 1) reports | SOME q => (SOME q, reports)
+          case result of NONE => with_size test_fun (k + 1) | SOME q => SOME q
         end;
   in
-    case test_fun of NONE => (NONE, ([comp_time], NONE))
+    case test_fun of NONE => !current_result
       | SOME test_fun =>
         limit ctxt (limit_time, is_interactive) (fn () =>
           let
-            val ((result, reports), exec_time) =
-              cpu_time "quickcheck execution" (fn () => with_size test_fun 1 [])
+            val (response, exec_time) =
+              cpu_time "quickcheck execution" (fn () => with_size test_fun 1)
+            val _ = add_response names eval_terms response current_result
+            val _ = add_timing exec_time current_result
           in
-            (Option.map (mk_result names eval_terms) result,
-              ([exec_time, comp_time],
-               if Config.get ctxt report andalso not (null reports) then SOME reports else NONE))
-          end) (fn () => (Output.urgent_message (excipit ()); (NONE, ([comp_time], NONE)))) ()
+            !current_result
+          end) (fn () => (message (excipit ()); !current_result)) ()
   end;
 
 fun test_terms ctxt ts =
@@ -244,17 +289,24 @@
 fun test_term_with_increasing_cardinality ctxt (limit_time, is_interactive) ts =
   let
     val thy = ProofContext.theory_of ctxt
+    fun message s = if Config.get ctxt quiet then () else Output.urgent_message s
     val (ts, eval_terms) = split_list ts
     val _ = map check_test_term ts
     val names = Term.add_free_names (hd ts) []
     val Ts = map snd (Term.add_frees (hd ts) [])
+    val current_result = Unsynchronized.ref empty_result 
     val (test_funs, comp_time) = cpu_time "quickcheck compilation"
       (fn () => map (mk_tester ctxt) (ts ~~ eval_terms))
+    val _ = add_timing comp_time current_result
     fun test_card_size (card, size) =
       (* FIXME: why decrement size by one? *)
-      case fst (the (nth test_funs (card - 1)) (size - 1)) of
-        SOME ts => SOME (mk_result names (nth eval_terms (card - 1)) ts)
-      | NONE => NONE
+      let
+        val (ts, timing) = cpu_time ("size " ^ string_of_int size ^ " and card " ^ string_of_int card)
+          (fn () => fst (the (nth test_funs (card - 1)) (size - 1)))
+        val _ = add_timing timing current_result
+      in
+        Option.map (pair card) ts
+      end
     val enumeration_card_size =
       if forall (fn T => Sign.of_sort thy (T,  ["Enum.enum"])) Ts then
         (* size does not matter *)
@@ -264,12 +316,15 @@
         map_product pair (1 upto (length ts)) (1 upto (Config.get ctxt size))
         |> sort (fn ((c1, s1), (c2, s2)) => int_ord ((c1 + s1), (c2 + s2)))
     in
-      if (forall is_none test_funs) then
-        (NONE, ([comp_time], NONE))
+      if (forall is_none test_funs) then !current_result
       else if (forall is_some test_funs) then
         limit ctxt (limit_time, is_interactive) (fn () =>
-          (get_first test_card_size enumeration_card_size, ([comp_time], NONE)))
-          (fn () => (Output.urgent_message "Quickcheck ran out of time"; (NONE, ([comp_time], NONE)))) ()
+          let
+            val _ = case get_first test_card_size enumeration_card_size of
+              SOME (card, ts) => add_response names (nth eval_terms (card - 1)) (SOME ts) current_result
+            | NONE => ()
+          in !current_result end)
+          (fn () => (message "Quickcheck ran out of time"; !current_result)) ()
       else
         error "Unexpectedly, testers of some cardinalities are executable but others are not"
     end
@@ -325,20 +380,25 @@
         [[]] => error error_msg
       | xs => xs
     val _ = if Config.get lthy quiet then () else warning error_msg
-    fun collect_results f reports [] = (NONE, rev reports)
-      | collect_results f reports (t :: ts) =
-        case f t of
-          (SOME res, report) => (SOME res, rev (report :: reports))
-        | (NONE, report) => collect_results f (report :: reports) ts
+    fun collect_results f [] results = results
+      | collect_results f (t :: ts) results =
+        let
+          val result = f t
+        in
+          if found_counterexample result then
+            (result :: results)
+          else
+            collect_results f ts (result :: results)
+        end
     fun test_term' goal =
       case goal of
         [(NONE, t)] => test_term lthy (limit_time, is_interactive) t
       | ts => test_term_with_increasing_cardinality lthy (limit_time, is_interactive) (map snd ts)
   in
     if Config.get lthy finite_types then
-      collect_results test_term' [] correct_inst_goals
+      collect_results test_term' correct_inst_goals []
     else
-      collect_results (test_term lthy (limit_time, is_interactive)) [] (maps (map snd) correct_inst_goals)
+      collect_results (test_term lthy (limit_time, is_interactive)) (maps (map snd) correct_inst_goals) []
   end;
 
 fun test_goal (time_limit, is_interactive) (insts, eval_terms) i state =
@@ -400,9 +460,15 @@
       (rev reports))
   | pretty_reports ctxt NONE = Pretty.str ""
 
-fun pretty_counterex_and_reports ctxt auto (cex, timing_and_reports) =
-  Pretty.chunks (pretty_counterex ctxt auto cex ::
-    map (pretty_reports ctxt) (map snd timing_and_reports))
+fun pretty_timings timings =
+  Pretty.chunks (Pretty.fbrk :: Pretty.str "timings:" ::
+    maps (fn (label, time) =>
+      Pretty.str (label ^ ": " ^ string_of_int time ^ " ms") :: Pretty.brk 1 :: []) (rev timings))
+
+fun pretty_counterex_and_reports ctxt auto (result :: _) =
+  Pretty.chunks (pretty_counterex ctxt auto (response_of result) ::
+    (* map (pretty_reports ctxt) (reports_of result) :: *)
+    (if Config.get ctxt timing then cons (pretty_timings (timings_of result)) else I) [])
 
 (* automatic testing *)
 
@@ -416,9 +482,11 @@
   in
     case res of
       NONE => (false, state)
-    | SOME (NONE, report) => (false, state)
-    | SOME (cex, report) => (true, Proof.goal_message (K (Pretty.chunks [Pretty.str "",
-        Pretty.mark Markup.hilite (pretty_counterex ctxt true cex)])) state)
+    | SOME (result :: _) => if found_counterexample result then
+        (true, Proof.goal_message (K (Pretty.chunks [Pretty.str "",
+          Pretty.mark Markup.hilite (pretty_counterex ctxt true (response_of result))])) state)
+      else
+        (false, state)
   end
 
 val setup = Auto_Tools.register_tool (auto, auto_quickcheck)
@@ -482,16 +550,23 @@
 
 fun quickcheck_params_cmd args = Context.theory_map (fold parse_test_param args);
 
+fun check_expectation state results =
+  (if found_counterexample (hd results) andalso expect (Proof.context_of state) = No_Counterexample
+  then
+    error ("quickcheck expected to find no counterexample but found one")
+  else
+    (if not (found_counterexample (hd results)) andalso expect (Proof.context_of state) = Counterexample
+    then
+      error ("quickcheck expected to find a counterexample but did not find one")
+    else ()))
+
 fun gen_quickcheck args i state =
   state
   |> Proof.map_context_result (fn ctxt => fold parse_test_param_inst args (([], []), ctxt))
   |> (fn ((insts, eval_terms), state') => test_goal (true, true) (insts, eval_terms) i state'
-  |> tap (fn (SOME x, _) => if expect (Proof.context_of state') = No_Counterexample then
-               error ("quickcheck expected to find no counterexample but found one") else ()
-           | (NONE, _) => if expect (Proof.context_of state') = Counterexample then
-               error ("quickcheck expected to find a counterexample but did not find one") else ()))
+  |> tap (check_expectation state'))
 
-fun quickcheck args i state = Option.map fst (fst (gen_quickcheck args i state));
+fun quickcheck args i state = counterexample_of (hd (gen_quickcheck args i state))
 
 fun quickcheck_cmd args i state =
   gen_quickcheck args i (Toplevel.proof_of state)