src/Tools/quickcheck.ML
changeset 35378 95d0e3adf38e
parent 35324 c9f428269b38
child 35379 d0c779d012dc
     1.1 --- a/src/Tools/quickcheck.ML	Tue Feb 23 16:58:21 2010 +0100
     1.2 +++ b/src/Tools/quickcheck.ML	Thu Feb 25 09:28:01 2010 +0100
     1.3 @@ -8,11 +8,16 @@
     1.4  sig
     1.5    val auto: bool Unsynchronized.ref
     1.6    val timing : bool Unsynchronized.ref
     1.7 +  datatype report = Report of
     1.8 +    { iterations : int, raised_match_errors : int,
     1.9 +      satisfied_assms : int list, positive_concl_tests : int }
    1.10 +  val gen_test_term: Proof.context -> bool -> bool -> string option -> int -> int -> term ->
    1.11 +    ((string * term) list option * ((string * int) list * (int * report list) list))
    1.12    val test_term: Proof.context -> bool -> string option -> int -> int -> term ->
    1.13      (string * term) list option
    1.14 -  val timed_test_term: Proof.context -> bool -> string option -> int -> int -> term ->
    1.15 -    ((string * term) list option * (string * int) list)
    1.16 -  val add_generator: string * (Proof.context -> term -> int -> term list option) -> theory -> theory
    1.17 +  val add_generator:
    1.18 +    string * (Proof.context -> bool -> term -> int -> term list option * (bool list * bool))
    1.19 +      -> theory -> theory
    1.20    val setup: theory -> theory
    1.21    val quickcheck: (string * string) list -> int -> Proof.state -> (string * term) list option
    1.22  end;
    1.23 @@ -33,31 +38,54 @@
    1.24        "auto-quickcheck"
    1.25        "Whether to run Quickcheck automatically.") ());
    1.26  
    1.27 +(* quickcheck report *)
    1.28 +
    1.29 +datatype single_report = Run of bool list * bool | MatchExc
    1.30 +
    1.31 +datatype report = Report of
    1.32 +  { iterations : int, raised_match_errors : int,
    1.33 +    satisfied_assms : int list, positive_concl_tests : int }
    1.34 +
    1.35 +fun collect_single_report single_report
    1.36 +    (Report {iterations = iterations, raised_match_errors = raised_match_errors,
    1.37 +    satisfied_assms = satisfied_assms, positive_concl_tests = positive_concl_tests}) =
    1.38 +  case single_report
    1.39 +  of MatchExc =>
    1.40 +    Report {iterations = iterations + 1, raised_match_errors = raised_match_errors + 1,
    1.41 +      satisfied_assms = satisfied_assms, positive_concl_tests = positive_concl_tests}
    1.42 +   | Run (assms, concl) =>
    1.43 +    Report {iterations = iterations + 1, raised_match_errors = raised_match_errors,
    1.44 +      satisfied_assms =
    1.45 +        map2 (fn b => fn s => if b then s + 1 else s) assms
    1.46 +         (if null satisfied_assms then replicate (length assms) 0 else satisfied_assms),
    1.47 +      positive_concl_tests = if concl then positive_concl_tests + 1 else positive_concl_tests}
    1.48  
    1.49  (* quickcheck configuration -- default parameters, test generators *)
    1.50  
    1.51  datatype test_params = Test_Params of
    1.52 -  { size: int, iterations: int, default_type: typ option, no_assms: bool };
    1.53 +  { size: int, iterations: int, default_type: typ option, no_assms: bool, report: bool};
    1.54  
    1.55 -fun dest_test_params (Test_Params { size, iterations, default_type, no_assms }) =
    1.56 -  ((size, iterations), (default_type, no_assms));
    1.57 -fun make_test_params ((size, iterations), (default_type, no_assms)) =
    1.58 +fun dest_test_params (Test_Params { size, iterations, default_type, no_assms, report }) =
    1.59 +  ((size, iterations), ((default_type, no_assms), report));
    1.60 +fun make_test_params ((size, iterations), ((default_type, no_assms), report)) =
    1.61    Test_Params { size = size, iterations = iterations, default_type = default_type,
    1.62 -                no_assms = no_assms };
    1.63 -fun map_test_params f (Test_Params { size, iterations, default_type, no_assms }) =
    1.64 -  make_test_params (f ((size, iterations), (default_type, no_assms)));
    1.65 +                no_assms = no_assms, report = report };
    1.66 +fun map_test_params f (Test_Params { size, iterations, default_type, no_assms, report }) =
    1.67 +  make_test_params (f ((size, iterations), ((default_type, no_assms), report)));
    1.68  fun merge_test_params (Test_Params { size = size1, iterations = iterations1, default_type = default_type1,
    1.69 -                                     no_assms = no_assms1 },
    1.70 +                                     no_assms = no_assms1, report = report1 },
    1.71    Test_Params { size = size2, iterations = iterations2, default_type = default_type2,
    1.72 -                no_assms = no_assms2 }) =
    1.73 +                no_assms = no_assms2, report = report2 }) =
    1.74    make_test_params ((Int.max (size1, size2), Int.max (iterations1, iterations2)),
    1.75 -    (case default_type1 of NONE => default_type2 | _ => default_type1, no_assms1 orelse no_assms2));
    1.76 +    ((case default_type1 of NONE => default_type2 | _ => default_type1, no_assms1 orelse no_assms2),
    1.77 +    report1 orelse report2));
    1.78  
    1.79  structure Data = Theory_Data
    1.80  (
    1.81 -  type T = (string * (Proof.context -> term -> int -> term list option)) list
    1.82 +  type T = (string * (Proof.context -> bool -> term -> int -> term list option * (bool list * bool))) list
    1.83      * test_params;
    1.84 -  val empty = ([], Test_Params { size = 10, iterations = 100, default_type = NONE, no_assms = false });
    1.85 +  val empty = ([], Test_Params
    1.86 +    { size = 10, iterations = 100, default_type = NONE, no_assms = false, report = false});
    1.87    val extend = I;
    1.88    fun merge ((generators1, params1), (generators2, params2)) : T =
    1.89      (AList.merge (op =) (K true) (generators1, generators2),
    1.90 @@ -66,7 +94,6 @@
    1.91  
    1.92  val add_generator = Data.map o apfst o AList.update (op =);
    1.93  
    1.94 -
    1.95  (* generating tests *)
    1.96  
    1.97  fun mk_tester_select name ctxt =
    1.98 @@ -74,14 +101,14 @@
    1.99     of NONE => error ("No such quickcheck generator: " ^ name)
   1.100      | SOME generator => generator ctxt;
   1.101  
   1.102 -fun mk_testers ctxt t =
   1.103 +fun mk_testers ctxt report t =
   1.104    (map snd o fst o Data.get o ProofContext.theory_of) ctxt
   1.105 -  |> map_filter (fn generator => try (generator ctxt) t);
   1.106 +  |> map_filter (fn generator => try (generator ctxt report) t);
   1.107  
   1.108 -fun mk_testers_strict ctxt t =
   1.109 +fun mk_testers_strict ctxt report t =
   1.110    let
   1.111      val generators = ((map snd o fst o Data.get o ProofContext.theory_of) ctxt)
   1.112 -    val testers = map (fn generator => Exn.capture (generator ctxt) t) generators;
   1.113 +    val testers = map (fn generator => Exn.capture (generator ctxt report) t) generators;
   1.114    in if forall (is_none o Exn.get_result) testers
   1.115      then [(Exn.release o snd o split_last) testers]
   1.116      else map_filter Exn.get_result testers
   1.117 @@ -106,36 +133,45 @@
   1.118      val time = Time.toMilliseconds (#cpu (end_timing start))
   1.119    in (Exn.release result, (description, time)) end
   1.120  
   1.121 -fun timed_test_term ctxt quiet generator_name size i t =
   1.122 +fun gen_test_term ctxt quiet report generator_name size i t =
   1.123    let
   1.124      val (names, t') = prep_test_term t;
   1.125      val (testers, comp_time) = cpu_time "quickcheck compilation"
   1.126        (fn () => (case generator_name
   1.127 -       of NONE => if quiet then mk_testers ctxt t' else mk_testers_strict ctxt t'
   1.128 -        | SOME name => [mk_tester_select name ctxt t']));
   1.129 -    fun iterate f 0 = NONE
   1.130 -      | iterate f j = case f () handle Match => (if quiet then ()
   1.131 -             else warning "Exception Match raised during quickcheck"; NONE)
   1.132 -          of NONE => iterate f (j - 1) | SOME q => SOME q;
   1.133 -    fun with_testers k [] = NONE
   1.134 +       of NONE => if quiet then mk_testers ctxt report t' else mk_testers_strict ctxt report t'
   1.135 +        | SOME name => [mk_tester_select name ctxt report t']));
   1.136 +    fun iterate f 0 report = (NONE, report)
   1.137 +      | iterate f j report =
   1.138 +        let
   1.139 +          val (test_result, single_report) = apsnd Run (f ()) handle Match => (if quiet then ()
   1.140 +             else warning "Exception Match raised during quickcheck"; (NONE, MatchExc))
   1.141 +          val report = collect_single_report single_report report
   1.142 +        in
   1.143 +          case test_result of NONE => iterate f (j - 1) report | SOME q => (SOME q, report)
   1.144 +        end
   1.145 +    val empty_report = Report { iterations = 0, raised_match_errors = 0,
   1.146 +      satisfied_assms = [], positive_concl_tests = 0 }
   1.147 +    fun with_testers k [] = (NONE, [])
   1.148        | with_testers k (tester :: testers) =
   1.149 -          case iterate (fn () => tester (k - 1)) i
   1.150 -           of NONE => with_testers k testers
   1.151 -            | SOME q => SOME q;
   1.152 -    fun with_size k = if k > size then NONE
   1.153 +          case iterate (fn () => tester (k - 1)) i empty_report
   1.154 +           of (NONE, report) => apsnd (cons report) (with_testers k testers)
   1.155 +            | (SOME q, report) => (SOME q, [report]);
   1.156 +    fun with_size k reports = if k > size then (NONE, reports)
   1.157        else (if quiet then () else priority ("Test data size: " ^ string_of_int k);
   1.158 -        case with_testers k testers
   1.159 -         of NONE => with_size (k + 1) | SOME q => SOME q);
   1.160 -    val (result, exec_time) = cpu_time "quickcheck execution"
   1.161 -      (fn () => case with_size 1
   1.162 -        of NONE => NONE
   1.163 -        | SOME ts => SOME (names ~~ ts))
   1.164 +        let
   1.165 +          val (result, new_report) = with_testers k testers
   1.166 +          val reports = ((k, new_report) :: reports)
   1.167 +        in case result of NONE => with_size (k + 1) reports | SOME q => (SOME q, reports) end);
   1.168 +    val ((result, reports), exec_time) = cpu_time "quickcheck execution"
   1.169 +      (fn () => apfst
   1.170 +         (fn result => case result of NONE => NONE
   1.171 +        | SOME ts => SOME (names ~~ ts)) (with_size 1 []))
   1.172    in
   1.173 -    (result, [exec_time, comp_time])
   1.174 +    (result, ([exec_time, comp_time], reports))
   1.175    end;
   1.176  
   1.177  fun test_term ctxt quiet generator_name size i t =
   1.178 -  fst (timed_test_term ctxt quiet generator_name size i t)
   1.179 +  fst (gen_test_term ctxt quiet false generator_name size i t)
   1.180  
   1.181  fun monomorphic_term thy insts default_T = 
   1.182    let
   1.183 @@ -152,7 +188,7 @@
   1.184        | subst T = T;
   1.185    in (map_types o map_atyps) subst end;
   1.186  
   1.187 -fun test_goal quiet generator_name size iterations default_T no_assms insts i assms state =
   1.188 +fun test_goal quiet report generator_name size iterations default_T no_assms insts i assms state =
   1.189    let
   1.190      val ctxt = Proof.context_of state;
   1.191      val thy = Proof.theory_of state;
   1.192 @@ -164,7 +200,7 @@
   1.193                                    subst_bounds (frees, strip gi))
   1.194        |> monomorphic_term thy insts default_T
   1.195        |> ObjectLogic.atomize_term thy;
   1.196 -  in test_term ctxt quiet generator_name size iterations gi' end;
   1.197 +  in gen_test_term ctxt quiet report generator_name size iterations gi' end;
   1.198  
   1.199  fun pretty_counterex ctxt NONE = Pretty.str "Quickcheck found no counterexample."
   1.200    | pretty_counterex ctxt (SOME cex) =
   1.201 @@ -172,6 +208,32 @@
   1.202          map (fn (s, t) =>
   1.203            Pretty.block [Pretty.str (s ^ " ="), Pretty.brk 1, Syntax.pretty_term ctxt t]) cex);
   1.204  
   1.205 +fun pretty_report (Report {iterations = iterations, raised_match_errors = raised_match_errors,
   1.206 +    satisfied_assms = satisfied_assms, positive_concl_tests = positive_concl_tests}) =
   1.207 +  let
   1.208 +    fun pretty_stat s i = Pretty.block ([Pretty.str (s ^ ": " ^ string_of_int i)])
   1.209 +  in
   1.210 +     ([pretty_stat "iterations" iterations,
   1.211 +     pretty_stat "match exceptions" raised_match_errors]
   1.212 +     @ map_index (fn (i, n) => pretty_stat ("satisfied " ^ string_of_int (i + 1) ^ ". assumption") n)
   1.213 +       satisfied_assms
   1.214 +     @ [pretty_stat "positive conclusion tests" positive_concl_tests])
   1.215 +  end
   1.216 +
   1.217 +fun pretty_reports' [report] = [Pretty.chunks (pretty_report report)]
   1.218 +  | pretty_reports' reports =
   1.219 +  map_index (fn (i, report) =>
   1.220 +    Pretty.chunks (Pretty.str (string_of_int (i + 1) ^ ". generator:\n") :: pretty_report report))
   1.221 +    reports
   1.222 +
   1.223 +fun pretty_reports ctxt reports =
   1.224 +  Pretty.chunks (Pretty.str "Quickcheck report:" ::
   1.225 +    maps (fn (size, reports) =>
   1.226 +      Pretty.str ("size " ^ string_of_int size ^ ":") :: pretty_reports' reports @ [Pretty.brk 1])
   1.227 +      (rev reports))
   1.228 +
   1.229 +fun pretty_counterex_and_reports ctxt (cex, (timing, reports)) =
   1.230 +  Pretty.chunks [pretty_counterex ctxt cex, pretty_reports ctxt reports]
   1.231  
   1.232  (* automatic testing *)
   1.233  
   1.234 @@ -182,15 +244,15 @@
   1.235      let
   1.236        val ctxt = Proof.context_of state;
   1.237        val assms = map term_of (Assumption.all_assms_of ctxt);
   1.238 -      val Test_Params { size, iterations, default_type, no_assms } =
   1.239 +      val Test_Params { size, iterations, default_type, no_assms, report } =
   1.240          (snd o Data.get o Proof.theory_of) state;
   1.241        val res =
   1.242 -        try (test_goal true NONE size iterations default_type no_assms [] 1 assms) state;
   1.243 +        try (test_goal true false NONE size iterations default_type no_assms [] 1 assms) state;
   1.244      in
   1.245        case res of
   1.246          NONE => (false, state)
   1.247 -      | SOME NONE => (false, state)
   1.248 -      | SOME cex => (true, Proof.goal_message (K (Pretty.chunks [Pretty.str "",
   1.249 +      | SOME (NONE, report) => (false, state)
   1.250 +      | SOME (cex, report) => (true, Proof.goal_message (K (Pretty.chunks [Pretty.str "",
   1.251            Pretty.mark Markup.hilite (pretty_counterex ctxt cex)])) state)
   1.252      end
   1.253  
   1.254 @@ -212,8 +274,10 @@
   1.255    | parse_test_param ctxt ("iterations", arg) =
   1.256        (apfst o apsnd o K) (read_nat arg)
   1.257    | parse_test_param ctxt ("default_type", arg) =
   1.258 -      (apsnd o apfst o K o SOME) (ProofContext.read_typ ctxt arg)
   1.259 +      (apsnd o apfst o apfst o K o SOME) (ProofContext.read_typ ctxt arg)
   1.260    | parse_test_param ctxt ("no_assms", arg) =
   1.261 +      (apsnd o apfst o apsnd o K) (read_bool arg)
   1.262 +  | parse_test_param ctxt ("report", arg) =
   1.263        (apsnd o apsnd o K) (read_bool arg)
   1.264    | parse_test_param ctxt (name, _) =
   1.265        error ("Unknown test parameter: " ^ name);
   1.266 @@ -235,22 +299,24 @@
   1.267      |> (Data.map o apsnd o map_test_params) f
   1.268    end;
   1.269  
   1.270 -fun quickcheck args i state =
   1.271 +fun gen_quickcheck args i state =
   1.272    let
   1.273      val thy = Proof.theory_of state;
   1.274      val ctxt = Proof.context_of state;
   1.275      val assms = map term_of (Assumption.all_assms_of ctxt);
   1.276      val default_params = (dest_test_params o snd o Data.get) thy;
   1.277      val f = fold (parse_test_param_inst ctxt) args;
   1.278 -    val (((size, iterations), (default_type, no_assms)), (generator_name, insts)) =
   1.279 +    val (((size, iterations), ((default_type, no_assms), report)), (generator_name, insts)) =
   1.280        f (default_params, (NONE, []));
   1.281    in
   1.282 -    test_goal false generator_name size iterations default_type no_assms insts i assms state
   1.283 +    test_goal false report generator_name size iterations default_type no_assms insts i assms state
   1.284    end;
   1.285  
   1.286 +fun quickcheck args i state = fst (gen_quickcheck args i state)
   1.287 +
   1.288  fun quickcheck_cmd args i state =
   1.289 -  quickcheck args i (Toplevel.proof_of state)
   1.290 -  |> Pretty.writeln o pretty_counterex (Toplevel.context_of state);
   1.291 +  gen_quickcheck args i (Toplevel.proof_of state)
   1.292 +  |> Pretty.writeln o pretty_counterex_and_reports (Toplevel.context_of state);
   1.293  
   1.294  local structure P = OuterParse and K = OuterKeyword in
   1.295