diff --git a/thys/SpecCheck/Examples/SpecCheck_Examples.thy b/thys/SpecCheck/Examples/SpecCheck_Examples.thy --- a/thys/SpecCheck/Examples/SpecCheck_Examples.thy +++ b/thys/SpecCheck/Examples/SpecCheck_Examples.thy @@ -1,129 +1,166 @@ \<^marker>\creator "Kevin Kappelmann"\ section \Examples\ theory SpecCheck_Examples imports SpecCheck_Dynamic begin -subsection \List examples\ - ML \ open SpecCheck open SpecCheck_Dynamic structure Gen = SpecCheck_Generator structure Prop = SpecCheck_Property structure Show = SpecCheck_Show structure Shrink = SpecCheck_Shrink structure Random = SpecCheck_Random \ +subsection \Unit Tests and Exceptions\ + +ML \ + fun faulty_abs n = + if n < ~10000 then error "out of bounds" + else if n < 0 then ~n + else n +\ + +ML_command \ +let + val check_unit_int_pair = check_unit_tests (Show.zip Show.int Show.int) + fun correctness_tests ctxt s = Lecker.test_group ctxt s [ + check_unit_int_pair [(~10, 10), (0, 0), (10, 10)] "correctness small values" + (Prop.prop (fn (n, exp) => faulty_abs n = exp)), + check_unit_int_pair [(~999999999, 999999999), (999999999, 999999999)] + "correctness large values" (Prop.prop (fn (n, exp) => faulty_abs n = exp)) + ] + fun exception_tests ctxt s = + let val exn_prop = Prop.expect_failure (ERROR "out of bounds") faulty_abs + in + Lecker.test_group ctxt s [ + check_unit_tests Show.int [~10, 0, 10] "expect exception for small values" exn_prop, + check_unit_tests Show.int [~999999999, ~99999999999999] "expect exception for large values" + exn_prop + ] + end +in + Lecker.test_group @{context} () [ + check_unit_tests Show.int [~10, 0, 10] "is idempotent" + (Prop.prop (fn n => faulty_abs (faulty_abs n) = faulty_abs n)), + correctness_tests, + exception_tests + ] +end +\ + +subsection \Randomised Tests\ + ML_command \ let val int_gen = Gen.range_int (~10000000, 10000000) val size_gen = Gen.nonneg 10 val check_list = check_shrink (Show.list Show.int) (Shrink.list Shrink.int) (Gen.list size_gen int_gen) fun list_test (k, f, xs) = AList.lookup (op=) (AList.map_entry (op=) k f xs) k = Option.map f (AList.lookup (op=) xs k) val list_test_show = Show.zip3 Show.int Show.none (Show.list (Show.zip Show.int Show.int)) val list_test_gen = Gen.zip3 int_gen (Gen.function' int_gen) (Gen.list size_gen (Gen.zip int_gen int_gen)) in Lecker.test_group @{context} (Random.new ()) [ Prop.prop (fn xs => rev xs = xs) |> check_list "rev = I", Prop.prop (fn xs => rev (rev xs) = xs) |> check_list "rev o rev = I", Prop.prop list_test |> check list_test_show list_test_gen "lookup map equiv map lookup" ] end \ text \The next three examples roughly correspond to the above test group (except that there's no shrinking). Compared to the string-based method, the method above is more flexible - you can change your generators, shrinking methods, and show instances - and robust - you are not reflecting strings (which might contain typos) but entering type-checked code. In exchange, it is a bit more work to set up the generators. However, in practice, one shouldn't rely on default generators in most cases anyway.\ ML_command \ check_dynamic @{context} "ALL xs. rev xs = xs"; \ ML_command \ check_dynamic @{context} "ALL xs. rev (rev xs) = xs"; \ subsection \AList Specification\ ML_command \ (*map_entry applies the function to the element*) check_dynamic @{context} (implode ["ALL k f xs. AList.lookup (op =) (AList.map_entry (op =) k f xs) k = ", "Option.map f (AList.lookup (op =) xs k)"]) \ ML_command \ (*update always results in an entry*) check_dynamic @{context} "ALL k v xs. AList.defined (op =) (AList.update (op =) (k, v) xs) k"; \ ML_command \ (*update always writes the value*) check_dynamic @{context} "ALL k v xs. AList.lookup (op =) (AList.update (op =) (k, v) xs) k = SOME v"; \ ML_command \ (*default always results in an entry*) check_dynamic @{context} "ALL k v xs. AList.defined (op =) (AList.default (op =) (k, v) xs) k"; \ ML_command \ (*delete always removes the entry*) check_dynamic @{context} "ALL k xs. not (AList.defined (op =) (AList.delete (op =) k xs) k)"; \ ML_command \ (*default writes the entry iff it didn't exist*) check_dynamic @{context} (implode ["ALL k v xs. (AList.lookup (op =) (AList.default (op =) (k, v) xs) k = ", "(if AList.defined (op =) xs k then AList.lookup (op =) xs k else SOME v))"]) \ subsection \Examples on Types and Terms\ ML_command \ check_dynamic @{context} "ALL f g t. map_types (g o f) t = (map_types f o map_types g) t"; \ ML_command \ check_dynamic @{context} "ALL f g t. map_types (f o g) t = (map_types f o map_types g) t"; \ text \One would think this holds:\ ML_command \ check_dynamic @{context} "ALL t ts. strip_comb (list_comb (t, ts)) = (t, ts)" \ text \But it only holds with this precondition:\ ML_command \ check_dynamic @{context} "ALL t ts. case t of _ $ _ => true | _ => strip_comb (list_comb (t, ts)) = (t, ts)" \ subsection \Some surprises\ ML_command \ check_dynamic @{context} "ALL Ts t. type_of1 (Ts, t) = fastype_of1 (Ts, t)" \ ML_command \ val thy = \<^theory>; check_dynamic (Context.the_local_context ()) "ALL t u. if Pattern.matches thy (t, u) then Term.could_unify (t, u) else true" \ end diff --git a/thys/SpecCheck/README.md b/thys/SpecCheck/README.md --- a/thys/SpecCheck/README.md +++ b/thys/SpecCheck/README.md @@ -1,63 +1,62 @@ # SpecCheck SpecCheck is a [QuickCheck](https://en.wikipedia.org/wiki/QuickCheck)-like testing framework for Isabelle/ML. You can use it to write specifications for ML functions. SpecCheck then checks whether your specification holds by testing your function against a given number of generated inputs. It helps you to identify bugs by printing counterexamples on failure and provides you timing information. -SpecCheck is customisable and allows you to specify your own input generators, +SpecCheck is customisable and allows you to specify your own input generators, unit tests, test output formats, as well as pretty printers and shrinking functions for counterexamples among other things. ## Quick Usage 1. Import `SpecCheck.SpecCheck` into your environment. 2. Write specifications using the ML invocation: `check show gen name prop ctxt seed` where - * `show` converts values into `Pretty.T` types to show the failing inputs. See `show/`. - * `gen` is the value generator used for the test. See `generators/`. + * `show` converts values into `Pretty.T` types to show the failing inputs. See `Show/`. + * `gen` is the value generator used for the test. See `Generators/`. * `name` is the name of the test case * `prop` is the property to be tested. See `property.ML` * `seed` is the initial seed for the generator. You can also choose to omit the show method for rapid testing or add a shrinking method à la QuickCheck to get better counterexamples. -If you want to write unit tests, use `check_list` instead of `check`. -A somewhat extensive example application can be found +You can find some simple examples in `Examples/` and a somewhat extensive example application [here](https://github.com/kappelmann/e-unification-isabelle/blob/master/Tests/first_order_unification_tests.ML). For more information see `speccheck.ML`. An experimental alternative allows you to specify tests using strings: 1. Import `SpecCheck_Dynamic.Dynamic` into your environment. 2. `check_property "ALL x. P x"` where `P x` is some ML code evaluating to a boolean ## Implementation Notes SpecCheck is based on [QCheck](https://github.com/league/qcheck), a testing framework for Standard ML by [Christopher League](https://contrapunctus.net/league/). As Isabelle/ML provides a rich and uniform ML platform, some features where removed or adapted, in particular: 1. Isabelle/ML provides common data structures, which we can use in the tool's implementation for storing data and printing output. 2. Implementations in Isabelle/ML checked with this tool commonly use Isabelle/ML's `int` type (which corresponds ML's `IntInf.int`), but do not use other integer types in ML such as ML's `Int.int`, `Word.word`, and others. 3. As Isabelle makes heavy use of parallelism, we avoid reference types. ## Next Steps * Implement more shrinking methods for commonly used types * Implement sizing methods (cf. QuickCheck's `sized`) ## License The source code originated from Christopher League's QCheck, which is licensed under the 2-clause BSD license. The current source code is licensed under the compatible 3-clause BSD license of Isabelle. ## Authors * Lukas Bulwahn * Nicolai Schaffroth * Sebastian Willenbrink * Kevin Kappelmann diff --git a/thys/SpecCheck/property.ML b/thys/SpecCheck/property.ML --- a/thys/SpecCheck/property.ML +++ b/thys/SpecCheck/property.ML @@ -1,46 +1,54 @@ (* Title: SpecCheck/property.ML Author: Lukas Bulwahn and Nicolai Schaffroth, TU Muenchen Author: Christopher League The base module of testable properties. A property is the type of values that SpecCheck knows how to test. Properties not only test whether a given predicate holds, but, for example, can also have preconditions. *) signature SPECCHECK_PROPERTY = sig type 'a pred = 'a -> bool (*the type of values testable by SpecCheck*) type 'a prop (*transforms a predicate into a testable property*) val prop : 'a pred -> 'a prop (*implication for properties: if the first argument evaluates to false, the test case is discarded*) val implies : 'a pred -> 'a prop -> 'a prop (*convenient notation for `implies` working on predicates*) val ==> : 'a pred * 'a pred -> 'a prop + val expect_failure : exn -> ('a -> 'b) -> 'a prop end structure SpecCheck_Property : SPECCHECK_PROPERTY = struct type 'a pred = 'a -> bool type 'a prop = 'a -> SpecCheck_Base.result_single fun apply f x = SpecCheck_Base.Result (f x) (*testcode may throw arbitrary exceptions; interrupts must not be caught!*) handle exn => if Exn.is_interrupt exn then Exn.reraise exn else SpecCheck_Base.Exception exn fun prop f x = apply f x fun implies cond prop x = if cond x then prop x else SpecCheck_Base.Discard fun ==> (p1, p2) = implies p1 (prop p2) +fun eq_exn (exn, exn') = exnName exn = exnName exn' andalso exnMessage exn = exnMessage exn' + +fun expect_failure exp_exn f = prop (fn x => + (f x; false) + handle exn => if Exn.is_interrupt exn then Exn.reraise exn else eq_exn (exn, exp_exn) + ) + end diff --git a/thys/SpecCheck/speccheck.ML b/thys/SpecCheck/speccheck.ML --- a/thys/SpecCheck/speccheck.ML +++ b/thys/SpecCheck/speccheck.ML @@ -1,205 +1,218 @@ (* Title: SpecCheck/speccheck.ML Author: Lukas Bulwahn, Nicolai Schaffroth, and Kevin Kappelmann TU Muenchen Author: Christopher League Specification-based testing of ML programs. *) signature SPECCHECK = sig (*tries to shrink a given (failing) input to a smaller, failing input*) val try_shrink : 'a SpecCheck_Property.prop -> 'a SpecCheck_Shrink.shrink -> 'a -> int -> SpecCheck_Base.stats -> ('a * SpecCheck_Base.stats) (*runs a property for a given test case and returns the result and the updated the statistics*) val run_a_test : 'a SpecCheck_Property.prop -> 'a -> SpecCheck_Base.stats -> SpecCheck_Base.result_single * SpecCheck_Base.stats (*returns the new state after executing the test*) val check_style : 'a SpecCheck_Output_Style_Types.output_style -> ('a SpecCheck_Show.show) option -> 'a SpecCheck_Shrink.shrink -> ('a, 's) SpecCheck_Generator.gen_state -> string -> 'a SpecCheck_Property.prop -> (Proof.context, 's) Lecker.test_state val check_shrink : 'a SpecCheck_Show.show -> 'a SpecCheck_Shrink.shrink -> ('a, 's) SpecCheck_Generator.gen_state -> string -> 'a SpecCheck_Property.prop -> (Proof.context, 's) Lecker.test_state val check : 'a SpecCheck_Show.show -> ('a, 's) SpecCheck_Generator.gen_state -> string -> 'a SpecCheck_Property.prop -> (Proof.context, 's) Lecker.test_state val check_base : ('a, 's) SpecCheck_Generator.gen_state -> string -> 'a SpecCheck_Property.prop -> (Proof.context, 's) Lecker.test_state (*returns all unprocessed elements of the sequence*) val check_seq_style : 'a SpecCheck_Output_Style_Types.output_style -> ('a SpecCheck_Show.show) option -> 'a Seq.seq -> string -> 'a SpecCheck_Property.prop -> Proof.context -> 'a Seq.seq val check_seq : 'a SpecCheck_Show.show -> 'a Seq.seq -> string -> 'a SpecCheck_Property.prop -> Proof.context -> 'a Seq.seq val check_seq_base : 'a Seq.seq -> string -> 'a SpecCheck_Property.prop -> Proof.context -> 'a Seq.seq - (*returns all unused elements of the list*) + (*returns all unused elements of the list; you can use this for unit tests*) val check_list_style : 'a SpecCheck_Output_Style_Types.output_style -> ('a SpecCheck_Show.show) option -> 'a list -> string -> 'a SpecCheck_Property.prop -> Proof.context -> 'a Seq.seq val check_list : 'a SpecCheck_Show.show -> 'a list -> string -> 'a SpecCheck_Property.prop -> Proof.context -> 'a Seq.seq val check_list_base : 'a list -> string -> 'a SpecCheck_Property.prop -> Proof.context -> 'a Seq.seq + (*returns the untouched, passed seed; useful for composing unit tests with randomised tests*) + val check_unit_tests_style : 'a SpecCheck_Output_Style_Types.output_style -> + ('a SpecCheck_Show.show) option -> 'a list -> string -> 'a SpecCheck_Property.prop -> + (Proof.context, 's) Lecker.test_state + val check_unit_tests : 'a SpecCheck_Show.show -> 'a list -> string -> + 'a SpecCheck_Property.prop -> (Proof.context, 's) Lecker.test_state + val check_unit_tests_base : 'a list -> string -> 'a SpecCheck_Property.prop -> + (Proof.context, 's) Lecker.test_state + end structure SpecCheck : SPECCHECK = struct open SpecCheck_Base fun run_a_test prop input { num_success_tests, num_failed_tests, num_discarded_tests, num_recently_discarded_tests, num_success_shrinks, num_failed_shrinks, timing } = let val (time, result) = Timing.timing (fn () => prop input) () val is_success = case result of Result is_success => is_success | _ => false val is_discard = case result of Discard => true | _ => false val is_failure = not is_discard andalso not is_success val num_success_tests = num_success_tests + (if is_success then 1 else 0) val num_failed_tests = num_failed_tests + (if is_failure then 1 else 0) val num_discarded_tests = num_discarded_tests + (if is_discard then 1 else 0) val num_recently_discarded_tests = if is_discard then num_recently_discarded_tests + 1 else 0 val timing = add_timing timing time val stats = { num_success_tests = num_success_tests, num_failed_tests = num_failed_tests, num_discarded_tests = num_discarded_tests, num_recently_discarded_tests = num_recently_discarded_tests, num_success_shrinks = num_success_shrinks, num_failed_shrinks = num_failed_shrinks, timing = timing } in (result, stats) end fun add_num_success_shrinks stats n = { num_success_tests = #num_success_tests stats, num_failed_tests = #num_failed_tests stats, num_discarded_tests = #num_discarded_tests stats, num_recently_discarded_tests = #num_recently_discarded_tests stats, num_success_shrinks = #num_success_shrinks stats + n, num_failed_shrinks = #num_failed_shrinks stats, timing = #timing stats } fun add_num_failed_shrinks stats n = { num_success_tests = #num_success_tests stats, num_failed_tests = #num_failed_tests stats, num_discarded_tests = #num_discarded_tests stats, num_recently_discarded_tests = #num_recently_discarded_tests stats, num_success_shrinks = #num_success_shrinks stats, num_failed_shrinks = #num_failed_shrinks stats + n, timing = #timing stats } fun try_shrink prop shrink input max_shrinks stats = let fun is_failure input = case run_a_test prop input empty_stats |> fst of Result is_success => not is_success | Discard => false | Exception _ => false (*do not count exceptions as a failure because the shrinker might just have broken some invariant of the function*) fun find_first_failure xq pulls_left stats = if pulls_left <= 0 then (NONE, pulls_left, stats) else case Seq.pull xq of NONE => (NONE, pulls_left, stats) | SOME (x, xq) => if is_failure x then (SOME x, pulls_left - 1, add_num_success_shrinks stats 1) else find_first_failure xq (pulls_left - 1) (add_num_failed_shrinks stats 1) in (*always try the first successful branch and abort without backtracking once no further shrink is possible.*) case find_first_failure (shrink input) max_shrinks stats of (*recursively shrink*) (SOME input, shrinks_left, stats) => try_shrink prop shrink input shrinks_left stats | (NONE, _, stats) => (input, stats) end fun test output_style init_stats show_opt shrink opt_gen name prop ctxt state = let val max_discard_ratio = Config.get ctxt SpecCheck_Configuration.max_discard_ratio val max_success = Config.get ctxt SpecCheck_Configuration.max_success (*number of counterexamples to generate before stopping the test*) val num_counterexamples = let val conf_num_counterexamples = Config.get ctxt SpecCheck_Configuration.num_counterexamples in if conf_num_counterexamples > 0 then conf_num_counterexamples else ~1 end val max_shrinks = Config.get ctxt SpecCheck_Configuration.max_shrinks fun run_tests opt_gen state stats counterexamples = (*stop the test run if enough (successful) tests were run or counterexamples were found*) if #num_success_tests stats >= max_success then (Success stats, state) else if num_tests stats >= max_success orelse #num_failed_tests stats = num_counterexamples then (Failure (stats, failure_data counterexamples), state) else (*test input and attempt to shrink on failure*) let val (input_opt, state) = opt_gen state in case input_opt of NONE => if #num_failed_tests stats > 0 then (Failure (stats, failure_data counterexamples), state) else (Success stats, state) | SOME input => let val (result, stats) = run_a_test prop input stats in case result of Result true => run_tests opt_gen state stats counterexamples | Result false => let val (counterexample, stats) = try_shrink prop shrink input max_shrinks stats in run_tests opt_gen state stats (counterexample :: counterexamples) end | Discard => if #num_recently_discarded_tests stats > max_discard_ratio then if #num_failed_tests stats > 0 then (Failure (stats, failure_data counterexamples), state) else (Gave_Up stats, state) else run_tests opt_gen state stats counterexamples | Exception exn => (Failure (stats, failure_data_exn (input :: counterexamples) exn), state) end end in Timing.timing (fn _ => run_tests opt_gen state init_stats []) () |> uncurry (apfst o output_style show_opt ctxt name) |> snd end fun check_style style show_opt shrink = test style empty_stats show_opt shrink o SpecCheck_Generator.map SOME fun check_shrink show = check_style SpecCheck_Default_Output_Style.default (SOME show) fun check show = check_shrink show SpecCheck_Shrink.none fun check_base gen = check_style SpecCheck_Default_Output_Style.default NONE SpecCheck_Shrink.none gen fun check_seq_style style show_opt xq name prop ctxt = test style empty_stats show_opt SpecCheck_Shrink.none SpecCheck_Generator.of_seq name prop ctxt xq - fun check_seq show = check_seq_style SpecCheck_Default_Output_Style.default (SOME show) fun check_seq_base xq = check_seq_style SpecCheck_Default_Output_Style.default NONE xq -fun check_list_style style show = check_seq_style style show o Seq.of_list +fun check_list_style style show_opt = check_seq_style style show_opt o Seq.of_list fun check_list show = check_seq show o Seq.of_list fun check_list_base xs = check_seq_base (Seq.of_list xs) +fun check_unit_tests_style style show_opt xs name prop ctxt seed = + check_list_style style show_opt xs name prop ctxt |> K seed +fun check_unit_tests show xs name prop ctxt seed = check_list show xs name prop ctxt |> K seed +fun check_unit_tests_base xs name prop ctxt seed = check_list_base xs name prop ctxt |> K seed + end