Skip to content

Commit 329ba04

Browse files
authored
Merge pull request #3537 from Zac-HD/example-xfail
New method: `@example(...).xfail()`
2 parents 679ce87 + e1deea8 commit 329ba04

File tree

6 files changed

+220
-14
lines changed

6 files changed

+220
-14
lines changed

hypothesis-python/RELEASE.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
RELEASE_TYPE: minor
2+
3+
A classic error when testing is to write a test function that can never fail,
4+
even on inputs that aren't allowed or manually provided. By analogy to the
5+
design pattern of::
6+
7+
@pytest.mark.parametrize("arg", [
8+
..., # passing examples
9+
pytest.param(..., marks=[pytest.mark.xfail]) # expected-failing input
10+
])
11+
12+
we now support :obj:`@example(...).xfail() <hypothesis.example.xfail>`, with
13+
the same (optional) ``condition``, ``reason``, and ``raises`` arguments as
14+
``pytest.mark.xfail()``.
15+
16+
Naturally you can also write ``.via(...).xfail(...)``, or ``.xfail(...).via(...)``,
17+
if you wish to note the provenance of expected-failing examples.

hypothesis-python/docs/reproducing.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ Either are fine, and you can use one in one example and the other in another
7676
example if for some reason you really want to, but a single example must be
7777
consistent.
7878

79+
.. automethod:: hypothesis.example.xfail
80+
7981
.. automethod:: hypothesis.example.via
8082

8183
.. _reproducing-with-seed:

hypothesis-python/src/hypothesis/core.py

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
Hashable,
3535
List,
3636
Optional,
37+
Tuple,
38+
Type,
3739
TypeVar,
3840
Union,
3941
overload,
@@ -92,10 +94,12 @@
9294
get_signature,
9395
impersonate,
9496
is_mock,
97+
nicerepr,
9598
proxies,
9699
repr_call,
97100
)
98101
from hypothesis.internal.scrutineer import Tracer, explanatory_lines
102+
from hypothesis.internal.validation import check_type
99103
from hypothesis.reporting import (
100104
current_verbosity,
101105
report,
@@ -134,6 +138,9 @@
134138
class Example:
135139
args = attr.ib()
136140
kwargs = attr.ib()
141+
# Plus two optional arguments for .xfail()
142+
raises = attr.ib(default=None)
143+
reason = attr.ib(default=None)
137144

138145

139146
class example:
@@ -156,6 +163,51 @@ def __call__(self, test: TestFunc) -> TestFunc:
156163
test.hypothesis_explicit_examples.append(self._this_example) # type: ignore
157164
return test
158165

166+
def xfail(
167+
self,
168+
condition: bool = True,
169+
*,
170+
reason: str = "",
171+
raises: Union[
172+
Type[BaseException], Tuple[Type[BaseException], ...]
173+
] = BaseException,
174+
) -> "example":
175+
"""Mark this example as an expected failure, like pytest.mark.xfail().
176+
177+
Expected-failing examples allow you to check that your test does fail on
178+
some examples, and therefore build confidence that *passing* tests are
179+
because your code is working, not because the test is missing something.
180+
181+
.. code-block:: python
182+
183+
@example(...).xfail()
184+
@example(...).xfail(reason="Prices must be non-negative")
185+
@example(...).xfail(raises=(KeyError, ValueError))
186+
@example(...).xfail(sys.version_info[:2] >= (3, 9), reason="needs py39+")
187+
@example(...).xfail(condition=sys.platform != "linux", raises=OSError)
188+
def test(x):
189+
pass
190+
"""
191+
check_type(bool, condition, "condition")
192+
check_type(str, reason, "reason")
193+
if not (
194+
isinstance(raises, type) and issubclass(raises, BaseException)
195+
) and not (
196+
isinstance(raises, tuple)
197+
and raises # () -> expected to fail with no error, which is impossible
198+
and all(
199+
isinstance(r, type) and issubclass(r, BaseException) for r in raises
200+
)
201+
):
202+
raise InvalidArgument(
203+
f"raises={raises!r} must be an exception type or tuple of exception types"
204+
)
205+
if condition:
206+
self._this_example = attr.evolve(
207+
self._this_example, raises=raises, reason=reason
208+
)
209+
return self
210+
159211
def via(self, *whence: str) -> "example":
160212
"""Attach a machine-readable label noting whence this example came.
161213
@@ -400,9 +452,7 @@ def draw(self, strategy):
400452
assert self.__draws == 0
401453
self.__draws += 1
402454
# The main strategy for given is always a tuples strategy that returns
403-
# first positional arguments then keyword arguments. When building this
404-
# object already converted all positional arguments to keyword arguments,
405-
# so this is the correct format to return.
455+
# first positional arguments then keyword arguments.
406456
return self.__args, self.__kwargs
407457

408458

@@ -414,6 +464,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
414464
]
415465

416466
for example in reversed(getattr(wrapped_test, "hypothesis_explicit_examples", ())):
467+
assert isinstance(example, Example)
417468
# All of this validation is to check that @example() got "the same" arguments
418469
# as @given, i.e. corresponding to the same parameters, even though they might
419470
# be any mixture of positional and keyword arguments.
@@ -455,12 +506,47 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
455506
with local_settings(state.settings):
456507
fragments_reported = []
457508
try:
509+
adata = ArtificialDataForExample(arguments, example_kwargs)
510+
bits = ", ".join(nicerepr(x) for x in arguments) + ", ".join(
511+
f"{k}={nicerepr(v)}" for k, v in example_kwargs.items()
512+
)
458513
with with_reporter(fragments_reported.append):
459-
state.execute_once(
460-
ArtificialDataForExample(arguments, example_kwargs),
461-
is_final=True,
462-
print_example=True,
463-
)
514+
if example.raises is None:
515+
state.execute_once(adata, is_final=True, print_example=True)
516+
else:
517+
# @example(...).xfail(...)
518+
try:
519+
state.execute_once(adata, is_final=True, print_example=True)
520+
except failure_exceptions_to_catch() as err:
521+
if not isinstance(err, example.raises):
522+
raise
523+
except example.raises as err:
524+
# We'd usually check this as early as possible, but it's
525+
# possible for failure_exceptions_to_catch() to grow when
526+
# e.g. pytest is imported between import- and test-time.
527+
raise InvalidArgument(
528+
f"@example({bits}) raised an expected {err!r}, "
529+
"but Hypothesis does not treat this as a test failure"
530+
) from err
531+
else:
532+
# Unexpectedly passing; always raise an error in this case.
533+
reason = f" because {example.reason}" * bool(example.reason)
534+
if example.raises is BaseException:
535+
name = "exception" # special-case no raises= arg
536+
elif not isinstance(example.raises, tuple):
537+
name = example.raises.__name__
538+
elif len(example.raises) == 1:
539+
name = example.raises[0].__name__
540+
else:
541+
name = (
542+
", ".join(ex.__name__ for ex in example.raises[:-1])
543+
+ f", or {example.raises[-1].__name__}"
544+
)
545+
vowel = name.upper()[0] in "AEIOU"
546+
raise AssertionError(
547+
f"Expected a{'n' * vowel} {name} from @example({bits})"
548+
f"{reason}, but no exception was raised."
549+
)
464550
except UnsatisfiedAssumption:
465551
# Odd though it seems, we deliberately support explicit examples that
466552
# are then rejected by a call to `assume()`. As well as iterative
@@ -478,7 +564,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
478564
# One user error - whether misunderstanding or typo - we've seen a few
479565
# times is to pass strategies to @example() where values are expected.
480566
# Checking is easy, and false-positives not much of a problem, so:
481-
if any(
567+
if isinstance(err, failure_exceptions_to_catch()) and any(
482568
isinstance(arg, SearchStrategy)
483569
for arg in example.args + tuple(example.kwargs.values())
484570
):
@@ -494,6 +580,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
494580
if (
495581
state.settings.report_multiple_bugs
496582
and pytest_shows_exceptiongroups
583+
and isinstance(err, failure_exceptions_to_catch())
497584
and not isinstance(err, skip_exceptions_to_reraise())
498585
):
499586
continue

hypothesis-python/tests/common/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class ExcInfo:
8989
pass
9090

9191

92-
def fails_with(e):
92+
def fails_with(e, *, match=None):
9393
def accepts(f):
9494
@proxies(f)
9595
def inverted_test(*arguments, **kwargs):
@@ -98,7 +98,7 @@ def inverted_test(*arguments, **kwargs):
9898
# the `raises` context manager so that any problems in rigging the
9999
# PRNG don't accidentally count as the expected failure.
100100
with deterministic_PRNG():
101-
with raises(e):
101+
with raises(e, match=match):
102102
f(*arguments, **kwargs)
103103

104104
return inverted_test

hypothesis-python/tests/cover/test_example.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,14 @@ def test_interactive_example_does_not_emit_warning():
8787
child.sendline("quit(code=0)")
8888

8989

90+
@fails_with(KeyboardInterrupt)
91+
@example(1)
92+
@example(2)
93+
@given(st.none())
94+
def test_raises_keyboardinterrupt_immediately(_):
95+
raise KeyboardInterrupt
96+
97+
9098
def identity(decorator):
9199
# The "identity function hack" from https://peps.python.org/pep-0614/
92100
# Method-chaining decorators are otherwise a syntax error in Python <= 3.8
@@ -104,3 +112,93 @@ def test_invalid_example_via():
104112
example(x=False).via(100) # not a string!
105113
with pytest.raises(TypeError):
106114
example(x=False).via("abc", "def") # too many args
115+
116+
117+
@pytest.mark.parametrize(
118+
"kw",
119+
[
120+
{"condition": None}, # must be a bool
121+
{"reason": None}, # must be a string
122+
{"raises": None}, # not a BaseException (or even a type)
123+
{"raises": int}, # not a BaseException
124+
{"raises": [Exception]}, # not a tuple
125+
{"raises": (None,)}, # tuple containing a non-BaseException
126+
{"raises": ()}, # empty tuple doesn't make sense here
127+
# raising non-failure exceptions, eg KeyboardInterrupt, is tested below
128+
],
129+
ids=repr,
130+
)
131+
def test_invalid_example_xfail_arguments(kw):
132+
with pytest.raises(InvalidArgument):
133+
example(x=False).xfail(**kw)
134+
135+
136+
@identity(example(True).xfail())
137+
@identity(example(True).xfail(reason="ignored for passing tests"))
138+
@identity(example(True).xfail(raises=KeyError))
139+
@identity(example(True).xfail(raises=(KeyError, ValueError)))
140+
@identity(example(True).xfail(True, reason="..."))
141+
@identity(example(False).xfail(condition=False))
142+
@given(st.none())
143+
def test_many_xfail_example_decorators(fails):
144+
if fails:
145+
raise KeyError
146+
147+
148+
@fails_with(AssertionError)
149+
@identity(example(x=True).xfail(raises=KeyError))
150+
@given(st.none())
151+
def test_xfail_reraises_non_specified_exception(x):
152+
assert not x
153+
154+
155+
@fails_with(
156+
InvalidArgument,
157+
match=r"@example\(x=True\) raised an expected BaseException\('msg'\), "
158+
r"but Hypothesis does not treat this as a test failure",
159+
)
160+
@identity(example(True).xfail())
161+
@given(st.none())
162+
def test_must_raise_a_failure_exception(x):
163+
if x:
164+
raise BaseException("msg")
165+
166+
167+
@fails_with(
168+
AssertionError,
169+
match=r"Expected an exception from @example\(x=None\), but no exception was raised.",
170+
)
171+
@identity(example(None).xfail())
172+
@given(st.none())
173+
def test_error_on_unexpected_pass_base(x):
174+
pass
175+
176+
177+
@fails_with(
178+
AssertionError,
179+
match=r"Expected an AssertionError from @example\(x=None\), but no exception was raised.",
180+
)
181+
@identity(example(None).xfail(raises=AssertionError))
182+
@given(st.none())
183+
def test_error_on_unexpected_pass_single(x):
184+
pass
185+
186+
187+
@fails_with(
188+
AssertionError,
189+
match=r"Expected an AssertionError from @example\(x=None\), but no exception was raised.",
190+
)
191+
@identity(example(None).xfail(raises=(AssertionError,)))
192+
@given(st.none())
193+
def test_error_on_unexpected_pass_single_elem_tuple(x):
194+
pass
195+
196+
197+
@fails_with(
198+
AssertionError,
199+
match=r"Expected a KeyError, or ValueError from @example\(x=None\), but no exception was raised.",
200+
)
201+
@identity(example(None).xfail(raises=(KeyError, ValueError)))
202+
@given(st.none())
203+
def test_error_on_unexpected_pass_multi(x):
204+
pass

hypothesis-python/tests/nocover/test_integer_ranges.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import pytest
1212

13-
from hypothesis import given
13+
from hypothesis import given, settings
1414
from hypothesis.internal.conjecture.utils import integer_range
1515
from hypothesis.strategies import integers
1616
from hypothesis.strategies._internal.strategies import SearchStrategy
@@ -47,14 +47,16 @@ def test_intervals_shrink_to_center(lower_center_upper):
4747
def test_bounded_integers_distribution_of_bit_width_issue_1387_regression():
4848
values = []
4949

50+
@settings(database=None, max_examples=1000)
5051
@given(integers(0, 1e100))
5152
def test(x):
52-
values.append(x)
53+
if 2 <= x <= int(1e100) - 2: # skip forced-endpoints
54+
values.append(x)
5355

5456
test()
5557

5658
# We draw from a shaped distribution up to 128bit ~7/8 of the time, and
5759
# uniformly the rest. So we should get some very large but not too many.
5860
huge = sum(x > 1e97 for x in values)
59-
assert huge != 0
61+
assert huge != 0 or len(values) < 800
6062
assert huge <= 0.3 * len(values) # expected ~1/8

0 commit comments

Comments
 (0)