From 28ae065c991becd7031fd113376eaf13ff3b3a9e Mon Sep 17 00:00:00 2001
From: Bas Nijholt <bas@nijho.lt>
Date: Wed, 10 Jun 2026 13:02:04 -0700
Subject: [PATCH] Make flaky test reruns draw fresh random values

pytest-randomly reseeds the global RNG at the start of every test call
phase, including reruns triggered by the flaky plugin. A randomized test
that fails for the session seed therefore fails identically on all
retries, making @flaky.flaky useless and causing spurious CI failures.

Add a fresh_seed_each_run decorator that mixes the rerun attempt number
into the seed inside the test call (a fixture cannot do this, since
pytest-randomly reseeds after fixture setup), apply it to all flaky
tests, and add retries to the scaling-invariance test which had none.
---
 adaptive/tests/flaky_utils.py          | 39 ++++++++++++++++++++++++++
 adaptive/tests/test_average_learner.py |  2 ++
 adaptive/tests/test_learner1d.py       |  2 ++
 adaptive/tests/test_learners.py        |  4 +++
 4 files changed, 47 insertions(+)
 create mode 100644 adaptive/tests/flaky_utils.py

diff --git a/adaptive/tests/flaky_utils.py b/adaptive/tests/flaky_utils.py
new file mode 100644
index 000000000..8922b7455
--- /dev/null
+++ b/adaptive/tests/flaky_utils.py
@@ -0,0 +1,39 @@
+"""Make ``flaky`` reruns work with ``pytest-randomly``."""
+
+import functools as ft
+import random
+from collections import Counter
+
+import numpy as np
+
+_attempts: Counter = Counter()
+
+
+def fresh_seed_each_run(func):
+    """Make ``@flaky.flaky`` reruns draw new random values.
+
+    ``pytest-randomly`` reseeds the global RNG at the start of every test
+    call phase — including reruns triggered by the ``flaky`` plugin — so a
+    randomized test that fails for the session seed fails identically on
+    every rerun, making the retries useless. Reseeding cannot happen in a
+    fixture (``pytest-randomly`` reseeds in ``pytest_runtest_call``, after
+    fixture setup), so this mixes the attempt number into the seed at the
+    start of the test call itself. Each rerun gets new draws while the
+    whole sequence stays reproducible via ``--randomly-seed``.
+
+    Apply directly on the test function, below ``@flaky.flaky`` and any
+    parametrization.
+    """
+
+    @ft.wraps(func)
+    def wrapper(*args, **kwargs):
+        key = (func.__qualname__, repr(args), repr(kwargs))
+        attempt = _attempts[key]
+        _attempts[key] += 1
+        if attempt:
+            seed = (random.getrandbits(32) + attempt) % 2**32
+            random.seed(seed)
+            np.random.seed(seed)
+        return func(*args, **kwargs)
+
+    return wrapper
diff --git a/adaptive/tests/test_average_learner.py b/adaptive/tests/test_average_learner.py
index d0176858e..462b2c165 100644
--- a/adaptive/tests/test_average_learner.py
+++ b/adaptive/tests/test_average_learner.py
@@ -5,6 +5,7 @@
 
 from adaptive.learner import AverageLearner
 from adaptive.runner import simple
+from adaptive.tests.flaky_utils import fresh_seed_each_run
 
 
 def f_unused(seed):
@@ -28,6 +29,7 @@ def test_only_returns_new_points():
 
 
 @flaky.flaky(max_runs=5)
+@fresh_seed_each_run
 def test_avg_std_and_npoints():
     learner = AverageLearner(f_unused, atol=None, rtol=0.01)
 
diff --git a/adaptive/tests/test_learner1d.py b/adaptive/tests/test_learner1d.py
index e83629f35..a2cee8776 100644
--- a/adaptive/tests/test_learner1d.py
+++ b/adaptive/tests/test_learner1d.py
@@ -9,6 +9,7 @@
 from adaptive.learner import Learner1D
 from adaptive.learner.learner1D import curvature_loss_function
 from adaptive.runner import BlockingRunner, simple
+from adaptive.tests.flaky_utils import fresh_seed_each_run
 
 
 def flat_middle(x):
@@ -259,6 +260,7 @@ def test_ask_does_not_return_known_points_when_returning_bounds():
 
 
 @flaky.flaky(max_runs=3)
+@fresh_seed_each_run
 def test_tell_many():
     def f(x, offset=0.123214):
         a = 0.01
diff --git a/adaptive/tests/test_learners.py b/adaptive/tests/test_learners.py
index d8cb2eaf7..de2ca48ef 100644
--- a/adaptive/tests/test_learners.py
+++ b/adaptive/tests/test_learners.py
@@ -29,6 +29,7 @@
 )
 from adaptive.learner.learner1D import with_pandas
 from adaptive.runner import simple
+from adaptive.tests.flaky_utils import fresh_seed_each_run
 
 LOSS_FUNCTIONS = {
     Learner1D: (
@@ -514,7 +515,9 @@ def test_expected_loss_improvement_is_less_than_total_loss(
 
 # XXX: This *should* pass (https://github.com/python-adaptive/adaptive/issues/55)
 #      but we xfail it now, as Learner2D will be deprecated anyway
+@flaky.flaky(max_runs=5)
 @run_with(Learner1D, xfail(Learner2D), LearnerND, AverageLearner1D)
+@fresh_seed_each_run
 def test_learner_performance_is_invariant_under_scaling(
     learner_type, f, learner_kwargs
 ):
@@ -583,6 +586,7 @@ def scale_x(x):
     SequenceLearner,
     with_all_loss_functions=False,
 )
+@fresh_seed_each_run
 def test_balancing_learner(learner_type, f, learner_kwargs):
     """Test if the BalancingLearner works with the different types of learners."""
     learners = [