diff --git a/adaptive/learner/learnerND.py b/adaptive/learner/learnerND.py index eeb94656..ab905609 100644 --- a/adaptive/learner/learnerND.py +++ b/adaptive/learner/learnerND.py @@ -377,6 +377,8 @@ def __init__(self, func, bounds, loss_per_simplex=None, *, anisotropic=False): # been returned has not been deleted. This checking is done by # _pop_highest_existing_simplex self._simplex_queue = SortedKeyList(key=_simplex_evaluation_priority) + self._next_bound_idx = 0 + self._bound_match_tol = 1e-10 def new(self) -> LearnerND: """Create a new learner with the same function and bounds.""" @@ -494,6 +496,7 @@ def load_dataframe( # type: ignore[override] self.function = partial_function_from_dataframe( self.function, df, function_prefix ) + self._next_bound_idx = 0 @property def bounds_are_done(self): @@ -605,6 +608,32 @@ def _simplex_exists(self, simplex): simplex = tuple(sorted(simplex)) return simplex in self.tri.simplices + def _is_known_point(self, point): + point = tuple(map(float, point)) + if point in self.data or point in self.pending_points: + return True + + # Scale the tolerance with the coordinate magnitude so that float + # round-trip drift (e.g. through a dataframe) is matched in domains + # of any size. + tolerances = [ + self._bound_match_tol * max(abs(lo), abs(hi), hi - lo) + for lo, hi in self._bbox + ] + + def _close(other): + return all( + abs(a - b) <= tol for (a, b, tol) in zip(point, other, tolerances) + ) + + for existing in self.data.keys(): + if _close(existing): + return True + for existing in self.pending_points: + if _close(existing): + return True + return False + def inside_bounds(self, point): """Check whether a point is inside the bounds.""" if self._interior is not None: @@ -677,13 +706,19 @@ def ask(self, n, tell_pending=True): def _ask_bound_point(self): # get the next bound point that is still available - new_point = next( - p - for p in self._bounds_points - if p not in self.data and p not in self.pending_points - ) - self.tell_pending(new_point) - return new_point, np.inf + while self._next_bound_idx < len(self._bounds_points): + new_point = self._bounds_points[self._next_bound_idx] + self._next_bound_idx += 1 + + if self._is_known_point(new_point): + continue + + self.tell_pending(new_point) + return new_point, np.inf + + # Unreachable: _ask only calls this method when _bounds_available, + # which guarantees an unknown bound point at index >= _next_bound_idx. + raise RuntimeError("No bound points available to ask.") def _ask_point_without_known_simplices(self): assert not self._bounds_available @@ -756,13 +791,13 @@ def _ask_best_point(self): @property def _bounds_available(self): return any( - (p not in self.pending_points and p not in self.data) - for p in self._bounds_points + not self._is_known_point(p) + for p in self._bounds_points[self._next_bound_idx :] ) def _ask(self): if self._bounds_available: - return self._ask_bound_point() # O(1) + return self._ask_bound_point() # O(N) worst case, amortized O(1) if self.tri is None: # All bound points are pending or have been evaluated, but we do not @@ -932,6 +967,9 @@ def remove_unfinished(self): self.pending_points = set() self._subtriangulations = {} self._pending_to_simplex = {} + # Discarded pending points may include bound points that were already + # consumed by _ask_bound_point; rescan them so they can be asked again. + self._next_bound_idx = 0 ########################## # Plotting related stuff # diff --git a/adaptive/tests/unit/test_learnernd_integration.py b/adaptive/tests/unit/test_learnernd_integration.py index 93910837..64ea6214 100644 --- a/adaptive/tests/unit/test_learnernd_integration.py +++ b/adaptive/tests/unit/test_learnernd_integration.py @@ -1,6 +1,7 @@ import math import pytest +from scipy.spatial import ConvexHull from adaptive.learner import LearnerND from adaptive.learner.learnerND import curvature_loss_function @@ -53,3 +54,50 @@ def test_learnerND_log_works(): learner.ask(2) # At this point, there should! be one simplex in the triangulation, # furthermore the last two points that were asked should be in this simplex + + +def test_learnerND_resume_after_loading_dataframe_convex_hull(): + # Regression test for https://github.com/python-adaptive/adaptive/issues/470 + pandas = pytest.importorskip("pandas") + + hull_points = [ + (4.375872112626925, 8.917730007820797), + (4.236547993389047, 6.458941130666561), + (6.027633760716439, 5.448831829968968), + (9.636627605010293, 3.8344151882577773), + ] + + # Simulate float drift from a dataframe round-trip: one hull vertex is + # off by 1e-10, so exact membership checks miss it and the learner used + # to re-ask it, crashing with "Point already in triangulation.". + drifted = tuple(c + 1e-10 for c in hull_points[-1]) + data_points = [*hull_points[:-1], drifted, (7.0, 6.0)] + + df = pandas.DataFrame(data_points, columns=["x", "y"]) + df["value"] = df["x"] + df["y"] + + def some_f(xy): + return xy[0] + xy[1] + + learner = LearnerND(some_f, ConvexHull(hull_points)) + learner.load_dataframe( + df, + with_default_function_args=False, + point_names=("x", "y"), + value_name="value", + ) + + target = len(df) + 1 + BlockingRunner(learner, npoints_goal=target) + assert learner.npoints >= target + + +def test_learnerND_remove_unfinished_reasks_bound_points(): + learner = LearnerND(ring_of_fire, bounds=[(-1, 1), (-1, 1)]) + points, _ = learner.ask(4) + assert set(points) == set(learner._bounds_points) + + # Discarding the pending bound points must make them available again. + learner.remove_unfinished() + points, _ = learner.ask(4) + assert set(points) == set(learner._bounds_points)