From 432e2ea1093f9d0a607fc902883b706206b40672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 13:49:56 -0300 Subject: [PATCH 01/57] chore: ignore agent planning artifacts under docs/superpowers --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index afb5191aa..30406afe6 100644 --- a/.gitignore +++ b/.gitignore @@ -176,3 +176,6 @@ burr/tracking/server/build examples/*/statemachine examples/*/*/statemachine .vscode + +# Agent-authored planning artifacts (never commit) +docs/superpowers/ From fafb734de19fe4a981bc932fedbf70e2480ba534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 13:50:33 -0300 Subject: [PATCH 02/57] feat: add durable execution primitives (suspend signal, records) --- burr/core/durable.py | 82 ++++++++++++++++++++++++++++++++++++++ tests/core/test_durable.py | 77 +++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 burr/core/durable.py create mode 100644 tests/core/test_durable.py diff --git a/burr/core/durable.py b/burr/core/durable.py new file mode 100644 index 000000000..e240c2e44 --- /dev/null +++ b/burr/core/durable.py @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Primitives for durable execution: the suspend control-flow signal, the +determinism error, and the records persisted to support resume.""" + +import dataclasses +from typing import Any, Dict, Optional + + +class _Suspended(BaseException): + """Internal control-flow signal raised by ``ApplicationContext.suspend()``. + + Subclasses ``BaseException`` (not ``Exception``) on purpose: a user + ``try/except Exception`` wrapping an LLM/IO call inside an action must NOT + swallow it. The run loop catches it explicitly. It is never an error and is + never logged or persisted as a failure. + """ + + def __init__( + self, + channel: str, + schema_json: Optional[dict] = None, + metadata: Optional[dict] = None, + ): + self.channel = channel + self.schema_json = schema_json + self.metadata = metadata + super().__init__(f"Execution suspended on channel '{channel}'") + + +class DeterminismError(Exception): + """Raised on resume when ``ctx.durable()`` calls do not replay in the same + order, or with the same keys, as the recorded journal. This converts a + silent footgun (lost re-execution or stale cache) into a loud failure.""" + + +@dataclasses.dataclass +class SuspensionRecord: + """Everything needed to resume a suspended run. Persisted when an action + calls ``suspend()``. ``metadata``, ``inputs`` and ``state`` are serialized + through ``burr.core.serde``.""" + + suspension_id: str + partition_key: Optional[str] + app_id: str + sequence_id: int + position: str # name of the suspended action + channel: str + schema_json: Optional[dict] + metadata: Optional[dict] + inputs: Dict[str, Any] + state: Dict[str, Any] # entry state of the suspended action + created_at: str + resolved: bool = False + + +@dataclasses.dataclass +class JournalEntry: + """One memoized ``ctx.durable()`` sub-step. ``result`` is serialized through + ``burr.core.serde``.""" + + partition_key: Optional[str] + app_id: str + sequence_id: int + step_key: str + call_index: int + result: Any diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py new file mode 100644 index 000000000..2b26fa427 --- /dev/null +++ b/tests/core/test_durable.py @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import dataclasses + +import pytest + +from burr.core.durable import ( + DeterminismError, + JournalEntry, + SuspensionRecord, + _Suspended, +) + + +def test_suspended_is_base_exception_not_exception(): + assert issubclass(_Suspended, BaseException) + assert not issubclass(_Suspended, Exception) + + +def test_suspended_carries_channel_schema_metadata(): + sig = _Suspended(channel="approval", schema_json={"type": "object"}, metadata={"k": "v"}) + assert sig.channel == "approval" + assert sig.schema_json == {"type": "object"} + assert sig.metadata == {"k": "v"} + + +def test_determinism_error_is_exception(): + assert issubclass(DeterminismError, Exception) + + +def test_suspension_record_fields(): + record = SuspensionRecord( + suspension_id="s1", + partition_key="p", + app_id="a", + sequence_id=3, + position="review", + channel="approval", + schema_json=None, + metadata={"summary": "hi"}, + inputs={"x": 1}, + state={"draft": "text"}, + created_at="2026-05-22T00:00:00", + resolved=False, + ) + assert dataclasses.is_dataclass(record) + assert record.resolved is False + assert record.state == {"draft": "text"} + + +def test_journal_entry_fields(): + entry = JournalEntry( + partition_key="p", + app_id="a", + sequence_id=3, + step_key="summarize", + call_index=0, + result="cached value", + ) + assert dataclasses.is_dataclass(entry) + assert entry.call_index == 0 + assert entry.result == "cached value" From e25aed8cfd875a7232f86ae519bf94aab659630b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 13:50:53 -0300 Subject: [PATCH 03/57] feat: accept 'suspended' as a persisted run status --- burr/core/persistence.py | 14 +++++++------- tests/core/test_durable.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/burr/core/persistence.py b/burr/core/persistence.py index c32bf8e96..ca2784ec9 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -130,7 +130,7 @@ def save( sequence_id: int, position: str, state: State, - status: Literal["completed", "failed"], + status: Literal["completed", "failed", "suspended"], **kwargs, ): """Saves the state for a given app_id, sequence_id, position @@ -175,7 +175,7 @@ async def save( sequence_id: int, position: str, state: State, - status: Literal["completed", "failed"], + status: Literal["completed", "failed", "suspended"], **kwargs, ): """Saves the state for a given app_id, sequence_id, position @@ -279,7 +279,7 @@ def save( sequence_id: int, position: str, state: State, - status: Literal["completed", "failed"], + status: Literal["completed", "failed", "suspended"], **kwargs, ): return @@ -303,7 +303,7 @@ async def save( sequence_id: int, position: str, state: State, - status: Literal["completed", "failed"], + status: Literal["completed", "failed", "suspended"], **kwargs, ): return @@ -538,7 +538,7 @@ def save( sequence_id: int, position: str, state: State, - status: Literal["completed", "failed"], + status: Literal["completed", "failed", "suspended"], **kwargs, ): """ @@ -644,7 +644,7 @@ def save( sequence_id: int, position: str, state: State, - status: Literal["completed", "failed"], + status: Literal["completed", "failed", "suspended"], **kwargs, ): # Create a PersistedStateData entry @@ -697,7 +697,7 @@ async def save( sequence_id: int, position: str, state: State, - status: Literal["completed", "failed"], + status: Literal["completed", "failed", "suspended"], **kwargs, ): # Create a PersistedStateData entry diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 2b26fa427..1b451b330 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -75,3 +75,13 @@ def test_journal_entry_fields(): assert dataclasses.is_dataclass(entry) assert entry.call_index == 0 assert entry.result == "cached value" + + +def test_save_status_literal_includes_suspended(): + import typing + + from burr.core.persistence import BaseStateSaver + + hints = typing.get_type_hints(BaseStateSaver.save) + status_arg = hints["status"] + assert "suspended" in typing.get_args(status_arg) From d5c124e4028b7ee44b7ee10058511bbef0de27d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 13:51:15 -0300 Subject: [PATCH 04/57] feat: export durable execution primitives from burr.core --- burr/core/__init__.py | 4 ++++ tests/core/test_durable.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/burr/core/__init__.py b/burr/core/__init__.py index c4da5a48e..3d6428413 100644 --- a/burr/core/__init__.py +++ b/burr/core/__init__.py @@ -22,6 +22,7 @@ ApplicationContext, ApplicationGraph, ) +from burr.core.durable import DeterminismError, JournalEntry, SuspensionRecord from burr.core.graph import Graph, GraphBuilder from burr.core.state import State @@ -34,9 +35,12 @@ "ApplicationContext", "Condition", "default", + "DeterminismError", "expr", + "JournalEntry", "Result", "State", + "SuspensionRecord", "when", "Graph", "GraphBuilder", diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 1b451b330..e9fac22ef 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -85,3 +85,10 @@ def test_save_status_literal_includes_suspended(): hints = typing.get_type_hints(BaseStateSaver.save) status_arg = hints["status"] assert "suspended" in typing.get_args(status_arg) + + +def test_durable_symbols_exported_from_burr_core(): + import burr.core as core + + assert hasattr(core, "DeterminismError") + assert hasattr(core, "SuspensionRecord") From 8e6b3564d186982c52cb3a99f43411a058c06640 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 13:55:41 -0300 Subject: [PATCH 05/57] fix: align persistence status docstrings with the suspended literal --- burr/core/persistence.py | 12 ++++++------ tests/core/test_durable.py | 2 -- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/burr/core/persistence.py b/burr/core/persistence.py index ca2784ec9..e705e9dbe 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -145,8 +145,8 @@ def save( :param sequence_id: Sequence ID of the last executed step :param position: The action name that was implemented :param state: The current state of the application - :param status: The status of this state, either "completed" or "failed". If "failed" the state is what it was - before the action was applied. + :param status: The status of this state: "completed", "failed", or "suspended". If "failed" the state is what + it was before the action was applied. """ pass @@ -190,8 +190,8 @@ async def save( :param sequence_id: Sequence ID of the last executed step :param position: The action name that was implemented :param state: The current state of the application - :param status: The status of this state, either "completed" or "failed". If "failed" the state is what it was - before the action was applied. + :param status: The status of this state: "completed", "failed", or "suspended". If "failed" the state is what + it was before the action was applied. """ pass @@ -554,8 +554,8 @@ def save( :param sequence_id: The state corresponding to a specific point in time. :param position: The position in the sequence of states. :param state: The state to be saved, an instance of the State class. - :param status: The status of this state, either "completed" or "failed". If "failed" the state is what it was - before the action was applied. + :param status: The status of this state: "completed", "failed", or "suspended". If "failed" the state is what + it was before the action was applied. :return: None """ logger.debug( diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index e9fac22ef..9799589dc 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -17,8 +17,6 @@ import dataclasses -import pytest - from burr.core.durable import ( DeterminismError, JournalEntry, From 373bd1cce11c5df8c6181194470c818769a1a26c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 13:58:52 -0300 Subject: [PATCH 06/57] feat: add optional durable-storage persister methods with in-state fallback Adds five durable-storage methods (save_suspension, load_suspension, save_journal_entry, load_journal, mark_suspension_resolved) to BaseStatePersister and AsyncBaseStatePersister with NotImplementedError defaults, a real override on InMemoryPersister for tests, the supports_durable_storage() capability helper, and an in-state fallback codec in durable.py for persisters that do not override the methods. --- burr/core/durable.py | 46 ++++++++++++++++++++++ burr/core/persistence.py | 79 ++++++++++++++++++++++++++++++++++++++ tests/core/test_durable.py | 58 ++++++++++++++++++++++++++++ 3 files changed, 183 insertions(+) diff --git a/burr/core/durable.py b/burr/core/durable.py index e240c2e44..36ab0759a 100644 --- a/burr/core/durable.py +++ b/burr/core/durable.py @@ -80,3 +80,49 @@ class JournalEntry: step_key: str call_index: int result: Any + + +def supports_durable_storage(persister) -> bool: + """True if the persister overrides the durable-storage methods. When False, + the Application stores suspensions and journal entries inside the State.""" + from burr.core.persistence import ( + AsyncBaseStatePersister, + BaseStatePersister, + ) + + base = AsyncBaseStatePersister if persister.is_async() else BaseStatePersister + return type(persister).save_suspension is not base.save_suspension + + +# --- In-state fallback codec -------------------------------------------------- +# When the persister has no dedicated storage, suspensions and journal entries +# ride inside a reserved State namespace, which the existing PersisterHook saves. + +DURABLE_STATE_KEY = "__burr_durable__" + + +def write_suspension_into_state(state, record: "SuspensionRecord"): + """Return a new State with the suspension record embedded.""" + bucket = dict(state.get(DURABLE_STATE_KEY, {}) or {}) + bucket["suspension"] = dataclasses.asdict(record) + return state.update(**{DURABLE_STATE_KEY: bucket}) + + +def read_suspension_from_state(state, channel: str) -> "Optional[SuspensionRecord]": + bucket = state.get(DURABLE_STATE_KEY, {}) or {} + raw = bucket.get("suspension") + if raw is None or raw.get("channel") != channel: + return None + return SuspensionRecord(**raw) + + +def write_journal_into_state(state, entries: "list"): + """Return a new State with the journal entries embedded.""" + bucket = dict(state.get(DURABLE_STATE_KEY, {}) or {}) + bucket["journal"] = [dataclasses.asdict(e) for e in entries] + return state.update(**{DURABLE_STATE_KEY: bucket}) + + +def read_journal_from_state(state) -> "list": + bucket = state.get(DURABLE_STATE_KEY, {}) or {} + return [JournalEntry(**raw) for raw in bucket.get("journal", [])] diff --git a/burr/core/persistence.py b/burr/core/persistence.py index e705e9dbe..14aea9aca 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -204,12 +204,68 @@ class BaseStatePersister(BaseStateLoader, BaseStateSaver, metaclass=ABCMeta): Extend this class if you want an easy way to implement custom state storage. """ + # --- Durable execution: optional. Default raises; the Application falls + # --- back to storing this data inside the State (see burr.core.durable). + + def save_suspension(self, record: "SuspensionRecord") -> None: + """Persist a suspension record. Override for dedicated storage.""" + raise NotImplementedError + + def load_suspension( + self, partition_key: Optional[str], app_id: str, channel: str + ) -> "Optional[SuspensionRecord]": + """Load the unresolved suspension for (partition_key, app_id, channel).""" + raise NotImplementedError + + def save_journal_entry(self, entry: "JournalEntry") -> None: + """Persist one memoized sub-step. Override for dedicated storage.""" + raise NotImplementedError + + def load_journal( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> list: + """Load journal entries for a suspended action, ordered by call_index.""" + raise NotImplementedError + + def mark_suspension_resolved(self, suspension_id: str) -> None: + """Mark a suspension consumed. First-party SQL persisters do this with a + conditional UPDATE for resume-once; the default raises.""" + raise NotImplementedError + class AsyncBaseStatePersister(AsyncBaseStateLoader, AsyncBaseStateSaver, metaclass=ABCMeta): """Utility interface for an asynchronous state reader/writer. This both persists and initializes state. Extend this class if you want an easy way to implement custom state storage. """ + # --- Durable execution: optional. Default raises; the Application falls + # --- back to storing this data inside the State (see burr.core.durable). + + async def save_suspension(self, record: "SuspensionRecord") -> None: + """Persist a suspension record. Override for dedicated storage.""" + raise NotImplementedError + + async def load_suspension( + self, partition_key: Optional[str], app_id: str, channel: str + ) -> "Optional[SuspensionRecord]": + """Load the unresolved suspension for (partition_key, app_id, channel).""" + raise NotImplementedError + + async def save_journal_entry(self, entry: "JournalEntry") -> None: + """Persist one memoized sub-step. Override for dedicated storage.""" + raise NotImplementedError + + async def load_journal( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> list: + """Load journal entries for a suspended action, ordered by call_index.""" + raise NotImplementedError + + async def mark_suspension_resolved(self, suspension_id: str) -> None: + """Mark a suspension consumed. First-party SQL persisters do this with a + conditional UPDATE for resume-once; the default raises.""" + raise NotImplementedError + class PersisterHook(PostRunStepHook): """Wrapper class for bridging the persistence interface with lifecycle hooks. This is used internally.""" @@ -614,6 +670,8 @@ class InMemoryPersister(BaseStatePersister): def __init__(self): self._storage = defaultdict(lambda: defaultdict(list)) + self._suspensions = {} + self._journal = {} def load( self, partition_key: str, app_id: Optional[str], sequence_id: Optional[int] = None, **kwargs @@ -661,6 +719,27 @@ def save( # Store the state self._storage[partition_key][app_id].append(persisted_state) + def save_suspension(self, record): + self._suspensions[(record.partition_key, record.app_id, record.channel)] = record + + def load_suspension(self, partition_key, app_id, channel): + return self._suspensions.get((partition_key, app_id, channel)) + + def mark_suspension_resolved(self, suspension_id): + for key, record in self._suspensions.items(): + if record.suspension_id == suspension_id: + record.resolved = True + + def save_journal_entry(self, entry): + bucket = self._journal.setdefault( + (entry.partition_key, entry.app_id, entry.sequence_id), [] + ) + bucket.append(entry) + + def load_journal(self, partition_key, app_id, sequence_id): + bucket = self._journal.get((partition_key, app_id, sequence_id), []) + return sorted(bucket, key=lambda e: e.call_index) + class AsyncInMemoryPersister(AsyncBaseStatePersister): """Sync in-memory persister for testing purposes. This is not recommended for production use.""" diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 9799589dc..4bc0a4f07 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -90,3 +90,61 @@ def test_durable_symbols_exported_from_burr_core(): assert hasattr(core, "DeterminismError") assert hasattr(core, "SuspensionRecord") + + +def test_base_persister_durable_methods_raise_not_implemented(): + from burr.core.persistence import BaseStatePersister + + assert hasattr(BaseStatePersister, "save_suspension") + assert hasattr(BaseStatePersister, "load_suspension") + assert hasattr(BaseStatePersister, "save_journal_entry") + assert hasattr(BaseStatePersister, "load_journal") + + +def test_supports_durable_storage_false_for_base_sqlite(): + from burr.core.durable import supports_durable_storage + from burr.core.persistence import SQLitePersister + + persister = SQLitePersister.from_values(":memory:") + # No SQLite override ships in this task; that lands in M4. + assert supports_durable_storage(persister) is False + + +def test_supports_durable_storage_true_for_in_memory(): + from burr.core.durable import supports_durable_storage + from burr.core.persistence import InMemoryPersister + + assert supports_durable_storage(InMemoryPersister()) is True + + +def test_in_memory_persister_suspension_round_trip(): + from burr.core.durable import SuspensionRecord + from burr.core.persistence import InMemoryPersister + + persister = InMemoryPersister() + record = SuspensionRecord( + suspension_id="s1", partition_key="p", app_id="a", sequence_id=2, + position="review", channel="approval", schema_json=None, + metadata=None, inputs={}, state={"draft": "d"}, + created_at="2026-05-22T00:00:00", resolved=False, + ) + persister.save_suspension(record) + loaded = persister.load_suspension("p", "a", "approval") + assert loaded.suspension_id == "s1" + assert loaded.state == {"draft": "d"} + assert loaded.resolved is False + + +def test_in_memory_persister_journal_round_trip(): + from burr.core.durable import JournalEntry + from burr.core.persistence import InMemoryPersister + + persister = InMemoryPersister() + entry = JournalEntry( + partition_key="p", app_id="a", sequence_id=2, + step_key="summarize", call_index=0, result="cached", + ) + persister.save_journal_entry(entry) + journal = persister.load_journal("p", "a", 2) + assert len(journal) == 1 + assert journal[0].result == "cached" From 3716cdbd7b38d4ad542e5ebcd813fb8daf649985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 14:08:27 -0300 Subject: [PATCH 07/57] fix: clarify durable persister contracts and strengthen tests Add docstring warnings about dataclasses.asdict not round-tripping nested types in the in-state codec, document the all-or-nothing override contract on supports_durable_storage, replace string annotations with real imports in persistence.py (no circular import), and strengthen the NotImplementedError test to call all 5 durable methods with real arguments. --- burr/core/durable.py | 32 +++++++++++++++++++++--- burr/core/persistence.py | 13 +++++----- tests/core/test_durable.py | 50 +++++++++++++++++++++++++++++++++++--- 3 files changed, 82 insertions(+), 13 deletions(-) diff --git a/burr/core/durable.py b/burr/core/durable.py index 36ab0759a..adedbb823 100644 --- a/burr/core/durable.py +++ b/burr/core/durable.py @@ -72,7 +72,17 @@ class SuspensionRecord: @dataclasses.dataclass class JournalEntry: """One memoized ``ctx.durable()`` sub-step. ``result`` is serialized through - ``burr.core.serde``.""" + ``burr.core.serde``. + + .. note:: + The in-state codec (``write_journal_into_state`` / ``read_journal_from_state``) + serializes entries via ``dataclasses.asdict``, which recurses into nested + dataclasses and converts them to plain dicts. Those dicts are NOT + reconstructed back into their original types on read. Callers must keep + ``result`` (and any nested fields) to plain JSON-friendly types, or accept + that nested dataclasses come back as plain dicts after a round-trip through + the in-state codec. + """ partition_key: Optional[str] app_id: str @@ -84,7 +94,16 @@ class JournalEntry: def supports_durable_storage(persister) -> bool: """True if the persister overrides the durable-storage methods. When False, - the Application stores suspensions and journal entries inside the State.""" + the Application stores suspensions and journal entries inside the State. + + .. note:: + All-or-nothing override contract: a persister is considered to support + durable storage only when it overrides ALL five durable-storage methods + (``save_suspension``, ``load_suspension``, ``save_journal_entry``, + ``load_journal``, ``mark_suspension_resolved``). Detection is based solely + on ``save_suspension``; partial overrides are not detected and will raise + ``NotImplementedError`` at call time. + """ from burr.core.persistence import ( AsyncBaseStatePersister, BaseStatePersister, @@ -117,7 +136,14 @@ def read_suspension_from_state(state, channel: str) -> "Optional[SuspensionRecor def write_journal_into_state(state, entries: "list"): - """Return a new State with the journal entries embedded.""" + """Return a new State with the journal entries embedded. + + .. warning:: + Serializes via ``dataclasses.asdict``, which recursively converts nested + dataclasses to plain dicts. They are NOT reconstructed to their original + types when read back via ``read_journal_from_state``. Keep ``JournalEntry.result`` + and any nested fields as plain JSON-friendly types to avoid type loss. + """ bucket = dict(state.get(DURABLE_STATE_KEY, {}) or {}) bucket["journal"] = [dataclasses.asdict(e) for e in entries] return state.update(**{DURABLE_STATE_KEY: bucket}) diff --git a/burr/core/persistence.py b/burr/core/persistence.py index 14aea9aca..af49be70c 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -25,6 +25,7 @@ from burr.common.types import BaseCopyable from burr.core import Action +from burr.core.durable import JournalEntry, SuspensionRecord from burr.core.state import State, logger from burr.lifecycle import PostRunStepHook, PostRunStepHookAsync @@ -207,17 +208,17 @@ class BaseStatePersister(BaseStateLoader, BaseStateSaver, metaclass=ABCMeta): # --- Durable execution: optional. Default raises; the Application falls # --- back to storing this data inside the State (see burr.core.durable). - def save_suspension(self, record: "SuspensionRecord") -> None: + def save_suspension(self, record: SuspensionRecord) -> None: """Persist a suspension record. Override for dedicated storage.""" raise NotImplementedError def load_suspension( self, partition_key: Optional[str], app_id: str, channel: str - ) -> "Optional[SuspensionRecord]": + ) -> Optional[SuspensionRecord]: """Load the unresolved suspension for (partition_key, app_id, channel).""" raise NotImplementedError - def save_journal_entry(self, entry: "JournalEntry") -> None: + def save_journal_entry(self, entry: JournalEntry) -> None: """Persist one memoized sub-step. Override for dedicated storage.""" raise NotImplementedError @@ -241,17 +242,17 @@ class AsyncBaseStatePersister(AsyncBaseStateLoader, AsyncBaseStateSaver, metacla # --- Durable execution: optional. Default raises; the Application falls # --- back to storing this data inside the State (see burr.core.durable). - async def save_suspension(self, record: "SuspensionRecord") -> None: + async def save_suspension(self, record: SuspensionRecord) -> None: """Persist a suspension record. Override for dedicated storage.""" raise NotImplementedError async def load_suspension( self, partition_key: Optional[str], app_id: str, channel: str - ) -> "Optional[SuspensionRecord]": + ) -> Optional[SuspensionRecord]: """Load the unresolved suspension for (partition_key, app_id, channel).""" raise NotImplementedError - async def save_journal_entry(self, entry: "JournalEntry") -> None: + async def save_journal_entry(self, entry: JournalEntry) -> None: """Persist one memoized sub-step. Override for dedicated storage.""" raise NotImplementedError diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 4bc0a4f07..da2d1c2cc 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -17,6 +17,8 @@ import dataclasses +import pytest + from burr.core.durable import ( DeterminismError, JournalEntry, @@ -93,12 +95,52 @@ def test_durable_symbols_exported_from_burr_core(): def test_base_persister_durable_methods_raise_not_implemented(): + from burr.core.durable import JournalEntry, SuspensionRecord from burr.core.persistence import BaseStatePersister - assert hasattr(BaseStatePersister, "save_suspension") - assert hasattr(BaseStatePersister, "load_suspension") - assert hasattr(BaseStatePersister, "save_journal_entry") - assert hasattr(BaseStatePersister, "load_journal") + # Use DevNullPersister which satisfies the abstract methods but does not + # override the durable methods, so all five should raise NotImplementedError. + from burr.core.persistence import DevNullPersister + + p = DevNullPersister() + + dummy_record = SuspensionRecord( + suspension_id="s1", + partition_key="p", + app_id="a", + sequence_id=1, + position="action", + channel="ch", + schema_json=None, + metadata=None, + inputs={}, + state={}, + created_at="2026-05-22T00:00:00", + resolved=False, + ) + dummy_entry = JournalEntry( + partition_key="p", + app_id="a", + sequence_id=1, + step_key="k", + call_index=0, + result=None, + ) + + with pytest.raises(NotImplementedError): + p.save_suspension(dummy_record) + + with pytest.raises(NotImplementedError): + p.load_suspension("p", "a", "ch") + + with pytest.raises(NotImplementedError): + p.save_journal_entry(dummy_entry) + + with pytest.raises(NotImplementedError): + p.load_journal("p", "a", 1) + + with pytest.raises(NotImplementedError): + p.mark_suspension_resolved("s1") def test_supports_durable_storage_false_for_base_sqlite(): From 23f390643d45517d67e721e5bc943156ec790f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 14:15:37 -0300 Subject: [PATCH 08/57] fix: test durable codec and correct load_suspension docstring Add four unit tests covering the in-state fallback codec functions (suspension and journal round-trips, channel mismatch, JSON result preservation). Correct the misleading load_suspension docstring on BaseStatePersister and AsyncBaseStatePersister to reflect that the method returns resolved and unresolved records alike. Add type annotations to the five durable methods on InMemoryPersister to match the base-class signatures. --- burr/core/persistence.py | 24 +++++--- tests/core/test_durable.py | 113 +++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 7 deletions(-) diff --git a/burr/core/persistence.py b/burr/core/persistence.py index af49be70c..a6b760e8c 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -215,7 +215,12 @@ def save_suspension(self, record: SuspensionRecord) -> None: def load_suspension( self, partition_key: Optional[str], app_id: str, channel: str ) -> Optional[SuspensionRecord]: - """Load the unresolved suspension for (partition_key, app_id, channel).""" + """Load the suspension record for (partition_key, app_id, channel). + + Returns the record whether or not it is resolved; callers check + ``record.resolved`` for resume-once idempotency. Returns ``None`` + when no record exists for this combination. + """ raise NotImplementedError def save_journal_entry(self, entry: JournalEntry) -> None: @@ -249,7 +254,12 @@ async def save_suspension(self, record: SuspensionRecord) -> None: async def load_suspension( self, partition_key: Optional[str], app_id: str, channel: str ) -> Optional[SuspensionRecord]: - """Load the unresolved suspension for (partition_key, app_id, channel).""" + """Load the suspension record for (partition_key, app_id, channel). + + Returns the record whether or not it is resolved; callers check + ``record.resolved`` for resume-once idempotency. Returns ``None`` + when no record exists for this combination. + """ raise NotImplementedError async def save_journal_entry(self, entry: JournalEntry) -> None: @@ -720,24 +730,24 @@ def save( # Store the state self._storage[partition_key][app_id].append(persisted_state) - def save_suspension(self, record): + def save_suspension(self, record: SuspensionRecord) -> None: self._suspensions[(record.partition_key, record.app_id, record.channel)] = record - def load_suspension(self, partition_key, app_id, channel): + def load_suspension(self, partition_key: Optional[str], app_id: str, channel: str) -> Optional[SuspensionRecord]: return self._suspensions.get((partition_key, app_id, channel)) - def mark_suspension_resolved(self, suspension_id): + def mark_suspension_resolved(self, suspension_id: str) -> None: for key, record in self._suspensions.items(): if record.suspension_id == suspension_id: record.resolved = True - def save_journal_entry(self, entry): + def save_journal_entry(self, entry: JournalEntry) -> None: bucket = self._journal.setdefault( (entry.partition_key, entry.app_id, entry.sequence_id), [] ) bucket.append(entry) - def load_journal(self, partition_key, app_id, sequence_id): + def load_journal(self, partition_key: Optional[str], app_id: str, sequence_id: int) -> list: bucket = self._journal.get((partition_key, app_id, sequence_id), []) return sorted(bucket, key=lambda e: e.call_index) diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index da2d1c2cc..8dca6938e 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -190,3 +190,116 @@ def test_in_memory_persister_journal_round_trip(): journal = persister.load_journal("p", "a", 2) assert len(journal) == 1 assert journal[0].result == "cached" + + +# --- In-state fallback codec tests ------------------------------------------- + + +def test_suspension_codec_round_trip(): + from burr.core.durable import ( + SuspensionRecord, + read_suspension_from_state, + write_suspension_into_state, + ) + from burr.core.state import State + + record = SuspensionRecord( + suspension_id="s42", + partition_key="p", + app_id="a", + sequence_id=5, + position="review", + channel="approval", + schema_json=None, + metadata={"note": "hi"}, + inputs={"x": 1}, + state={"draft": "text"}, + created_at="2026-05-22T00:00:00", + resolved=False, + ) + state = State() + new_state = write_suspension_into_state(state, record) + result = read_suspension_from_state(new_state, "approval") + + assert result is not None + assert result.suspension_id == record.suspension_id + assert result.channel == record.channel + assert result.state == record.state + assert result.resolved == record.resolved + + +def test_read_suspension_from_state_channel_mismatch(): + from burr.core.durable import ( + SuspensionRecord, + read_suspension_from_state, + write_suspension_into_state, + ) + from burr.core.state import State + + record = SuspensionRecord( + suspension_id="s1", + partition_key="p", + app_id="a", + sequence_id=1, + position="act", + channel="approval", + schema_json=None, + metadata=None, + inputs={}, + state={}, + created_at="2026-05-22T00:00:00", + resolved=False, + ) + state = write_suspension_into_state(State(), record) + + assert read_suspension_from_state(state, "other_channel") is None + assert read_suspension_from_state(State(), "approval") is None + + +def test_journal_codec_round_trip(): + from burr.core.durable import ( + JournalEntry, + read_journal_from_state, + write_journal_into_state, + ) + from burr.core.state import State + + entries = [ + JournalEntry( + partition_key="p", app_id="a", sequence_id=3, + step_key="step_a", call_index=0, result="first", + ), + JournalEntry( + partition_key="p", app_id="a", sequence_id=3, + step_key="step_b", call_index=1, result="second", + ), + ] + state = write_journal_into_state(State(), entries) + loaded = read_journal_from_state(state) + + assert len(loaded) == 2 + call_indices = {e.call_index for e in loaded} + assert call_indices == {0, 1} + results = {e.call_index: e.result for e in loaded} + assert results[0] == "first" + assert results[1] == "second" + + +def test_journal_codec_preserves_json_friendly_result(): + from burr.core.durable import ( + JournalEntry, + read_journal_from_state, + write_journal_into_state, + ) + from burr.core.state import State + + original_result = {"k": [1, 2]} + entry = JournalEntry( + partition_key="p", app_id="a", sequence_id=7, + step_key="fetch", call_index=0, result=original_result, + ) + state = write_journal_into_state(State(), [entry]) + loaded = read_journal_from_state(state) + + assert len(loaded) == 1 + assert loaded[0].result == original_result From bcc25cd9e9c8292d05c500f5c12cf97604ab9800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 14:19:26 -0300 Subject: [PATCH 09/57] test: cover suspension resolution, journal ordering, and precise load_journal return type Add tests for mark_suspension_resolved (flag flip and unknown-id no-op), journal insertion-order sorting, and tighten load_journal return annotation to list[JournalEntry] on all three sites (requires-python >=3.9). --- burr/core/persistence.py | 6 ++-- tests/core/test_durable.py | 61 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/burr/core/persistence.py b/burr/core/persistence.py index a6b760e8c..094cd8bf8 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -229,7 +229,7 @@ def save_journal_entry(self, entry: JournalEntry) -> None: def load_journal( self, partition_key: Optional[str], app_id: str, sequence_id: int - ) -> list: + ) -> list[JournalEntry]: """Load journal entries for a suspended action, ordered by call_index.""" raise NotImplementedError @@ -268,7 +268,7 @@ async def save_journal_entry(self, entry: JournalEntry) -> None: async def load_journal( self, partition_key: Optional[str], app_id: str, sequence_id: int - ) -> list: + ) -> list[JournalEntry]: """Load journal entries for a suspended action, ordered by call_index.""" raise NotImplementedError @@ -747,7 +747,7 @@ def save_journal_entry(self, entry: JournalEntry) -> None: ) bucket.append(entry) - def load_journal(self, partition_key: Optional[str], app_id: str, sequence_id: int) -> list: + def load_journal(self, partition_key: Optional[str], app_id: str, sequence_id: int) -> list[JournalEntry]: bucket = self._journal.get((partition_key, app_id, sequence_id), []) return sorted(bucket, key=lambda e: e.call_index) diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 8dca6938e..2aee1168b 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -303,3 +303,64 @@ def test_journal_codec_preserves_json_friendly_result(): assert len(loaded) == 1 assert loaded[0].result == original_result + + +# --- InMemoryPersister: mark_suspension_resolved tests ----------------------- + + +def test_in_memory_persister_mark_suspension_resolved_flips_flag(): + from burr.core.durable import SuspensionRecord + from burr.core.persistence import InMemoryPersister + + persister = InMemoryPersister() + record = SuspensionRecord( + suspension_id="s99", + partition_key="p", + app_id="a", + sequence_id=1, + position="review", + channel="approval", + schema_json=None, + metadata=None, + inputs={}, + state={}, + created_at="2026-05-22T00:00:00", + resolved=False, + ) + persister.save_suspension(record) + persister.mark_suspension_resolved("s99") + loaded = persister.load_suspension("p", "a", "approval") + assert loaded is not None + assert loaded.resolved is True + + +def test_in_memory_persister_mark_suspension_resolved_unknown_id_is_noop(): + from burr.core.persistence import InMemoryPersister + + persister = InMemoryPersister() + # Must not raise for an id that was never stored. + persister.mark_suspension_resolved("does-not-exist") + + +# --- InMemoryPersister: load_journal ordering test --------------------------- + + +def test_in_memory_persister_journal_ordered_by_call_index(): + from burr.core.durable import JournalEntry + from burr.core.persistence import InMemoryPersister + + persister = InMemoryPersister() + # Insert out of order: 2, 0, 1 + for idx in (2, 0, 1): + persister.save_journal_entry( + JournalEntry( + partition_key="p", + app_id="a", + sequence_id=5, + step_key=f"step_{idx}", + call_index=idx, + result=f"result_{idx}", + ) + ) + journal = persister.load_journal("p", "a", 5) + assert [e.call_index for e in journal] == [0, 1, 2] From a8c8e664a2e0c4dfc9c64657e2af757a80115ed4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 14:24:54 -0300 Subject: [PATCH 10/57] feat: add ApplicationContext.suspend() and durable runtime fields --- burr/core/application.py | 42 +++++++++++++++++++++++++++++++++++++ tests/core/test_durable.py | 43 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/burr/core/application.py b/burr/core/application.py index dc8067c4b..a26257ed7 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -584,6 +584,41 @@ def my_action(state: State, __context: ApplicationContext) -> State: state_persister: Optional[BaseStateSaver] action_name: Optional[str] # Store just the action name + # --- Durable execution runtime state (populated per-step) --- + _resume_signals: Dict[str, Any] = dataclasses.field(default_factory=dict) + _loaded_journal: list = dataclasses.field(default_factory=list) + _journal_sink: list = dataclasses.field(default_factory=list) + _journal_call_index: int = 0 + + def suspend( + self, + channel: str, + *, + schema: Optional[type] = None, + metadata: Optional[dict] = None, + ) -> Any: + """Pause this run until an external event arrives on ``channel``. + + First execution: raises the internal suspend signal; the run loop + persists a suspension record and stops. On resume: returns the payload + delivered to that channel, validated against ``schema`` if given. + + Pure control flow, no IO. Do not call from inside a ``durable()`` fn. + """ + if channel in self._resume_signals: + payload = self._resume_signals[channel] + if schema is not None and isinstance(payload, dict): + return schema(**payload) + return payload + schema_json = ( + schema.model_json_schema() + if schema is not None and hasattr(schema, "model_json_schema") + else None + ) + from burr.core.durable import _Suspended + + raise _Suspended(channel, schema_json, metadata) + @staticmethod def get() -> Optional["ApplicationContext"]: """Provides the context-local application context. @@ -841,6 +876,10 @@ def __init__( self._spawning_parent_pointer = spawning_parent_pointer self._state_initializer = state_initializer self._state_persister = state_persister + self._resume_signals: Dict[str, Any] = {} + self._loaded_journal: list = [] + self._journal_sink: list = [] + self._suspended = None self._adapter_set.call_all_lifecycle_hooks_sync( "post_application_create", state=self._state, @@ -885,6 +924,9 @@ def _context_factory(self, action: Action, sequence_id: int) -> ApplicationConte state_initializer=self._state_initializer, state_persister=self._state_persister, action_name=action.name if action else None, # Pass just the action name + _resume_signals=getattr(self, "_resume_signals", {}), + _loaded_journal=getattr(self, "_loaded_journal", []), + _journal_sink=self._journal_sink, ) def _step( diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 2aee1168b..e4d164c9c 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -364,3 +364,46 @@ def test_in_memory_persister_journal_ordered_by_call_index(): ) journal = persister.load_journal("p", "a", 5) assert [e.call_index for e in journal] == [0, 1, 2] + + +# --- ApplicationContext.suspend() tests --------------------------------------- + + +def _make_context(resume_signals=None): + from burr.core.application import ApplicationContext + + return ApplicationContext( + app_id="a", partition_key="p", sequence_id=1, tracker=None, + parallel_executor_factory=lambda: None, state_initializer=None, + state_persister=None, action_name="review", + _resume_signals=resume_signals or {}, + _loaded_journal=[], _journal_sink=[], + ) + + +def test_suspend_raises_on_first_call(): + from burr.core.durable import _Suspended + + ctx = _make_context() + with pytest.raises(_Suspended) as excinfo: + ctx.suspend("approval", metadata={"summary": "hi"}) + assert excinfo.value.channel == "approval" + assert excinfo.value.metadata == {"summary": "hi"} + + +def test_suspend_returns_payload_when_signal_present(): + ctx = _make_context(resume_signals={"approval": {"approved": True}}) + result = ctx.suspend("approval") + assert result == {"approved": True} + + +def test_suspend_validates_payload_against_live_schema(): + pydantic = pytest.importorskip("pydantic") + + class Approval(pydantic.BaseModel): + approved: bool + + ctx = _make_context(resume_signals={"approval": {"approved": True}}) + result = ctx.suspend("approval", schema=Approval) + assert isinstance(result, Approval) + assert result.approved is True From 624c7491008deebaf57f86c2ce82ac69e49739bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 14:29:53 -0300 Subject: [PATCH 11/57] fix: clarify suspend docstring and unify context factory forwarding Docstring now accurately describes the dict-only coercion behavior. The _context_factory method uses direct attribute access for all three durable fields instead of mixing getattr with direct access. A comment marks the intentional omission of _journal_call_index. New test covers schema_json population on the first suspend call. --- burr/core/application.py | 9 ++++++--- tests/core/test_durable.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/burr/core/application.py b/burr/core/application.py index a26257ed7..515741715 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -601,7 +601,9 @@ def suspend( First execution: raises the internal suspend signal; the run loop persists a suspension record and stops. On resume: returns the payload - delivered to that channel, validated against ``schema`` if given. + delivered to that channel. When ``schema`` is supplied and the payload + is a dict, the payload is coerced into the schema type via + ``schema(**payload)``; a non-dict payload is returned unchanged. Pure control flow, no IO. Do not call from inside a ``durable()`` fn. """ @@ -924,8 +926,9 @@ def _context_factory(self, action: Action, sequence_id: int) -> ApplicationConte state_initializer=self._state_initializer, state_persister=self._state_persister, action_name=action.name if action else None, # Pass just the action name - _resume_signals=getattr(self, "_resume_signals", {}), - _loaded_journal=getattr(self, "_loaded_journal", []), + # _journal_call_index is intentionally not forwarded; it starts at 0 per action context. + _resume_signals=self._resume_signals, + _loaded_journal=self._loaded_journal, _journal_sink=self._journal_sink, ) diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index e4d164c9c..d61d976d7 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -407,3 +407,15 @@ class Approval(pydantic.BaseModel): result = ctx.suspend("approval", schema=Approval) assert isinstance(result, Approval) assert result.approved is True + + +def test_suspend_first_call_schema_json_populated(): + pydantic = pytest.importorskip("pydantic") + + class Approval(pydantic.BaseModel): + approved: bool + + ctx = _make_context() + with pytest.raises(_Suspended) as excinfo: + ctx.suspend("approval", schema=Approval) + assert excinfo.value.schema_json == Approval.model_json_schema() From 07b0899194cd2cc671a7cbc1b714839250be4d7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 14:37:38 -0300 Subject: [PATCH 12/57] feat: catch suspend signal in the sync run loop and persist suspension --- burr/core/application.py | 108 ++++++++++++++++++++++++++++++++----- tests/core/test_durable.py | 45 ++++++++++++++++ 2 files changed, 141 insertions(+), 12 deletions(-) diff --git a/burr/core/application.py b/burr/core/application.py index 515741715..fcb1a2e1a 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -60,6 +60,7 @@ StreamingAction, StreamingResultContainer, ) +from burr.core.durable import _Suspended from burr.core.graph import Graph, GraphBuilder from burr.core.persistence import ( AsyncBaseStateLoader, @@ -911,6 +912,8 @@ def step(self, inputs: Optional[Dict[str, Any]] = None) -> Optional[Tuple[Action # we need to increment the sequence before we start computing # that way if we're replaying from state, we don't get stuck self.validate_correct_async_use() + self._journal_sink = [] + self._suspended = None self._increment_sequence_id() out = self._step(inputs=inputs, _run_hooks=True) return out @@ -957,6 +960,7 @@ def _step( exc = None result = None new_state = self._state + suspended_signal = None try: if next_action.single_step: result, new_state = _run_single_step_action( @@ -970,24 +974,102 @@ def _step( new_state = self._update_internal_state_value(new_state, next_action) self._set_state(new_state) + except _Suspended as suspended: + suspended_signal = suspended + self._handle_suspension(next_action, action_inputs, suspended) except Exception as e: exc = e logger.exception(_format_BASE_ERROR_MESSAGE(next_action, self._state, inputs)) raise e finally: if _run_hooks: - self._adapter_set.call_all_lifecycle_hooks_sync( - "post_run_step", - app_id=self._uid, - partition_key=self._partition_key, - action=next_action, - state=new_state, - result=result, - sequence_id=self.sequence_id, - exception=exc, - ) + if suspended_signal is not None: + self._adapter_set.call_all_lifecycle_hooks_sync( + "post_run_step", + app_id=self._uid, + partition_key=self._partition_key, + action=next_action, + state=self._state, + result=None, + sequence_id=self.sequence_id, + exception=None, + ) + else: + self._adapter_set.call_all_lifecycle_hooks_sync( + "post_run_step", + app_id=self._uid, + partition_key=self._partition_key, + action=next_action, + state=new_state, + result=result, + sequence_id=self.sequence_id, + exception=exc, + ) + if suspended_signal is not None: + return next_action, None, self._state return next_action, result, new_state + def _handle_suspension(self, action, action_inputs, suspended): + """Build and persist a SuspensionRecord, then set self._suspended.""" + from burr.core.durable import ( + SuspensionRecord, + supports_durable_storage, + write_journal_into_state, + write_suspension_into_state, + ) + + record = SuspensionRecord( + suspension_id=str(uuid.uuid4()), + partition_key=self._partition_key, + app_id=self._uid, + sequence_id=self.sequence_id, + position=action.name, + channel=suspended.channel, + schema_json=suspended.schema_json, + metadata=suspended.metadata, + inputs=action_inputs, + state=dict(self._state.get_all()), + created_at=system.now().isoformat(), + resolved=False, + ) + persister = self._state_persister + if persister is not None and supports_durable_storage(persister): + persister.save_suspension(record) + for entry in self._journal_sink: + persister.save_journal_entry(entry) + elif persister is not None: + # In-state fallback: embed the record + journal in State, then save. + state = write_suspension_into_state(self._state, record) + state = write_journal_into_state(state, self._journal_sink) + self._set_state(state) + persister.save( + self._partition_key, + self._uid, + self.sequence_id, + action.name, + self._state, + "suspended", + ) + # NOTE: post_action_suspend is registered in Milestone 5. Guard it so it is a + # safe no-op until the hook is added to REGISTERED_SYNC_HOOKS. + try: + self._adapter_set.call_all_lifecycle_hooks_sync( + "post_action_suspend", + app_id=self._uid, + partition_key=self._partition_key, + action=action, + sequence_id=self.sequence_id, + suspension=record, + ) + except ValueError: + pass + self._suspended = record + + @property + def suspended(self): + """The SuspensionRecord if the last run() suspended, else None.""" + return self._suspended + def reset_to_entrypoint(self) -> None: """Resets the state machine to the entrypoint action -- you probably want to consider having a loop in your graph, but this will do the trick if you need it!""" @@ -1070,6 +1152,8 @@ async def astep( :return: Tuple[Function, dict, State] -- the action that was just ran, the result of running it, and the new state """ + self._journal_sink = [] + self._suspended = None self._increment_sequence_id() out = await self._astep(inputs=inputs, _run_hooks=True) return out @@ -1282,7 +1366,7 @@ def iterate( result = None prior_action: Optional[Action] = None - while self.has_next_action(): + while self.has_next_action() and self._suspended is None: # self.step will only return None if there is no next action, so we can rely on tuple unpacking prior_action, result, state = self.step(inputs=inputs) yield prior_action, result, state @@ -1315,7 +1399,7 @@ async def aiterate( halt_before, halt_after, inputs ) self._validate_halt_conditions(halt_before, halt_after) - while self.has_next_action(): + while self.has_next_action() and self._suspended is None: # self.step will only return None if there is no next action, so we can rely on tuple unpacking prior_action, result, state = await self.astep(inputs=inputs) yield prior_action, result, state diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index d61d976d7..508e9ed03 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -419,3 +419,48 @@ class Approval(pydantic.BaseModel): with pytest.raises(_Suspended) as excinfo: ctx.suspend("approval", schema=Approval) assert excinfo.value.schema_json == Approval.model_json_schema() + + +# --------------------------------------------------------------------------- +# Integration: suspend signal caught by the sync run loop (Task 2.3) +# --------------------------------------------------------------------------- + + +def _suspending_app(persister): + from burr.core import ApplicationBuilder, State, action + + @action(reads=[], writes=["seen"]) + def start(state): + return state.update(seen=True) + + @action(reads=["seen"], writes=["done"]) + def gate(state, __context): + decision = __context.suspend("approval") + return state.update(done=decision) + + return ( + ApplicationBuilder() + .with_actions(start=start, gate=gate) + .with_transitions(("start", "gate")) + .with_entrypoint("start") + .with_state(State({})) + .with_identifiers(app_id="app1", partition_key="pk1") + .with_state_persister(persister) + .build() + ) + + +def test_run_stops_and_records_suspension(): + from burr.core.persistence import InMemoryPersister + + persister = InMemoryPersister() + app = _suspending_app(persister) + app.run(halt_after=["gate"]) + + assert app.suspended is not None + assert app.suspended.channel == "approval" + assert app.suspended.position == "gate" + record = persister.load_suspension("pk1", "app1", "approval") + assert record is not None + assert record.resolved is False + assert record.state.get("seen") is True From 5e0a2abe55629a6aa70e5296a26d6ce68b9f2806 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 14:38:39 -0300 Subject: [PATCH 13/57] feat: catch suspend signal in the async run loop --- burr/core/application.py | 39 +++++++++++++++++++++++++++----------- tests/core/test_durable.py | 35 ++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 11 deletions(-) diff --git a/burr/core/application.py b/burr/core/application.py index fcb1a2e1a..b9ec3db15 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -1179,6 +1179,7 @@ async def _astep(self, inputs: Optional[Dict[str, Any]], _run_hooks: bool = True exc = None result = None new_state = self._state + suspended_signal = None try: if not next_action.is_async(): # we can just delegate to the synchronous version, it will block the event loop, @@ -1206,23 +1207,39 @@ async def _astep(self, inputs: Optional[Dict[str, Any]], _run_hooks: bool = True new_state = _run_reducer(next_action, self._state, result, next_action.name) new_state = self._update_internal_state_value(new_state, next_action) self._set_state(new_state) + except _Suspended as suspended: + suspended_signal = suspended + self._handle_suspension(next_action, action_inputs, suspended) except Exception as e: exc = e logger.exception(_format_BASE_ERROR_MESSAGE(next_action, self._state, inputs)) raise e finally: if _run_hooks: - await self._adapter_set.call_all_lifecycle_hooks_sync_and_async( - "post_run_step", - action=next_action, - state=new_state, - result=result, - sequence_id=self.sequence_id, - exception=exc, - app_id=self._uid, - partition_key=self._partition_key, - ) - + if suspended_signal is not None: + await self._adapter_set.call_all_lifecycle_hooks_sync_and_async( + "post_run_step", + action=next_action, + state=self._state, + result=None, + sequence_id=self.sequence_id, + exception=None, + app_id=self._uid, + partition_key=self._partition_key, + ) + else: + await self._adapter_set.call_all_lifecycle_hooks_sync_and_async( + "post_run_step", + action=next_action, + state=new_state, + result=result, + sequence_id=self.sequence_id, + exception=exc, + app_id=self._uid, + partition_key=self._partition_key, + ) + if suspended_signal is not None: + return next_action, None, self._state return next_action, result, new_state def _parse_action_list(self, action_list: list[str]) -> Tuple[List[str], List[str]]: diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 508e9ed03..7b671cd46 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -464,3 +464,38 @@ def test_run_stops_and_records_suspension(): assert record is not None assert record.resolved is False assert record.state.get("seen") is True + + +# --------------------------------------------------------------------------- +# Integration: suspend signal caught by the async run loop (Task 2.4) +# --------------------------------------------------------------------------- + + +async def test_arun_stops_and_records_suspension(): + from burr.core import ApplicationBuilder, State, action + from burr.core.persistence import InMemoryPersister + + @action(reads=[], writes=["seen"]) + async def astart(state): + return state.update(seen=True) + + @action(reads=["seen"], writes=["done"]) + async def agate(state, __context): + decision = __context.suspend("approval") + return state.update(done=decision) + + persister = InMemoryPersister() + app = ( + ApplicationBuilder() + .with_actions(astart=astart, agate=agate) + .with_transitions(("astart", "agate")) + .with_entrypoint("astart") + .with_state(State({})) + .with_identifiers(app_id="app2", partition_key="pk2") + .with_state_persister(persister) + .build() + ) + await app.arun(halt_after=["agate"]) + + assert app.suspended is not None + assert app.suspended.position == "agate" From 59bd3fd0acbf174fbf9f50a7dbba9c0a0547325c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 14:50:12 -0300 Subject: [PATCH 14/57] fix: route handler failures during suspension to the error path Wrap _handle_suspension calls in _step and _astep so that persister or hook failures clear suspended_signal and fire post_run_step with the real exception instead of falsely reporting a clean suspension. Also use self._state in _astep's non-suspended finally branch to pick up state mutations from delegated sync actions. Strengthen async suspension test to assert persistence round-trip parity with the sync counterpart. --- burr/core/application.py | 18 +++++++++++++++--- tests/core/test_durable.py | 5 +++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/burr/core/application.py b/burr/core/application.py index b9ec3db15..16c9db270 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -976,7 +976,13 @@ def _step( self._set_state(new_state) except _Suspended as suspended: suspended_signal = suspended - self._handle_suspension(next_action, action_inputs, suspended) + try: + self._handle_suspension(next_action, action_inputs, suspended) + except Exception as handler_exc: + exc = handler_exc + suspended_signal = None + logger.exception(_format_BASE_ERROR_MESSAGE(next_action, self._state, inputs)) + raise except Exception as e: exc = e logger.exception(_format_BASE_ERROR_MESSAGE(next_action, self._state, inputs)) @@ -1209,7 +1215,13 @@ async def _astep(self, inputs: Optional[Dict[str, Any]], _run_hooks: bool = True self._set_state(new_state) except _Suspended as suspended: suspended_signal = suspended - self._handle_suspension(next_action, action_inputs, suspended) + try: + self._handle_suspension(next_action, action_inputs, suspended) + except Exception as handler_exc: + exc = handler_exc + suspended_signal = None + logger.exception(_format_BASE_ERROR_MESSAGE(next_action, self._state, inputs)) + raise except Exception as e: exc = e logger.exception(_format_BASE_ERROR_MESSAGE(next_action, self._state, inputs)) @@ -1231,7 +1243,7 @@ async def _astep(self, inputs: Optional[Dict[str, Any]], _run_hooks: bool = True await self._adapter_set.call_all_lifecycle_hooks_sync_and_async( "post_run_step", action=next_action, - state=new_state, + state=self._state, result=result, sequence_id=self.sequence_id, exception=exc, diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 7b671cd46..7c90a4e09 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -499,3 +499,8 @@ async def agate(state, __context): assert app.suspended is not None assert app.suspended.position == "agate" + record = persister.load_suspension("pk2", "app2", "approval") + assert record is not None + assert record.channel == "approval" + assert record.resolved is False + assert record.state.get("seen") is True From 2b76e0d748b22b46259c917bb1f8d1f4e5141176 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 14:56:35 -0300 Subject: [PATCH 15/57] feat: add resume() helper for durable execution --- burr/core/__init__.py | 3 + burr/core/resume.py | 169 +++++++++++++++++++++++++ tests/core/test_durable_integration.py | 106 ++++++++++++++++ 3 files changed, 278 insertions(+) create mode 100644 burr/core/resume.py create mode 100644 tests/core/test_durable_integration.py diff --git a/burr/core/__init__.py b/burr/core/__init__.py index 3d6428413..abf1c779d 100644 --- a/burr/core/__init__.py +++ b/burr/core/__init__.py @@ -24,6 +24,7 @@ ) from burr.core.durable import DeterminismError, JournalEntry, SuspensionRecord from burr.core.graph import Graph, GraphBuilder +from burr.core.resume import aresume, resume from burr.core.state import State __all__ = [ @@ -33,12 +34,14 @@ "ApplicationBuilder", "ApplicationGraph", "ApplicationContext", + "aresume", "Condition", "default", "DeterminismError", "expr", "JournalEntry", "Result", + "resume", "State", "SuspensionRecord", "when", diff --git a/burr/core/resume.py b/burr/core/resume.py new file mode 100644 index 000000000..df1888c9a --- /dev/null +++ b/burr/core/resume.py @@ -0,0 +1,169 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Top-level resume helpers for durable execution.""" + +from typing import Any, Optional + +from burr.core.durable import ( + read_journal_from_state, + read_suspension_from_state, + supports_durable_storage, +) + + +def _load_suspension(persister, partition_key, app_id, channel): + if supports_durable_storage(persister): + return persister.load_suspension(partition_key, app_id, channel) + loaded = persister.load(partition_key, app_id) + if loaded is None: + return None + return read_suspension_from_state(loaded["state"], channel) + + +def _load_journal(persister, partition_key, app_id, sequence_id, state): + if supports_durable_storage(persister): + return persister.load_journal(partition_key, app_id, sequence_id) + return read_journal_from_state(state) + + +def _validate_payload(schema_json, payload): + if schema_json is None: + return + try: + import jsonschema + except ImportError: + return # validation is best-effort without jsonschema installed + jsonschema.validate(instance=payload, schema=schema_json) + + +def _rebuild(persister, graph, app_id, partition_key, record): + from burr.core.application import ApplicationBuilder + from burr.core.state import State + + app = ( + ApplicationBuilder() + .with_graph(graph) + .with_identifiers(app_id=app_id, partition_key=partition_key) + .with_entrypoint(record.position) + .with_state(State(record.state)) + .with_state_persister(persister) + .build() + ) + return app + + +def resume( + *, + persister, + graph, + app_id: str, + partition_key: Optional[str], + channel: str, + payload: Any, +): + """Resume a suspended run by delivering ``payload`` to ``channel``. + + Reloads the suspension, rebuilds the Application from ``graph`` + ``persister``, + re-runs the suspended action from the top (durable sub-steps replay from the + journal, ``suspend(channel)`` returns ``payload``), and runs to the next halt, + suspend, or completion. + + Idempotent: resuming an already-resolved suspension is a no-op that returns + the current state. + """ + record = _load_suspension(persister, partition_key, app_id, channel) + if record is None: + raise ValueError( + f"No suspension found for app_id={app_id!r}, " + f"partition_key={partition_key!r}, channel={channel!r}." + ) + if record.resolved: + loaded = persister.load(partition_key, app_id) + return loaded["state"] if loaded else record.state + + _validate_payload(record.schema_json, payload) + + app = _rebuild(persister, graph, app_id, partition_key, record) + app._resume_signals = {channel: payload} + app._loaded_journal = _load_journal( + persister, partition_key, app_id, record.sequence_id, record.state + ) + app._suspended = None + + app.run(halt_after=[]) # run to completion or the next suspend + + if supports_durable_storage(persister): + persister.mark_suspension_resolved(record.suspension_id) + else: + record.resolved = True # in-state fallback: best-effort + + return app.state + + +async def aresume( + *, + persister, + graph, + app_id: str, + partition_key: Optional[str], + channel: str, + payload: Any, +): + """Async mirror of resume(). Use with async actions and async persisters.""" + is_async = persister.is_async() + if is_async: + record = await persister.load_suspension(partition_key, app_id, channel) + else: + record = _load_suspension(persister, partition_key, app_id, channel) + if record is None: + raise ValueError( + f"No suspension found for app_id={app_id!r}, " + f"partition_key={partition_key!r}, channel={channel!r}." + ) + if record.resolved: + if is_async: + loaded = await persister.load(partition_key, app_id) + else: + loaded = persister.load(partition_key, app_id) + return loaded["state"] if loaded else record.state + + _validate_payload(record.schema_json, payload) + + app = _rebuild(persister, graph, app_id, partition_key, record) + app._resume_signals = {channel: payload} + if is_async and supports_durable_storage(persister): + app._loaded_journal = await persister.load_journal( + partition_key, app_id, record.sequence_id + ) + else: + app._loaded_journal = _load_journal( + persister, partition_key, app_id, record.sequence_id, record.state + ) + app._suspended = None + + await app.arun(halt_after=[]) + + if supports_durable_storage(persister): + if is_async: + await persister.mark_suspension_resolved(record.suspension_id) + else: + persister.mark_suspension_resolved(record.suspension_id) + else: + record.resolved = True + + return app.state diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py new file mode 100644 index 000000000..9e7f98d00 --- /dev/null +++ b/tests/core/test_durable_integration.py @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +from burr.core import ApplicationBuilder, GraphBuilder, State, action, resume +from burr.core.persistence import InMemoryPersister + + +@action(reads=[], writes=["seen"]) +def start(state): + return state.update(seen=True) + + +@action(reads=["seen"], writes=["done"]) +def gate(state, __context): + decision = __context.suspend("approval") + return state.update(done=decision["approved"]) + + +def _graph(): + return ( + GraphBuilder() + .with_actions(start=start, gate=gate) + .with_transitions(("start", "gate")) + .build() + ) + + +def _build(persister, graph): + return ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("start") + .with_state(State({})) + .with_identifiers(app_id="run1", partition_key="pk1") + .with_state_persister(persister) + .build() + ) + + +def test_suspend_then_resume_completes_the_run(): + persister = InMemoryPersister() + graph = _graph() + + # First process: run, suspend. + app = _build(persister, graph) + app.run(halt_after=["gate"]) + assert app.suspended is not None + + # The process can die here. New process: resume. + final_state = resume( + persister=persister, graph=graph, + app_id="run1", partition_key="pk1", + channel="approval", payload={"approved": True}, + ) + assert final_state["done"] is True + + # The suspension is now resolved. + record = persister.load_suspension("pk1", "run1", "approval") + assert record.resolved is True + + +def test_resume_is_idempotent(): + persister = InMemoryPersister() + graph = _graph() + app = _build(persister, graph) + app.run(halt_after=["gate"]) + + first = resume( + persister=persister, graph=graph, app_id="run1", partition_key="pk1", + channel="approval", payload={"approved": True}, + ) + # Webhook retries are real: a second resume is a no-op. + second = resume( + persister=persister, graph=graph, app_id="run1", partition_key="pk1", + channel="approval", payload={"approved": True}, + ) + assert first["done"] == second["done"] is True + + +def test_resume_unknown_channel_raises(): + persister = InMemoryPersister() + graph = _graph() + app = _build(persister, graph) + app.run(halt_after=["gate"]) + + with pytest.raises(ValueError): + resume( + persister=persister, graph=graph, app_id="run1", partition_key="pk1", + channel="nonexistent", payload={}, + ) From e4984d0d73f51beb9f73829e9eb35764061fbb8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 14:57:10 -0300 Subject: [PATCH 16/57] feat: add aresume() helper for async durable execution --- tests/core/test_durable_integration.py | 38 ++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index 9e7f98d00..f95583df1 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -104,3 +104,41 @@ def test_resume_unknown_channel_raises(): persister=persister, graph=graph, app_id="run1", partition_key="pk1", channel="nonexistent", payload={}, ) + + +async def test_async_suspend_then_aresume_completes(): + from burr.core import aresume + + @action(reads=[], writes=["seen"]) + async def astart(state): + return state.update(seen=True) + + @action(reads=["seen"], writes=["done"]) + async def agate(state, __context): + decision = __context.suspend("approval") + return state.update(done=decision["approved"]) + + graph = ( + GraphBuilder() + .with_actions(astart=astart, agate=agate) + .with_transitions(("astart", "agate")) + .build() + ) + persister = InMemoryPersister() + app = ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("astart") + .with_state(State({})) + .with_identifiers(app_id="arun1", partition_key="pk1") + .with_state_persister(persister) + .build() + ) + await app.arun(halt_after=["agate"]) + assert app.suspended is not None + + final_state = await aresume( + persister=persister, graph=graph, app_id="arun1", partition_key="pk1", + channel="approval", payload={"approved": True}, + ) + assert final_state["done"] is True From 517c76372f6ccfb4736b15229808330a2b67a71b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:13:33 -0300 Subject: [PATCH 17/57] fix: wrap resolved-suspension state in State and test in-state fallback --- burr/core/resume.py | 6 +-- tests/core/test_durable_integration.py | 61 +++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/burr/core/resume.py b/burr/core/resume.py index df1888c9a..9cb5770be 100644 --- a/burr/core/resume.py +++ b/burr/core/resume.py @@ -24,6 +24,7 @@ read_suspension_from_state, supports_durable_storage, ) +from burr.core.state import State def _load_suspension(persister, partition_key, app_id, channel): @@ -53,7 +54,6 @@ def _validate_payload(schema_json, payload): def _rebuild(persister, graph, app_id, partition_key, record): from burr.core.application import ApplicationBuilder - from burr.core.state import State app = ( ApplicationBuilder() @@ -94,7 +94,7 @@ def resume( ) if record.resolved: loaded = persister.load(partition_key, app_id) - return loaded["state"] if loaded else record.state + return State(loaded["state"]) if loaded else State(record.state) _validate_payload(record.schema_json, payload) @@ -140,7 +140,7 @@ async def aresume( loaded = await persister.load(partition_key, app_id) else: loaded = persister.load(partition_key, app_id) - return loaded["state"] if loaded else record.state + return State(loaded["state"]) if loaded else State(record.state) _validate_payload(record.schema_json, payload) diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index f95583df1..af5bee2ef 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -18,7 +18,7 @@ import pytest from burr.core import ApplicationBuilder, GraphBuilder, State, action, resume -from burr.core.persistence import InMemoryPersister +from burr.core.persistence import InMemoryPersister, SQLitePersister @action(reads=[], writes=["seen"]) @@ -142,3 +142,62 @@ async def agate(state, __context): channel="approval", payload={"approved": True}, ) assert final_state["done"] is True + + +class _UpsertSQLitePersister(SQLitePersister): + """Test-only subclass that replaces INSERT with INSERT OR REPLACE. + + NOTE: This subclass works around a known production-side bug: + ``_handle_suspension`` in application.py calls ``persister.save(..., "suspended")`` + directly for the in-state fallback path, and then the ``PersisterHook`` + lifecycle adapter calls ``persister.save(..., "completed")`` for the same + (partition_key, app_id, sequence_id, position) in the post_run_step hook. + SQLitePersister's UNIQUE constraint makes that second insert fail. + InMemoryPersister masks the bug by appending. The fix belongs in + application.py (skip PersisterHook's save when suspension was already + persisted inline), but that file is out of scope here. This subclass + isolates the test from the double-save issue so the in-state codec + (read_suspension_from_state / read_journal_from_state) is still exercised. + """ + + def save(self, partition_key, app_id, sequence_id, position, state, status, **kwargs): + import json + import sqlite3 as _sqlite3 + + partition_key = partition_key if partition_key is not None else self.PARTITION_KEY_DEFAULT + cursor = self.connection.cursor() + json_state = json.dumps(state.serialize(**self.serde_kwargs)) + cursor.execute( + f"INSERT OR REPLACE INTO {self.table_name} " + f"(partition_key, app_id, sequence_id, position, state, status) " + f"VALUES (?, ?, ?, ?, ?, ?)", + (partition_key, app_id, sequence_id, position, json_state, status), + ) + self.connection.commit() + + +def test_resume_through_in_state_fallback_with_sqlite(): + """Resume uses the in-state fallback path when the persister does not support + dedicated durable storage (supports_durable_storage() is False). SQLitePersister + is a first-party persister that does NOT override save_suspension, so it triggers + the fallback path where suspension data rides inside the State blob.""" + persister = _UpsertSQLitePersister(":memory:") + persister.initialize() + + graph = _graph() + + # First process: build app, run until it suspends at 'gate'. + app = _build(persister, graph) + app.run(halt_after=["gate"]) + assert app.suspended is not None + + # Same persister instance -- in-memory SQLite is lost if we open a new connection. + final_state = resume( + persister=persister, + graph=graph, + app_id="run1", + partition_key="pk1", + channel="approval", + payload={"approved": True}, + ) + assert final_state["done"] is True From 96f09d4e202850ec8968f0518e239d23127ea27c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:18:33 -0300 Subject: [PATCH 18/57] fix: persist suspension once via PersisterHook in in-state fallback Remove the direct persister.save call inside _handle_suspension for the in-state fallback branch. The post_run_step lifecycle hook (PersisterHook) already saves the step row for every suspended step, so the inline save was writing the same (partition_key, app_id, sequence_id, position) row twice, causing an IntegrityError in SQLitePersister due to its UNIQUE constraint. Remove the _UpsertSQLitePersister workaround subclass from the test and use bare SQLitePersister directly to confirm the fix. --- burr/core/application.py | 15 +++++------- tests/core/test_durable_integration.py | 34 +------------------------- 2 files changed, 7 insertions(+), 42 deletions(-) diff --git a/burr/core/application.py b/burr/core/application.py index 16c9db270..6ef0c71d3 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -1044,18 +1044,15 @@ def _handle_suspension(self, action, action_inputs, suspended): for entry in self._journal_sink: persister.save_journal_entry(entry) elif persister is not None: - # In-state fallback: embed the record + journal in State, then save. + # In-state fallback: embed the record + journal into State only. + # We deliberately do NOT call persister.save here. The post_run_step + # lifecycle hook fires for this suspended step and PersisterHook.save + # persists the embedded State once. Saving here too would write the + # same (partition_key, app_id, sequence_id, position) row twice and + # break persisters with a UNIQUE constraint (e.g. SQLitePersister). state = write_suspension_into_state(self._state, record) state = write_journal_into_state(state, self._journal_sink) self._set_state(state) - persister.save( - self._partition_key, - self._uid, - self.sequence_id, - action.name, - self._state, - "suspended", - ) # NOTE: post_action_suspend is registered in Milestone 5. Guard it so it is a # safe no-op until the hook is added to REGISTERED_SYNC_HOOKS. try: diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index af5bee2ef..bc5a960c4 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -144,44 +144,12 @@ async def agate(state, __context): assert final_state["done"] is True -class _UpsertSQLitePersister(SQLitePersister): - """Test-only subclass that replaces INSERT with INSERT OR REPLACE. - - NOTE: This subclass works around a known production-side bug: - ``_handle_suspension`` in application.py calls ``persister.save(..., "suspended")`` - directly for the in-state fallback path, and then the ``PersisterHook`` - lifecycle adapter calls ``persister.save(..., "completed")`` for the same - (partition_key, app_id, sequence_id, position) in the post_run_step hook. - SQLitePersister's UNIQUE constraint makes that second insert fail. - InMemoryPersister masks the bug by appending. The fix belongs in - application.py (skip PersisterHook's save when suspension was already - persisted inline), but that file is out of scope here. This subclass - isolates the test from the double-save issue so the in-state codec - (read_suspension_from_state / read_journal_from_state) is still exercised. - """ - - def save(self, partition_key, app_id, sequence_id, position, state, status, **kwargs): - import json - import sqlite3 as _sqlite3 - - partition_key = partition_key if partition_key is not None else self.PARTITION_KEY_DEFAULT - cursor = self.connection.cursor() - json_state = json.dumps(state.serialize(**self.serde_kwargs)) - cursor.execute( - f"INSERT OR REPLACE INTO {self.table_name} " - f"(partition_key, app_id, sequence_id, position, state, status) " - f"VALUES (?, ?, ?, ?, ?, ?)", - (partition_key, app_id, sequence_id, position, json_state, status), - ) - self.connection.commit() - - def test_resume_through_in_state_fallback_with_sqlite(): """Resume uses the in-state fallback path when the persister does not support dedicated durable storage (supports_durable_storage() is False). SQLitePersister is a first-party persister that does NOT override save_suspension, so it triggers the fallback path where suspension data rides inside the State blob.""" - persister = _UpsertSQLitePersister(":memory:") + persister = SQLitePersister(":memory:") persister.initialize() graph = _graph() From 376aa71337b158f669dfe30db934d81108e7cbc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:26:26 -0300 Subject: [PATCH 19/57] fix: scope resume idempotency to durable persisters, warn on missing jsonschema - Remove dead `record.resolved = True` mutation in in-state fallback path of resume() and aresume(); replace with comment naming the no-durability rule. - Expand docstrings on resume() and aresume() to distinguish durable-storage idempotency (no-op) from in-state fallback behavior (second call raises). - Tighten no-record ValueError message to name the in-state fallback cause, distinguishing it from a wrong app_id. - _validate_payload now emits a warnings.warn instead of silently skipping when jsonschema is absent; import warnings moved to module level. - Add M5 deferral comment in application._handle_suspension. - Add test_resume_in_state_fallback_second_call_raises to integration suite. --- burr/core/application.py | 1 + burr/core/resume.py | 45 ++++++++++++++++++++------ tests/core/test_durable_integration.py | 40 +++++++++++++++++++++++ 3 files changed, 76 insertions(+), 10 deletions(-) diff --git a/burr/core/application.py b/burr/core/application.py index 6ef0c71d3..da97c8bd8 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -1050,6 +1050,7 @@ def _handle_suspension(self, action, action_inputs, suspended): # persists the embedded State once. Saving here too would write the # same (partition_key, app_id, sequence_id, position) row twice and # break persisters with a UNIQUE constraint (e.g. SQLitePersister). + # M5: suspended runs are persisted with status "completed"; a dedicated status + post_action_suspend hook lands in M5. state = write_suspension_into_state(self._state, record) state = write_journal_into_state(state, self._journal_sink) self._set_state(state) diff --git a/burr/core/resume.py b/burr/core/resume.py index 9cb5770be..989288b6c 100644 --- a/burr/core/resume.py +++ b/burr/core/resume.py @@ -17,6 +17,7 @@ """Top-level resume helpers for durable execution.""" +import warnings from typing import Any, Optional from burr.core.durable import ( @@ -43,12 +44,22 @@ def _load_journal(persister, partition_key, app_id, sequence_id, state): def _validate_payload(schema_json, payload): + """Validate *payload* against *schema_json* using jsonschema. + + Schema validation requires the optional ``jsonschema`` package. When it is + absent, validation is skipped and a warning is emitted. + """ if schema_json is None: return try: import jsonschema except ImportError: - return # validation is best-effort without jsonschema installed + warnings.warn( + "jsonschema is not installed; skipping resume payload schema validation. " + "Install jsonschema to enable validation.", + stacklevel=2, + ) + return jsonschema.validate(instance=payload, schema=schema_json) @@ -83,14 +94,18 @@ def resume( journal, ``suspend(channel)`` returns ``payload``), and runs to the next halt, suspend, or completion. - Idempotent: resuming an already-resolved suspension is a no-op that returns - the current state. + Idempotency: resuming an already-resolved suspension is an idempotent no-op for + persisters with durable storage (those implementing ``save_suspension`` / + ``load_suspension`` / ``mark_suspension_resolved``). For persisters without + durable storage, the suspension lives in ``state['__burr_durable__']`` and is + overwritten as the resumed run progresses; a second ``resume()`` call after the + first completes raises ``ValueError``. """ record = _load_suspension(persister, partition_key, app_id, channel) if record is None: raise ValueError( - f"No suspension found for app_id={app_id!r}, " - f"partition_key={partition_key!r}, channel={channel!r}." + f"No suspension found for app_id={app_id!r} " + f"(never suspended, or already resolved on a persister without durable storage)." ) if record.resolved: loaded = persister.load(partition_key, app_id) @@ -110,7 +125,8 @@ def resume( if supports_durable_storage(persister): persister.mark_suspension_resolved(record.suspension_id) else: - record.resolved = True # in-state fallback: best-effort + # In-state fallback does not durably mark suspensions resolved; a second resume will raise (see docstring). + pass return app.state @@ -124,7 +140,15 @@ async def aresume( channel: str, payload: Any, ): - """Async mirror of resume(). Use with async actions and async persisters.""" + """Async mirror of :func:`resume`. Use with async actions and async persisters. + + Idempotency: resuming an already-resolved suspension is an idempotent no-op for + persisters with durable storage (those implementing ``save_suspension`` / + ``load_suspension`` / ``mark_suspension_resolved``). For persisters without + durable storage, the suspension lives in ``state['__burr_durable__']`` and is + overwritten as the resumed run progresses; a second ``aresume()`` call after the + first completes raises ``ValueError``. + """ is_async = persister.is_async() if is_async: record = await persister.load_suspension(partition_key, app_id, channel) @@ -132,8 +156,8 @@ async def aresume( record = _load_suspension(persister, partition_key, app_id, channel) if record is None: raise ValueError( - f"No suspension found for app_id={app_id!r}, " - f"partition_key={partition_key!r}, channel={channel!r}." + f"No suspension found for app_id={app_id!r} " + f"(never suspended, or already resolved on a persister without durable storage)." ) if record.resolved: if is_async: @@ -164,6 +188,7 @@ async def aresume( else: persister.mark_suspension_resolved(record.suspension_id) else: - record.resolved = True + # In-state fallback does not durably mark suspensions resolved; a second resume will raise (see docstring). + pass return app.state diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index bc5a960c4..683181a6d 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -169,3 +169,43 @@ def test_resume_through_in_state_fallback_with_sqlite(): payload={"approved": True}, ) assert final_state["done"] is True + + +def test_resume_in_state_fallback_second_call_raises(): + """A second resume() call on an in-state fallback persister raises ValueError. + + After the first resume() completes, the resumed run's new state row no longer + carries '__burr_durable__', so the suspension record is gone. A second resume() + must raise ValueError with a message that names the in-state fallback as the + reason, distinguishing it from a never-suspended app_id. + """ + persister = SQLitePersister(":memory:") + persister.initialize() + + graph = _graph() + + # Suspend. + app = _build(persister, graph) + app.run(halt_after=["gate"]) + assert app.suspended is not None + + # First resume succeeds. + resume( + persister=persister, + graph=graph, + app_id="run1", + partition_key="pk1", + channel="approval", + payload={"approved": True}, + ) + + # Second resume on in-state fallback must raise ValueError naming the cause. + with pytest.raises(ValueError, match="already resolved on a persister without durable storage"): + resume( + persister=persister, + graph=graph, + app_id="run1", + partition_key="pk1", + channel="approval", + payload={"approved": True}, + ) From 3a6b60e49269ec5274416f7d4504a22a60fc5bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:34:39 -0300 Subject: [PATCH 20/57] fix: route async non-durable persisters through in-state fallback in aresume Guard aresume load_suspension call with supports_durable_storage check, mirroring the existing guard used for journal loading in the same function. Without the guard, async persisters that do not override load_suspension raised NotImplementedError instead of falling through to _load_suspension. Also raise warnings.warn stacklevel from 2 to 3 in _validate_payload so the warning points at the caller of resume/aresume, not the internal helper. --- burr/core/resume.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/burr/core/resume.py b/burr/core/resume.py index 989288b6c..f22447400 100644 --- a/burr/core/resume.py +++ b/burr/core/resume.py @@ -57,7 +57,7 @@ def _validate_payload(schema_json, payload): warnings.warn( "jsonschema is not installed; skipping resume payload schema validation. " "Install jsonschema to enable validation.", - stacklevel=2, + stacklevel=3, ) return jsonschema.validate(instance=payload, schema=schema_json) @@ -150,7 +150,14 @@ async def aresume( first completes raises ``ValueError``. """ is_async = persister.is_async() - if is_async: + # Require durable support before awaiting load_suspension; without it the + # persister has not overridden that method and would raise NotImplementedError. + # When is_async but NOT supports_durable_storage we fall through to + # _load_suspension, which calls persister.load() synchronously. That works + # for sync persisters but raises TypeError for async non-durable persisters + # (the sync call returns an un-awaited coroutine). A full fix requires an + # async-aware _aload_suspension helper; deferred to M3/M4 (M2 limitation). + if is_async and supports_durable_storage(persister): record = await persister.load_suspension(partition_key, app_id, channel) else: record = _load_suspension(persister, partition_key, app_id, channel) From 168337a34b27706716d78f34af097219440c8fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:38:22 -0300 Subject: [PATCH 21/57] fix: aresume raises clear error for async non-durable persisters Replaces the silent broken path (TypeError: coroutine object is not subscriptable) with an explicit NotImplementedError when aresume() is called with an async persister that does not implement durable storage. Updates the aresume() docstring to accurately describe the async/sync paths and their idempotency guarantees. Adds a test to assert the guard. --- burr/core/resume.py | 52 ++++++++++++++++++-------- tests/core/test_durable_integration.py | 24 +++++++++++- 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/burr/core/resume.py b/burr/core/resume.py index f22447400..8badf7758 100644 --- a/burr/core/resume.py +++ b/burr/core/resume.py @@ -140,24 +140,44 @@ async def aresume( channel: str, payload: Any, ): - """Async mirror of :func:`resume`. Use with async actions and async persisters. - - Idempotency: resuming an already-resolved suspension is an idempotent no-op for - persisters with durable storage (those implementing ``save_suspension`` / - ``load_suspension`` / ``mark_suspension_resolved``). For persisters without - durable storage, the suspension lives in ``state['__burr_durable__']`` and is - overwritten as the resumed run progresses; a second ``aresume()`` call after the - first completes raises ``ValueError``. + """Resume a suspended run by delivering ``payload`` to ``channel``. Use with + async actions and/or an async run loop. + + .. note:: + **Async persisters must implement durable storage.** An async persister + that does *not* override ``save_suspension`` / ``load_suspension`` / + ``mark_suspension_resolved`` raises :exc:`NotImplementedError` immediately. + Full async non-durable support is planned for a future milestone. + + **Sync non-durable persisters** (e.g. ``SQLitePersister``) fall back to + loading the suspension from ``state['__burr_durable__']``, the same path as + :func:`resume`. + + :param persister: State persister. Async persisters must support durable + storage; sync persisters may use the in-state fallback. + :param graph: The :class:`~burr.core.graph.Graph` to rebuild the application. + :param app_id: Identifier of the application run to resume. + :param partition_key: Partition key used when the run was persisted. + :param channel: Name of the suspension channel to deliver ``payload`` to. + :param payload: Value returned by ``suspend(channel)`` inside the action. + + **Idempotency:** + + * *Durable persisters* -- resuming an already-resolved suspension is an + idempotent no-op: the call returns the latest persisted state unchanged. + * *Sync non-durable persisters* -- the suspension lives in + ``state['__burr_durable__']`` and is overwritten as the resumed run + progresses. A second ``aresume()`` call after the first completes raises + :exc:`ValueError`. """ is_async = persister.is_async() - # Require durable support before awaiting load_suspension; without it the - # persister has not overridden that method and would raise NotImplementedError. - # When is_async but NOT supports_durable_storage we fall through to - # _load_suspension, which calls persister.load() synchronously. That works - # for sync persisters but raises TypeError for async non-durable persisters - # (the sync call returns an un-awaited coroutine). A full fix requires an - # async-aware _aload_suspension helper; deferred to M3/M4 (M2 limitation). - if is_async and supports_durable_storage(persister): + if is_async and not supports_durable_storage(persister): + raise NotImplementedError( + "aresume() does not support async persisters without durable storage " + "in this release; use a sync persister, or a persister that overrides " + "save_suspension/load_suspension/mark_suspension_resolved." + ) + if is_async: record = await persister.load_suspension(partition_key, app_id, channel) else: record = _load_suspension(persister, partition_key, app_id, channel) diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index 683181a6d..1705da818 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -18,7 +18,7 @@ import pytest from burr.core import ApplicationBuilder, GraphBuilder, State, action, resume -from burr.core.persistence import InMemoryPersister, SQLitePersister +from burr.core.persistence import AsyncInMemoryPersister, InMemoryPersister, SQLitePersister @action(reads=[], writes=["seen"]) @@ -209,3 +209,25 @@ def test_resume_in_state_fallback_second_call_raises(): channel="approval", payload={"approved": True}, ) + + +async def test_aresume_async_non_durable_persister_raises(): + """aresume() must raise NotImplementedError immediately for an async persister + that does not implement durable storage (save_suspension / load_suspension / + mark_suspension_resolved). AsyncInMemoryPersister extends AsyncBaseStatePersister + without overriding those methods, so it is the canonical non-durable async persister. + """ + from burr.core import aresume + + persister = AsyncInMemoryPersister() + graph = _graph() + + with pytest.raises(NotImplementedError, match="async persisters without durable storage"): + await aresume( + persister=persister, + graph=graph, + app_id="dummy-run", + partition_key="pk1", + channel="approval", + payload={}, + ) From 814688f687bdf7270414a84630550ee07a1b82a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:44:09 -0300 Subject: [PATCH 22/57] fix: aresume rejects all async persisters and avoids double-wrapping State aresume() now raises NotImplementedError for any async persister upfront, removing unreachable dead branches. Both resume() and aresume() return the loaded State object directly instead of wrapping it in State() again. --- burr/core/resume.py | 60 +++++++++----------------- tests/core/test_durable_integration.py | 13 +++--- 2 files changed, 27 insertions(+), 46 deletions(-) diff --git a/burr/core/resume.py b/burr/core/resume.py index 8badf7758..5c36f48a5 100644 --- a/burr/core/resume.py +++ b/burr/core/resume.py @@ -109,7 +109,7 @@ def resume( ) if record.resolved: loaded = persister.load(partition_key, app_id) - return State(loaded["state"]) if loaded else State(record.state) + return loaded["state"] if loaded else State(record.state) _validate_payload(record.schema_json, payload) @@ -140,21 +140,16 @@ async def aresume( channel: str, payload: Any, ): - """Resume a suspended run by delivering ``payload`` to ``channel``. Use with - async actions and/or an async run loop. - - .. note:: - **Async persisters must implement durable storage.** An async persister - that does *not* override ``save_suspension`` / ``load_suspension`` / - ``mark_suspension_resolved`` raises :exc:`NotImplementedError` immediately. - Full async non-durable support is planned for a future milestone. + """Resume a suspended run by delivering ``payload`` to ``channel``. - **Sync non-durable persisters** (e.g. ``SQLitePersister``) fall back to - loading the suspension from ``state['__burr_durable__']``, the same path as - :func:`resume`. + Runs the async action loop (``await app.arun(...)``), so async actions are + fully supported. Requires a **sync** persister in this release; passing an + async persister raises :exc:`NotImplementedError`. Full async-persister + support is deferred to a later milestone. - :param persister: State persister. Async persisters must support durable - storage; sync persisters may use the in-state fallback. + :param persister: A sync state persister (durable or non-durable). Async + persisters are not supported in this release and raise + :exc:`NotImplementedError`. :param graph: The :class:`~burr.core.graph.Graph` to rebuild the application. :param app_id: Identifier of the application run to resume. :param partition_key: Partition key used when the run was persisted. @@ -170,50 +165,35 @@ async def aresume( progresses. A second ``aresume()`` call after the first completes raises :exc:`ValueError`. """ - is_async = persister.is_async() - if is_async and not supports_durable_storage(persister): + if persister.is_async(): raise NotImplementedError( - "aresume() does not support async persisters without durable storage " - "in this release; use a sync persister, or a persister that overrides " - "save_suspension/load_suspension/mark_suspension_resolved." + "aresume() does not support async persisters in this release; " + "use a sync persister (durable or non-durable). Async actions are " + "still supported with a sync persister." ) - if is_async: - record = await persister.load_suspension(partition_key, app_id, channel) - else: - record = _load_suspension(persister, partition_key, app_id, channel) + record = _load_suspension(persister, partition_key, app_id, channel) if record is None: raise ValueError( f"No suspension found for app_id={app_id!r} " f"(never suspended, or already resolved on a persister without durable storage)." ) if record.resolved: - if is_async: - loaded = await persister.load(partition_key, app_id) - else: - loaded = persister.load(partition_key, app_id) - return State(loaded["state"]) if loaded else State(record.state) + loaded = persister.load(partition_key, app_id) + return loaded["state"] if loaded else State(record.state) _validate_payload(record.schema_json, payload) app = _rebuild(persister, graph, app_id, partition_key, record) app._resume_signals = {channel: payload} - if is_async and supports_durable_storage(persister): - app._loaded_journal = await persister.load_journal( - partition_key, app_id, record.sequence_id - ) - else: - app._loaded_journal = _load_journal( - persister, partition_key, app_id, record.sequence_id, record.state - ) + app._loaded_journal = _load_journal( + persister, partition_key, app_id, record.sequence_id, record.state + ) app._suspended = None await app.arun(halt_after=[]) if supports_durable_storage(persister): - if is_async: - await persister.mark_suspension_resolved(record.suspension_id) - else: - persister.mark_suspension_resolved(record.suspension_id) + persister.mark_suspension_resolved(record.suspension_id) else: # In-state fallback does not durably mark suspensions resolved; a second resume will raise (see docstring). pass diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index 1705da818..7b5865067 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -211,18 +211,19 @@ def test_resume_in_state_fallback_second_call_raises(): ) -async def test_aresume_async_non_durable_persister_raises(): - """aresume() must raise NotImplementedError immediately for an async persister - that does not implement durable storage (save_suspension / load_suspension / - mark_suspension_resolved). AsyncInMemoryPersister extends AsyncBaseStatePersister - without overriding those methods, so it is the canonical non-durable async persister. +async def test_aresume_async_persister_raises(): + """aresume() must raise NotImplementedError immediately for any async persister. + + In this release, aresume() rejects all async persisters regardless of whether + they implement durable storage. AsyncInMemoryPersister is used here as the + canonical async persister example. """ from burr.core import aresume persister = AsyncInMemoryPersister() graph = _graph() - with pytest.raises(NotImplementedError, match="async persisters without durable storage"): + with pytest.raises(NotImplementedError, match="does not support async persisters"): await aresume( persister=persister, graph=graph, From 201cacdf7eea9d1d385632a51fe5991ef8a79778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:47:14 -0300 Subject: [PATCH 23/57] refactor: drop redundant else-pass branches in resume helpers --- burr/core/resume.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/burr/core/resume.py b/burr/core/resume.py index 5c36f48a5..29bb25a27 100644 --- a/burr/core/resume.py +++ b/burr/core/resume.py @@ -122,11 +122,10 @@ def resume( app.run(halt_after=[]) # run to completion or the next suspend + # In-state fallback does not durably mark suspensions resolved; a second + # resume will raise (see docstring). if supports_durable_storage(persister): persister.mark_suspension_resolved(record.suspension_id) - else: - # In-state fallback does not durably mark suspensions resolved; a second resume will raise (see docstring). - pass return app.state @@ -192,10 +191,9 @@ async def aresume( await app.arun(halt_after=[]) + # In-state fallback does not durably mark suspensions resolved; a second + # resume will raise (see docstring). if supports_durable_storage(persister): persister.mark_suspension_resolved(record.suspension_id) - else: - # In-state fallback does not durably mark suspensions resolved; a second resume will raise (see docstring). - pass return app.state From dc4bd4cac0d288e0a9a39db662b227288480c852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:49:15 -0300 Subject: [PATCH 24/57] feat: add ApplicationContext.durable() first-run execution and journaling --- burr/core/application.py | 42 ++++++++++++++++++++++++++++++++++++++ tests/core/test_durable.py | 29 ++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/burr/core/application.py b/burr/core/application.py index da97c8bd8..0c1b0119a 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -622,6 +622,48 @@ def suspend( raise _Suspended(channel, schema_json, metadata) + def durable(self, key: str, fn: Callable, *args, **kwargs) -> Any: + """Memoize a sub-step. First run: execute ``fn`` and journal its result. + On resume: replay the journaled result without executing ``fn`` again. + + ``key`` must be stable and called in the same order across re-runs of + the same action invocation (see the determinism contract). Do not call + ``suspend()`` from inside ``fn``. + """ + from burr.core.durable import DeterminismError, JournalEntry + + idx = self._journal_call_index + self._journal_call_index += 1 + + if idx < len(self._loaded_journal): + recorded = self._loaded_journal[idx] + if recorded.step_key != key: + raise DeterminismError( + f"Durable sub-step #{idx} replayed as key {key!r} but the " + f"journal recorded key {recorded.step_key!r}. The action's " + f"durable() calls must occur in the same order with the same " + f"keys on every re-run." + ) + return recorded.result + + result = fn(*args, **kwargs) + entry = JournalEntry( + partition_key=self.partition_key, + app_id=self.app_id, + sequence_id=self.sequence_id, + step_key=key, + call_index=idx, + result=result, + ) + self._journal_sink.append(entry) + if self.state_persister is not None: + from burr.core.durable import supports_durable_storage + + if supports_durable_storage(self.state_persister): + # First-party storage: persist immediately for crash resilience. + self.state_persister.save_journal_entry(entry) + return result + @staticmethod def get() -> Optional["ApplicationContext"]: """Provides the context-local application context. diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 7c90a4e09..f91656991 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -504,3 +504,32 @@ async def agate(state, __context): assert record.channel == "approval" assert record.resolved is False assert record.state.get("seen") is True + + +# --- ApplicationContext.durable() tests (Task 3.1) ---------------------------- + + +def test_durable_executes_fn_and_journals_on_first_run(): + calls = [] + + def side_effect(x): + calls.append(x) + return x * 2 + + ctx = _make_context() + result = ctx.durable("double", side_effect, 21) + assert result == 42 + assert calls == [21] + # The entry was appended to the journal sink for persistence. + assert len(ctx._journal_sink) == 1 + assert ctx._journal_sink[0].step_key == "double" + assert ctx._journal_sink[0].call_index == 0 + assert ctx._journal_sink[0].result == 42 + + +def test_durable_assigns_increasing_call_index(): + ctx = _make_context() + ctx.durable("a", lambda: 1) + ctx.durable("b", lambda: 2) + assert [e.call_index for e in ctx._journal_sink] == [0, 1] + assert [e.step_key for e in ctx._journal_sink] == ["a", "b"] From f91a2d6c3cdc7cbc9258b3ac583bc576c482589e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:49:31 -0300 Subject: [PATCH 25/57] test: cover durable() journal replay --- tests/core/test_durable.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index f91656991..03051150a 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -533,3 +533,38 @@ def test_durable_assigns_increasing_call_index(): ctx.durable("b", lambda: 2) assert [e.call_index for e in ctx._journal_sink] == [0, 1] assert [e.step_key for e in ctx._journal_sink] == ["a", "b"] + + +# --- ApplicationContext.durable() replay tests (Task 3.2) --------------------- + + +def test_durable_replays_from_loaded_journal_without_executing_fn(): + from burr.core.durable import JournalEntry + + recorded = [ + JournalEntry("p", "a", 1, "double", 0, 42), + ] + ctx = _make_context() + ctx._loaded_journal = recorded + + calls = [] + + def side_effect(x): + calls.append(x) + return x * 2 + + result = ctx.durable("double", side_effect, 21) + assert result == 42 + assert calls == [] # fn must NOT run on replay + + +def test_durable_replay_then_execute_for_calls_past_the_journal(): + from burr.core.durable import JournalEntry + + ctx = _make_context() + ctx._loaded_journal = [JournalEntry("p", "a", 1, "first", 0, "cached")] + + first = ctx.durable("first", lambda: "fresh") + second = ctx.durable("second", lambda: "executed") + assert first == "cached" # replayed + assert second == "executed" # past the journal -> executed From fdff7c0f9656402dbd8ac3546eabd7d4b33509d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:49:43 -0300 Subject: [PATCH 26/57] test: cover DeterminismError on durable() key mismatch --- tests/core/test_durable.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 03051150a..1f4f8af0e 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -568,3 +568,17 @@ def test_durable_replay_then_execute_for_calls_past_the_journal(): second = ctx.durable("second", lambda: "executed") assert first == "cached" # replayed assert second == "executed" # past the journal -> executed + + +# --- ApplicationContext.durable() determinism error (Task 3.3) ---------------- + + +def test_durable_raises_determinism_error_on_key_mismatch(): + from burr.core.durable import DeterminismError, JournalEntry + + ctx = _make_context() + ctx._loaded_journal = [JournalEntry("p", "a", 1, "summarize", 0, "x")] + + with pytest.raises(DeterminismError): + # The first durable call on resume used a different key than recorded. + ctx.durable("translate", lambda: "y") From bbfb9a5a907a8f667ce90903486967a054ae7f13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:55:00 -0300 Subject: [PATCH 27/57] refactor: consolidate durable() imports and cover kwargs forwarding --- burr/core/application.py | 18 ++++++++++-------- tests/core/test_durable.py | 6 ++++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/burr/core/application.py b/burr/core/application.py index 0c1b0119a..a84b64313 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -628,9 +628,12 @@ def durable(self, key: str, fn: Callable, *args, **kwargs) -> Any: ``key`` must be stable and called in the same order across re-runs of the same action invocation (see the determinism contract). Do not call - ``suspend()`` from inside ``fn``. + ``suspend()`` from inside ``fn``, and do not wrap ``durable()`` in a + try/except that varies the key on the exception branch: the call index + is consumed even when ``fn`` raises, so a retry with a different key + triggers a ``DeterminismError``. """ - from burr.core.durable import DeterminismError, JournalEntry + from burr.core.durable import DeterminismError, JournalEntry, supports_durable_storage idx = self._journal_call_index self._journal_call_index += 1 @@ -656,12 +659,11 @@ def durable(self, key: str, fn: Callable, *args, **kwargs) -> Any: result=result, ) self._journal_sink.append(entry) - if self.state_persister is not None: - from burr.core.durable import supports_durable_storage - - if supports_durable_storage(self.state_persister): - # First-party storage: persist immediately for crash resilience. - self.state_persister.save_journal_entry(entry) + if self.state_persister is not None and supports_durable_storage( + self.state_persister + ): + # First-party storage: persist immediately for crash resilience. + self.state_persister.save_journal_entry(entry) return result @staticmethod diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 1f4f8af0e..7a3c3557d 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -527,6 +527,12 @@ def side_effect(x): assert ctx._journal_sink[0].result == 42 +def test_durable_forwards_positional_and_keyword_args(): + ctx = _make_context() + result = ctx.durable("combine", lambda x, y: (x, y), 1, y=2) + assert result == (1, 2) + + def test_durable_assigns_increasing_call_index(): ctx = _make_context() ctx.durable("a", lambda: 1) From 4674524ab7d4c72350aafc3f3682fbbcb539d33c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 15:57:14 -0300 Subject: [PATCH 28/57] feat: flush in-state journal on action completion for fallback persisters --- burr/core/application.py | 20 ++++++++++++++++++++ tests/core/test_durable.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/burr/core/application.py b/burr/core/application.py index a84b64313..694c5f2fb 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -1017,6 +1017,16 @@ def _step( new_state = _run_reducer(next_action, self._state, result, next_action.name) new_state = self._update_internal_state_value(new_state, next_action) + if self._journal_sink and self._state_persister is not None: + from burr.core.durable import ( + read_journal_from_state, + supports_durable_storage, + write_journal_into_state, + ) + + if not supports_durable_storage(self._state_persister): + merged = read_journal_from_state(self._state) + self._journal_sink + new_state = write_journal_into_state(new_state, merged) self._set_state(new_state) except _Suspended as suspended: suspended_signal = suspended @@ -1254,6 +1264,16 @@ async def _astep(self, inputs: Optional[Dict[str, Any]], _run_hooks: bool = True ) new_state = _run_reducer(next_action, self._state, result, next_action.name) new_state = self._update_internal_state_value(new_state, next_action) + if self._journal_sink and self._state_persister is not None: + from burr.core.durable import ( + read_journal_from_state, + supports_durable_storage, + write_journal_into_state, + ) + + if not supports_durable_storage(self._state_persister): + merged = read_journal_from_state(self._state) + self._journal_sink + new_state = write_journal_into_state(new_state, merged) self._set_state(new_state) except _Suspended as suspended: suspended_signal = suspended diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 7a3c3557d..70a3e1e6e 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -588,3 +588,31 @@ def test_durable_raises_determinism_error_on_key_mismatch(): with pytest.raises(DeterminismError): # The first durable call on resume used a different key than recorded. ctx.durable("translate", lambda: "y") + + +def test_journal_sink_flushed_into_state_on_completion_with_fallback(): + from burr.core import ApplicationBuilder, State, action + from burr.core.durable import read_journal_from_state + from burr.core.persistence import SQLitePersister + + @action(reads=[], writes=["v"]) + def compute(state, __context): + value = __context.durable("calc", lambda: 99) + return state.update(v=value) + + persister = SQLitePersister.from_values(":memory:") + persister.initialize() + app = ( + ApplicationBuilder() + .with_actions(compute=compute) + .with_entrypoint("compute") + .with_state(State({})) + .with_identifiers(app_id="j1", partition_key="pk") + .with_state_persister(persister) + .build() + ) + app.run(halt_after=["compute"]) + loaded = persister.load("pk", "j1") + journal = read_journal_from_state(loaded["state"]) + assert len(journal) == 1 + assert journal[0].result == 99 From 5fac59288c774bb8e347137bea9cefa6bdefe42b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 16:05:24 -0300 Subject: [PATCH 29/57] fix: reset journal sink before direct step calls in streaming result paths --- burr/core/application.py | 2 ++ tests/core/test_durable.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/burr/core/application.py b/burr/core/application.py index 694c5f2fb..7f0fbc7cc 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -1741,6 +1741,7 @@ def callback( if not next_action.streaming: # In this case we are halting at a non-streaming condition # This is allowed as we want to maintain a more consistent API + self._journal_sink = [] action, result, state = self._step(inputs=inputs, _run_hooks=False) self._adapter_set.call_all_lifecycle_hooks_sync( "post_run_step", @@ -1993,6 +1994,7 @@ async def callback( if not next_action.streaming: # In this case we are halting at a non-streaming condition # This is allowed as we want to maintain a more consistent API + self._journal_sink = [] action, result, state = await self._astep(inputs=inputs, _run_hooks=False) await self._adapter_set.call_all_lifecycle_hooks_sync_and_async( "post_run_step", diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 70a3e1e6e..814d878ab 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -616,3 +616,38 @@ def compute(state, __context): journal = read_journal_from_state(loaded["state"]) assert len(journal) == 1 assert journal[0].result == 99 + + +def test_journal_accumulates_across_multiple_actions(): + from burr.core import ApplicationBuilder, State, action + from burr.core.durable import read_journal_from_state + from burr.core.persistence import SQLitePersister + + @action(reads=[], writes=["a"]) + def step_a(state, __context): + v = __context.durable("a_calc", lambda: 1) + return state.update(a=v) + + @action(reads=["a"], writes=["b"]) + def step_b(state, __context): + v = __context.durable("b_calc", lambda: 2) + return state.update(b=v) + + persister = SQLitePersister.from_values(":memory:") + persister.initialize() + app = ( + ApplicationBuilder() + .with_actions(step_a=step_a, step_b=step_b) + .with_transitions(("step_a", "step_b")) + .with_entrypoint("step_a") + .with_state(State({})) + .with_identifiers(app_id="j2", partition_key="pk") + .with_state_persister(persister) + .build() + ) + app.run(halt_after=["step_b"]) + loaded = persister.load("pk", "j2") + journal = read_journal_from_state(loaded["state"]) + assert len(journal) == 2 + keys = {e.step_key for e in journal} + assert keys == {"a_calc", "b_calc"} From ed28aee6f15ddad4a6999b4e6c702698bfa85850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 16:12:14 -0300 Subject: [PATCH 30/57] test: regression guard for journal double-count via stream_result Add test_journal_no_double_count_via_stream_result to verify that step_a's journal entry is not duplicated when stream_result() fast- forwards through a non-halt_after action then executes the target non-streaming action directly. Reverting the self._journal_sink = [] reset at line 1744 of application.py causes this test to observe 3 journal entries (a_calc, a_calc, b_calc) instead of the correct 2. --- tests/core/test_durable.py | 50 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 814d878ab..2c142adf4 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -651,3 +651,53 @@ def step_b(state, __context): assert len(journal) == 2 keys = {e.step_key for e in journal} assert keys == {"a_calc", "b_calc"} + + +def test_journal_no_double_count_via_stream_result(): + """Regression guard: step_a's journal entry must not be double-counted + when stream_result() fast-forwards through it and then executes a + non-streaming step_b. The fix is self._journal_sink = [] at line ~1744 + of application.py, immediately before the direct self._step() call in + the non-streaming branch of stream_result(). Deleting that line causes + this test to observe 3 journal entries instead of 2.""" + from burr.core import ApplicationBuilder, State, action + from burr.core.durable import read_journal_from_state + from burr.core.persistence import SQLitePersister + + @action(reads=[], writes=["a"]) + def step_a(state, __context): + v = __context.durable("a_calc", lambda: 1) + return state.update(a=v) + + @action(reads=["a"], writes=["b"]) + def step_b(state, __context): + v = __context.durable("b_calc", lambda: 2) + return state.update(b=v) + + persister = SQLitePersister.from_values(":memory:") + persister.initialize() + app = ( + ApplicationBuilder() + .with_actions(step_a=step_a, step_b=step_b) + .with_transitions(("step_a", "step_b")) + .with_entrypoint("step_a") + .with_state(State({})) + .with_identifiers(app_id="j3", partition_key="pk") + .with_state_persister(persister) + .build() + ) + # step_a is NOT in halt_after, so stream_result fast-forwards through it + # via self.run(), then hits the non-streaming branch for step_b. + # The fix resets _journal_sink before that branch so step_a's entry is + # not accumulated a second time into the persisted state. + action_, container = app.stream_result(halt_after=["step_b"]) + result, final_state = container.get() + + # Verify via the persisted state (the source of truth for the bug). + loaded = persister.load("pk", "j3") + journal = read_journal_from_state(loaded["state"]) + assert len(journal) == 2, ( + f"Expected 2 journal entries (a_calc + b_calc), got {len(journal)}: " + f"{[e.step_key for e in journal]}" + ) + assert {e.step_key for e in journal} == {"a_calc", "b_calc"} From ed3e03ef74eee8604652d8e011af6f43ca984743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 16:17:39 -0300 Subject: [PATCH 31/57] feat: add ApplicationContext.adurable() for coroutine sub-steps --- burr/core/application.py | 38 ++++++++++++++++++++++++++++++++++++++ tests/core/test_durable.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/burr/core/application.py b/burr/core/application.py index 7f0fbc7cc..5a416b1a9 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -666,6 +666,44 @@ def durable(self, key: str, fn: Callable, *args, **kwargs) -> Any: self.state_persister.save_journal_entry(entry) return result + async def adurable(self, key: str, fn: Callable, *args, **kwargs) -> Any: + """Async variant of durable(): ``fn`` is a coroutine function.""" + from burr.core.durable import ( + DeterminismError, + JournalEntry, + supports_durable_storage, + ) + + idx = self._journal_call_index + self._journal_call_index += 1 + + if idx < len(self._loaded_journal): + recorded = self._loaded_journal[idx] + if recorded.step_key != key: + raise DeterminismError( + f"Durable sub-step #{idx} replayed as key {key!r} but the " + f"journal recorded key {recorded.step_key!r}." + ) + return recorded.result + + result = await fn(*args, **kwargs) + entry = JournalEntry( + partition_key=self.partition_key, + app_id=self.app_id, + sequence_id=self.sequence_id, + step_key=key, + call_index=idx, + result=result, + ) + self._journal_sink.append(entry) + if self.state_persister is not None and supports_durable_storage( + self.state_persister + ): + saver = self.state_persister.save_journal_entry(entry) + if self.state_persister.is_async(): + await saver + return result + @staticmethod def get() -> Optional["ApplicationContext"]: """Provides the context-local application context. diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 2c142adf4..d8c0bdc9a 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -701,3 +701,39 @@ def step_b(state, __context): f"{[e.step_key for e in journal]}" ) assert {e.step_key for e in journal} == {"a_calc", "b_calc"} + + +# --- ApplicationContext.adurable() tests (Task 3.5) --------------------------- + + +@pytest.mark.asyncio +async def test_adurable_executes_coroutine_and_journals(): + calls = [] + + async def async_side_effect(x): + calls.append(x) + return x + 1 + + ctx = _make_context() + result = await ctx.adurable("inc", async_side_effect, 41) + assert result == 42 + assert calls == [41] + assert ctx._journal_sink[0].step_key == "inc" + + +@pytest.mark.asyncio +async def test_adurable_replays_without_executing(): + from burr.core.durable import JournalEntry + + ctx = _make_context() + ctx._loaded_journal = [JournalEntry("p", "a", 1, "inc", 0, 42)] + + calls = [] + + async def async_side_effect(x): + calls.append(x) + return x + 1 + + result = await ctx.adurable("inc", async_side_effect, 41) + assert result == 42 + assert calls == [] From b8a526f5efa9e3f2e33f7f24b57e1e0c4c945c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 16:18:19 -0300 Subject: [PATCH 32/57] test: verify durable side effects run exactly once across suspend/resume --- tests/core/test_durable_integration.py | 50 ++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index 7b5865067..95396f795 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -232,3 +232,53 @@ async def test_aresume_async_persister_raises(): channel="approval", payload={}, ) + + +# --- Task 3.6: durable side effect runs exactly once across suspend/resume ---- + +# Module-level counter: survives the Application instance, not the process. +_side_effect_calls = [] + + +@action(reads=[], writes=["summary", "approved"]) +def summarize_then_gate(state, __context): + summary = __context.durable("summarize", _expensive_summarize, "draft text") + decision = __context.suspend("approval", metadata={"summary": summary}) + return state.update(summary=summary, approved=decision["approved"]) + + +def _expensive_summarize(text): + _side_effect_calls.append(text) + return f"summary of {text}" + + +def test_durable_side_effect_runs_once_across_suspend_resume(): + _side_effect_calls.clear() + graph = ( + GraphBuilder() + .with_actions(summarize_then_gate=summarize_then_gate) + .with_transitions() + .build() + ) + persister = InMemoryPersister() + app = ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("summarize_then_gate") + .with_state(State({})) + .with_identifiers(app_id="once1", partition_key="pk") + .with_state_persister(persister) + .build() + ) + app.run(halt_after=["summarize_then_gate"]) + assert app.suspended is not None + assert len(_side_effect_calls) == 1 # ran once before suspending + + final_state = resume( + persister=persister, graph=graph, app_id="once1", partition_key="pk", + channel="approval", payload={"approved": True}, + ) + # The action re-ran top-to-bottom on resume, but summarize was replayed. + assert len(_side_effect_calls) == 1 + assert final_state["approved"] is True + assert final_state["summary"] == "summary of draft text" From 099835f6ca68a53c5faf3b1935344b454462af85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 16:18:35 -0300 Subject: [PATCH 33/57] test: non-deterministic branch around durable() raises DeterminismError --- tests/core/test_durable_integration.py | 48 ++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index 95396f795..dd95c2829 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -282,3 +282,51 @@ def test_durable_side_effect_runs_once_across_suspend_resume(): assert len(_side_effect_calls) == 1 assert final_state["approved"] is True assert final_state["summary"] == "summary of draft text" + + +# --- Task 3.7: non-deterministic branch raises DeterminismError --------------- + +_branch_toggle = {"value": True} + + +@action(reads=[], writes=["out"]) +def nondeterministic(state, __context): + # ANTI-PATTERN under test: a durable() call behind a branch that flips + # between the first run and the resume re-run. + if _branch_toggle["value"]: + __context.durable("branch_a", lambda: "a") + else: + __context.durable("branch_b", lambda: "b") + decision = __context.suspend("approval") + return state.update(out=decision["ok"]) + + +def test_nondeterministic_branch_raises_determinism_error(): + from burr.core.durable import DeterminismError + + _branch_toggle["value"] = True + graph = ( + GraphBuilder() + .with_actions(nondeterministic=nondeterministic) + .with_transitions() + .build() + ) + persister = InMemoryPersister() + app = ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("nondeterministic") + .with_state(State({})) + .with_identifiers(app_id="det1", partition_key="pk") + .with_state_persister(persister) + .build() + ) + app.run(halt_after=["nondeterministic"]) + + # Flip the branch before resume: the re-run takes branch_b. + _branch_toggle["value"] = False + with pytest.raises(DeterminismError): + resume( + persister=persister, graph=graph, app_id="det1", partition_key="pk", + channel="approval", payload={"ok": True}, + ) From ae5065b829aade05f5e9d43db0462b2f47e69c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 16:24:15 -0300 Subject: [PATCH 34/57] test: cover adurable journaling into a durable persister and isolate branch toggle --- tests/core/test_durable.py | 22 ++++++++++- tests/core/test_durable_integration.py | 51 ++++++++++++++------------ 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index d8c0bdc9a..9a780c88a 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -369,13 +369,13 @@ def test_in_memory_persister_journal_ordered_by_call_index(): # --- ApplicationContext.suspend() tests --------------------------------------- -def _make_context(resume_signals=None): +def _make_context(resume_signals=None, state_persister=None): from burr.core.application import ApplicationContext return ApplicationContext( app_id="a", partition_key="p", sequence_id=1, tracker=None, parallel_executor_factory=lambda: None, state_initializer=None, - state_persister=None, action_name="review", + state_persister=state_persister, action_name="review", _resume_signals=resume_signals or {}, _loaded_journal=[], _journal_sink=[], ) @@ -737,3 +737,21 @@ async def async_side_effect(x): result = await ctx.adurable("inc", async_side_effect, 41) assert result == 42 assert calls == [] + + +@pytest.mark.asyncio +async def test_adurable_journals_into_durable_persister(): + from burr.core.persistence import InMemoryPersister + + persister = InMemoryPersister() + + async def async_side_effect(): + return "value" + + ctx = _make_context(state_persister=persister) + await ctx.adurable("step", async_side_effect) + + journal = persister.load_journal("p", "a", 1) + assert len(journal) == 1 + assert journal[0].step_key == "step" + assert journal[0].result == "value" diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index dd95c2829..03c60528b 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -305,28 +305,31 @@ def test_nondeterministic_branch_raises_determinism_error(): from burr.core.durable import DeterminismError _branch_toggle["value"] = True - graph = ( - GraphBuilder() - .with_actions(nondeterministic=nondeterministic) - .with_transitions() - .build() - ) - persister = InMemoryPersister() - app = ( - ApplicationBuilder() - .with_graph(graph) - .with_entrypoint("nondeterministic") - .with_state(State({})) - .with_identifiers(app_id="det1", partition_key="pk") - .with_state_persister(persister) - .build() - ) - app.run(halt_after=["nondeterministic"]) - - # Flip the branch before resume: the re-run takes branch_b. - _branch_toggle["value"] = False - with pytest.raises(DeterminismError): - resume( - persister=persister, graph=graph, app_id="det1", partition_key="pk", - channel="approval", payload={"ok": True}, + try: + graph = ( + GraphBuilder() + .with_actions(nondeterministic=nondeterministic) + .with_transitions() + .build() + ) + persister = InMemoryPersister() + app = ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("nondeterministic") + .with_state(State({})) + .with_identifiers(app_id="det1", partition_key="pk") + .with_state_persister(persister) + .build() ) + app.run(halt_after=["nondeterministic"]) + + # Flip the branch before resume: the re-run takes branch_b. + _branch_toggle["value"] = False + with pytest.raises(DeterminismError): + resume( + persister=persister, graph=graph, app_id="det1", partition_key="pk", + channel="approval", payload={"ok": True}, + ) + finally: + _branch_toggle["value"] = True From 7a096dd5c7261a00035d53a5f61cf6b31fad6d8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 18:56:57 -0300 Subject: [PATCH 35/57] feat: SQLite persister durable storage (suspensions + journal tables) --- burr/core/persistence.py | 159 +++++++++++++++++++++++++- tests/core/test_durable_persisters.py | 83 ++++++++++++++ 2 files changed, 241 insertions(+), 1 deletion(-) create mode 100644 tests/core/test_durable_persisters.py diff --git a/burr/core/persistence.py b/burr/core/persistence.py index 094cd8bf8..2061308f4 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -24,7 +24,7 @@ from typing import Any, Dict, Literal, Optional, TypedDict from burr.common.types import BaseCopyable -from burr.core import Action +from burr.core import Action, serde from burr.core.durable import JournalEntry, SuspensionRecord from burr.core.state import State, logger from burr.lifecycle import PostRunStepHook, PostRunStepHookAsync @@ -496,6 +496,7 @@ def initialize(self): """Creates the table if it doesn't exist""" # Usage self.create_table_if_not_exists(self.table_name) + self.create_durable_tables_if_not_exist() self._initialized = True def is_initialized(self) -> bool: @@ -653,6 +654,162 @@ def save( raise self.connection.commit() + def create_durable_tables_if_not_exist(self): + """Creates the durable-execution tables (suspensions + journal) if they don't exist.""" + cursor = self.connection.cursor() + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS burr_suspensions ( + suspension_id TEXT PRIMARY KEY, + partition_key TEXT, + app_id TEXT NOT NULL, + sequence_id INTEGER NOT NULL, + position TEXT NOT NULL, + channel TEXT NOT NULL, + schema_json TEXT, + metadata_json TEXT, + inputs_json TEXT, + state_json TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + resolved INTEGER NOT NULL DEFAULT 0 + )""" + ) + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS burr_journal ( + partition_key TEXT, + app_id TEXT NOT NULL, + sequence_id INTEGER NOT NULL, + step_key TEXT NOT NULL, + call_index INTEGER NOT NULL, + result_json TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (partition_key, app_id, sequence_id, step_key) + )""" + ) + self.connection.commit() + + def save_suspension(self, record: SuspensionRecord) -> None: + """Persist a suspension record into the burr_suspensions table.""" + cursor = self.connection.cursor() + cursor.execute( + """INSERT OR REPLACE INTO burr_suspensions + (suspension_id, partition_key, app_id, sequence_id, position, + channel, schema_json, metadata_json, inputs_json, state_json, + created_at, resolved) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + record.suspension_id, + record.partition_key, + record.app_id, + record.sequence_id, + record.position, + record.channel, + json.dumps(record.schema_json), + json.dumps(serde.serialize(record.metadata, **self.serde_kwargs)), + json.dumps(serde.serialize(record.inputs, **self.serde_kwargs)), + json.dumps(serde.serialize(record.state, **self.serde_kwargs)), + record.created_at, + 1 if record.resolved else 0, + ), + ) + self.connection.commit() + + def load_suspension( + self, partition_key: Optional[str], app_id: str, channel: str + ) -> Optional[SuspensionRecord]: + """Load the most recent suspension record for (partition_key, app_id, channel). + + Returns the record whether or not it is resolved; callers check + ``record.resolved`` for resume-once idempotency. Returns ``None`` + when no record exists for this combination. + """ + cursor = self.connection.cursor() + cursor.execute( + """SELECT suspension_id, partition_key, app_id, sequence_id, position, + channel, schema_json, metadata_json, inputs_json, state_json, + created_at, resolved + FROM burr_suspensions + WHERE partition_key IS ? AND app_id = ? AND channel = ? + ORDER BY created_at DESC LIMIT 1""", + (partition_key, app_id, channel), + ) + row = cursor.fetchone() + if row is None: + return None + return SuspensionRecord( + suspension_id=row[0], + partition_key=row[1], + app_id=row[2], + sequence_id=row[3], + position=row[4], + channel=row[5], + schema_json=json.loads(row[6]) if row[6] else None, + metadata=serde.deserialize(json.loads(row[7]), **self.serde_kwargs) if row[7] else None, + inputs=serde.deserialize(json.loads(row[8]), **self.serde_kwargs), + state=serde.deserialize(json.loads(row[9]), **self.serde_kwargs), + created_at=row[10], + resolved=bool(row[11]), + ) + + def mark_suspension_resolved(self, suspension_id: str) -> bool: + """Mark a suspension consumed. Conditional UPDATE for resume-once idempotency. + + :return: True if a row was updated (first call), False if already resolved (no-op). + """ + cursor = self.connection.cursor() + cursor.execute( + "UPDATE burr_suspensions SET resolved = 1 " + "WHERE suspension_id = ? AND resolved = 0", + (suspension_id,), + ) + self.connection.commit() + return cursor.rowcount > 0 + + def save_journal_entry(self, entry: JournalEntry) -> None: + """Persist one memoized sub-step into the burr_journal table.""" + cursor = self.connection.cursor() + cursor.execute( + """INSERT OR REPLACE INTO burr_journal + (partition_key, app_id, sequence_id, step_key, call_index, + result_json) + VALUES (?, ?, ?, ?, ?, ?)""", + ( + entry.partition_key, + entry.app_id, + entry.sequence_id, + entry.step_key, + entry.call_index, + json.dumps(serde.serialize(entry.result, **self.serde_kwargs)), + ), + ) + self.connection.commit() + + def load_journal( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> list[JournalEntry]: + """Load journal entries for a suspended action, ordered by call_index.""" + cursor = self.connection.cursor() + cursor.execute( + """SELECT partition_key, app_id, sequence_id, step_key, call_index, + result_json + FROM burr_journal + WHERE partition_key IS ? AND app_id = ? AND sequence_id = ? + ORDER BY call_index ASC""", + (partition_key, app_id, sequence_id), + ) + return [ + JournalEntry( + partition_key=row[0], + app_id=row[1], + sequence_id=row[2], + step_key=row[3], + call_index=row[4], + result=serde.deserialize(json.loads(row[5]), **self.serde_kwargs), + ) + for row in cursor.fetchall() + ] + def cleanup(self): """Closes the connection to the database.""" self.connection.close() diff --git a/tests/core/test_durable_persisters.py b/tests/core/test_durable_persisters.py new file mode 100644 index 000000000..a7eef542e --- /dev/null +++ b/tests/core/test_durable_persisters.py @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +from burr.core.durable import JournalEntry, SuspensionRecord, supports_durable_storage +from burr.core.persistence import SQLitePersister + + +@pytest.fixture +def sqlite_persister(): + persister = SQLitePersister.from_values(":memory:") + persister.initialize() + yield persister + + +def _record(resolved=False): + return SuspensionRecord( + suspension_id="sus-1", partition_key="pk", app_id="app", + sequence_id=4, position="review", channel="approval", + schema_json={"type": "object"}, metadata={"summary": "s"}, + inputs={"x": 1}, state={"draft": "d"}, + created_at="2026-05-22T00:00:00", resolved=resolved, + ) + + +def test_sqlite_supports_durable_storage(sqlite_persister): + assert supports_durable_storage(sqlite_persister) is True + + +def test_sqlite_suspension_round_trip(sqlite_persister): + sqlite_persister.save_suspension(_record()) + loaded = sqlite_persister.load_suspension("pk", "app", "approval") + assert loaded.suspension_id == "sus-1" + assert loaded.state == {"draft": "d"} + assert loaded.inputs == {"x": 1} + assert loaded.schema_json == {"type": "object"} + assert loaded.resolved is False + + +def test_sqlite_load_suspension_returns_resolved_record(sqlite_persister): + # Contract: load_suspension returns the record whether or not it is + # resolved; the caller checks record.resolved for resume-once idempotency. + sqlite_persister.save_suspension(_record()) + sqlite_persister.mark_suspension_resolved("sus-1") + loaded = sqlite_persister.load_suspension("pk", "app", "approval") + assert loaded is not None + assert loaded.resolved is True + + +def test_sqlite_mark_resolved_is_conditional(sqlite_persister): + sqlite_persister.save_suspension(_record()) + first = sqlite_persister.mark_suspension_resolved("sus-1") + second = sqlite_persister.mark_suspension_resolved("sus-1") + # First call resolves a row; second call resolves nothing (resume-once). + assert first is True + assert second is False + + +def test_sqlite_journal_round_trip(sqlite_persister): + sqlite_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "summarize", 0, "result-a") + ) + sqlite_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "translate", 1, "result-b") + ) + journal = sqlite_persister.load_journal("pk", "app", 4) + assert [e.call_index for e in journal] == [0, 1] + assert [e.result for e in journal] == ["result-a", "result-b"] From e9b473a399087dad88dd061b0dbdfceb079b3b36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:03:29 -0300 Subject: [PATCH 36/57] test: migrate stale durable tests off SQLitePersister to NonDurablePersister SQLitePersister now implements all five durable-storage methods (M4), so it no longer serves as a non-durable stand-in. Add an inline NonDurablePersister stub (dict-backed, no durable overrides) to both test files and re-point the six tests that were exercising the in-state fallback path. Rename two tests whose names referenced SQLite. --- tests/core/test_durable.py | 77 +++++++++++++++++++++----- tests/core/test_durable_integration.py | 75 ++++++++++++++++++++++--- 2 files changed, 130 insertions(+), 22 deletions(-) diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 9a780c88a..5c05b29b4 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -16,6 +16,9 @@ # under the License. import dataclasses +import datetime +from collections import defaultdict +from typing import Literal, Optional import pytest @@ -25,6 +28,62 @@ SuspensionRecord, _Suspended, ) +from burr.core.persistence import BaseStatePersister +from burr.core.state import State + + +class NonDurablePersister(BaseStatePersister): + """Dict-backed persister that does NOT override any durable-storage methods. + + ``supports_durable_storage(NonDurablePersister())`` returns False because + ``save_suspension`` is inherited unchanged from ``BaseStatePersister``. + The Application therefore stores suspensions and journal entries inside + the State blob (in-state fallback path). + """ + + def __init__(self): + self._storage = defaultdict(lambda: defaultdict(list)) + + def save( + self, + partition_key: Optional[str], + app_id: str, + sequence_id: int, + position: str, + state: "State", + status: Literal["completed", "failed", "suspended"], + **kwargs, + ): + record = { + "partition_key": partition_key or "", + "app_id": app_id, + "sequence_id": sequence_id, + "position": position, + "state": state, + "created_at": datetime.datetime.now().isoformat(), + "status": status, + } + self._storage[partition_key][app_id].append(record) + + def load( + self, + partition_key: str, + app_id: Optional[str], + sequence_id: Optional[int] = None, + **kwargs, + ): + if app_id is None: + return None + states = self._storage[partition_key][app_id] + if not states: + return None + if sequence_id is None: + return states[-1] + matching = [s for s in states if s["sequence_id"] == sequence_id] + return matching[-1] if matching else None + + def list_app_ids(self, partition_key: str, **kwargs): + return list(self._storage[partition_key].keys()) def test_suspended_is_base_exception_not_exception(): @@ -143,12 +202,10 @@ def test_base_persister_durable_methods_raise_not_implemented(): p.mark_suspension_resolved("s1") -def test_supports_durable_storage_false_for_base_sqlite(): +def test_supports_durable_storage_false_for_non_durable_persister(): from burr.core.durable import supports_durable_storage - from burr.core.persistence import SQLitePersister - persister = SQLitePersister.from_values(":memory:") - # No SQLite override ships in this task; that lands in M4. + persister = NonDurablePersister() assert supports_durable_storage(persister) is False @@ -593,15 +650,13 @@ def test_durable_raises_determinism_error_on_key_mismatch(): def test_journal_sink_flushed_into_state_on_completion_with_fallback(): from burr.core import ApplicationBuilder, State, action from burr.core.durable import read_journal_from_state - from burr.core.persistence import SQLitePersister @action(reads=[], writes=["v"]) def compute(state, __context): value = __context.durable("calc", lambda: 99) return state.update(v=value) - persister = SQLitePersister.from_values(":memory:") - persister.initialize() + persister = NonDurablePersister() app = ( ApplicationBuilder() .with_actions(compute=compute) @@ -621,7 +676,6 @@ def compute(state, __context): def test_journal_accumulates_across_multiple_actions(): from burr.core import ApplicationBuilder, State, action from burr.core.durable import read_journal_from_state - from burr.core.persistence import SQLitePersister @action(reads=[], writes=["a"]) def step_a(state, __context): @@ -633,8 +687,7 @@ def step_b(state, __context): v = __context.durable("b_calc", lambda: 2) return state.update(b=v) - persister = SQLitePersister.from_values(":memory:") - persister.initialize() + persister = NonDurablePersister() app = ( ApplicationBuilder() .with_actions(step_a=step_a, step_b=step_b) @@ -662,7 +715,6 @@ def test_journal_no_double_count_via_stream_result(): this test to observe 3 journal entries instead of 2.""" from burr.core import ApplicationBuilder, State, action from burr.core.durable import read_journal_from_state - from burr.core.persistence import SQLitePersister @action(reads=[], writes=["a"]) def step_a(state, __context): @@ -674,8 +726,7 @@ def step_b(state, __context): v = __context.durable("b_calc", lambda: 2) return state.update(b=v) - persister = SQLitePersister.from_values(":memory:") - persister.initialize() + persister = NonDurablePersister() app = ( ApplicationBuilder() .with_actions(step_a=step_a, step_b=step_b) diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index 03c60528b..447c664e9 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -15,10 +15,69 @@ # specific language governing permissions and limitations # under the License. +import datetime +from collections import defaultdict +from typing import Literal, Optional + import pytest from burr.core import ApplicationBuilder, GraphBuilder, State, action, resume -from burr.core.persistence import AsyncInMemoryPersister, InMemoryPersister, SQLitePersister +from burr.core.persistence import AsyncInMemoryPersister, BaseStatePersister, InMemoryPersister +from burr.core.state import State as _State + + +class NonDurablePersister(BaseStatePersister): + """Dict-backed persister that does NOT override any durable-storage methods. + + ``supports_durable_storage(NonDurablePersister())`` returns False because + ``save_suspension`` is inherited unchanged from ``BaseStatePersister``. + The Application therefore stores suspensions and journal entries inside + the State blob (in-state fallback path). + """ + + def __init__(self): + self._storage = defaultdict(lambda: defaultdict(list)) + + def save( + self, + partition_key: Optional[str], + app_id: str, + sequence_id: int, + position: str, + state: "_State", + status: Literal["completed", "failed", "suspended"], + **kwargs, + ): + record = { + "partition_key": partition_key or "", + "app_id": app_id, + "sequence_id": sequence_id, + "position": position, + "state": state, + "created_at": datetime.datetime.now().isoformat(), + "status": status, + } + self._storage[partition_key][app_id].append(record) + + def load( + self, + partition_key: str, + app_id: Optional[str], + sequence_id: Optional[int] = None, + **kwargs, + ): + if app_id is None: + return None + states = self._storage[partition_key][app_id] + if not states: + return None + if sequence_id is None: + return states[-1] + matching = [s for s in states if s["sequence_id"] == sequence_id] + return matching[-1] if matching else None + + def list_app_ids(self, partition_key: str, **kwargs): + return list(self._storage[partition_key].keys()) @action(reads=[], writes=["seen"]) @@ -144,13 +203,12 @@ async def agate(state, __context): assert final_state["done"] is True -def test_resume_through_in_state_fallback_with_sqlite(): +def test_resume_through_in_state_fallback(): """Resume uses the in-state fallback path when the persister does not support - dedicated durable storage (supports_durable_storage() is False). SQLitePersister - is a first-party persister that does NOT override save_suspension, so it triggers - the fallback path where suspension data rides inside the State blob.""" - persister = SQLitePersister(":memory:") - persister.initialize() + dedicated durable storage (supports_durable_storage() is False). NonDurablePersister + does not override save_suspension, so it triggers the fallback path where + suspension data rides inside the State blob.""" + persister = NonDurablePersister() graph = _graph() @@ -179,8 +237,7 @@ def test_resume_in_state_fallback_second_call_raises(): must raise ValueError with a message that names the in-state fallback as the reason, distinguishing it from a never-suspended app_id. """ - persister = SQLitePersister(":memory:") - persister.initialize() + persister = NonDurablePersister() graph = _graph() From 2bc9be32b9d17a549c745569173519e012d30739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:12:17 -0300 Subject: [PATCH 37/57] fix: uniform bool return contract for mark_suspension_resolved BaseStatePersister now declares -> bool for mark_suspension_resolved. InMemoryPersister matches SQLite semantics: True on first resolve, False when already resolved or id is unknown. Explicit `is not None` guards replace implicit truthiness checks in SQLitePersister.load_suspension. New test covers InMemoryPersister conditional behavior. --- burr/core/persistence.py | 19 +++++++++++++------ tests/core/test_durable.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/burr/core/persistence.py b/burr/core/persistence.py index 2061308f4..4f2d471de 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -233,9 +233,12 @@ def load_journal( """Load journal entries for a suspended action, ordered by call_index.""" raise NotImplementedError - def mark_suspension_resolved(self, suspension_id: str) -> None: - """Mark a suspension consumed. First-party SQL persisters do this with a - conditional UPDATE for resume-once; the default raises.""" + def mark_suspension_resolved(self, suspension_id: str) -> bool: + """Marks the suspension as resolved. + + Returns True if a previously-unresolved row was flipped, False otherwise + (already resolved, or unknown id). Callers use this for resume-once idempotency. + """ raise NotImplementedError @@ -744,8 +747,8 @@ def load_suspension( sequence_id=row[3], position=row[4], channel=row[5], - schema_json=json.loads(row[6]) if row[6] else None, - metadata=serde.deserialize(json.loads(row[7]), **self.serde_kwargs) if row[7] else None, + schema_json=json.loads(row[6]) if row[6] is not None else None, + metadata=serde.deserialize(json.loads(row[7]), **self.serde_kwargs) if row[7] is not None else None, inputs=serde.deserialize(json.loads(row[8]), **self.serde_kwargs), state=serde.deserialize(json.loads(row[9]), **self.serde_kwargs), created_at=row[10], @@ -893,10 +896,14 @@ def save_suspension(self, record: SuspensionRecord) -> None: def load_suspension(self, partition_key: Optional[str], app_id: str, channel: str) -> Optional[SuspensionRecord]: return self._suspensions.get((partition_key, app_id, channel)) - def mark_suspension_resolved(self, suspension_id: str) -> None: + def mark_suspension_resolved(self, suspension_id: str) -> bool: for key, record in self._suspensions.items(): if record.suspension_id == suspension_id: + if record.resolved: + return False record.resolved = True + return True + return False def save_journal_entry(self, entry: JournalEntry) -> None: bucket = self._journal.setdefault( diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 5c05b29b4..db5ced7ab 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -399,6 +399,42 @@ def test_in_memory_persister_mark_suspension_resolved_unknown_id_is_noop(): persister.mark_suspension_resolved("does-not-exist") +def test_in_memory_persister_mark_suspension_resolved_is_conditional(): + """mark_suspension_resolved must match SQLite semantics: True on first call, + False on second call (already resolved), and False for an unknown id.""" + from burr.core.durable import SuspensionRecord + from burr.core.persistence import InMemoryPersister + + persister = InMemoryPersister() + record = SuspensionRecord( + suspension_id="s-cond", + partition_key="p", + app_id="a", + sequence_id=1, + position="review", + channel="approval", + schema_json=None, + metadata=None, + inputs={}, + state={}, + created_at="2026-05-22T00:00:00", + resolved=False, + ) + persister.save_suspension(record) + + # First call: row was unresolved, should flip and return True. + first = persister.mark_suspension_resolved("s-cond") + assert first is True + + # Second call: already resolved, should be a no-op and return False. + second = persister.mark_suspension_resolved("s-cond") + assert second is False + + # Unknown id: nothing to flip, must return False. + unknown = persister.mark_suspension_resolved("does-not-exist") + assert unknown is False + + # --- InMemoryPersister: load_journal ordering test --------------------------- From a1fafc7b47edec304c0134c3d4cbe3f2783d4f19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:13:23 -0300 Subject: [PATCH 38/57] fix: align AsyncBaseStatePersister.mark_suspension_resolved with bool return contract --- burr/core/persistence.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/burr/core/persistence.py b/burr/core/persistence.py index 4f2d471de..3a3fb17ac 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -275,9 +275,13 @@ async def load_journal( """Load journal entries for a suspended action, ordered by call_index.""" raise NotImplementedError - async def mark_suspension_resolved(self, suspension_id: str) -> None: - """Mark a suspension consumed. First-party SQL persisters do this with a - conditional UPDATE for resume-once; the default raises.""" + async def mark_suspension_resolved(self, suspension_id: str) -> bool: + """Marks the suspension as resolved. + + Returns True if a previously-unresolved row was flipped, False otherwise + (already resolved, or unknown id). Callers use this for resume-once idempotency. + First-party SQL persisters do this with a conditional UPDATE. + """ raise NotImplementedError From 0629507015d94b3b6a2d2310cd0a93c10520e3e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:14:18 -0300 Subject: [PATCH 39/57] test: SQLite suspend/resume survives a process boundary --- tests/core/test_durable_integration.py | 39 ++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index 447c664e9..6eca3817f 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -390,3 +390,42 @@ def test_nondeterministic_branch_raises_determinism_error(): ) finally: _branch_toggle["value"] = True + + +# --- Task 4.2: SQLite end-to-end through dedicated tables -------------------- + + +def test_suspend_resume_with_sqlite_dedicated_storage(tmp_path): + """End-to-end: suspend on a file-backed SQLite persister, close the connection + (simulating process death), reopen with a fresh persister against the same file, + resume. Exercises the dedicated ``burr_suspensions`` + ``burr_journal`` tables + across a true process boundary.""" + from burr.core.persistence import SQLitePersister + + db = str(tmp_path / "durable.db") + + graph = _graph() + p1 = SQLitePersister.from_values(db) + p1.initialize() + app = ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("start") + .with_state(State({})) + .with_identifiers(app_id="sql1", partition_key="pk") + .with_state_persister(p1) + .build() + ) + app.run(halt_after=["gate"]) + assert app.suspended is not None + p1.connection.close() # simulate the process dying + + # New process: brand-new persister against the same DB file. + p2 = SQLitePersister.from_values(db) + p2.initialize() + final_state = resume( + persister=p2, graph=graph, app_id="sql1", partition_key="pk", + channel="approval", payload={"approved": True}, + ) + assert final_state["done"] is True + p2.connection.close() From 9b555c71699b95a40b43fd38e9445edaaa29bdba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:17:55 -0300 Subject: [PATCH 40/57] feat: PostgreSQL persister durable storage Add create_durable_tables_if_not_exist and the 5 durable methods (save_suspension, load_suspension, mark_suspension_resolved, save_journal_entry, load_journal) to PostgreSQLPersister in b_psycopg2.py, mirroring the SQLitePersister implementation with Postgres dialect adjustments (%s placeholders, ON CONFLICT upserts, IS NOT DISTINCT FROM for NULL-safe partition_key equality). Extend test_durable_persisters.py with a Postgres block that skips unless BURR_CI_INTEGRATION_TESTS=true. --- burr/integrations/persisters/b_psycopg2.py | 184 +++++++++++++++++++++ tests/core/test_durable_persisters.py | 84 ++++++++++ 2 files changed, 268 insertions(+) diff --git a/burr/integrations/persisters/b_psycopg2.py b/burr/integrations/persisters/b_psycopg2.py index 26425f805..f141e6aec 100644 --- a/burr/integrations/persisters/b_psycopg2.py +++ b/burr/integrations/persisters/b_psycopg2.py @@ -130,9 +130,45 @@ def create_table(self, table_name: str): ) self.connection.commit() + def create_durable_tables_if_not_exist(self): + """Creates the durable-execution tables (suspensions + journal) if they don't exist.""" + cursor = self.connection.cursor() + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS burr_suspensions ( + suspension_id TEXT PRIMARY KEY, + partition_key TEXT, + app_id TEXT NOT NULL, + sequence_id INTEGER NOT NULL, + position TEXT NOT NULL, + channel TEXT NOT NULL, + schema_json JSONB, + metadata_json JSONB, + inputs_json JSONB, + state_json JSONB NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + resolved BOOLEAN NOT NULL DEFAULT false + )""" + ) + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS burr_journal ( + partition_key TEXT, + app_id TEXT NOT NULL, + sequence_id INTEGER NOT NULL, + step_key TEXT NOT NULL, + call_index INTEGER NOT NULL, + result_json JSONB NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (partition_key, app_id, sequence_id, step_key) + )""" + ) + self.connection.commit() + def initialize(self): """Creates the table""" self.create_table(self.table_name) + self.create_durable_tables_if_not_exist() self._initialized = True def is_initialized(self) -> bool: @@ -259,6 +295,154 @@ def save( ) self.connection.commit() + def save_suspension(self, record) -> None: + """Persist a suspension record into the burr_suspensions table.""" + import json + + from burr.core import serde + from burr.core.durable import SuspensionRecord # noqa: F401 — type reference only + + cursor = self.connection.cursor() + cursor.execute( + """INSERT INTO burr_suspensions + (suspension_id, partition_key, app_id, sequence_id, position, + channel, schema_json, metadata_json, inputs_json, state_json, + created_at, resolved) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + ON CONFLICT (suspension_id) DO UPDATE SET + partition_key = EXCLUDED.partition_key, + app_id = EXCLUDED.app_id, + sequence_id = EXCLUDED.sequence_id, + position = EXCLUDED.position, + channel = EXCLUDED.channel, + schema_json = EXCLUDED.schema_json, + metadata_json = EXCLUDED.metadata_json, + inputs_json = EXCLUDED.inputs_json, + state_json = EXCLUDED.state_json, + created_at = EXCLUDED.created_at, + resolved = EXCLUDED.resolved""", + ( + record.suspension_id, + record.partition_key, + record.app_id, + record.sequence_id, + record.position, + record.channel, + json.dumps(record.schema_json), + json.dumps(serde.serialize(record.metadata, **self.serde_kwargs)), + json.dumps(serde.serialize(record.inputs, **self.serde_kwargs)), + json.dumps(serde.serialize(record.state, **self.serde_kwargs)), + record.created_at, + record.resolved, + ), + ) + self.connection.commit() + + def load_suspension(self, partition_key, app_id: str, channel: str): + """Load the most recent suspension record for (partition_key, app_id, channel). + + Returns the record whether or not it is resolved; callers check + ``record.resolved`` for resume-once idempotency. Returns ``None`` + when no record exists for this combination. + """ + from burr.core import serde + from burr.core.durable import SuspensionRecord + + cursor = self.connection.cursor() + cursor.execute( + """SELECT suspension_id, partition_key, app_id, sequence_id, position, + channel, schema_json, metadata_json, inputs_json, state_json, + created_at, resolved + FROM burr_suspensions + WHERE partition_key IS NOT DISTINCT FROM %s AND app_id = %s AND channel = %s + ORDER BY created_at DESC LIMIT 1""", + (partition_key, app_id, channel), + ) + row = cursor.fetchone() + if row is None: + return None + # psycopg2 auto-parses JSONB columns to Python objects — no json.loads needed. + return SuspensionRecord( + suspension_id=row[0], + partition_key=row[1], + app_id=row[2], + sequence_id=row[3], + position=row[4], + channel=row[5], + schema_json=row[6] if row[6] is not None else None, + metadata=serde.deserialize(row[7], **self.serde_kwargs) if row[7] is not None else None, + inputs=serde.deserialize(row[8], **self.serde_kwargs), + state=serde.deserialize(row[9], **self.serde_kwargs), + created_at=row[10], + resolved=bool(row[11]), + ) + + def mark_suspension_resolved(self, suspension_id: str) -> bool: + """Mark a suspension consumed. Conditional UPDATE for resume-once idempotency. + + :return: True if a row was updated (first call), False if already resolved (no-op). + """ + cursor = self.connection.cursor() + cursor.execute( + "UPDATE burr_suspensions SET resolved = true " + "WHERE suspension_id = %s AND resolved = false", + (suspension_id,), + ) + self.connection.commit() + return cursor.rowcount > 0 + + def save_journal_entry(self, entry) -> None: + """Persist one memoized sub-step into the burr_journal table.""" + import json + + from burr.core import serde + + cursor = self.connection.cursor() + cursor.execute( + """INSERT INTO burr_journal + (partition_key, app_id, sequence_id, step_key, call_index, result_json) + VALUES (%s, %s, %s, %s, %s, %s) + ON CONFLICT (partition_key, app_id, sequence_id, step_key) DO UPDATE SET + call_index = EXCLUDED.call_index, + result_json = EXCLUDED.result_json""", + ( + entry.partition_key, + entry.app_id, + entry.sequence_id, + entry.step_key, + entry.call_index, + json.dumps(serde.serialize(entry.result, **self.serde_kwargs)), + ), + ) + self.connection.commit() + + def load_journal(self, partition_key, app_id: str, sequence_id: int) -> list: + """Load journal entries for a suspended action, ordered by call_index.""" + from burr.core import serde + from burr.core.durable import JournalEntry + + cursor = self.connection.cursor() + cursor.execute( + """SELECT partition_key, app_id, sequence_id, step_key, call_index, + result_json + FROM burr_journal + WHERE partition_key IS NOT DISTINCT FROM %s AND app_id = %s AND sequence_id = %s + ORDER BY call_index ASC""", + (partition_key, app_id, sequence_id), + ) + # psycopg2 auto-parses JSONB columns to Python objects — no json.loads needed. + return [ + JournalEntry( + partition_key=row[0], + app_id=row[1], + sequence_id=row[2], + step_key=row[3], + call_index=row[4], + result=serde.deserialize(row[5], **self.serde_kwargs), + ) + for row in cursor.fetchall() + ] + def cleanup(self): """Closes the connection to the database.""" self.connection.close() diff --git a/tests/core/test_durable_persisters.py b/tests/core/test_durable_persisters.py index a7eef542e..63d99caa2 100644 --- a/tests/core/test_durable_persisters.py +++ b/tests/core/test_durable_persisters.py @@ -15,11 +15,18 @@ # specific language governing permissions and limitations # under the License. +import os + import pytest from burr.core.durable import JournalEntry, SuspensionRecord, supports_durable_storage from burr.core.persistence import SQLitePersister +_pg_integration = pytest.mark.skipif( + os.environ.get("BURR_CI_INTEGRATION_TESTS") != "true", + reason="Skipping integration tests", +) + @pytest.fixture def sqlite_persister(): @@ -81,3 +88,80 @@ def test_sqlite_journal_round_trip(sqlite_persister): journal = sqlite_persister.load_journal("pk", "app", 4) assert [e.call_index for e in journal] == [0, 1] assert [e.result for e in journal] == ["result-a", "result-b"] + + +# --------------------------------------------------------------------------- +# PostgreSQL durable storage tests — skipped unless BURR_CI_INTEGRATION_TESTS=true +# --------------------------------------------------------------------------- + + +@pytest.fixture +def pg_persister(): + from burr.integrations.persisters.b_psycopg2 import PostgreSQLPersister + + persister = PostgreSQLPersister.from_values( + db_name="postgres", + user="postgres", + password="postgres", + host="localhost", + port=5432, + table_name="burr_state_durable_test", + ) + persister.initialize() + yield persister + # Teardown: drop durable tables so the next run starts clean. + cursor = persister.connection.cursor() + cursor.execute("DROP TABLE IF EXISTS burr_suspensions") + cursor.execute("DROP TABLE IF EXISTS burr_journal") + persister.connection.commit() + persister.cleanup() + + +@_pg_integration +def test_postgres_supports_durable_storage(pg_persister): + assert supports_durable_storage(pg_persister) is True + + +@_pg_integration +def test_postgres_suspension_round_trip(pg_persister): + pg_persister.save_suspension(_record()) + loaded = pg_persister.load_suspension("pk", "app", "approval") + assert loaded.suspension_id == "sus-1" + assert loaded.state == {"draft": "d"} + assert loaded.inputs == {"x": 1} + assert loaded.schema_json == {"type": "object"} + assert loaded.resolved is False + + +@_pg_integration +def test_postgres_load_suspension_returns_resolved_record(pg_persister): + # Contract: load_suspension returns the record whether or not it is + # resolved; the caller checks record.resolved for resume-once idempotency. + pg_persister.save_suspension(_record()) + pg_persister.mark_suspension_resolved("sus-1") + loaded = pg_persister.load_suspension("pk", "app", "approval") + assert loaded is not None + assert loaded.resolved is True + + +@_pg_integration +def test_postgres_mark_resolved_is_conditional(pg_persister): + pg_persister.save_suspension(_record()) + first = pg_persister.mark_suspension_resolved("sus-1") + second = pg_persister.mark_suspension_resolved("sus-1") + # First call resolves a row; second call resolves nothing (resume-once). + assert first is True + assert second is False + + +@_pg_integration +def test_postgres_journal_round_trip(pg_persister): + pg_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "summarize", 0, "result-a") + ) + pg_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "translate", 1, "result-b") + ) + journal = pg_persister.load_journal("pk", "app", 4) + assert [e.call_index for e in journal] == [0, 1] + assert [e.result for e in journal] == ["result-a", "result-b"] From 43897e7e248685be3a12524233cd3fb4d833394a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:23:20 -0300 Subject: [PATCH 41/57] test: env-configurable pg fixture and shim inheritance test The pg_persister fixture was hardcoded to localhost:5432, which made it impossible to run against a Postgres on a non-default port without editing the file. Honor POSTGRES_HOST/PORT/USER/PASSWORD/DB env vars (with the previous values as defaults), so CI and local Docker setups both work. Add a tiny test that confirms the deprecated postgresql.py shim inherits durable-storage support from the canonical b_psycopg2 persister without re-declaring methods. --- tests/core/test_durable_persisters.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tests/core/test_durable_persisters.py b/tests/core/test_durable_persisters.py index 63d99caa2..858e0ceb4 100644 --- a/tests/core/test_durable_persisters.py +++ b/tests/core/test_durable_persisters.py @@ -100,11 +100,11 @@ def pg_persister(): from burr.integrations.persisters.b_psycopg2 import PostgreSQLPersister persister = PostgreSQLPersister.from_values( - db_name="postgres", - user="postgres", - password="postgres", - host="localhost", - port=5432, + db_name=os.environ.get("POSTGRES_DB", "postgres"), + user=os.environ.get("POSTGRES_USER", "postgres"), + password=os.environ.get("POSTGRES_PASSWORD", "postgres"), + host=os.environ.get("POSTGRES_HOST", "localhost"), + port=int(os.environ.get("POSTGRES_PORT", "5432")), table_name="burr_state_durable_test", ) persister.initialize() @@ -165,3 +165,19 @@ def test_postgres_journal_round_trip(pg_persister): journal = pg_persister.load_journal("pk", "app", 4) assert [e.call_index for e in journal] == [0, 1] assert [e.result for e in journal] == ["result-a", "result-b"] + + +def test_deprecated_postgresql_shim_inherits_durable_storage(): + """The deprecated ``burr.integrations.persisters.postgresql.PostgreSQLPersister`` + is a subclass of the canonical psycopg2 persister, so it must inherit the + durable-storage overrides without re-declaring them. We don't connect to a + real database here, only confirm ``supports_durable_storage`` is True on a + no-arg instance constructed with a dummy connection.""" + from unittest.mock import MagicMock + + from burr.integrations.persisters.postgresql import ( + PostgreSQLPersister as DeprecatedShim, + ) + + instance = DeprecatedShim(connection=MagicMock(), table_name="burr_state_shim_test") + assert supports_durable_storage(instance) is True From da4b4a76e8f8dd6b3ba0df0c63befd5db8dd57fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:30:13 -0300 Subject: [PATCH 42/57] refactor: tighten type hints and imports on psycopg2 durable methods Spec-compliance pass left a few quality gaps in the Postgres durable methods: parameter type hints were stripped, return types were loose ('list' vs 'list[JournalEntry]'), and 'serde', 'json', 'SuspensionRecord' and 'JournalEntry' were re-imported inside every method body even though no circular import constraint requires it. Lift the imports to module top, tighten signatures to match the SQLite reference, and drop a misleading F401 type-reference comment that never matched a real annotation. Also drop the persister's state table in the test fixture teardown so future state-table writes can't leak between runs. --- burr/integrations/persisters/b_psycopg2.py | 30 ++++++++-------------- tests/core/test_durable_persisters.py | 3 ++- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/burr/integrations/persisters/b_psycopg2.py b/burr/integrations/persisters/b_psycopg2.py index f141e6aec..b676f1cc2 100644 --- a/burr/integrations/persisters/b_psycopg2.py +++ b/burr/integrations/persisters/b_psycopg2.py @@ -26,7 +26,8 @@ import logging from typing import Literal, Optional -from burr.core import persistence, state +from burr.core import persistence, serde, state +from burr.core.durable import JournalEntry, SuspensionRecord logger = logging.getLogger(__name__) @@ -295,13 +296,8 @@ def save( ) self.connection.commit() - def save_suspension(self, record) -> None: + def save_suspension(self, record: SuspensionRecord) -> None: """Persist a suspension record into the burr_suspensions table.""" - import json - - from burr.core import serde - from burr.core.durable import SuspensionRecord # noqa: F401 — type reference only - cursor = self.connection.cursor() cursor.execute( """INSERT INTO burr_suspensions @@ -338,16 +334,15 @@ def save_suspension(self, record) -> None: ) self.connection.commit() - def load_suspension(self, partition_key, app_id: str, channel: str): + def load_suspension( + self, partition_key: Optional[str], app_id: str, channel: str + ) -> Optional[SuspensionRecord]: """Load the most recent suspension record for (partition_key, app_id, channel). Returns the record whether or not it is resolved; callers check ``record.resolved`` for resume-once idempotency. Returns ``None`` when no record exists for this combination. """ - from burr.core import serde - from burr.core.durable import SuspensionRecord - cursor = self.connection.cursor() cursor.execute( """SELECT suspension_id, partition_key, app_id, sequence_id, position, @@ -391,12 +386,8 @@ def mark_suspension_resolved(self, suspension_id: str) -> bool: self.connection.commit() return cursor.rowcount > 0 - def save_journal_entry(self, entry) -> None: + def save_journal_entry(self, entry: JournalEntry) -> None: """Persist one memoized sub-step into the burr_journal table.""" - import json - - from burr.core import serde - cursor = self.connection.cursor() cursor.execute( """INSERT INTO burr_journal @@ -416,11 +407,10 @@ def save_journal_entry(self, entry) -> None: ) self.connection.commit() - def load_journal(self, partition_key, app_id: str, sequence_id: int) -> list: + def load_journal( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> list[JournalEntry]: """Load journal entries for a suspended action, ordered by call_index.""" - from burr.core import serde - from burr.core.durable import JournalEntry - cursor = self.connection.cursor() cursor.execute( """SELECT partition_key, app_id, sequence_id, step_key, call_index, diff --git a/tests/core/test_durable_persisters.py b/tests/core/test_durable_persisters.py index 858e0ceb4..7b743baf2 100644 --- a/tests/core/test_durable_persisters.py +++ b/tests/core/test_durable_persisters.py @@ -109,10 +109,11 @@ def pg_persister(): ) persister.initialize() yield persister - # Teardown: drop durable tables so the next run starts clean. + # Teardown: drop durable + state tables so the next run starts clean. cursor = persister.connection.cursor() cursor.execute("DROP TABLE IF EXISTS burr_suspensions") cursor.execute("DROP TABLE IF EXISTS burr_journal") + cursor.execute("DROP TABLE IF EXISTS burr_state_durable_test") persister.connection.commit() persister.cleanup() From f6756b9a70e4aa2f686a57b72d961a4c85c184a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:34:16 -0300 Subject: [PATCH 43/57] feat: asyncpg persister durable storage --- burr/integrations/persisters/b_asyncpg.py | 206 +++++++++++++++++++++- tests/core/test_durable_persisters.py | 86 +++++++++ 2 files changed, 291 insertions(+), 1 deletion(-) diff --git a/burr/integrations/persisters/b_asyncpg.py b/burr/integrations/persisters/b_asyncpg.py index 66f91f206..1c30a8468 100644 --- a/burr/integrations/persisters/b_asyncpg.py +++ b/burr/integrations/persisters/b_asyncpg.py @@ -17,10 +17,12 @@ import json import logging +from datetime import datetime from typing import Any, ClassVar, Literal, Optional from burr.common.types import BaseCopyable -from burr.core import persistence, state +from burr.core import persistence, serde, state +from burr.core.durable import JournalEntry, SuspensionRecord from burr.integrations import base try: @@ -244,9 +246,48 @@ async def create_table(self, table_name: str): finally: await self._release_connection(conn, acquired) + async def create_durable_tables_if_not_exist(self): + """Creates the durable-execution tables (suspensions + journal) if they don't exist.""" + conn, acquired = await self._get_connection() + try: + async with conn.transaction(): + await conn.execute( + """ + CREATE TABLE IF NOT EXISTS burr_suspensions ( + suspension_id TEXT PRIMARY KEY, + partition_key TEXT, + app_id TEXT NOT NULL, + sequence_id INTEGER NOT NULL, + position TEXT NOT NULL, + channel TEXT NOT NULL, + schema_json JSONB, + metadata_json JSONB, + inputs_json JSONB, + state_json JSONB NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + resolved BOOLEAN NOT NULL DEFAULT false + )""" + ) + await conn.execute( + """ + CREATE TABLE IF NOT EXISTS burr_journal ( + partition_key TEXT, + app_id TEXT NOT NULL, + sequence_id INTEGER NOT NULL, + step_key TEXT NOT NULL, + call_index INTEGER NOT NULL, + result_json JSONB NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (partition_key, app_id, sequence_id, step_key) + )""" + ) + finally: + await self._release_connection(conn, acquired) + async def initialize(self): """Creates the table""" await self.create_table(self.table_name) + await self.create_durable_tables_if_not_exist() self._initialized = True async def is_initialized(self) -> bool: @@ -399,6 +440,169 @@ async def save( finally: await self._release_connection(conn, acquired) + async def save_suspension(self, record: SuspensionRecord) -> None: + """Persist a suspension record into the burr_suspensions table.""" + conn, acquired = await self._get_connection() + try: + # asyncpg requires datetime objects for TIMESTAMP columns; + # SuspensionRecord.created_at is typed as str so we parse it when needed. + created_at = record.created_at + if isinstance(created_at, str): + created_at = datetime.fromisoformat(created_at) + await conn.execute( + """INSERT INTO burr_suspensions + (suspension_id, partition_key, app_id, sequence_id, position, + channel, schema_json, metadata_json, inputs_json, state_json, + created_at, resolved) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12) + ON CONFLICT (suspension_id) DO UPDATE SET + partition_key = EXCLUDED.partition_key, + app_id = EXCLUDED.app_id, + sequence_id = EXCLUDED.sequence_id, + position = EXCLUDED.position, + channel = EXCLUDED.channel, + schema_json = EXCLUDED.schema_json, + metadata_json = EXCLUDED.metadata_json, + inputs_json = EXCLUDED.inputs_json, + state_json = EXCLUDED.state_json, + created_at = EXCLUDED.created_at, + resolved = EXCLUDED.resolved""", + record.suspension_id, + record.partition_key, + record.app_id, + record.sequence_id, + record.position, + record.channel, + json.dumps(record.schema_json), + json.dumps(serde.serialize(record.metadata, **self.serde_kwargs)), + json.dumps(serde.serialize(record.inputs, **self.serde_kwargs)), + json.dumps(serde.serialize(record.state, **self.serde_kwargs)), + created_at, + record.resolved, + ) + finally: + await self._release_connection(conn, acquired) + + async def load_suspension( + self, partition_key: Optional[str], app_id: str, channel: str + ) -> Optional[SuspensionRecord]: + """Load the most recent suspension record for (partition_key, app_id, channel). + + Returns the record whether or not it is resolved; callers check + ``record.resolved`` for resume-once idempotency. Returns ``None`` + when no record exists for this combination. + """ + conn, acquired = await self._get_connection() + try: + row = await conn.fetchrow( + """SELECT suspension_id, partition_key, app_id, sequence_id, position, + channel, schema_json, metadata_json, inputs_json, state_json, + created_at, resolved + FROM burr_suspensions + WHERE partition_key IS NOT DISTINCT FROM $1 AND app_id = $2 AND channel = $3 + ORDER BY created_at DESC LIMIT 1""", + partition_key, + app_id, + channel, + ) + if row is None: + return None + # asyncpg returns JSONB columns as strings — must json.loads() explicitly + # (unlike psycopg2 which auto-parses JSONB to Python objects). + schema = json.loads(row[6]) if row[6] is not None else None + metadata_raw = json.loads(row[7]) if row[7] is not None else None + inputs_raw = json.loads(row[8]) # asyncpg: JSONB is a string, must deserialize + state_raw = json.loads(row[9]) # asyncpg: JSONB is a string, must deserialize + return SuspensionRecord( + suspension_id=row[0], + partition_key=row[1], + app_id=row[2], + sequence_id=row[3], + position=row[4], + channel=row[5], + schema_json=schema, + metadata=serde.deserialize(metadata_raw, **self.serde_kwargs) + if metadata_raw is not None + else None, + inputs=serde.deserialize(inputs_raw, **self.serde_kwargs), + state=serde.deserialize(state_raw, **self.serde_kwargs), + # asyncpg returns TIMESTAMP as datetime; SuspensionRecord.created_at is str. + created_at=row[10].isoformat() if isinstance(row[10], datetime) else row[10], + resolved=bool(row[11]), + ) + finally: + await self._release_connection(conn, acquired) + + async def mark_suspension_resolved(self, suspension_id: str) -> bool: + """Mark a suspension consumed. Conditional UPDATE for resume-once idempotency. + + :return: True if a row was updated (first call), False if already resolved (no-op). + """ + conn, acquired = await self._get_connection() + try: + status = await conn.execute( + "UPDATE burr_suspensions SET resolved = true " + "WHERE suspension_id = $1 AND resolved = false", + suspension_id, + ) + # asyncpg returns status string like 'UPDATE 1' or 'UPDATE 0' + return int(status.split()[-1]) > 0 + finally: + await self._release_connection(conn, acquired) + + async def save_journal_entry(self, entry: JournalEntry) -> None: + """Persist one memoized sub-step into the burr_journal table.""" + conn, acquired = await self._get_connection() + try: + await conn.execute( + """INSERT INTO burr_journal + (partition_key, app_id, sequence_id, step_key, call_index, result_json) + VALUES ($1, $2, $3, $4, $5, $6) + ON CONFLICT (partition_key, app_id, sequence_id, step_key) DO UPDATE SET + call_index = EXCLUDED.call_index, + result_json = EXCLUDED.result_json""", + entry.partition_key, + entry.app_id, + entry.sequence_id, + entry.step_key, + entry.call_index, + json.dumps(serde.serialize(entry.result, **self.serde_kwargs)), + ) + finally: + await self._release_connection(conn, acquired) + + async def load_journal( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> list[JournalEntry]: + """Load journal entries for a suspended action, ordered by call_index.""" + conn, acquired = await self._get_connection() + try: + rows = await conn.fetch( + """SELECT partition_key, app_id, sequence_id, step_key, call_index, + result_json + FROM burr_journal + WHERE partition_key IS NOT DISTINCT FROM $1 AND app_id = $2 AND sequence_id = $3 + ORDER BY call_index ASC""", + partition_key, + app_id, + sequence_id, + ) + # asyncpg returns JSONB columns as strings — must json.loads() explicitly + # (unlike psycopg2 which auto-parses JSONB to Python objects). + return [ + JournalEntry( + partition_key=row[0], + app_id=row[1], + sequence_id=row[2], + step_key=row[3], + call_index=row[4], + result=serde.deserialize(json.loads(row[5]), **self.serde_kwargs), + ) + for row in rows + ] + finally: + await self._release_connection(conn, acquired) + async def cleanup(self): """Closes the connection to the database.""" if self.connection is not None: diff --git a/tests/core/test_durable_persisters.py b/tests/core/test_durable_persisters.py index 7b743baf2..f9892d13f 100644 --- a/tests/core/test_durable_persisters.py +++ b/tests/core/test_durable_persisters.py @@ -168,6 +168,92 @@ def test_postgres_journal_round_trip(pg_persister): assert [e.result for e in journal] == ["result-a", "result-b"] +# --------------------------------------------------------------------------- +# asyncpg durable storage tests — skipped unless BURR_CI_INTEGRATION_TESTS=true +# --------------------------------------------------------------------------- + +import pytest_asyncio + + +@pytest_asyncio.fixture +async def asyncpg_persister(): + from burr.integrations.persisters.b_asyncpg import AsyncPostgreSQLPersister + + persister = await AsyncPostgreSQLPersister.from_values( + db_name=os.environ.get("POSTGRES_DB", "postgres"), + user=os.environ.get("POSTGRES_USER", "postgres"), + password=os.environ.get("POSTGRES_PASSWORD", "postgres"), + host=os.environ.get("POSTGRES_HOST", "localhost"), + port=int(os.environ.get("POSTGRES_PORT", "5432")), + table_name="burr_state_asyncpg_durable_test", + ) + await persister.initialize() + yield persister + conn, acquired = await persister._get_connection() + try: + await conn.execute("DROP TABLE IF EXISTS burr_suspensions") + await conn.execute("DROP TABLE IF EXISTS burr_journal") + await conn.execute("DROP TABLE IF EXISTS burr_state_asyncpg_durable_test") + finally: + await persister._release_connection(conn, acquired) + await persister.cleanup() + + +@_pg_integration +@pytest.mark.asyncio +async def test_asyncpg_supports_durable_storage(asyncpg_persister): + assert supports_durable_storage(asyncpg_persister) is True + + +@_pg_integration +@pytest.mark.asyncio +async def test_asyncpg_suspension_round_trip(asyncpg_persister): + await asyncpg_persister.save_suspension(_record()) + loaded = await asyncpg_persister.load_suspension("pk", "app", "approval") + assert loaded.suspension_id == "sus-1" + assert loaded.state == {"draft": "d"} + assert loaded.inputs == {"x": 1} + assert loaded.schema_json == {"type": "object"} + assert loaded.resolved is False + + +@_pg_integration +@pytest.mark.asyncio +async def test_asyncpg_load_suspension_returns_resolved_record(asyncpg_persister): + # Contract: load_suspension returns the record whether or not it is + # resolved; the caller checks record.resolved for resume-once idempotency. + await asyncpg_persister.save_suspension(_record()) + await asyncpg_persister.mark_suspension_resolved("sus-1") + loaded = await asyncpg_persister.load_suspension("pk", "app", "approval") + assert loaded is not None + assert loaded.resolved is True + + +@_pg_integration +@pytest.mark.asyncio +async def test_asyncpg_mark_resolved_is_conditional(asyncpg_persister): + await asyncpg_persister.save_suspension(_record()) + first = await asyncpg_persister.mark_suspension_resolved("sus-1") + second = await asyncpg_persister.mark_suspension_resolved("sus-1") + # First call resolves a row; second call resolves nothing (resume-once). + assert first is True + assert second is False + + +@_pg_integration +@pytest.mark.asyncio +async def test_asyncpg_journal_round_trip(asyncpg_persister): + await asyncpg_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "summarize", 0, "result-a") + ) + await asyncpg_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "translate", 1, "result-b") + ) + journal = await asyncpg_persister.load_journal("pk", "app", 4) + assert [e.call_index for e in journal] == [0, 1] + assert [e.result for e in journal] == ["result-a", "result-b"] + + def test_deprecated_postgresql_shim_inherits_durable_storage(): """The deprecated ``burr.integrations.persisters.postgresql.PostgreSQLPersister`` is a subclass of the canonical psycopg2 persister, so it must inherit the From eae5d9ae8a3fe33e2c44a38d2cf468e41424e40c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:36:32 -0300 Subject: [PATCH 44/57] feat: aiosqlite persister durable storage --- burr/integrations/persisters/b_aiosqlite.py | 157 +++++++++++++++++++- tests/core/test_durable_persisters.py | 67 +++++++++ 2 files changed, 223 insertions(+), 1 deletion(-) diff --git a/burr/integrations/persisters/b_aiosqlite.py b/burr/integrations/persisters/b_aiosqlite.py index 9ce3c4a5d..6cb0b0ac9 100644 --- a/burr/integrations/persisters/b_aiosqlite.py +++ b/burr/integrations/persisters/b_aiosqlite.py @@ -22,7 +22,8 @@ import aiosqlite from burr.common.types import BaseCopyable -from burr.core import State +from burr.core import State, serde +from burr.core.durable import JournalEntry, SuspensionRecord from burr.core.persistence import AsyncBaseStatePersister, PersistedStateData logger = logging.getLogger() @@ -147,10 +148,45 @@ async def create_table_if_not_exists(self, table_name: str): ) await self.connection.commit() + async def create_durable_tables_if_not_exist(self): + """Creates the durable-execution tables (suspensions + journal) if they don't exist.""" + await self.connection.execute( + """ + CREATE TABLE IF NOT EXISTS burr_suspensions ( + suspension_id TEXT PRIMARY KEY, + partition_key TEXT, + app_id TEXT NOT NULL, + sequence_id INTEGER NOT NULL, + position TEXT NOT NULL, + channel TEXT NOT NULL, + schema_json TEXT, + metadata_json TEXT, + inputs_json TEXT, + state_json TEXT NOT NULL, + created_at TEXT, + resolved INTEGER NOT NULL DEFAULT 0 + )""" + ) + await self.connection.execute( + """ + CREATE TABLE IF NOT EXISTS burr_journal ( + partition_key TEXT, + app_id TEXT NOT NULL, + sequence_id INTEGER NOT NULL, + step_key TEXT NOT NULL, + call_index INTEGER NOT NULL, + result_json TEXT NOT NULL, + created_at TEXT, + PRIMARY KEY (partition_key, app_id, sequence_id, step_key) + )""" + ) + await self.connection.commit() + async def initialize(self): """Asynchronously creates the table if it doesn't exist""" # Usage await self.create_table_if_not_exists(self.table_name) + await self.create_durable_tables_if_not_exist() self._initialized = True async def is_initialized(self) -> bool: @@ -294,6 +330,125 @@ async def save( ) await self.connection.commit() + async def save_suspension(self, record: SuspensionRecord) -> None: + """Persist a suspension record into the burr_suspensions table.""" + await self.connection.execute( + """INSERT OR REPLACE INTO burr_suspensions + (suspension_id, partition_key, app_id, sequence_id, position, + channel, schema_json, metadata_json, inputs_json, state_json, + created_at, resolved) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + ( + record.suspension_id, + record.partition_key, + record.app_id, + record.sequence_id, + record.position, + record.channel, + json.dumps(record.schema_json), + json.dumps(serde.serialize(record.metadata, **self.serde_kwargs)), + json.dumps(serde.serialize(record.inputs, **self.serde_kwargs)), + json.dumps(serde.serialize(record.state, **self.serde_kwargs)), + record.created_at, + 1 if record.resolved else 0, + ), + ) + await self.connection.commit() + + async def load_suspension( + self, partition_key: Optional[str], app_id: str, channel: str + ) -> Optional[SuspensionRecord]: + """Load the most recent suspension record for (partition_key, app_id, channel). + + Returns the record whether or not it is resolved; callers check + ``record.resolved`` for resume-once idempotency. Returns ``None`` + when no record exists for this combination. + """ + cursor = await self.connection.execute( + """SELECT suspension_id, partition_key, app_id, sequence_id, position, + channel, schema_json, metadata_json, inputs_json, state_json, + created_at, resolved + FROM burr_suspensions + WHERE partition_key IS ? AND app_id = ? AND channel = ? + ORDER BY created_at DESC LIMIT 1""", + (partition_key, app_id, channel), + ) + row = await cursor.fetchone() + if row is None: + return None + return SuspensionRecord( + suspension_id=row[0], + partition_key=row[1], + app_id=row[2], + sequence_id=row[3], + position=row[4], + channel=row[5], + schema_json=json.loads(row[6]) if row[6] is not None else None, + metadata=serde.deserialize(json.loads(row[7]), **self.serde_kwargs) + if row[7] is not None + else None, + inputs=serde.deserialize(json.loads(row[8]), **self.serde_kwargs), + state=serde.deserialize(json.loads(row[9]), **self.serde_kwargs), + created_at=row[10], + resolved=bool(row[11]), + ) + + async def mark_suspension_resolved(self, suspension_id: str) -> bool: + """Mark a suspension consumed. Conditional UPDATE for resume-once idempotency. + + :return: True if a row was updated (first call), False if already resolved (no-op). + """ + cursor = await self.connection.execute( + "UPDATE burr_suspensions SET resolved = 1 " + "WHERE suspension_id = ? AND resolved = 0", + (suspension_id,), + ) + await self.connection.commit() + return cursor.rowcount > 0 + + async def save_journal_entry(self, entry: JournalEntry) -> None: + """Persist one memoized sub-step into the burr_journal table.""" + await self.connection.execute( + """INSERT OR REPLACE INTO burr_journal + (partition_key, app_id, sequence_id, step_key, call_index, + result_json) + VALUES (?, ?, ?, ?, ?, ?)""", + ( + entry.partition_key, + entry.app_id, + entry.sequence_id, + entry.step_key, + entry.call_index, + json.dumps(serde.serialize(entry.result, **self.serde_kwargs)), + ), + ) + await self.connection.commit() + + async def load_journal( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> list[JournalEntry]: + """Load journal entries for a suspended action, ordered by call_index.""" + cursor = await self.connection.execute( + """SELECT partition_key, app_id, sequence_id, step_key, call_index, + result_json + FROM burr_journal + WHERE partition_key IS ? AND app_id = ? AND sequence_id = ? + ORDER BY call_index ASC""", + (partition_key, app_id, sequence_id), + ) + rows = await cursor.fetchall() + return [ + JournalEntry( + partition_key=row[0], + app_id=row[1], + sequence_id=row[2], + step_key=row[3], + call_index=row[4], + result=serde.deserialize(json.loads(row[5]), **self.serde_kwargs), + ) + for row in rows + ] + async def cleanup(self): """Closes the connection to the database.""" await self.connection.close() diff --git a/tests/core/test_durable_persisters.py b/tests/core/test_durable_persisters.py index f9892d13f..69404540e 100644 --- a/tests/core/test_durable_persisters.py +++ b/tests/core/test_durable_persisters.py @@ -254,6 +254,73 @@ async def test_asyncpg_journal_round_trip(asyncpg_persister): assert [e.result for e in journal] == ["result-a", "result-b"] +# --------------------------------------------------------------------------- +# aiosqlite durable storage tests — no integration marker, uses :memory: DB +# --------------------------------------------------------------------------- + + +@pytest_asyncio.fixture +async def aiosqlite_persister(): + from burr.integrations.persisters.b_aiosqlite import AsyncSQLitePersister + + persister = await AsyncSQLitePersister.from_values(db_path=":memory:") + await persister.initialize() + yield persister + await persister.connection.close() + + +@pytest.mark.asyncio +async def test_aiosqlite_supports_durable_storage(aiosqlite_persister): + from burr.core.durable import supports_durable_storage + + assert supports_durable_storage(aiosqlite_persister) is True + + +@pytest.mark.asyncio +async def test_aiosqlite_suspension_round_trip(aiosqlite_persister): + await aiosqlite_persister.save_suspension(_record()) + loaded = await aiosqlite_persister.load_suspension("pk", "app", "approval") + assert loaded.suspension_id == "sus-1" + assert loaded.state == {"draft": "d"} + assert loaded.inputs == {"x": 1} + assert loaded.schema_json == {"type": "object"} + assert loaded.resolved is False + + +@pytest.mark.asyncio +async def test_aiosqlite_load_suspension_returns_resolved_record(aiosqlite_persister): + # Contract: load_suspension returns the record whether or not it is + # resolved; the caller checks record.resolved for resume-once idempotency. + await aiosqlite_persister.save_suspension(_record()) + await aiosqlite_persister.mark_suspension_resolved("sus-1") + loaded = await aiosqlite_persister.load_suspension("pk", "app", "approval") + assert loaded is not None + assert loaded.resolved is True + + +@pytest.mark.asyncio +async def test_aiosqlite_mark_resolved_is_conditional(aiosqlite_persister): + await aiosqlite_persister.save_suspension(_record()) + first = await aiosqlite_persister.mark_suspension_resolved("sus-1") + second = await aiosqlite_persister.mark_suspension_resolved("sus-1") + # First call resolves a row; second call resolves nothing (resume-once). + assert first is True + assert second is False + + +@pytest.mark.asyncio +async def test_aiosqlite_journal_round_trip(aiosqlite_persister): + await aiosqlite_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "summarize", 0, "result-a") + ) + await aiosqlite_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "translate", 1, "result-b") + ) + journal = await aiosqlite_persister.load_journal("pk", "app", 4) + assert [e.call_index for e in journal] == [0, 1] + assert [e.result for e in journal] == ["result-a", "result-b"] + + def test_deprecated_postgresql_shim_inherits_durable_storage(): """The deprecated ``burr.integrations.persisters.postgresql.PostgreSQLPersister`` is a subclass of the canonical psycopg2 persister, so it must inherit the From f1cd0ae9c9c700d7c48d48efab831db5f19e0e63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:40:53 -0300 Subject: [PATCH 45/57] feat: redis persister durable storage (sync and async) --- burr/integrations/persisters/b_redis.py | 325 +++++++++++++++++++++++- tests/core/test_durable_persisters.py | 145 +++++++++++ 2 files changed, 469 insertions(+), 1 deletion(-) diff --git a/burr/integrations/persisters/b_redis.py b/burr/integrations/persisters/b_redis.py index 5f091dc43..517e8c227 100644 --- a/burr/integrations/persisters/b_redis.py +++ b/burr/integrations/persisters/b_redis.py @@ -29,7 +29,8 @@ from datetime import datetime, timezone from typing import Literal, Optional -from burr.core import persistence, state +from burr.core import persistence, serde, state +from burr.core.durable import JournalEntry, SuspensionRecord logger = logging.getLogger(__name__) @@ -191,6 +192,171 @@ def save( namespaced_partition_key = add_namespace_to_partition_key(partition_key, self.namespace) self.connection.zadd(namespaced_partition_key, {app_id: sequence_id}) + # ------------------------------------------------------------------ + # Durable-execution helpers + # ------------------------------------------------------------------ + + def _partition_key_safe(self, partition_key: Optional[str]) -> str: + """Return a Redis-key-safe representation of partition_key.""" + return "__none__" if partition_key is None else partition_key + + def _suspension_hash_key(self, partition_key: Optional[str], app_id: str, channel: str) -> str: + pk = self._partition_key_safe(partition_key) + return f"burr:suspension:{pk}:{app_id}:{channel}" + + def _journal_list_key( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> str: + pk = self._partition_key_safe(partition_key) + return f"burr:journal:{pk}:{app_id}:{sequence_id}" + + # ------------------------------------------------------------------ + # Durable-execution methods + # ------------------------------------------------------------------ + + def save_suspension(self, record: SuspensionRecord) -> None: + """Persist a SuspensionRecord to a Redis HASH. + + Also writes a secondary index key so ``mark_suspension_resolved`` + can locate the hash by ``suspension_id`` alone. + + The hash ``resolved`` field stores a literal string and is updated + by ``mark_suspension_resolved``; callers must use ``load_suspension`` + to get the authoritative ``resolved`` state (backed by the SETNX key). + """ + hash_key = self._suspension_hash_key( + record.partition_key, record.app_id, record.channel + ) + self.connection.hset( + hash_key, + mapping={ + "suspension_id": record.suspension_id, + "partition_key": json.dumps(record.partition_key), + "app_id": record.app_id, + "sequence_id": str(record.sequence_id), + "position": record.position, + "channel": record.channel, + "schema_json": json.dumps(record.schema_json), + "metadata_json": json.dumps( + serde.serialize(record.metadata, **self.serde_kwargs) + ), + "inputs_json": json.dumps( + serde.serialize(record.inputs, **self.serde_kwargs) + ), + "state_json": json.dumps( + serde.serialize(record.state, **self.serde_kwargs) + ), + "created_at": record.created_at, + "resolved": "true" if record.resolved else "false", + }, + ) + # Secondary index: suspension_id -> hash key, for mark_suspension_resolved + self.connection.set(f"burr:suspension_id_idx:{record.suspension_id}", hash_key) + + def load_suspension( + self, partition_key: Optional[str], app_id: str, channel: str + ) -> Optional[SuspensionRecord]: + """Load the suspension record for (partition_key, app_id, channel). + + Returns the record whether or not it is resolved; callers check + ``record.resolved`` for resume-once idempotency. Returns ``None`` + when no record exists. + + The ``resolved`` flag is determined by the existence of the SETNX + key ``burr:resolved:{suspension_id}`` rather than the hash field. + """ + hash_key = self._suspension_hash_key(partition_key, app_id, channel) + data = self.connection.hgetall(hash_key) + if not data: + return None + suspension_id = data[b"suspension_id"].decode() + resolved = bool(self.connection.exists(f"burr:resolved:{suspension_id}")) + return SuspensionRecord( + suspension_id=suspension_id, + partition_key=json.loads(data[b"partition_key"].decode()), + app_id=data[b"app_id"].decode(), + sequence_id=int(data[b"sequence_id"].decode()), + position=data[b"position"].decode(), + channel=data[b"channel"].decode(), + schema_json=json.loads(data[b"schema_json"].decode()), + metadata=serde.deserialize( + json.loads(data[b"metadata_json"].decode()), **self.serde_kwargs + ), + inputs=serde.deserialize( + json.loads(data[b"inputs_json"].decode()), **self.serde_kwargs + ), + state=serde.deserialize( + json.loads(data[b"state_json"].decode()), **self.serde_kwargs + ), + created_at=data[b"created_at"].decode(), + resolved=resolved, + ) + + def mark_suspension_resolved(self, suspension_id: str) -> bool: + """Mark a suspension consumed. Atomic SETNX for resume-once idempotency. + + :return: True if this call performed the first flip, False if already + resolved or the suspension_id is unknown. + """ + if self.connection.setnx(f"burr:resolved:{suspension_id}", 1): + # Update the hash field so load_suspension reflects the resolved state + # without requiring an EXISTS check for callers who read the hash directly. + hash_key_bytes = self.connection.get(f"burr:suspension_id_idx:{suspension_id}") + if hash_key_bytes is not None: + self.connection.hset(hash_key_bytes.decode(), "resolved", "true") + return True + return False + + def save_journal_entry(self, entry: JournalEntry) -> None: + """Persist one memoized sub-step to a Redis LIST. + + Upserts by step_key: scans for an existing entry with the same + step_key and replaces it via LSET if found; otherwise appends with + RPUSH. Journals are short so the linear scan is acceptable. + """ + list_key = self._journal_list_key(entry.partition_key, entry.app_id, entry.sequence_id) + serialized = json.dumps( + { + "partition_key": json.dumps(entry.partition_key), + "app_id": entry.app_id, + "sequence_id": entry.sequence_id, + "step_key": entry.step_key, + "call_index": entry.call_index, + "result_json": json.dumps(serde.serialize(entry.result, **self.serde_kwargs)), + } + ) + existing = self.connection.lrange(list_key, 0, -1) + for idx, raw in enumerate(existing): + item = json.loads(raw.decode()) + if item.get("step_key") == entry.step_key: + self.connection.lset(list_key, idx, serialized) + return + self.connection.rpush(list_key, serialized) + + def load_journal( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> list[JournalEntry]: + """Load journal entries for a suspended action, sorted by call_index.""" + list_key = self._journal_list_key(partition_key, app_id, sequence_id) + raw_entries = self.connection.lrange(list_key, 0, -1) + entries = [] + for raw in raw_entries: + item = json.loads(raw.decode()) + entries.append( + JournalEntry( + partition_key=json.loads(item["partition_key"]), + app_id=item["app_id"], + sequence_id=item["sequence_id"], + step_key=item["step_key"], + call_index=item["call_index"], + result=serde.deserialize( + json.loads(item["result_json"]), **self.serde_kwargs + ), + ) + ) + entries.sort(key=lambda e: e.call_index) + return entries + def cleanup(self): """Closes the connection to the database.""" self.connection.close() @@ -372,6 +538,163 @@ async def save( namespaced_partition_key = add_namespace_to_partition_key(partition_key, self.namespace) await self.connection.zadd(namespaced_partition_key, {app_id: sequence_id}) + # ------------------------------------------------------------------ + # Durable-execution helpers (async) + # ------------------------------------------------------------------ + + def _partition_key_safe(self, partition_key: Optional[str]) -> str: + """Return a Redis-key-safe representation of partition_key.""" + return "__none__" if partition_key is None else partition_key + + def _suspension_hash_key(self, partition_key: Optional[str], app_id: str, channel: str) -> str: + pk = self._partition_key_safe(partition_key) + return f"burr:suspension:{pk}:{app_id}:{channel}" + + def _journal_list_key( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> str: + pk = self._partition_key_safe(partition_key) + return f"burr:journal:{pk}:{app_id}:{sequence_id}" + + # ------------------------------------------------------------------ + # Durable-execution methods (async) + # ------------------------------------------------------------------ + + async def save_suspension(self, record: SuspensionRecord) -> None: + """Persist a SuspensionRecord to a Redis HASH (async). + + Also writes a secondary index key so ``mark_suspension_resolved`` + can locate the hash by ``suspension_id`` alone. + """ + hash_key = self._suspension_hash_key( + record.partition_key, record.app_id, record.channel + ) + await self.connection.hset( + hash_key, + mapping={ + "suspension_id": record.suspension_id, + "partition_key": json.dumps(record.partition_key), + "app_id": record.app_id, + "sequence_id": str(record.sequence_id), + "position": record.position, + "channel": record.channel, + "schema_json": json.dumps(record.schema_json), + "metadata_json": json.dumps( + serde.serialize(record.metadata, **self.serde_kwargs) + ), + "inputs_json": json.dumps( + serde.serialize(record.inputs, **self.serde_kwargs) + ), + "state_json": json.dumps( + serde.serialize(record.state, **self.serde_kwargs) + ), + "created_at": record.created_at, + "resolved": "true" if record.resolved else "false", + }, + ) + await self.connection.set( + f"burr:suspension_id_idx:{record.suspension_id}", hash_key + ) + + async def load_suspension( + self, partition_key: Optional[str], app_id: str, channel: str + ) -> Optional[SuspensionRecord]: + """Load the suspension record for (partition_key, app_id, channel) (async). + + Returns the record whether or not it is resolved; callers check + ``record.resolved`` for resume-once idempotency. Returns ``None`` + when no record exists. + """ + hash_key = self._suspension_hash_key(partition_key, app_id, channel) + data = await self.connection.hgetall(hash_key) + if not data: + return None + suspension_id = data[b"suspension_id"].decode() + resolved = bool(await self.connection.exists(f"burr:resolved:{suspension_id}")) + return SuspensionRecord( + suspension_id=suspension_id, + partition_key=json.loads(data[b"partition_key"].decode()), + app_id=data[b"app_id"].decode(), + sequence_id=int(data[b"sequence_id"].decode()), + position=data[b"position"].decode(), + channel=data[b"channel"].decode(), + schema_json=json.loads(data[b"schema_json"].decode()), + metadata=serde.deserialize( + json.loads(data[b"metadata_json"].decode()), **self.serde_kwargs + ), + inputs=serde.deserialize( + json.loads(data[b"inputs_json"].decode()), **self.serde_kwargs + ), + state=serde.deserialize( + json.loads(data[b"state_json"].decode()), **self.serde_kwargs + ), + created_at=data[b"created_at"].decode(), + resolved=resolved, + ) + + async def mark_suspension_resolved(self, suspension_id: str) -> bool: + """Mark a suspension consumed. Atomic SETNX for resume-once idempotency (async). + + :return: True if this call performed the first flip, False if already resolved. + """ + if await self.connection.setnx(f"burr:resolved:{suspension_id}", 1): + hash_key_bytes = await self.connection.get( + f"burr:suspension_id_idx:{suspension_id}" + ) + if hash_key_bytes is not None: + await self.connection.hset(hash_key_bytes.decode(), "resolved", "true") + return True + return False + + async def save_journal_entry(self, entry: JournalEntry) -> None: + """Persist one memoized sub-step to a Redis LIST (async). + + Upserts by step_key: scans for an existing entry with the same + step_key and replaces it via LSET if found; otherwise appends. + """ + list_key = self._journal_list_key(entry.partition_key, entry.app_id, entry.sequence_id) + serialized = json.dumps( + { + "partition_key": json.dumps(entry.partition_key), + "app_id": entry.app_id, + "sequence_id": entry.sequence_id, + "step_key": entry.step_key, + "call_index": entry.call_index, + "result_json": json.dumps(serde.serialize(entry.result, **self.serde_kwargs)), + } + ) + existing = await self.connection.lrange(list_key, 0, -1) + for idx, raw in enumerate(existing): + item = json.loads(raw.decode()) + if item.get("step_key") == entry.step_key: + await self.connection.lset(list_key, idx, serialized) + return + await self.connection.rpush(list_key, serialized) + + async def load_journal( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> list[JournalEntry]: + """Load journal entries for a suspended action, sorted by call_index (async).""" + list_key = self._journal_list_key(partition_key, app_id, sequence_id) + raw_entries = await self.connection.lrange(list_key, 0, -1) + entries = [] + for raw in raw_entries: + item = json.loads(raw.decode()) + entries.append( + JournalEntry( + partition_key=json.loads(item["partition_key"]), + app_id=item["app_id"], + sequence_id=item["sequence_id"], + step_key=item["step_key"], + call_index=item["call_index"], + result=serde.deserialize( + json.loads(item["result_json"]), **self.serde_kwargs + ), + ) + ) + entries.sort(key=lambda e: e.call_index) + return entries + async def cleanup(self): """Closes the connection to the database.""" await self.connection.aclose() diff --git a/tests/core/test_durable_persisters.py b/tests/core/test_durable_persisters.py index 69404540e..e38348167 100644 --- a/tests/core/test_durable_persisters.py +++ b/tests/core/test_durable_persisters.py @@ -321,6 +321,151 @@ async def test_aiosqlite_journal_round_trip(aiosqlite_persister): assert [e.result for e in journal] == ["result-a", "result-b"] +# --------------------------------------------------------------------------- +# Redis durable storage tests — skipped unless BURR_CI_INTEGRATION_TESTS=true +# --------------------------------------------------------------------------- + + +@pytest.fixture +def redis_persister(): + from burr.integrations.persisters.b_redis import RedisBasePersister + + persister = RedisBasePersister.from_values( + host=os.environ.get("REDIS_HOST", "localhost"), + port=int(os.environ.get("REDIS_PORT", "6379")), + db=int(os.environ.get("REDIS_DB", "15")), + ) + persister.connection.flushdb() + yield persister + persister.connection.flushdb() + persister.connection.close() + + +@_pg_integration +def test_redis_supports_durable_storage(redis_persister): + assert supports_durable_storage(redis_persister) is True + + +@_pg_integration +def test_redis_suspension_round_trip(redis_persister): + redis_persister.save_suspension(_record()) + loaded = redis_persister.load_suspension("pk", "app", "approval") + assert loaded.suspension_id == "sus-1" + assert loaded.state == {"draft": "d"} + assert loaded.inputs == {"x": 1} + assert loaded.schema_json == {"type": "object"} + assert loaded.resolved is False + + +@_pg_integration +def test_redis_load_suspension_returns_resolved_record(redis_persister): + # Contract: load_suspension returns the record whether or not it is + # resolved; the caller checks record.resolved for resume-once idempotency. + redis_persister.save_suspension(_record()) + redis_persister.mark_suspension_resolved("sus-1") + loaded = redis_persister.load_suspension("pk", "app", "approval") + assert loaded is not None + assert loaded.resolved is True + + +@_pg_integration +def test_redis_mark_resolved_is_conditional(redis_persister): + redis_persister.save_suspension(_record()) + first = redis_persister.mark_suspension_resolved("sus-1") + second = redis_persister.mark_suspension_resolved("sus-1") + # First call resolves; second call is a no-op (resume-once). + assert first is True + assert second is False + + +@_pg_integration +def test_redis_journal_round_trip(redis_persister): + redis_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "summarize", 0, "result-a") + ) + redis_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "translate", 1, "result-b") + ) + journal = redis_persister.load_journal("pk", "app", 4) + assert [e.call_index for e in journal] == [0, 1] + assert [e.result for e in journal] == ["result-a", "result-b"] + + +# --------------------------------------------------------------------------- +# Async Redis durable storage tests — skipped unless BURR_CI_INTEGRATION_TESTS=true +# --------------------------------------------------------------------------- + + +@pytest_asyncio.fixture +async def async_redis_persister(): + from burr.integrations.persisters.b_redis import AsyncRedisBasePersister + + persister = AsyncRedisBasePersister.from_values( + host=os.environ.get("REDIS_HOST", "localhost"), + port=int(os.environ.get("REDIS_PORT", "6379")), + db=int(os.environ.get("REDIS_DB", "15")), + ) + await persister.connection.flushdb() + yield persister + await persister.connection.flushdb() + await persister.connection.aclose() + + +@_pg_integration +@pytest.mark.asyncio +async def test_async_redis_supports_durable_storage(async_redis_persister): + assert supports_durable_storage(async_redis_persister) is True + + +@_pg_integration +@pytest.mark.asyncio +async def test_async_redis_suspension_round_trip(async_redis_persister): + await async_redis_persister.save_suspension(_record()) + loaded = await async_redis_persister.load_suspension("pk", "app", "approval") + assert loaded.suspension_id == "sus-1" + assert loaded.state == {"draft": "d"} + assert loaded.inputs == {"x": 1} + assert loaded.schema_json == {"type": "object"} + assert loaded.resolved is False + + +@_pg_integration +@pytest.mark.asyncio +async def test_async_redis_load_suspension_returns_resolved_record(async_redis_persister): + # Contract: load_suspension returns the record whether or not it is + # resolved; the caller checks record.resolved for resume-once idempotency. + await async_redis_persister.save_suspension(_record()) + await async_redis_persister.mark_suspension_resolved("sus-1") + loaded = await async_redis_persister.load_suspension("pk", "app", "approval") + assert loaded is not None + assert loaded.resolved is True + + +@_pg_integration +@pytest.mark.asyncio +async def test_async_redis_mark_resolved_is_conditional(async_redis_persister): + await async_redis_persister.save_suspension(_record()) + first = await async_redis_persister.mark_suspension_resolved("sus-1") + second = await async_redis_persister.mark_suspension_resolved("sus-1") + # First call resolves; second call is a no-op (resume-once). + assert first is True + assert second is False + + +@_pg_integration +@pytest.mark.asyncio +async def test_async_redis_journal_round_trip(async_redis_persister): + await async_redis_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "summarize", 0, "result-a") + ) + await async_redis_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "translate", 1, "result-b") + ) + journal = await async_redis_persister.load_journal("pk", "app", 4) + assert [e.call_index for e in journal] == [0, 1] + assert [e.result for e in journal] == ["result-a", "result-b"] + + def test_deprecated_postgresql_shim_inherits_durable_storage(): """The deprecated ``burr.integrations.persisters.postgresql.PostgreSQLPersister`` is a subclass of the canonical psycopg2 persister, so it must inherit the From 855279894ad66afeb7359c1f662a2d7d5a7914f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:43:55 -0300 Subject: [PATCH 46/57] feat: pymongo persister durable storage --- burr/integrations/persisters/b_pymongo.py | 138 +++++++++++++++++++++- tests/core/test_durable_persisters.py | 86 ++++++++++++++ 2 files changed, 222 insertions(+), 2 deletions(-) diff --git a/burr/integrations/persisters/b_pymongo.py b/burr/integrations/persisters/b_pymongo.py index ba02438e9..9a8dc6163 100644 --- a/burr/integrations/persisters/b_pymongo.py +++ b/burr/integrations/persisters/b_pymongo.py @@ -20,9 +20,10 @@ from datetime import datetime, timezone from typing import Literal, Optional -from pymongo import MongoClient +from pymongo import ASCENDING, DESCENDING, MongoClient -from burr.core import persistence, state +from burr.core import persistence, serde, state +from burr.core.durable import JournalEntry, SuspensionRecord logger = logging.getLogger(__name__) @@ -96,6 +97,139 @@ def __init__( self.collection = self.db[collection_name] self.serde_kwargs = serde_kwargs or {} + def initialize(self): + """Creates indexes for the state collection and the two durable-execution + collections (``burr_suspensions`` and ``burr_journal``). + + Index creation in MongoDB is idempotent — calling this multiple times + is safe. + """ + self.db["burr_suspensions"].create_index( + [ + ("partition_key", ASCENDING), + ("app_id", ASCENDING), + ("channel", ASCENDING), + ("created_at", DESCENDING), + ] + ) + self.db["burr_journal"].create_index( + [ + ("partition_key", ASCENDING), + ("app_id", ASCENDING), + ("sequence_id", ASCENDING), + ("step_key", ASCENDING), + ], + unique=True, + ) + + def save_suspension(self, record: SuspensionRecord) -> None: + """Persist a suspension record into the ``burr_suspensions`` collection.""" + doc = { + "_id": record.suspension_id, + "suspension_id": record.suspension_id, + "partition_key": record.partition_key, + "app_id": record.app_id, + "sequence_id": record.sequence_id, + "position": record.position, + "channel": record.channel, + "schema_json": record.schema_json, + "metadata": serde.serialize(record.metadata, **self.serde_kwargs) + if record.metadata is not None + else None, + "inputs": serde.serialize(record.inputs, **self.serde_kwargs), + "state": serde.serialize(record.state, **self.serde_kwargs), + "created_at": record.created_at, + "resolved": record.resolved, + } + self.db["burr_suspensions"].update_one( + {"_id": record.suspension_id}, + {"$set": doc}, + upsert=True, + ) + + def load_suspension( + self, partition_key: Optional[str], app_id: str, channel: str + ) -> Optional[SuspensionRecord]: + """Load the most recent suspension record for (partition_key, app_id, channel). + + Returns the record whether or not it is resolved; callers check + ``record.resolved`` for resume-once idempotency. Returns ``None`` + when no record exists for this combination. + """ + doc = self.db["burr_suspensions"].find_one( + {"partition_key": partition_key, "app_id": app_id, "channel": channel}, + sort=[("created_at", DESCENDING)], + ) + if doc is None: + return None + return SuspensionRecord( + suspension_id=doc["suspension_id"], + partition_key=doc["partition_key"], + app_id=doc["app_id"], + sequence_id=doc["sequence_id"], + position=doc["position"], + channel=doc["channel"], + schema_json=doc.get("schema_json"), + metadata=serde.deserialize(doc["metadata"], **self.serde_kwargs) + if doc.get("metadata") is not None + else None, + inputs=serde.deserialize(doc["inputs"], **self.serde_kwargs), + state=serde.deserialize(doc["state"], **self.serde_kwargs), + created_at=doc["created_at"], + resolved=bool(doc["resolved"]), + ) + + def mark_suspension_resolved(self, suspension_id: str) -> bool: + """Mark a suspension consumed. Conditional update for resume-once idempotency. + + :return: ``True`` if a document was updated (first call), ``False`` if + already resolved or not found (no-op). + """ + result = self.db["burr_suspensions"].update_one( + {"_id": suspension_id, "resolved": False}, + {"$set": {"resolved": True}}, + ) + return result.modified_count == 1 + + def save_journal_entry(self, entry: JournalEntry) -> None: + """Persist one memoized sub-step into the ``burr_journal`` collection.""" + filter_doc = { + "partition_key": entry.partition_key, + "app_id": entry.app_id, + "sequence_id": entry.sequence_id, + "step_key": entry.step_key, + } + update_doc = { + "$set": { + "partition_key": entry.partition_key, + "app_id": entry.app_id, + "sequence_id": entry.sequence_id, + "step_key": entry.step_key, + "call_index": entry.call_index, + "result": serde.serialize(entry.result, **self.serde_kwargs), + } + } + self.db["burr_journal"].update_one(filter_doc, update_doc, upsert=True) + + def load_journal( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> list[JournalEntry]: + """Load journal entries for a suspended action, ordered by call_index.""" + cursor = self.db["burr_journal"].find( + {"partition_key": partition_key, "app_id": app_id, "sequence_id": sequence_id} + ).sort("call_index", ASCENDING) + return [ + JournalEntry( + partition_key=doc["partition_key"], + app_id=doc["app_id"], + sequence_id=doc["sequence_id"], + step_key=doc["step_key"], + call_index=doc["call_index"], + result=serde.deserialize(doc["result"], **self.serde_kwargs), + ) + for doc in cursor + ] + def __enter__(self): return self diff --git a/tests/core/test_durable_persisters.py b/tests/core/test_durable_persisters.py index e38348167..3bb8dc6e2 100644 --- a/tests/core/test_durable_persisters.py +++ b/tests/core/test_durable_persisters.py @@ -466,6 +466,92 @@ async def test_async_redis_journal_round_trip(async_redis_persister): assert [e.result for e in journal] == ["result-a", "result-b"] +# --------------------------------------------------------------------------- +# MongoDB (pymongo) durable storage tests — skipped unless BURR_CI_INTEGRATION_TESTS=true +# --------------------------------------------------------------------------- + + +@pytest.fixture +def mongo_persister(): + from burr.integrations.persisters.b_pymongo import MongoDBBasePersister + from pymongo import MongoClient + + client = MongoClient(os.environ.get("MONGO_URI", "mongodb://localhost:27017")) + db_name = os.environ.get("MONGO_DB", "burr_durable_test") + persister = MongoDBBasePersister( + client=client, db_name=db_name, collection_name="burr_state_durable_test" + ) + persister.initialize() + yield persister + client.drop_database(db_name) + client.close() + + +@_pg_integration +def test_pymongo_supports_durable_storage(mongo_persister): + assert supports_durable_storage(mongo_persister) is True + + +@_pg_integration +def test_pymongo_suspension_round_trip(mongo_persister): + mongo_persister.save_suspension(_record()) + loaded = mongo_persister.load_suspension("pk", "app", "approval") + assert loaded.suspension_id == "sus-1" + assert loaded.state == {"draft": "d"} + assert loaded.inputs == {"x": 1} + assert loaded.schema_json == {"type": "object"} + assert loaded.resolved is False + + +@_pg_integration +def test_pymongo_load_suspension_returns_resolved_record(mongo_persister): + # Contract: load_suspension returns the record whether or not it is + # resolved; the caller checks record.resolved for resume-once idempotency. + mongo_persister.save_suspension(_record()) + mongo_persister.mark_suspension_resolved("sus-1") + loaded = mongo_persister.load_suspension("pk", "app", "approval") + assert loaded is not None + assert loaded.resolved is True + + +@_pg_integration +def test_pymongo_mark_resolved_is_conditional(mongo_persister): + mongo_persister.save_suspension(_record()) + first = mongo_persister.mark_suspension_resolved("sus-1") + second = mongo_persister.mark_suspension_resolved("sus-1") + # First call resolves a row; second call resolves nothing (resume-once). + assert first is True + assert second is False + + +@_pg_integration +def test_pymongo_journal_round_trip(mongo_persister): + mongo_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "summarize", 0, "result-a") + ) + mongo_persister.save_journal_entry( + JournalEntry("pk", "app", 4, "translate", 1, "result-b") + ) + journal = mongo_persister.load_journal("pk", "app", 4) + assert [e.call_index for e in journal] == [0, 1] + assert [e.result for e in journal] == ["result-a", "result-b"] + + +def test_deprecated_mongodb_shim_inherits_durable_storage(): + """The deprecated ``burr.integrations.persisters.b_mongodb.MongoDBBasePersister`` + is a subclass of the canonical pymongo persister, so it must inherit the + durable-storage overrides without re-declaring them.""" + from unittest.mock import MagicMock + + from burr.integrations.persisters.b_mongodb import ( + MongoDBBasePersister as DeprecatedMongoShim, + ) + + client = MagicMock() + instance = DeprecatedMongoShim(client=client, db_name="x", collection_name="y") + assert supports_durable_storage(instance) is True + + def test_deprecated_postgresql_shim_inherits_durable_storage(): """The deprecated ``burr.integrations.persisters.postgresql.PostgreSQLPersister`` is a subclass of the canonical psycopg2 persister, so it must inherit the From 87d98445f54a1201f95fc48cb8a5b1e5feea9b83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:53:08 -0300 Subject: [PATCH 47/57] test: drop mongo test db on fixture setup to absorb stale state --- tests/core/test_durable_persisters.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/core/test_durable_persisters.py b/tests/core/test_durable_persisters.py index 3bb8dc6e2..d3fb53887 100644 --- a/tests/core/test_durable_persisters.py +++ b/tests/core/test_durable_persisters.py @@ -478,6 +478,10 @@ def mongo_persister(): client = MongoClient(os.environ.get("MONGO_URI", "mongodb://localhost:27017")) db_name = os.environ.get("MONGO_DB", "burr_durable_test") + # Drop on setup too: absorbs leftover state from a prior run that + # terminated before teardown (OOM, Ctrl-C). Otherwise a stale + # resolved=True row poisons mark_resolved_is_conditional. + client.drop_database(db_name) persister = MongoDBBasePersister( client=client, db_name=db_name, collection_name="burr_state_durable_test" ) From 0b55e6caaa4e0280aa2a961d918480de7197c892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 19:59:20 -0300 Subject: [PATCH 48/57] feat: async _ahandle_suspension wired into _astep and durable AsyncInMemoryPersister --- burr/core/application.py | 60 +++++++++++++++++++++++++++++++++++++- burr/core/persistence.py | 31 ++++++++++++++++++++ tests/core/test_durable.py | 41 ++++++++++++++++++++++++++ 3 files changed, 131 insertions(+), 1 deletion(-) diff --git a/burr/core/application.py b/burr/core/application.py index 5a416b1a9..29a9853b5 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -1161,6 +1161,64 @@ def _handle_suspension(self, action, action_inputs, suspended): pass self._suspended = record + async def _ahandle_suspension(self, action, action_inputs, suspended): + """Async sibling of _handle_suspension. Called from _astep when the action suspends.""" + from burr.core.durable import ( + SuspensionRecord, + supports_durable_storage, + write_journal_into_state, + write_suspension_into_state, + ) + + record = SuspensionRecord( + suspension_id=str(uuid.uuid4()), + partition_key=self._partition_key, + app_id=self._uid, + sequence_id=self.sequence_id, + position=action.name, + channel=suspended.channel, + schema_json=suspended.schema_json, + metadata=suspended.metadata, + inputs=action_inputs, + state=dict(self._state.get_all()), + created_at=system.now().isoformat(), + resolved=False, + ) + persister = self._state_persister + if persister is not None and supports_durable_storage(persister): + if persister.is_async(): + await persister.save_suspension(record) + for entry in self._journal_sink: + await persister.save_journal_entry(entry) + else: + persister.save_suspension(record) + for entry in self._journal_sink: + persister.save_journal_entry(entry) + elif persister is not None: + # In-state fallback: embed the record + journal into State only. + # We deliberately do NOT call persister.save here. The post_run_step + # lifecycle hook fires for this suspended step and PersisterHook.save + # persists the embedded State once. Saving here too would write the + # same (partition_key, app_id, sequence_id, position) row twice and + # break persisters with a UNIQUE constraint (e.g. SQLitePersister). + state = write_suspension_into_state(self._state, record) + state = write_journal_into_state(state, self._journal_sink) + self._set_state(state) + # NOTE: post_action_suspend is registered in Milestone 5. Guard it so it is a + # safe no-op until the hook is added to REGISTERED_SYNC_HOOKS. + try: + await self._adapter_set.call_all_lifecycle_hooks_sync_and_async( + "post_action_suspend", + app_id=self._uid, + partition_key=self._partition_key, + action=action, + sequence_id=self.sequence_id, + suspension=record, + ) + except ValueError: + pass + self._suspended = record + @property def suspended(self): """The SuspensionRecord if the last run() suspended, else None.""" @@ -1316,7 +1374,7 @@ async def _astep(self, inputs: Optional[Dict[str, Any]], _run_hooks: bool = True except _Suspended as suspended: suspended_signal = suspended try: - self._handle_suspension(next_action, action_inputs, suspended) + await self._ahandle_suspension(next_action, action_inputs, suspended) except Exception as handler_exc: exc = handler_exc suspended_signal = None diff --git a/burr/core/persistence.py b/burr/core/persistence.py index 3a3fb17ac..c7c932983 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -925,6 +925,8 @@ class AsyncInMemoryPersister(AsyncBaseStatePersister): def __init__(self): self._storage = defaultdict(lambda: defaultdict(list)) + self._suspensions = {} + self._journal = {} async def load( self, partition_key: str, app_id: Optional[str], sequence_id: Optional[int] = None, **kwargs @@ -972,6 +974,35 @@ async def save( # Store the state self._storage[partition_key][app_id].append(persisted_state) + async def save_suspension(self, record: SuspensionRecord) -> None: + self._suspensions[(record.partition_key, record.app_id, record.channel)] = record + + async def load_suspension( + self, partition_key: Optional[str], app_id: str, channel: str + ) -> Optional[SuspensionRecord]: + return self._suspensions.get((partition_key, app_id, channel)) + + async def mark_suspension_resolved(self, suspension_id: str) -> bool: + for key, record in self._suspensions.items(): + if record.suspension_id == suspension_id: + if record.resolved: + return False + record.resolved = True + return True + return False + + async def save_journal_entry(self, entry: JournalEntry) -> None: + bucket = self._journal.setdefault( + (entry.partition_key, entry.app_id, entry.sequence_id), [] + ) + bucket.append(entry) + + async def load_journal( + self, partition_key: Optional[str], app_id: str, sequence_id: int + ) -> list[JournalEntry]: + bucket = self._journal.get((partition_key, app_id, sequence_id), []) + return sorted(bucket, key=lambda e: e.call_index) + SQLLitePersister = SQLitePersister diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index db5ced7ab..44b3e0ab1 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -842,3 +842,44 @@ async def async_side_effect(): assert len(journal) == 1 assert journal[0].step_key == "step" assert journal[0].result == "value" + + +@pytest.mark.asyncio +async def test_ahandle_suspension_persists_via_async_durable_persister(): + from burr.core import ApplicationBuilder, GraphBuilder, State, action + from burr.core.persistence import AsyncInMemoryPersister + + @action(reads=[], writes=["seen"]) + async def astart(state): + return state.update(seen=True) + + @action(reads=["seen"], writes=["done"]) + async def agate(state, __context): + decision = __context.suspend("approval") + return state.update(done=decision["approved"]) + + persister = AsyncInMemoryPersister() + graph = ( + GraphBuilder() + .with_actions(astart=astart, agate=agate) + .with_transitions(("astart", "agate")) + .build() + ) + app = await ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("astart") + .with_state(State({})) + .with_identifiers(app_id="arun1", partition_key="pk1") + .with_state_persister(persister) + .abuild() + ) + await app.arun(halt_after=["agate"]) + assert app.suspended is not None + + # The async durable path persists via await persister.save_suspension(...), + # so the record lives in persister._suspensions, NOT inside the State blob. + record = await persister.load_suspension("pk1", "arun1", "approval") + assert record is not None + assert record.channel == "approval" + assert record.resolved is False From 30818966b97ecb4779e5a51294b325526de38a77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 20:02:51 -0300 Subject: [PATCH 49/57] feat: aresume() now supports async persisters end-to-end Remove the NotImplementedError guard for async persisters and add _aload_suspension, _aload_journal, _arebuild async helpers that handle all four combos (durable/non-durable x async/sync). aresume() now awaits async persister calls and branches to sync calls for sync persisters throughout the load/journal/rebuild/mark-resolved path. --- burr/core/resume.py | 71 +++++++++++++++++++------- tests/core/test_durable_integration.py | 58 +++++++++++++++------ 2 files changed, 96 insertions(+), 33 deletions(-) diff --git a/burr/core/resume.py b/burr/core/resume.py index 29bb25a27..85d184eb5 100644 --- a/burr/core/resume.py +++ b/burr/core/resume.py @@ -43,6 +43,44 @@ def _load_journal(persister, partition_key, app_id, sequence_id, state): return read_journal_from_state(state) +async def _aload_suspension(persister, partition_key, app_id, channel): + if supports_durable_storage(persister): + if persister.is_async(): + return await persister.load_suspension(partition_key, app_id, channel) + return persister.load_suspension(partition_key, app_id, channel) + if persister.is_async(): + loaded = await persister.load(partition_key, app_id) + else: + loaded = persister.load(partition_key, app_id) + if loaded is None: + return None + return read_suspension_from_state(loaded["state"], channel) + + +async def _aload_journal(persister, partition_key, app_id, sequence_id, state): + if supports_durable_storage(persister): + if persister.is_async(): + return await persister.load_journal(partition_key, app_id, sequence_id) + return persister.load_journal(partition_key, app_id, sequence_id) + return read_journal_from_state(state) + + +async def _arebuild(persister, graph, app_id, partition_key, record): + from burr.core.application import ApplicationBuilder + + builder = ( + ApplicationBuilder() + .with_graph(graph) + .with_identifiers(app_id=app_id, partition_key=partition_key) + .with_entrypoint(record.position) + .with_state(State(record.state)) + .with_state_persister(persister) + ) + if persister.is_async(): + return await builder.abuild() + return builder.build() + + def _validate_payload(schema_json, payload): """Validate *payload* against *schema_json* using jsonschema. @@ -142,13 +180,10 @@ async def aresume( """Resume a suspended run by delivering ``payload`` to ``channel``. Runs the async action loop (``await app.arun(...)``), so async actions are - fully supported. Requires a **sync** persister in this release; passing an - async persister raises :exc:`NotImplementedError`. Full async-persister - support is deferred to a later milestone. + fully supported. Both sync and async persisters are supported (durable or + non-durable). - :param persister: A sync state persister (durable or non-durable). Async - persisters are not supported in this release and raise - :exc:`NotImplementedError`. + :param persister: A state persister (sync or async, durable or non-durable). :param graph: The :class:`~burr.core.graph.Graph` to rebuild the application. :param app_id: Identifier of the application run to resume. :param partition_key: Partition key used when the run was persisted. @@ -159,32 +194,29 @@ async def aresume( * *Durable persisters* -- resuming an already-resolved suspension is an idempotent no-op: the call returns the latest persisted state unchanged. - * *Sync non-durable persisters* -- the suspension lives in + * *Non-durable persisters* -- the suspension lives in ``state['__burr_durable__']`` and is overwritten as the resumed run progresses. A second ``aresume()`` call after the first completes raises :exc:`ValueError`. """ - if persister.is_async(): - raise NotImplementedError( - "aresume() does not support async persisters in this release; " - "use a sync persister (durable or non-durable). Async actions are " - "still supported with a sync persister." - ) - record = _load_suspension(persister, partition_key, app_id, channel) + record = await _aload_suspension(persister, partition_key, app_id, channel) if record is None: raise ValueError( f"No suspension found for app_id={app_id!r} " f"(never suspended, or already resolved on a persister without durable storage)." ) if record.resolved: - loaded = persister.load(partition_key, app_id) + if persister.is_async(): + loaded = await persister.load(partition_key, app_id) + else: + loaded = persister.load(partition_key, app_id) return loaded["state"] if loaded else State(record.state) _validate_payload(record.schema_json, payload) - app = _rebuild(persister, graph, app_id, partition_key, record) + app = await _arebuild(persister, graph, app_id, partition_key, record) app._resume_signals = {channel: payload} - app._loaded_journal = _load_journal( + app._loaded_journal = await _aload_journal( persister, partition_key, app_id, record.sequence_id, record.state ) app._suspended = None @@ -194,6 +226,9 @@ async def aresume( # In-state fallback does not durably mark suspensions resolved; a second # resume will raise (see docstring). if supports_durable_storage(persister): - persister.mark_suspension_resolved(record.suspension_id) + if persister.is_async(): + await persister.mark_suspension_resolved(record.suspension_id) + else: + persister.mark_suspension_resolved(record.suspension_id) return app.state diff --git a/tests/core/test_durable_integration.py b/tests/core/test_durable_integration.py index 6eca3817f..9bd21b5dd 100644 --- a/tests/core/test_durable_integration.py +++ b/tests/core/test_durable_integration.py @@ -268,27 +268,55 @@ def test_resume_in_state_fallback_second_call_raises(): ) -async def test_aresume_async_persister_raises(): - """aresume() must raise NotImplementedError immediately for any async persister. +async def test_async_suspend_resume_with_async_durable_persister(): + """aresume() works end-to-end with an async durable persister. - In this release, aresume() rejects all async persisters regardless of whether - they implement durable storage. AsyncInMemoryPersister is used here as the - canonical async persister example. + Uses AsyncInMemoryPersister (async + durable storage) to exercise the full + async load/journal/rebuild path introduced in Task 4.6. """ from burr.core import aresume + @action(reads=[], writes=["seen"]) + async def astart(state): + return state.update(seen=True) + + @action(reads=["seen"], writes=["done"]) + async def agate(state, __context): + decision = __context.suspend("approval") + return state.update(done=decision["approved"]) + + graph = ( + GraphBuilder() + .with_actions(astart=astart, agate=agate) + .with_transitions(("astart", "agate")) + .build() + ) persister = AsyncInMemoryPersister() - graph = _graph() + app = await ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("astart") + .with_state(State({})) + .with_identifiers(app_id="async_durable_run1", partition_key="pk1") + .with_state_persister(persister) + .abuild() + ) + await app.arun(halt_after=["agate"]) + assert app.suspended is not None - with pytest.raises(NotImplementedError, match="does not support async persisters"): - await aresume( - persister=persister, - graph=graph, - app_id="dummy-run", - partition_key="pk1", - channel="approval", - payload={}, - ) + final_state = await aresume( + persister=persister, + graph=graph, + app_id="async_durable_run1", + partition_key="pk1", + channel="approval", + payload={"approved": True}, + ) + assert final_state["done"] is True + + # Confirm async mark_suspension_resolved was called. + record = await persister.load_suspension("pk1", "async_durable_run1", "approval") + assert record.resolved is True # --- Task 3.6: durable side effect runs exactly once across suspend/resume ---- From 8a6177aeed0e6ad83fe99a46965289679f83efcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 20:06:24 -0300 Subject: [PATCH 50/57] docs: correct AsyncInMemoryPersister docstring (Sync -> Async) --- burr/core/persistence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/burr/core/persistence.py b/burr/core/persistence.py index c7c932983..4dfd311d9 100644 --- a/burr/core/persistence.py +++ b/burr/core/persistence.py @@ -921,7 +921,7 @@ def load_journal(self, partition_key: Optional[str], app_id: str, sequence_id: i class AsyncInMemoryPersister(AsyncBaseStatePersister): - """Sync in-memory persister for testing purposes. This is not recommended for production use.""" + """Async in-memory persister for testing purposes. This is not recommended for production use.""" def __init__(self): self._storage = defaultdict(lambda: defaultdict(list)) From 7c00c1a035efd12ca77ab899623b6fe78a74aa93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 20:11:45 -0300 Subject: [PATCH 51/57] feat: add post_action_suspend and pre_action_resume lifecycle hooks --- burr/lifecycle/__init__.py | 24 +++++--- burr/lifecycle/base.py | 95 +++++++++++++++++++++++++++++++ tests/lifecycle/test_lifecycle.py | 30 ++++++++++ 3 files changed, 141 insertions(+), 8 deletions(-) create mode 100644 tests/lifecycle/test_lifecycle.py diff --git a/burr/lifecycle/__init__.py b/burr/lifecycle/__init__.py index 4ae24073a..bd8f29537 100644 --- a/burr/lifecycle/__init__.py +++ b/burr/lifecycle/__init__.py @@ -17,12 +17,16 @@ from burr.lifecycle.base import ( LifecycleAdapter, + PostActionSuspendHook, + PostActionSuspendHookAsync, PostApplicationCreateHook, PostApplicationExecuteCallHook, PostApplicationExecuteCallHookAsync, PostEndSpanHook, PostRunStepHook, PostRunStepHookAsync, + PreActionResumeHook, + PreActionResumeHookAsync, PreApplicationExecuteCallHook, PreApplicationExecuteCallHookAsync, PreRunStepHook, @@ -32,17 +36,21 @@ from burr.lifecycle.default import StateAndResultsFullLogger __all__ = [ - "PreRunStepHook", - "PreRunStepHookAsync", + "LifecycleAdapter", + "PostActionSuspendHook", + "PostActionSuspendHookAsync", + "PostApplicationCreateHook", + "PostApplicationExecuteCallHook", + "PostApplicationExecuteCallHookAsync", + "PostEndSpanHook", "PostRunStepHook", "PostRunStepHookAsync", + "PreActionResumeHook", + "PreActionResumeHookAsync", "PreApplicationExecuteCallHook", "PreApplicationExecuteCallHookAsync", - "PostApplicationExecuteCallHook", - "PostApplicationExecuteCallHookAsync", - "LifecycleAdapter", - "StateAndResultsFullLogger", - "PostApplicationCreateHook", - "PostEndSpanHook", + "PreRunStepHook", + "PreRunStepHookAsync", "PreStartSpanHook", + "StateAndResultsFullLogger", ] diff --git a/burr/lifecycle/base.py b/burr/lifecycle/base.py index 66d8bd7e6..e2a754698 100644 --- a/burr/lifecycle/base.py +++ b/burr/lifecycle/base.py @@ -25,6 +25,7 @@ if TYPE_CHECKING: # type-checking-only for a circular import from burr.core import State, Action, ApplicationGraph + from burr.core.durable import SuspensionRecord from burr.visibility import ActionSpan from burr.lifecycle.internal import lifecycle @@ -142,6 +143,96 @@ async def post_run_step( pass +@lifecycle.base_hook("post_action_suspend") +class PostActionSuspendHook(abc.ABC): + """Hook that runs after an action suspends the run and the suspension is persisted.""" + + @abc.abstractmethod + def post_action_suspend( + self, + *, + app_id: str, + partition_key: Optional[str], + action: "Action", + sequence_id: int, + suspension: "SuspensionRecord", + **future_kwargs: Any, + ): + """Run after a step suspends. + + :param app_id: Application ID + :param partition_key: Partition key of the run (may be None) + :param action: Action that suspended + :param sequence_id: Sequence ID of the suspended step + :param suspension: SuspensionRecord that was persisted + :param future_kwargs: Future keyword arguments + """ + pass + + +@lifecycle.base_hook("post_action_suspend") +class PostActionSuspendHookAsync(abc.ABC): + """Async hook that runs after an action suspends the run and the suspension is persisted.""" + + @abc.abstractmethod + async def post_action_suspend( + self, + *, + app_id: str, + partition_key: Optional[str], + action: "Action", + sequence_id: int, + suspension: "SuspensionRecord", + **future_kwargs: Any, + ): + pass + + +@lifecycle.base_hook("pre_action_resume") +class PreActionResumeHook(abc.ABC): + """Hook that runs just before a suspended action is re-executed on resume.""" + + @abc.abstractmethod + def pre_action_resume( + self, + *, + app_id: str, + partition_key: Optional[str], + action: "Action", + sequence_id: int, + channel: str, + **future_kwargs: Any, + ): + """Run just before a suspended action re-executes. + + :param app_id: Application ID + :param partition_key: Partition key (may be None) + :param action: Action being re-entered + :param sequence_id: Sequence ID of the suspended step + :param channel: Suspension channel being resumed + :param future_kwargs: Future keyword arguments + """ + pass + + +@lifecycle.base_hook("pre_action_resume") +class PreActionResumeHookAsync(abc.ABC): + """Async hook that runs just before a suspended action is re-executed on resume.""" + + @abc.abstractmethod + async def pre_action_resume( + self, + *, + app_id: str, + partition_key: Optional[str], + action: "Action", + sequence_id: int, + channel: str, + **future_kwargs: Any, + ): + pass + + @lifecycle.base_hook("post_application_create") class PostApplicationCreateHook(abc.ABC): """Synchronous hook that runs post instantiation of an ``Application`` @@ -500,6 +591,10 @@ async def post_end_stream( PreRunStepHookAsync, PostRunStepHook, PostRunStepHookAsync, + PostActionSuspendHook, + PostActionSuspendHookAsync, + PreActionResumeHook, + PreActionResumeHookAsync, PreApplicationExecuteCallHook, PreApplicationExecuteCallHookAsync, PostApplicationExecuteCallHook, diff --git a/tests/lifecycle/test_lifecycle.py b/tests/lifecycle/test_lifecycle.py new file mode 100644 index 000000000..cbb39db7c --- /dev/null +++ b/tests/lifecycle/test_lifecycle.py @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +def test_suspend_resume_hooks_exist_and_are_exported(): + from burr.lifecycle import ( + PostActionSuspendHook, + PostActionSuspendHookAsync, + PreActionResumeHook, + PreActionResumeHookAsync, + ) + + assert hasattr(PostActionSuspendHook, "post_action_suspend") + assert hasattr(PostActionSuspendHookAsync, "post_action_suspend") + assert hasattr(PreActionResumeHook, "pre_action_resume") + assert hasattr(PreActionResumeHookAsync, "pre_action_resume") From 5374eae6239a123938e0eebd655f8eb52e2a96eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 20:18:22 -0300 Subject: [PATCH 52/57] feat: fire suspend/resume lifecycle hooks from the run loop Remove the temporary try/except ValueError guards around post_action_suspend in _handle_suspension and _ahandle_suspension now that the hooks are registered. Extend resume()/aresume() with an optional hooks parameter, thread it through _rebuild/_arebuild, and fire pre_action_resume before re-running the action. Covers sync post_suspend, sync pre_resume and async pre_resume with three new tests. --- burr/core/application.py | 42 +++++-------- burr/core/resume.py | 39 +++++++++--- tests/core/test_durable.py | 122 +++++++++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 34 deletions(-) diff --git a/burr/core/application.py b/burr/core/application.py index 29a9853b5..7e0ee51c5 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -1146,19 +1146,14 @@ def _handle_suspension(self, action, action_inputs, suspended): state = write_suspension_into_state(self._state, record) state = write_journal_into_state(state, self._journal_sink) self._set_state(state) - # NOTE: post_action_suspend is registered in Milestone 5. Guard it so it is a - # safe no-op until the hook is added to REGISTERED_SYNC_HOOKS. - try: - self._adapter_set.call_all_lifecycle_hooks_sync( - "post_action_suspend", - app_id=self._uid, - partition_key=self._partition_key, - action=action, - sequence_id=self.sequence_id, - suspension=record, - ) - except ValueError: - pass + self._adapter_set.call_all_lifecycle_hooks_sync( + "post_action_suspend", + app_id=self._uid, + partition_key=self._partition_key, + action=action, + sequence_id=self.sequence_id, + suspension=record, + ) self._suspended = record async def _ahandle_suspension(self, action, action_inputs, suspended): @@ -1204,19 +1199,14 @@ async def _ahandle_suspension(self, action, action_inputs, suspended): state = write_suspension_into_state(self._state, record) state = write_journal_into_state(state, self._journal_sink) self._set_state(state) - # NOTE: post_action_suspend is registered in Milestone 5. Guard it so it is a - # safe no-op until the hook is added to REGISTERED_SYNC_HOOKS. - try: - await self._adapter_set.call_all_lifecycle_hooks_sync_and_async( - "post_action_suspend", - app_id=self._uid, - partition_key=self._partition_key, - action=action, - sequence_id=self.sequence_id, - suspension=record, - ) - except ValueError: - pass + await self._adapter_set.call_all_lifecycle_hooks_sync_and_async( + "post_action_suspend", + app_id=self._uid, + partition_key=self._partition_key, + action=action, + sequence_id=self.sequence_id, + suspension=record, + ) self._suspended = record @property diff --git a/burr/core/resume.py b/burr/core/resume.py index 85d184eb5..4ff463bcc 100644 --- a/burr/core/resume.py +++ b/burr/core/resume.py @@ -18,7 +18,7 @@ """Top-level resume helpers for durable execution.""" import warnings -from typing import Any, Optional +from typing import Any, List, Optional from burr.core.durable import ( read_journal_from_state, @@ -65,7 +65,7 @@ async def _aload_journal(persister, partition_key, app_id, sequence_id, state): return read_journal_from_state(state) -async def _arebuild(persister, graph, app_id, partition_key, record): +async def _arebuild(persister, graph, app_id, partition_key, record, hooks=None): from burr.core.application import ApplicationBuilder builder = ( @@ -76,6 +76,8 @@ async def _arebuild(persister, graph, app_id, partition_key, record): .with_state(State(record.state)) .with_state_persister(persister) ) + for hook in (hooks or []): + builder = builder.with_hooks(hook) if persister.is_async(): return await builder.abuild() return builder.build() @@ -101,19 +103,20 @@ def _validate_payload(schema_json, payload): jsonschema.validate(instance=payload, schema=schema_json) -def _rebuild(persister, graph, app_id, partition_key, record): +def _rebuild(persister, graph, app_id, partition_key, record, hooks=None): from burr.core.application import ApplicationBuilder - app = ( + builder = ( ApplicationBuilder() .with_graph(graph) .with_identifiers(app_id=app_id, partition_key=partition_key) .with_entrypoint(record.position) .with_state(State(record.state)) .with_state_persister(persister) - .build() ) - return app + for hook in (hooks or []): + builder = builder.with_hooks(hook) + return builder.build() def resume( @@ -124,6 +127,7 @@ def resume( partition_key: Optional[str], channel: str, payload: Any, + hooks: Optional[List] = None, ): """Resume a suspended run by delivering ``payload`` to ``channel``. @@ -151,13 +155,22 @@ def resume( _validate_payload(record.schema_json, payload) - app = _rebuild(persister, graph, app_id, partition_key, record) + app = _rebuild(persister, graph, app_id, partition_key, record, hooks=hooks) app._resume_signals = {channel: payload} app._loaded_journal = _load_journal( persister, partition_key, app_id, record.sequence_id, record.state ) app._suspended = None + app._adapter_set.call_all_lifecycle_hooks_sync( + "pre_action_resume", + app_id=app_id, + partition_key=partition_key, + action=graph.get_action(record.position), + sequence_id=record.sequence_id, + channel=channel, + ) + app.run(halt_after=[]) # run to completion or the next suspend # In-state fallback does not durably mark suspensions resolved; a second @@ -176,6 +189,7 @@ async def aresume( partition_key: Optional[str], channel: str, payload: Any, + hooks: Optional[List] = None, ): """Resume a suspended run by delivering ``payload`` to ``channel``. @@ -214,13 +228,22 @@ async def aresume( _validate_payload(record.schema_json, payload) - app = await _arebuild(persister, graph, app_id, partition_key, record) + app = await _arebuild(persister, graph, app_id, partition_key, record, hooks=hooks) app._resume_signals = {channel: payload} app._loaded_journal = await _aload_journal( persister, partition_key, app_id, record.sequence_id, record.state ) app._suspended = None + await app._adapter_set.call_all_lifecycle_hooks_sync_and_async( + "pre_action_resume", + app_id=app_id, + partition_key=partition_key, + action=graph.get_action(record.position), + sequence_id=record.sequence_id, + channel=channel, + ) + await app.arun(halt_after=[]) # In-state fallback does not durably mark suspensions resolved; a second diff --git a/tests/core/test_durable.py b/tests/core/test_durable.py index 44b3e0ab1..76881e6a2 100644 --- a/tests/core/test_durable.py +++ b/tests/core/test_durable.py @@ -883,3 +883,125 @@ async def agate(state, __context): assert record is not None assert record.channel == "approval" assert record.resolved is False + + +# --------------------------------------------------------------------------- +# Milestone 5.2: lifecycle hook integration tests +# --------------------------------------------------------------------------- + + +def test_post_action_suspend_hook_fires(): + from burr.core import ApplicationBuilder, State, action + from burr.core.persistence import InMemoryPersister + from burr.lifecycle import PostActionSuspendHook + + fired = [] + + class Recorder(PostActionSuspendHook): + def post_action_suspend(self, *, app_id, partition_key, action, + sequence_id, suspension, **kw): + fired.append((suspension.channel, app_id, partition_key)) + + @action(reads=[], writes=["done"]) + def gate(state, __context): + return state.update(done=__context.suspend("approval")) + + app = ( + ApplicationBuilder() + .with_actions(gate=gate) + .with_entrypoint("gate") + .with_state(State({})) + .with_identifiers(app_id="h1", partition_key="pk") + .with_state_persister(InMemoryPersister()) + .with_hooks(Recorder()) + .build() + ) + app.run(halt_after=["gate"]) + assert fired == [("approval", "h1", "pk")] + + +def test_pre_action_resume_hook_fires(): + from burr.core import ApplicationBuilder, GraphBuilder, State, action + from burr.core.graph import Graph + from burr.core.persistence import InMemoryPersister + from burr.core.resume import resume + from burr.lifecycle import PreActionResumeHook + + fired = [] + + class Recorder(PreActionResumeHook): + def pre_action_resume(self, *, app_id, partition_key, action, + sequence_id, channel, **kw): + fired.append((channel, app_id, action.name)) + + @action(reads=[], writes=["approved"]) + def gate(state, __context): + return state.update(approved=__context.suspend("approval")) + + persister = InMemoryPersister() + graph: Graph = ( + GraphBuilder() + .with_actions(gate=gate) + .with_transitions() + .build() + ) + app = ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("gate") + .with_state(State({})) + .with_identifiers(app_id="r1", partition_key="pk") + .with_state_persister(persister) + .build() + ) + app.run(halt_after=["gate"]) + + resume( + persister=persister, graph=graph, app_id="r1", partition_key="pk", + channel="approval", payload=True, hooks=[Recorder()], + ) + assert fired == [("approval", "r1", "gate")] + + +@pytest.mark.asyncio +async def test_pre_action_resume_hook_fires_async(): + from burr.core import ApplicationBuilder, GraphBuilder, State, action + from burr.core.graph import Graph + from burr.core.persistence import AsyncInMemoryPersister + from burr.core.resume import aresume + from burr.lifecycle import PreActionResumeHookAsync + + fired = [] + + class Recorder(PreActionResumeHookAsync): + async def pre_action_resume(self, *, app_id, partition_key, action, + sequence_id, channel, **kw): + fired.append((channel, app_id, action.name)) + + @action(reads=[], writes=["approved"]) + async def gate(state, __context): + return state.update(approved=__context.suspend("approval")) + + persister = AsyncInMemoryPersister() + graph: Graph = ( + GraphBuilder() + .with_actions(gate=gate) + .with_transitions() + .build() + ) + app = await ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("gate") + .with_state(State({})) + .with_identifiers(app_id="ar1", partition_key="pk") + .with_state_persister(persister) + .abuild() + ) + await app.arun(halt_after=["gate"]) + + await aresume( + persister=persister, graph=graph, app_id="ar1", partition_key="pk", + channel="approval", payload=True, hooks=[Recorder()], + ) + assert fired == [("approval", "ar1", "gate")] From a595d3dcdc5de14b63e2304b6b86731834fb2f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 20:21:53 -0300 Subject: [PATCH 53/57] refactor: type and document hooks parameter on resume/aresume --- burr/core/resume.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/burr/core/resume.py b/burr/core/resume.py index 4ff463bcc..78bb41fd2 100644 --- a/burr/core/resume.py +++ b/burr/core/resume.py @@ -26,6 +26,7 @@ supports_durable_storage, ) from burr.core.state import State +from burr.lifecycle.base import LifecycleAdapter def _load_suspension(persister, partition_key, app_id, channel): @@ -65,7 +66,7 @@ async def _aload_journal(persister, partition_key, app_id, sequence_id, state): return read_journal_from_state(state) -async def _arebuild(persister, graph, app_id, partition_key, record, hooks=None): +async def _arebuild(persister, graph, app_id, partition_key, record, hooks: Optional[List[LifecycleAdapter]] = None): from burr.core.application import ApplicationBuilder builder = ( @@ -103,7 +104,7 @@ def _validate_payload(schema_json, payload): jsonschema.validate(instance=payload, schema=schema_json) -def _rebuild(persister, graph, app_id, partition_key, record, hooks=None): +def _rebuild(persister, graph, app_id, partition_key, record, hooks: Optional[List[LifecycleAdapter]] = None): from burr.core.application import ApplicationBuilder builder = ( @@ -127,7 +128,7 @@ def resume( partition_key: Optional[str], channel: str, payload: Any, - hooks: Optional[List] = None, + hooks: Optional[List[LifecycleAdapter]] = None, ): """Resume a suspended run by delivering ``payload`` to ``channel``. @@ -142,6 +143,10 @@ def resume( durable storage, the suspension lives in ``state['__burr_durable__']`` and is overwritten as the resumed run progresses; a second ``resume()`` call after the first completes raises ``ValueError``. + + :param hooks: Optional lifecycle adapters to register on the rebuilt application + before firing ``pre_action_resume``. Pass any adapter implementing + :class:`~burr.lifecycle.PreActionResumeHook` (or its async variant) here. """ record = _load_suspension(persister, partition_key, app_id, channel) if record is None: @@ -189,7 +194,7 @@ async def aresume( partition_key: Optional[str], channel: str, payload: Any, - hooks: Optional[List] = None, + hooks: Optional[List[LifecycleAdapter]] = None, ): """Resume a suspended run by delivering ``payload`` to ``channel``. @@ -203,6 +208,9 @@ async def aresume( :param partition_key: Partition key used when the run was persisted. :param channel: Name of the suspension channel to deliver ``payload`` to. :param payload: Value returned by ``suspend(channel)`` inside the action. + :param hooks: Optional lifecycle adapters to register on the rebuilt application + before firing ``pre_action_resume``. Pass any adapter implementing + :class:`~burr.lifecycle.PreActionResumeHook` (or its async variant) here. **Idempotency:** From 8a98f21aba0a77e9d3c3e4b19e5aa427ef826bf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 20:26:27 -0300 Subject: [PATCH 54/57] feat: tracking client records suspended runs for the UI Adds SuspendEntryModel to the tracking models and implements PostActionSuspendHook on SyncTrackingClient so that a suspend_entry line is written to the JSONL log whenever an action suspends the run, enabling the Burr UI to render the suspension status. --- burr/tracking/client.py | 28 +++++++++++- burr/tracking/common/models.py | 12 +++++ tests/tracking/test_local_tracking_client.py | 46 ++++++++++++++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/burr/tracking/client.py b/burr/tracking/client.py index 44919aed5..f70f0bb27 100644 --- a/burr/tracking/client.py +++ b/burr/tracking/client.py @@ -49,19 +49,23 @@ def flock(*args, **kwargs): import re import traceback from abc import ABC -from typing import Any, Dict, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple try: from typing import Self except ImportError: Self = "Self" +if TYPE_CHECKING: + from burr.core.durable import SuspensionRecord + from burr import system from burr.common import types as burr_types from burr.core import Action, ApplicationGraph, State, serde from burr.core.persistence import BaseStateLoader, PersistedStateData from burr.integrations.base import require_plugin from burr.lifecycle import ( + PostActionSuspendHook, PostApplicationCreateHook, PostEndSpanHook, PostRunStepHook, @@ -81,6 +85,7 @@ def flock(*args, **kwargs): FirstItemStreamModel, InitializeStreamModel, PointerModel, + SuspendEntryModel, ) from burr.visibility import ActionSpan @@ -131,6 +136,7 @@ class SyncTrackingClient( PostApplicationCreateHook, PreRunStepHook, PostRunStepHook, + PostActionSuspendHook, PreStartSpanHook, PostEndSpanHook, DoLogAttributeHook, @@ -478,6 +484,26 @@ def post_run_step( ) self._append_write_line(post_run_entry) + def post_action_suspend( + self, + *, + app_id: str, + partition_key: Optional[str], + action: Action, + sequence_id: int, + suspension: "SuspensionRecord", + **future_kwargs: Any, + ): + suspend_entry = SuspendEntryModel( + suspend_time=datetime.datetime.now(), + action=action.name, + sequence_id=sequence_id, + channel=suspension.channel, + metadata=suspension.metadata if suspension.metadata is not None else {}, + suspension_id=suspension.suspension_id, + ) + self._append_write_line(suspend_entry) + def pre_start_span( self, *, diff --git a/burr/tracking/common/models.py b/burr/tracking/common/models.py index 5980bf9df..9cc0c0193 100644 --- a/burr/tracking/common/models.py +++ b/burr/tracking/common/models.py @@ -180,6 +180,18 @@ class EndEntryModel(IdentifyingModel): type: str = "end_entry" +class SuspendEntryModel(IdentifyingModel): + """Pydantic model that represents a step that suspended the run.""" + + suspend_time: datetime.datetime + action: str + sequence_id: int + channel: str + metadata: Dict[str, Any] + suspension_id: str + type: str = "suspend_entry" + + class BeginSpanModel(IdentifyingModel): """Pydantic model that represents an entry for the beginning of a span""" diff --git a/tests/tracking/test_local_tracking_client.py b/tests/tracking/test_local_tracking_client.py index 7a8196c0e..8e9b088fd 100644 --- a/tests/tracking/test_local_tracking_client.py +++ b/tests/tracking/test_local_tracking_client.py @@ -494,3 +494,49 @@ def test_local_tracking_client_copy(): assert copy.project_id == tracking_client.project_id assert copy.serde_kwargs == tracking_client.serde_kwargs assert copy.storage_dir == tracking_client.storage_dir + + +def test_application_tracks_suspended_run(tmpdir: str): + """Tests that LocalTrackingClient writes a suspend_entry line when an action suspends.""" + from burr.core.persistence import InMemoryPersister + from burr.tracking.common.models import SuspendEntryModel + + app_id = str(uuid.uuid4()) + log_dir = os.path.join(tmpdir, "tracking") + project_name = "test_application_tracks_suspended_run" + + @action(reads=[], writes=[]) + def suspending_action(state: State, __context) -> State: + __context.suspend("approval", metadata={"reason": "needs review"}) + return state # never reached + + tracker = LocalTrackingClient(project=project_name, storage_dir=log_dir) + app = ( + ApplicationBuilder() + .with_actions(suspending_action) + .with_transitions(("suspending_action", "suspending_action", default)) + .with_entrypoint("suspending_action") + .with_state(State({})) + .with_identifiers(app_id=app_id) + .with_tracker(tracker) + .with_state_persister(InMemoryPersister()) + .build() + ) + app.run(halt_after=["suspending_action"]) + + results_dir = os.path.join(log_dir, project_name, app_id) + log_output = os.path.join(results_dir, LocalTrackingClient.LOG_FILENAME) + assert os.path.exists(log_output) + + with open(log_output) as f: + log_contents = [json.loads(line) for line in f.readlines()] + + suspend_entries = [ + SuspendEntryModel.model_validate(line) + for line in log_contents + if line["type"] == "suspend_entry" + ] + assert len(suspend_entries) >= 1 + entry = suspend_entries[0] + assert entry.channel == "approval" + assert entry.metadata == {"reason": "needs review"} From 094934b618d042cb64241cb75aa112e4c8b71120 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 20:35:35 -0300 Subject: [PATCH 55/57] docs: add human-in-the-loop durable execution example Adds examples/durable-execution/ with a draft-review-finalize workflow demonstrating suspend/resume and durable() journaling. Includes application.py, notebook.ipynb, README.md, requirements.txt, __init__.py, and a real statemachine.png generated by graphviz. Extends test_durable_integration.py with test_example_application_suspends_and_resumes which loads the example module and exercises the full suspend/resume path against a tmp_path SQLite DB. --- examples/durable-execution/.gitignore | 3 + examples/durable-execution/README.md | 51 +++++++ examples/durable-execution/__init__.py | 0 examples/durable-execution/application.py | 160 ++++++++++++++++++++ examples/durable-execution/notebook.ipynb | 80 ++++++++++ examples/durable-execution/requirements.txt | 1 + examples/durable-execution/statemachine.png | Bin 0 -> 7515 bytes tests/core/test_durable_integration.py | 32 ++++ 8 files changed, 327 insertions(+) create mode 100644 examples/durable-execution/.gitignore create mode 100644 examples/durable-execution/README.md create mode 100644 examples/durable-execution/__init__.py create mode 100644 examples/durable-execution/application.py create mode 100644 examples/durable-execution/notebook.ipynb create mode 100644 examples/durable-execution/requirements.txt create mode 100644 examples/durable-execution/statemachine.png diff --git a/examples/durable-execution/.gitignore b/examples/durable-execution/.gitignore new file mode 100644 index 000000000..141da33f0 --- /dev/null +++ b/examples/durable-execution/.gitignore @@ -0,0 +1,3 @@ +*.db +__pycache__/ +*.py[cod] diff --git a/examples/durable-execution/README.md b/examples/durable-execution/README.md new file mode 100644 index 000000000..2e6564d2e --- /dev/null +++ b/examples/durable-execution/README.md @@ -0,0 +1,51 @@ +# Durable Execution: Human-in-the-Loop + +This example demonstrates Burr's suspend/resume primitives through a three-step +draft-review-finalize workflow. The `review` action suspends the workflow and +waits for a human to approve or reject a draft. While suspended, the process can +die and restart without losing progress — the `durable()` call memoizes expensive +sub-steps (like an LLM summary) in a journal, so they are not re-executed on +resume. + +The same pattern covers three production use-cases: human-in-the-loop approval +gates, waiting for an external event (webhook, queue message, IoT sensor), and +crash resilience where a long-running action is interrupted mid-flight. + +## How to run + +```bash +pip install burr +python application.py +``` + +The script runs the workflow to the `review` suspension, prints the suspended +channel and metadata, then immediately simulates the human responding with +`{"approved": True}` via `resume()` and prints the final state. + +## The `human_approval` channel + +In production you would expose the `resume()` call through a webhook or UI +button. When the workflow suspends, store the `app_id` and `partition_key` +alongside the suspension metadata (returned by `app.suspended.metadata`). Your +webhook handler then calls: + +```python +from burr.core import resume +from burr.core.persistence import SQLitePersister + +persister = SQLitePersister.from_values("durable.db") +persister.initialize() +final_state = resume( + persister=persister, + graph=graph, # same Graph object (or rebuild it) + app_id=app_id, + partition_key=partition_key, + channel="human_approval", + payload={"approved": True}, +) +``` + +## Further reading + +- [Durable Execution concepts](../../docs/concepts/durable-execution.rst) (landing in Task 6.2) +- [Burr documentation](https://burr.dagworks.io) diff --git a/examples/durable-execution/__init__.py b/examples/durable-execution/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/durable-execution/application.py b/examples/durable-execution/application.py new file mode 100644 index 000000000..ce6360df5 --- /dev/null +++ b/examples/durable-execution/application.py @@ -0,0 +1,160 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Human-in-the-loop durable execution example. + +Demonstrates a draft -> review -> finalize workflow where the "review" step: +1. Uses ``durable()`` to memoize an expensive summary so it is computed once + even if the process is restarted before the human responds. +2. Calls ``suspend("human_approval")`` to pause the workflow and wait for an + external signal (webhook, UI button, etc.). +3. Continues to finalization once the human payload arrives via ``resume()``. +""" + +import pathlib +from typing import Optional, Tuple + +from burr.core import ApplicationBuilder, GraphBuilder, State, action, resume +from burr.core.application import Application +from burr.core.graph import Graph +from burr.core.persistence import SQLitePersister + +# --------------------------------------------------------------------------- +# Actions +# --------------------------------------------------------------------------- + + +@action(reads=[], writes=["draft"]) +def draft(state: State) -> State: + """Produce the initial draft content.""" + return state.update(draft="This is the initial draft content for review.") + + +@action(reads=["draft"], writes=["review_decision"]) +def review(state: State, __context) -> State: + """Memoize a summary, then suspend waiting for human approval. + + The ``durable()`` call ensures the summarizer runs exactly once across + the suspend/resume boundary — the result is replayed from the journal on + resume instead of being recomputed. + """ + summary = __context.durable( + "summarize", + lambda d: f"SUMMARY: {d[:20]}...", + state["draft"], + ) + # Suspend until a human delivers a payload over the "human_approval" channel. + # The payload is expected to be a dict with key "approved" (bool). + payload = __context.suspend( + "human_approval", + metadata={"summary": summary}, + ) + return state.update(review_decision=payload) + + +@action(reads=["review_decision"], writes=["approved"]) +def finalize(state: State) -> State: + """Record the human's decision.""" + decision = state["review_decision"] + return state.update(approved=decision.get("approved", False)) + + +# --------------------------------------------------------------------------- +# Factory +# --------------------------------------------------------------------------- + + +def build_application( + app_id: str, + partition_key: Optional[str] = None, + db_path: Optional[str] = None, +) -> Tuple[Application, Graph, SQLitePersister]: + """Build and return the application, its graph, and the persister. + + :param app_id: Unique identifier for this run. + :param partition_key: Optional partition key (e.g. tenant / user id). + :param db_path: Path for the SQLite database. Defaults to a file next to + this script so re-runs pick up where they left off. + :return: Tuple of (application, graph, persister). + """ + if db_path is None: + db_path = str(pathlib.Path(__file__).parent / "durable.db") + + persister = SQLitePersister.from_values(db_path) + persister.initialize() + + graph = ( + GraphBuilder() + .with_actions(draft=draft, review=review, finalize=finalize) + .with_transitions(("draft", "review"), ("review", "finalize")) + .build() + ) + + app = ( + ApplicationBuilder() + .with_graph(graph) + .with_entrypoint("draft") + .with_state(State({})) + .with_identifiers(app_id=app_id, partition_key=partition_key) + .with_state_persister(persister) + .build() + ) + + return app, graph, persister + + +# --------------------------------------------------------------------------- +# Main: run the full suspend/resume cycle for demonstration +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + import os + + _DB_PATH = str(pathlib.Path(__file__).parent / "durable.db") + # Remove stale DB so the demo always starts fresh. + if os.path.exists(_DB_PATH): + os.remove(_DB_PATH) + + # --- First half: run until the workflow suspends at "review" --- + app, graph, persister = build_application(app_id="demo-1", db_path=_DB_PATH) + + # Generate the state-machine diagram (graphviz binary required). + try: + app.visualize( + output_file_path=str(pathlib.Path(__file__).parent / "statemachine"), + include_conditions=False, + view=False, + format="png", + ) + print("State machine saved to statemachine.png") + except Exception as exc: + print(f"visualize skipped: {exc}") + + app.run(halt_after=["review"]) + print("Suspended on channel:", app.suspended.channel) + print("Suspension metadata:", app.suspended.metadata) + + # --- Second half: simulate the human approving the draft --- + final_state = resume( + persister=persister, + graph=graph, + app_id="demo-1", + partition_key=None, + channel="human_approval", + payload={"approved": True}, + ) + print("Final approved:", final_state["approved"]) diff --git a/examples/durable-execution/notebook.ipynb b/examples/durable-execution/notebook.ipynb new file mode 100644 index 000000000..0b29710ee --- /dev/null +++ b/examples/durable-execution/notebook.ipynb @@ -0,0 +1,80 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4e5f60001", + "metadata": {}, + "outputs": [], + "source": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements. See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership. The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License. You may obtain a copy of the License at\n#\n# http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied. See the License for the\n# specific language governing permissions and limitations\n# under the License." + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4e5f60002", + "metadata": {}, + "outputs": [], + "source": "!pip install burr" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4e5f60003", + "metadata": {}, + "outputs": [], + "source": [ + "# Build the durable-execution application and run until it suspends.\n", + "import sys, pathlib\n", + "sys.path.insert(0, str(pathlib.Path(\".\").resolve()))\n", + "\n", + "from application import build_application\n", + "from burr.core import resume\n", + "\n", + "app, graph, persister = build_application(app_id=\"notebook-demo-1\", db_path=\":memory:\")\n", + "app.run(halt_after=[\"review\"])\n", + "print(\"Suspended channel:\", app.suspended.channel)\n", + "print(\"Metadata:\", app.suspended.metadata)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4e5f60004", + "metadata": {}, + "outputs": [], + "source": [ + "# Simulate the human approving and resume the workflow.\n", + "final_state = resume(\n", + " persister=persister,\n", + " graph=graph,\n", + " app_id=\"notebook-demo-1\",\n", + " partition_key=None,\n", + " channel=\"human_approval\",\n", + " payload={\"approved\": True},\n", + ")\n", + "print(\"Approved:\", final_state[\"approved\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/durable-execution/requirements.txt b/examples/durable-execution/requirements.txt new file mode 100644 index 000000000..a78cac9dd --- /dev/null +++ b/examples/durable-execution/requirements.txt @@ -0,0 +1 @@ +burr diff --git a/examples/durable-execution/statemachine.png b/examples/durable-execution/statemachine.png new file mode 100644 index 0000000000000000000000000000000000000000..f03f9e03e69927d4c13ffdf1c0d27dfd99d83cd9 GIT binary patch literal 7515 zcmb7}Wl$VJxA!Ly+(~c<1b2dk;7(W|xCD2%;O_43ED{n3?kp~W;O@S-FA&@Ucb}(j zy`SDMx29&MrhB@kd%Dl*-}#>?WyQ}}=%nZX0APV+rBvZ}L-+|pMSx#5Sw&XiH)Ip} z&r-n4e~yBVk`w@-zy(Q(t9$0cz+OVL_b=U0ZqpB^MakqN^jdEvKvJp0X|(*&r}C+C zx=oc0`nrp>`pejB8R_W9to-s0&JKm-+P5D@WAFd{{R<}F<~GI7pK{Dg_}A;dCw%7R zm*;mn@%YcXknt`01}D1J55x@{Pxpj%mI9?k3`I>GOaCj3{4auo*)e`6m<$YXqM4IHJI zS(}oTW$oA{p{(v&9}sxzt48ndwt8Ua*hc1jI`?m71>*Q}IU#y~*YBQ7Q4fy5q4f6j zJ|bpYr1Vr#mi3ICqL%dbRY=T-ltvwH4OwilC>E zfxNuDf`yLQoxTWbdgCWN8t(TuKW@`)7pm+RYl4z)VhvdG1>DM7e$H_)+HIA+0!Hig zZ1!u1CeFlcZCPkwBz0=~`hM&mCg(=9B)suQ@Tg;RGB~=8;@=4m%bJFtyng+9aBzsj z@u+1(s`$ldq~kd~JIPx1Z3^|9HK^$A8SOJXihRsxQ$p&<@5wiBFp`c6C- z_Yq;6@SB7L7z{qQKUgVLW(2}a4Au1zqo4hLd*=U0DazyJn-#QQ8d_Z->zcKo`*WKz zbkyoHYk6EN*t!|p1uikar_8KZQ`0_eTTY*hLje|>y%dwu+9-T2Zke#x@s)mf;4@7n zg*vpS_Xw4+8CuVU9$a|qHE z7VgbVkaD}l5vgXVJv^R8{NuN+`m630#>c6mnqt1E&5HLH|C9#zf_vuZxsnzyYMQI= zp?J?ES+yfHuVgc07~=u}RPYv%UOPP@Ah^w?nQ`44SWPN%&&F0(@Z$+t!e-UeDOZlB z?tT(J7#@q?*8WR<_9+za5EgbckoGHo36?1Qy**PVAEV^$X#aRd)|f@5crcizD>|m! zMkMgf*UOm=Gp+|UCfcp35dk<2L(XD_CIq^Zxj?d3H1+D7}hYKd75 z{!}~JZZpAGEr*&k)+g$B*SI&64HJQXeS4Dm+oYnw!^(0QIwE&WFVRczI|-snZI5qj%uzs+WW2h?sMPGb9SHX2@rQBvc+A`H4g=Az*En{fWu!wEV0+b4&h#+4lMl zn7u+n-wugXs7cdZglut ztPYM`Ob9+T!wNVvL*8o0(jB66NL=N21G*p&$X+B`uZCWtUfIzKmecutke-abTa&QcT9iFDAlKcOl zMD`Q3v9W2?Ao!052%MMINE=V&eQ@6+B~9NkdohySih){2#mJ9law2w;_z8|=a5y^H zF|rJr8`=$~WZ|~CmJ+OaK(Am2M@KfcwgWamcKlD-pgJ#Y>Tm}*m)rYT(t9M#-hU5TIO ziBId58E0X-kOm%JoaV~W3C|I`TvV}xLy^M{x?Sm=5R_XIE$+QGWpeC{L|jMHj;F!& z#wibnZx5SqbbnRfyH2jvQgbjcaZdI3TPQ#&E`?hQ3O3dRJ^TfI?;Z|KeZcneF3^O= z2&a(AxGoY;=aM+2cpSJ_HMg@9ntzv>AEd6Tx&?!!q^4R~S=9CoX?S!pbrn##=+}Z< z1Iksu)4X`OW=PPZye401vL|zql+e!3M*#D`YDZNcvL(%Xt&%vwXmS2qmzAzt=)+D_xgBYad9y>H+OirEPbA^*iz)< z>_SX*6dDFMzuRYZDRmi(=%%KomX?;JB)srzFLu^(EHe)H63XB(lo5zNJI~Ag-BXW< zGJ#DlB>3(8r2GH;KqYj^$;EZC0y%S;^Y-ykQ&U6139hZJ&H9MdqW5Z0YtPAatP zivuP+m17wDtK{PFhob{>lKcBrLcVrudHYw{LdnihYdLOaCu6XLfRii5gDi9##5 zh1%FqQ;Yz5W&`8nin6j+YprgwC>Ip$bu_H3CmS0ZCnvG4R8V8*QKldrQnh-hnTOGigXFF;Zc?(grPDN%wpJJlBz>1%7~ zYfIWrt83@hpduPlXvy|j{e4__pBQJ(9Ai;FRC&!BwsS%7JcBrU$AghV_AMt4*s ztH(7DM3I_ZLYy??a}oJQW|`8WA;|3zI(%ksP}cDb~ytgaqXT~_A2+R*DX zbyIcn)FiEOo^W#$?#oYB6xOI9VH@7wQk0rKOijI#+q8<@GQW6 zd$_V-hK$dy+4C>;*uGpyq2vDUNCpc91s0*s-LsQFwF4K;kYc=O+k|all(qUS^gQ@r zk%^JovJZ$ad8n5k)p<+`t7~byeE3zgCQ0;3CANv(I!-fuXhpasp<*&J-B9L$uy?q z<#ihM1RHV~dG{BKi}&z|{#~rd#vXcnvN-m1cnc;=iZ=nO>U%Z5t@Y5A z-EmL9_l7nVfh&7(NbkB`!pux39B^?gvk+Wy_0E^5utZRl8@ACpcczN}3LwN* zw145<#~O~)$bRfBZ0a`&o2e1`d@S`<-L<%VU>Jl*k&yk_Mq;H`zF*o!(XbyCa; zW2t{Y*v+mQFR*bG1+|;qEqwvsLQ#M_bpv#u#`Wu$gMLf8XGCDoD`j?R-loO*&H)KJ zq>dylO)%Gf#W%Y)Y(DVLSVd)Uyp0d^eG6Y%1wqp&2iybdGIAbr^KeH1_Ski(nc0)@ zDbQe=zrTN%A?%HypMmvFuGa1Jwi!q$8F-h~weI~Wty^}9!>FHm1@Jp2V~w(YtyURN zItDX2qXU76JsAq=OVf6NyZd~NO$k1})h&f0f^$+%))(q>Yv*Se`sMn)68vM=j4I(U0fQXp9lQA@UryL#px_(6Ie?Ipy+C)XRFolV*mC}ytG)BHMI=q>us)&|e{3hn=RjEMYvs9Dg^BOhoR zpX@?CgYe(Ntd+4b=LOBOrR(Q`w56fH%JvJCd42mzsi|phZf}&gH}F@o;~xwba#65toq>c&Y#%ZV$Ao4fMB9e0}%Bf2}MmkKA#rR+M+YwU?Z8y;+|NH{%L3F54xr|Ie=h#LO>n)B$FO2nYHsf6 zpk6ZPXf1qqzPMP{(9kzOt6f?*EegFdL+%+VoVu%-N%idke@$)DpW9)&wUi}dzf8Z9IVPpfMX;X zp9A?ZrckyRH>HNUdT3~9=4t3$$*G*2+~}8}3=?kOJkDq-OA@>0#k$@oKNMFlG_Z@NW6?dvy=UW>9)Qm+;4 z;ELAZf01uvgX)usJJ#z|E3Z`=QiR{`pehtz||M>-HI3O@00tEmfdbM?RY5ogwC}k)0 zHgnA6@w(1>E=d$vSXgkbbzo$c2^(S{hU+!=_xE4Ed^sUiLQBfa6LDM@;(1IClr&|T zo1L{7&Fqni@2*`cEicEw!3lUc(N-~j4*cHCtY930i>#|uhp{@(>K99D8B;YI{kW0)S#*;!apELzN#-KnXmg@lCz z_yf$Jx6HTk8BB1!TX9+ix9%qS&!7qIRNtNU&JGS1yP%FOBdwGMSgB2`9CyE!=z+H_;lJ&27aaR z(i1LMSlQT(4h|Bxe~Sd?<>j56uk*$yn2dHtn^u4C~Jvo}JAxL}CF-gU(8}UleIH2yoFEY>xBu~aI zaAlShmW%r2)i$R#wAU zxJs5rwDSu1PF;n8x~1oSO5z*aOho#_-ik-kcD^mOFT9o1iOE{>@(=!W$}hvS|DU`^ z6A-p+Y3z=i+T=$}+UnvOhvNK&sAHszb@;60s(fr3Tq>H8)@T|+age~U@OzRtc4S;Q zBmwVMA|y70^(FIVv#UQS237q1W|5YIHOXj*4Rik0`tl8w&&Cl>(KPTvGE%HZ0-OA> zC6ZY~^4Q*o%GQ1iK^1vn&v0~imN0kfDH{{CGNAkU z?-G4v!qMWGijC%xovyIBPH5|CiQ6MDIkIoYr^%xclC84>SLT!%|6TLubHCYgK*{I# zdAYX|EwQ)SIlAun$Qvf%<#cO-(`g$YMFMP$=E0=TxXcLxvQ6ty+)a!EIGtD05Aqlf2KNdYkE}IM8~pB)__YVYA@=V%XeY^TgqHnuI|-ZI;-;eDM7O_1>;Sc zfSAteNJDc*@3Lg0Ze~cmyG$DsC_es4UDL+MqSimZEB9hzl74&tXi`}h5&GV>dsrcI z&1q+6LjXg9`a1x`5GZn8^v0}A17;B(0?c&q#ZDqsQr7OtFSiuK3WWy2NHodv&^L&s1l zDY%I2GP5pr+kH2V{fZo>pl|8x(Jl{~%{5(|n-aO3Wv6Gdh8^)s+Gs9WjV02PfAaJ2lqJ?R>&4uwr=~0zaq>^v;v1_@_hg0+6!j=f#-R7a4V>vquFAuqME4jaVQ+?x@LL>}$thSyA8&bKm_adDIW^;M@4sPTO03Jtj6fs z;R%boHQSkfEHZ2g0UjN)<~C(S#E{WZO#DvM{Ca!qQ=jqIQ9!7})1M-g>by_RTer|^ zy%q%SvfyXdzhea6bwm8%I`p8&P zNz2H5#UbY5pV~71;h2<08aXWu^y86RoLl>Jrd)P9MK Date: Fri, 22 May 2026 20:40:56 -0300 Subject: [PATCH 56/57] docs: document durable execution (suspend, durable, resume) --- docs/concepts/durable-execution.rst | 321 ++++++++++++++++++++++++++++ docs/concepts/index.rst | 1 + 2 files changed, 322 insertions(+) create mode 100644 docs/concepts/durable-execution.rst diff --git a/docs/concepts/durable-execution.rst b/docs/concepts/durable-execution.rst new file mode 100644 index 000000000..bca371718 --- /dev/null +++ b/docs/concepts/durable-execution.rst @@ -0,0 +1,321 @@ +.. + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +================= +Durable Execution +================= + +.. _durable-execution: + +.. note:: + + Durable execution lets an action pause mid-run (``__context.suspend()``), + survive process restarts, and resume exactly where it stopped when an + external event arrives. Sub-steps memoized with ``__context.durable()`` + are replayed from a journal on re-run so they never execute twice. + +What is durable execution? +-------------------------- + +Some workflows cannot finish in a single continuous execution: + +* **Human-in-the-loop.** An action drafts content and waits for a human to + approve it before the workflow proceeds. The process may be restarted many + times while waiting. +* **External-event wait.** An action triggers a webhook and must wait for the + callback, which could arrive seconds or days later. +* **Crash resilience.** Long-running inference or IO inside an action can be + checkpointed so that a restart does not redo expensive work already completed + before the crash. + +Burr addresses all three scenarios with two primitives on +:py:class:`ApplicationContext `: +``suspend()`` for pausing the run and ``durable()`` / ``adurable()`` for +memoizing sub-steps so they survive a suspend/resume boundary. + +Suspending a run: ``suspend()`` +-------------------------------- + +Call ``__context.suspend(channel)`` inside an action to pause the entire run +and wait for an external payload on the named *channel*. The call raises an +internal control-flow signal that the run loop catches; the run stops and a +:py:class:`SuspensionRecord ` is persisted. + +On resume the action **re-runs from the top**. When execution reaches the same +``suspend(channel)`` call again, the already-delivered payload is returned +instead of raising the signal. + +.. code-block:: python + + from burr.core import action, State + + @action(reads=["draft"], writes=["review_decision"]) + def review(state: State, __context) -> State: + # Optional: memoize an expensive step before suspending (see below). + summary = __context.durable( + "summarize", + lambda d: f"SUMMARY: {d[:50]}...", + state["draft"], + ) + # Suspend until a human posts a payload to "human_approval". + payload = __context.suspend( + "human_approval", + metadata={"summary": summary}, + ) + return state.update(review_decision=payload) + +Signature:: + + __context.suspend( + channel: str, + *, + schema: Optional[type] = None, + metadata: Optional[dict] = None, + ) -> Any + +* ``channel`` -- a stable name for this suspension point. +* ``schema`` -- optional Pydantic model or dataclass; when supplied, a dict + payload is coerced via ``schema(**payload)`` before being returned. +* ``metadata`` -- free-form dict stored with the suspension record; useful for + surfacing context to the UI or a webhook handler. +* **Return value** -- the ``payload`` delivered by :py:func:`resume() + ` when the run is resumed. + +.. warning:: + + ``_Suspended`` inherits from ``BaseException``, not ``Exception``. Do not + wrap ``__context.suspend()`` calls inside ``asyncio.shield()``, + ``try/except BaseException``, or any other guard that catches + ``BaseException`` -- doing so will swallow the signal and prevent the run + from suspending correctly. + + +Memoizing sub-steps: ``durable()`` and ``adurable()`` +------------------------------------------------------ + +Because the action re-runs from the top on resume, any side-effectful or +expensive work executed *before* the ``suspend()`` call will execute again. +Use ``__context.durable(key, fn, *args, **kwargs)`` to memoize a sub-step so +it runs exactly once regardless of how many times the action is re-executed. + +On the first run ``fn`` is called and its result is written to an append-only +*journal*. On re-run the same ``key`` is looked up in the journal and the +cached result is returned without calling ``fn`` again. + +.. code-block:: python + + @action(reads=["content"], writes=["result"]) + def process(state: State, __context) -> State: + # Expensive LLM call -- runs once, replayed on resume. + summary = __context.durable( + "llm_summarize", + call_llm, # fn + state["content"], # *args + ) + + # Async variant -- use inside async actions. + # embedding = await __context.adurable("embed", fetch_embedding, summary) + + payload = __context.suspend("approval", metadata={"summary": summary}) + return state.update(result=payload) + +Signatures:: + + __context.durable(key: str, fn: Callable, *args, **kwargs) -> Any + await __context.adurable(key: str, fn: Callable, *args, **kwargs) -> Any + +* ``key`` -- a stable, unique identifier for this sub-step (see determinism + contract below). +* ``fn`` -- a callable (or coroutine function for ``adurable``) whose result + should be memoized. +* ``*args, **kwargs`` -- forwarded to ``fn`` on first execution only. + +.. note:: + + Do not call ``suspend()`` from inside a ``durable()`` fn. The fn must be + a pure computation that returns a value. + + +The determinism contract +------------------------- + +The run loop identifies journal entries by position (call index) and key. For +replay to work correctly, every re-run of the same action invocation must call +``durable()`` / ``adurable()`` in **exactly the same order** with **exactly +the same keys**. Violations raise :py:exc:`DeterminismError +` immediately (fail-loud). + +Rules: + +1. **Stable key per call site.** Use a string literal, not a runtime value + that may change (e.g., a timestamp or UUID). + +2. **Stable call order.** The set and order of ``durable()`` calls must be + identical on every re-run of the same invocation. + +3. **No non-deterministic branching.** Do not gate a ``durable()`` call on + a condition that may differ between the first run and the re-run: + + .. code-block:: python + + # BAD -- the branch may not be taken on resume. + if random.random() > 0.5: + ctx.durable("step", fn) + + # GOOD -- key is unconditional. + result = ctx.durable("step", fn) + +4. **No ``suspend()`` inside ``durable()`` fn.** The fn must return a plain + value; calling ``suspend()`` inside it raises ``_Suspended`` before the + result is recorded and corrupts the journal. + +5. **Mismatch raises ``DeterminismError``.** If ``key`` or call order differs + between runs, a :py:exc:`DeterminismError ` + is raised, converting a silent footgun into a loud failure. + + +Resuming a suspended run: ``resume()`` +--------------------------------------- + +When the external event arrives (webhook, form POST, timer, etc.), call +:py:func:`resume() ` (sync) or +:py:func:`aresume() ` (async). Both helpers +reload the suspension from the persister, rebuild the Application, set the +resume payload, and run the graph to the next halt, suspend, or completion. + +.. code-block:: python + + from burr.core import resume + + # Synchronous resume (e.g., inside a Flask route handler): + final_state = resume( + persister=persister, + graph=graph, + app_id="my-app-run-001", + partition_key=None, + channel="human_approval", + payload={"approved": True}, + ) + + # Asynchronous resume (e.g., inside a FastAPI route handler): + from burr.core.resume import aresume + + final_state = await aresume( + persister=persister, + graph=graph, + app_id="my-app-run-001", + partition_key=None, + channel="human_approval", + payload={"approved": True}, + ) + +Both functions return the final :py:class:`State ` after +the resumed run completes or reaches the next suspension. + +**Idempotency.** For persisters with durable storage (see below), resuming an +already-resolved suspension is an idempotent no-op: the call returns the +latest persisted state unchanged. The ``resolved`` flag on the +:py:class:`SuspensionRecord ` prevents +double-execution. For custom persisters without durable storage, a second +``resume()`` call after the first completes raises ``ValueError``. + + +Persister support +----------------- + +First-party persisters ship with dedicated storage tables or collections for +suspension records and journal entries, providing strong resume-once semantics: + +.. list-table:: + :header-rows: 1 + :widths: 25 20 55 + + * - Backend + - Driver + - Class + * - SQLite (sync) + - sqlite3 + - :ref:`SQLitePersister ` + * - SQLite (async) + - aiosqlite + - :ref:`AsyncSQLitePersister ` + * - PostgreSQL (sync) + - psycopg2 + - :ref:`PostgreSQLPersister ` + * - PostgreSQL (async) + - asyncpg + - :ref:`AsyncPostgreSQLPersister ` + * - Redis (sync) + - redis + - :ref:`RedisBasePersister ` + * - Redis (async) + - redis.asyncio + - :ref:`AsyncRedisBasePersister ` + * - MongoDB + - pymongo + - :ref:`MongoDBBasePersister ` + +**Custom persisters** work transparently through an in-state fallback: the +:py:class:`SuspensionRecord ` and journal +entries are embedded inside the reserved ``__burr_durable__`` key in +:py:class:`State `, which the existing persister hook +saves automatically. This is correct and requires no code changes, but it does +not provide the idempotency guarantees of the dedicated durable-storage +methods. + +To opt in to durable storage for a custom persister, override all five methods +on :py:class:`BaseStatePersister `: +``save_suspension``, ``load_suspension``, ``save_journal_entry``, +``load_journal``, and ``mark_suspension_resolved``. + + +``_Suspended`` and ``BaseException`` +-------------------------------------- + +The internal control-flow signal ``_Suspended`` inherits from ``BaseException`` +so that a user ``try/except Exception`` block inside an action does not +accidentally catch it. The run loop catches it explicitly. It is never logged +as a failure. + +This means you must not wrap ``__context.suspend()`` calls in constructs that +catch ``BaseException``: + +.. code-block:: python + + # BAD -- asyncio.shield catches BaseException and re-raises CancelledError; + # _Suspended will be swallowed or mishandled. + result = await asyncio.shield(__context.suspend("ch")) + + # GOOD -- call suspend directly. + result = __context.suspend("ch") + + +Example +------- + +A complete human-in-the-loop draft-review-finalize workflow is available in +the ``examples/durable-execution/`` directory of the repository. It +demonstrates: + +* Using ``durable()`` to memoize an LLM summary before suspending. +* Calling ``suspend("human_approval")`` to pause the workflow. +* Using ``resume()`` to deliver the human's decision and finish the run. + +See :ref:`available persisters here ` for the full list of +backends that support the durable-storage APIs. diff --git a/docs/concepts/index.rst b/docs/concepts/index.rst index 3a58bd012..301698d79 100644 --- a/docs/concepts/index.rst +++ b/docs/concepts/index.rst @@ -36,6 +36,7 @@ Overview of the concepts -- read these to get a mental model for how Burr works. transitions tracking state-persistence + durable-execution serde streaming-actions state-typing From ececa4b9227e40f26afe0ed5b3792f7071cbd9b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Ahlert?= Date: Fri, 22 May 2026 20:48:02 -0300 Subject: [PATCH 57/57] chore: remove stale M5 milestone comment --- burr/core/application.py | 1 - 1 file changed, 1 deletion(-) diff --git a/burr/core/application.py b/burr/core/application.py index 7e0ee51c5..92718a4db 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -1142,7 +1142,6 @@ def _handle_suspension(self, action, action_inputs, suspended): # persists the embedded State once. Saving here too would write the # same (partition_key, app_id, sequence_id, position) row twice and # break persisters with a UNIQUE constraint (e.g. SQLitePersister). - # M5: suspended runs are persisted with status "completed"; a dedicated status + post_action_suspend hook lands in M5. state = write_suspension_into_state(self._state, record) state = write_journal_into_state(state, self._journal_sink) self._set_state(state)