From 238a7b3189eef62498c5dcdd83908f2d2d3c9e74 Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Mon, 25 May 2026 10:42:09 -0400 Subject: [PATCH 1/6] initial implementation for replacing except-pass --- .github/workflows/tests.yml | 2 +- api/logs.py | 32 +++++-- api/search.py | 63 ++++++++++--- api/workspaces.py | 7 +- services/workspace_listing.py | 60 ++++++++++-- services/workspace_resolver.py | 50 +++++++--- services/workspace_tabs.py | 37 ++++++-- tests/test_parse_failure_logging.py | 137 ++++++++++++++++++++++++++++ 8 files changed, 336 insertions(+), 52 deletions(-) create mode 100644 tests/test_parse_failure_logging.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b5cf093..29e917f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -116,7 +116,7 @@ jobs: # new endpoint file because `pytest tests/` would also re-collect the # 178 unittest.TestCase subclasses already run in the step above — # ~2× the CI minutes for zero extra signal. - run: python -m pytest tests/test_api_endpoints.py -v --tb=short + run: python -m pytest tests/test_api_endpoints.py tests/test_parse_failure_logging.py -v --tb=short # ── PyInstaller desktop build (Windows only, once per workflow) ──────── # Closes #44. Builds the onedir bundle and smoke-tests --help so the diff --git a/api/logs.py b/api/logs.py index a213cdc..6c15be3 100644 --- a/api/logs.py +++ b/api/logs.py @@ -48,8 +48,12 @@ def get_logs(): try: bubble = json.loads(row["value"]) chat_map.setdefault(chat_id, []).append(bubble) - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to decode bubble row %s: %s", + row["key"], + e, + ) for chat_id, bubbles in chat_map.items(): bubbles = [b for b in bubbles if isinstance(b, dict)] @@ -90,8 +94,12 @@ def get_logs(): with open(wj_path, "r", encoding="utf-8") as f: wd = json.load(f) workspace_folder = wd.get("folder") - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + name, + e, + ) try: # closing() guarantees .close() on scope exit (issue #17). @@ -130,10 +138,18 @@ def get_logs(): "type": "composer", "messageCount": len(c.get("conversation") or []), }) - except Exception: - pass - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read logs from workspace %s: %s", + name, + e, + ) + except Exception as e: + _logger.warning( + "Failed to iterate workspaces under %s: %s", + workspace_path, + e, + ) logs.sort(key=lambda log: log.get("timestamp") or 0, reverse=True) return jsonify({"logs": logs}) diff --git a/api/search.py b/api/search.py index 79ed0c8..bc4e51a 100644 --- a/api/search.py +++ b/api/search.py @@ -114,10 +114,18 @@ def search(): fn = parts[-1] if parts else None if fn: ws_id_to_name[name] = _url_unquote(fn) - except Exception: - pass - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + name, + e, + ) + except Exception as e: + _logger.warning( + "Failed to list workspace entries under %s: %s", + workspace_path, + e, + ) # Build composer → workspace mapping composer_id_to_ws = {} @@ -139,8 +147,12 @@ def search(): cid = c.get("composerId") if isinstance(c, dict) else None if cid: composer_id_to_ws[cid] = entry["name"] - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to load composer mapping from workspace %s: %s", + entry["name"], + e, + ) # Load bubble text for searching bubble_map = {} @@ -261,8 +273,12 @@ def search(): "matchingText": matching_text, "type": "composer", }) - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to process Composer from composerData:%s during search: %s", + composer_id, + e, + ) except Exception: _logger.exception("Error searching global storage") @@ -288,8 +304,12 @@ def search(): with open(wj_path, "r", encoding="utf-8") as f: wd = json.load(f) workspace_folder = wd.get("folder") - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + name, + e, + ) workspace_name = _workspace_display_name_from_folder(workspace_folder, fallback=name) # try/finally guarantees .close() on every exit path (issue #17). @@ -362,13 +382,21 @@ def search(): "type": "chat", }) - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to search legacy workspace %s: %s", + name, + e, + ) finally: if conn is not None: conn.close() - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to iterate legacy workspaces under %s: %s", + workspace_path, + e, + ) # --------------------------------------------------------------- # Search Cursor CLI sessions (only for type=all) @@ -386,7 +414,12 @@ def search(): try: messages = traverse_blobs(session["db_path"]) - except Exception: + except Exception as e: + _logger.warning( + "Failed to traverse CLI session blobs for %s: %s", + session_id, + e, + ) continue bubbles = messages_to_bubbles(messages, created_ms) diff --git a/api/workspaces.py b/api/workspaces.py index e778993..091d03a 100644 --- a/api/workspaces.py +++ b/api/workspaces.py @@ -117,7 +117,12 @@ def get_workspace(workspace_id): inferred = _infer_workspace_name_from_context(workspace_path, workspace_id) if inferred: workspace_name = inferred - except Exception: + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + workspace_id, + e, + ) inferred = _infer_workspace_name_from_context(workspace_path, workspace_id) if inferred: workspace_name = inferred diff --git a/services/workspace_listing.py b/services/workspace_listing.py index 228fad2..66d19d6 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -1,10 +1,13 @@ from __future__ import annotations import json +import logging import os import sqlite3 from datetime import datetime, timezone +_logger = logging.getLogger(__name__) + from utils.cli_chat_reader import list_cli_projects from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.path_helpers import ( @@ -14,6 +17,7 @@ ) from utils.workspace_descriptor import read_json_file from utils.workspace_path import get_cli_chats_path +from models import Composer, SchemaError from services.workspace_db import ( _build_composer_id_to_workspace_id, _collect_invalid_workspace_ids, @@ -72,7 +76,32 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: for row in composer_rows: cid = row["key"].split(":")[1] try: - cd = json.loads(row["value"]) + parsed = json.loads(row["value"]) + except (json.JSONDecodeError, TypeError, ValueError) as e: + _logger.warning( + "Failed to decode Composer from composerData:%s: %s", + cid, + e, + ) + continue + if not isinstance(parsed, dict): + _logger.warning( + "Failed to parse Composer from composerData:%s: expected object, got %s", + cid, + type(parsed).__name__, + ) + continue + try: + composer = Composer.from_dict(parsed, composer_id=cid) + except SchemaError as e: + _logger.warning( + "Failed to parse Composer from composerData:%s: %s", + cid, + e, + ) + continue + cd = composer.raw + try: pid = _determine_project_for_conversation( cd, cid, project_layouts_map, project_name_map, workspace_path_map, @@ -98,10 +127,14 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: "lastUpdatedAt": to_epoch_ms(cd.get("lastUpdatedAt")) or to_epoch_ms(cd.get("createdAt")) or 0, "createdAt": to_epoch_ms(cd.get("createdAt")) or 0, }) - except Exception: - pass - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to process Composer from composerData:%s: %s", + cid, + e, + ) + except Exception as e: + _logger.error("Failed to load composer rows from global storage: %s", e) # Group workspace entries by normalized folder path folder_to_entries: dict[str, list] = {} @@ -114,8 +147,12 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: first_folder = folders[0] if folders else None if first_folder: norm_folder = normalize_file_path(first_folder) - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + entry["name"], + e, + ) if not norm_folder: norm_folder = entry["name"] # fallback to workspace ID entry_folder_map[entry["name"]] = norm_folder @@ -139,7 +176,12 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: for e in group if os.path.isfile(os.path.join(workspace_path, e["name"], "state.vscdb")) ) - except Exception: + except Exception as e: + _logger.warning( + "Failed to resolve mtime for workspace folder %s: %s", + norm_folder, + e, + ) mtime = 0 workspace_name = _get_workspace_display_name(workspace_path, primary["name"]) @@ -238,7 +280,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: "source": "cli", }) except Exception as e: - print(f"Failed to load CLI projects: {e}") + _logger.warning("Failed to load CLI projects: %s", e) projects.sort(key=lambda p: p["lastModified"], reverse=True) return projects diff --git a/services/workspace_resolver.py b/services/workspace_resolver.py index c27da96..57d1930 100644 --- a/services/workspace_resolver.py +++ b/services/workspace_resolver.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import logging import os import re import sqlite3 @@ -8,6 +9,8 @@ from contextlib import closing from pathlib import Path +_logger = logging.getLogger(__name__) + from utils.path_helpers import ( get_workspace_display_name, get_workspace_folder_paths, @@ -28,8 +31,12 @@ def _get_workspace_display_name(workspace_path: str, workspace_id: str) -> str: name = get_workspace_display_name(workspace.raw) if name: return name - except (SchemaError, OSError, ValueError): - pass + except (SchemaError, OSError, ValueError) as e: + _logger.warning( + "Failed to parse Workspace from %s: %s", + workspace_id, + e, + ) return workspace_id @@ -131,8 +138,12 @@ def _get_project_from_file_path( if is_within_workspace and len(wp) > best_len: best_len = len(wp) best_match = entry["name"] - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + entry["name"], + e, + ) return best_match @@ -147,8 +158,12 @@ def _create_project_name_to_workspace_id_map(workspace_entries): folder_name = parts[-1] if parts else None if folder_name: mapping[folder_name] = entry["name"] - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + entry["name"], + e, + ) return mapping @@ -160,8 +175,12 @@ def _create_workspace_path_to_id_map(workspace_entries): for folder in get_workspace_folder_paths(wd): normalized = normalize_file_path(folder) out[normalized] = entry["name"] - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + entry["name"], + e, + ) return out @@ -274,8 +293,12 @@ def _determine_project_for_conversation( name = re.sub(r"^file://", "", folder).replace("\\", "/").split("/")[-1] if name: folder_name_to_ws.append({"name": name, "id": entry["name"]}) - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + entry["name"], + e, + ) best_id = None best_len = 0 @@ -312,7 +335,12 @@ def _infer_invalid_workspace_aliases( continue try: cd = json.loads(row["value"]) - except Exception: + except Exception as e: + _logger.warning( + "Failed to decode Composer from composerData:%s: %s", + cid, + e, + ) continue inferred = _determine_project_for_conversation( cd, diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 42fa807..cbd396d 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -1,11 +1,14 @@ from __future__ import annotations import json +import logging import os import sqlite3 from datetime import datetime from typing import Any +_logger = logging.getLogger(__name__) + from utils.path_helpers import ( get_workspace_folder_paths, normalize_file_path, @@ -69,8 +72,12 @@ def assemble_workspace_tabs( first_folder = folders[0] if folders else None if first_folder: target_folder = normalize_file_path(first_folder) - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + workspace_id, + e, + ) if target_folder: for entry in workspace_entries: try: @@ -79,8 +86,12 @@ def assemble_workspace_tabs( f2 = folders2[0] if folders2 else None if f2 and normalize_file_path(f2) == target_folder: matching_ws_ids.add(entry["name"]) - except Exception: - pass + except Exception as e: + _logger.warning( + "Failed to read workspace.json for %s: %s", + entry["name"], + e, + ) bubble_map: dict[str, dict] = {} code_block_diff_map: dict[str, list] = {} @@ -113,7 +124,11 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: # Drift logged so the operator can chase disappearing # bubbles instead of guessing. Bad row still skipped so the # tabs endpoint can't 500 on one malformed bubble. - print(f"Schema drift in bubble {bid}: {e}") + _logger.warning( + "Failed to parse Bubble from bubbleId:%s: %s", + bid, + e, + ) # Load codeBlockDiffs code_block_diff_map = load_code_block_diff_map(global_db) @@ -179,7 +194,11 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: # Drift skipped + logged so the two primary conversation # paths (list_workspaces + get_workspace_tabs) agree on what # counts as a valid composer. - print(f"Schema drift in composer {composer_id}: {e}") + _logger.warning( + "Failed to parse Composer from composerData:%s: %s", + composer_id, + e, + ) continue try: cd = composer.raw @@ -497,7 +516,11 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: response["tabs"].append(tab) except Exception as e: - print(f"Error parsing composer data for {composer_id}: {e}") + _logger.warning( + "Failed to process Composer from composerData:%s: %s", + composer_id, + e, + ) # Sort tabs by timestamp descending (newest first) response["tabs"].sort(key=lambda t: t.get("timestamp") or 0, reverse=True) diff --git a/tests/test_parse_failure_logging.py b/tests/test_parse_failure_logging.py new file mode 100644 index 0000000..2299178 --- /dev/null +++ b/tests/test_parse_failure_logging.py @@ -0,0 +1,137 @@ +"""pytest caplog tests for structured logging at model parse sites (issue #66).""" + +from __future__ import annotations + +import json +import logging +import os +import sqlite3 +import sys +import tempfile + +import pytest + +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if REPO_ROOT not in sys.path: + sys.path.insert(0, REPO_ROOT) + +from services.workspace_listing import list_workspace_projects +from services.workspace_tabs import assemble_workspace_tabs + + +def _seed_listing_with_drifted_composer(parent: str) -> str: + ws_root = os.path.join(parent, "workspaceStorage") + global_root = os.path.join(parent, "globalStorage") + os.makedirs(ws_root, exist_ok=True) + os.makedirs(global_root, exist_ok=True) + + ws_dir = os.path.join(ws_root, "ws-a") + os.makedirs(ws_dir, exist_ok=True) + target_folder = os.path.join(parent, "real-project") + os.makedirs(target_folder, exist_ok=True) + with open(os.path.join(ws_dir, "workspace.json"), "w", encoding="utf-8") as f: + json.dump({"folder": f"file://{target_folder}"}, f) + sqlite3.connect(os.path.join(ws_dir, "state.vscdb")).close() + + conn = sqlite3.connect(os.path.join(global_root, "state.vscdb")) + conn.execute("CREATE TABLE cursorDiskKV ([key] TEXT PRIMARY KEY, value TEXT)") + # Missing createdAt — Composer.from_dict raises SchemaError. + conn.execute( + "INSERT INTO cursorDiskKV VALUES (?, ?)", + ( + "composerData:cmp-drift", + json.dumps({ + "name": "Drifted composer", + "fullConversationHeadersOnly": [{"bubbleId": "b-1"}], + }), + ), + ) + conn.execute( + "INSERT INTO cursorDiskKV VALUES (?, ?)", + ("bubbleId:cmp-drift:b-1", json.dumps({"type": "user", "text": "hello"})), + ) + conn.commit() + conn.close() + return ws_root + + +def _seed_tabs_with_drifted_bubble(parent: str) -> str: + ws_root = os.path.join(parent, "workspaceStorage") + global_root = os.path.join(parent, "globalStorage") + os.makedirs(ws_root, exist_ok=True) + os.makedirs(global_root, exist_ok=True) + + ws_dir = os.path.join(ws_root, "ws-a") + os.makedirs(ws_dir, exist_ok=True) + with open(os.path.join(ws_dir, "workspace.json"), "w", encoding="utf-8") as f: + json.dump({"folder": "/tmp/proj"}, f) + sqlite3.connect(os.path.join(ws_dir, "state.vscdb")).close() + + conn = sqlite3.connect(os.path.join(global_root, "state.vscdb")) + conn.execute("CREATE TABLE cursorDiskKV ([key] TEXT PRIMARY KEY, value TEXT)") + conn.execute( + "INSERT INTO cursorDiskKV VALUES (?, ?)", + ( + "composerData:cmp-ok", + json.dumps({ + "name": "Good tab", + "createdAt": 1_715_000_000_000, + "lastUpdatedAt": 1_715_000_500_000, + "fullConversationHeadersOnly": [ + {"bubbleId": "b-bad", "type": 1}, + {"bubbleId": "b-good", "type": 1}, + ], + }), + ), + ) + # Non-dict bubble value trips Bubble.from_dict schema gate. + conn.execute( + "INSERT INTO cursorDiskKV VALUES (?, ?)", + ("bubbleId:cmp-ok:b-bad", json.dumps("not-a-dict")), + ) + conn.execute( + "INSERT INTO cursorDiskKV VALUES (?, ?)", + ("bubbleId:cmp-ok:b-good", json.dumps({"text": "hello"})), + ) + conn.commit() + conn.close() + return ws_root + + +@pytest.fixture +def caplog_at_warning(caplog: pytest.LogCaptureFixture) -> pytest.LogCaptureFixture: + caplog.set_level(logging.WARNING) + return caplog + + +def test_listing_logs_composer_schema_drift(caplog_at_warning: pytest.LogCaptureFixture) -> None: + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_listing_with_drifted_composer(tmp) + with caplog_at_warning.at_level(logging.WARNING, logger="services.workspace_listing"): + list_workspace_projects(ws_root, rules=[]) + + messages = [r.getMessage() for r in caplog_at_warning.records] + assert any("Composer" in m and "cmp-drift" in m for m in messages), ( + f"expected Composer parse warning for cmp-drift, got: {messages}" + ) + + +def test_workspace_tabs_logs_bubble_schema_drift(caplog_at_warning: pytest.LogCaptureFixture) -> None: + from flask import Flask + + app = Flask(__name__) + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] + + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_tabs_with_drifted_bubble(tmp) + with caplog_at_warning.at_level(logging.WARNING, logger="services.workspace_tabs"): + with app.test_request_context("/api/workspaces/global/tabs"): + payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) + + assert status == 200 + assert "cmp-ok" in [t["id"] for t in payload.get("tabs", [])] + messages = [r.getMessage() for r in caplog_at_warning.records] + assert any("Bubble" in m and "b-bad" in m for m in messages), ( + f"expected Bubble parse warning for b-bad, got: {messages}" + ) From d55881f8c5ef89aa5e1a15acac6abb8f57eaf79f Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Mon, 25 May 2026 10:56:08 -0400 Subject: [PATCH 2/6] chore: refresh requirements-lock.txt for click 8.4.1 Linux CI pip-compile now resolves click==8.4.1 within the flask bound; the lock still pinned 8.4.0, which failed the lockfile freshness job. --- requirements-lock.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-lock.txt b/requirements-lock.txt index e1c0759..4a65662 100644 --- a/requirements-lock.txt +++ b/requirements-lock.txt @@ -6,7 +6,7 @@ # Lock is generated on Linux (CI / update-lock.yml). Windows-only transitives (e.g. # colorama via click) are omitted — pip still installs them on Windows when needed. blinker==1.9.0 # via flask -click==8.4.0 # via flask +click==8.4.1 # via flask defusedxml==0.7.1 # via fpdf2 flask==3.1.3 # via -r requirements.txt fonttools==4.63.0 # via fpdf2 From 4e7a23be99ff376635281b23da229cdac2c5e2c3 Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Mon, 25 May 2026 11:14:57 -0400 Subject: [PATCH 3/6] fix: code rabbitai comments --- services/workspace_resolver.py | 7 +++ services/workspace_tabs.py | 22 +++++++-- tests/test_invalid_workspace_aliases.py | 30 ++++++++++++ tests/test_parse_failure_logging.py | 63 +++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 4 deletions(-) diff --git a/services/workspace_resolver.py b/services/workspace_resolver.py index 57d1930..50af03d 100644 --- a/services/workspace_resolver.py +++ b/services/workspace_resolver.py @@ -342,6 +342,13 @@ def _infer_invalid_workspace_aliases( e, ) continue + if not isinstance(cd, dict): + _logger.warning( + "Failed to parse Composer from composerData:%s: expected object, got %s", + cid, + type(cd).__name__, + ) + continue inferred = _determine_project_for_conversation( cd, cid, diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index cbd396d..53fbba7 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -114,8 +114,15 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: parts = row["key"].split(":") if len(parts) >= 3: bid = parts[2] - parsed = _try_loads_kv_value(row["value"]) - if parsed is None: + try: + parsed = json.loads(row["value"]) + except json.JSONDecodeError as e: + _logger.warning( + "Failed to decode Bubble from %s: %s (value: %r)", + row["key"], + e, + row["value"], + ) continue try: bubble_obj = Bubble.from_dict(parsed, bubble_id=bid) @@ -185,8 +192,15 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: for row in composer_rows: composer_id = row["key"].split(":")[1] - parsed = _try_loads_kv_value(row["value"]) - if parsed is None: + try: + parsed = json.loads(row["value"]) + except json.JSONDecodeError as e: + _logger.warning( + "Failed to decode Composer from composerData:%s: %s (value: %r)", + composer_id, + e, + row["value"], + ) continue try: composer = Composer.from_dict(parsed, composer_id=composer_id) diff --git a/tests/test_invalid_workspace_aliases.py b/tests/test_invalid_workspace_aliases.py index ae9ee81..1daa30a 100644 --- a/tests/test_invalid_workspace_aliases.py +++ b/tests/test_invalid_workspace_aliases.py @@ -87,6 +87,36 @@ def test_drifted_composer_does_not_skew_vote(self): # cid-3 is dropped (drift), so boost-ws wins 2-0 (not 2-1) self.assertEqual(aliases.get("invalid-ws"), "boost-ws") + def test_non_dict_composer_json_skipped_without_crash(self) -> None: + composer_rows = [ + {"key": "composerData:cid-1", "value": json.dumps({"createdAt": 1_715_000_000_000, "fullConversationHeadersOnly": []})}, + {"key": "composerData:cid-2", "value": json.dumps({"createdAt": 1_715_000_000_000, "fullConversationHeadersOnly": []})}, + {"key": "composerData:cid-bad", "value": json.dumps("not-a-dict")}, + ] + composer_id_to_ws = {"cid-1": "invalid-ws", "cid-2": "invalid-ws", "cid-bad": "invalid-ws"} + project_layouts_map = { + "cid-1": [normalize_file_path(r"d:\_Cpp_Digest\boostbacklog")], + "cid-2": [normalize_file_path(r"d:\_Cpp_Digest\boostbacklog")], + "cid-bad": [normalize_file_path(r"d:\_Cpp_Digest\team-brain")], + } + workspace_path_map = { + normalize_file_path(r"d:\_cpp_digest\boostbacklog"): "boost-ws", + normalize_file_path(r"d:\_cpp_digest\team-brain"): "team-ws", + } + + aliases = _infer_invalid_workspace_aliases( + composer_rows=composer_rows, + project_layouts_map=project_layouts_map, + project_name_map={}, + workspace_path_map=workspace_path_map, + workspace_entries=[], + bubble_map={}, + composer_id_to_ws=composer_id_to_ws, + invalid_workspace_ids={"invalid-ws"}, + ) + + self.assertEqual(aliases.get("invalid-ws"), "boost-ws") + if __name__ == "__main__": unittest.main() diff --git a/tests/test_parse_failure_logging.py b/tests/test_parse_failure_logging.py index 2299178..1563a9c 100644 --- a/tests/test_parse_failure_logging.py +++ b/tests/test_parse_failure_logging.py @@ -8,6 +8,7 @@ import sqlite3 import sys import tempfile +from contextlib import closing import pytest @@ -116,6 +117,68 @@ def test_listing_logs_composer_schema_drift(caplog_at_warning: pytest.LogCapture ) +def test_workspace_tabs_logs_bubble_json_decode_failure( + caplog_at_warning: pytest.LogCaptureFixture, +) -> None: + from flask import Flask + + app = Flask(__name__) + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] + + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_tabs_with_drifted_bubble(tmp) + global_db = os.path.join(tmp, "globalStorage", "state.vscdb") + with closing(sqlite3.connect(global_db)) as conn: + conn.execute( + "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)", + ("bubbleId:cmp-ok:b-json", "{not valid json"), + ) + conn.commit() + with caplog_at_warning.at_level(logging.WARNING, logger="services.workspace_tabs"): + with app.test_request_context("/api/workspaces/global/tabs"): + payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) + + assert status == 200 + messages = [r.getMessage() for r in caplog_at_warning.records] + assert any("decode Bubble" in m and "b-json" in m for m in messages), ( + f"expected JSON decode warning for b-json, got: {messages}" + ) + + +def test_workspace_tabs_logs_composer_json_decode_failure( + caplog_at_warning: pytest.LogCaptureFixture, +) -> None: + from flask import Flask + + app = Flask(__name__) + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] + + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_tabs_with_drifted_bubble(tmp) + global_db = os.path.join(tmp, "globalStorage", "state.vscdb") + # Value must match composer_rows LIKE '%fullConversationHeadersOnly%' to reach parse. + bad_composer_value = ( + '{"fullConversationHeadersOnly": [{"bubbleId": "b1"}], "createdAt":' + ) + with closing(sqlite3.connect(global_db)) as conn: + conn.execute( + "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)", + ("composerData:cmp-json", bad_composer_value), + ) + conn.commit() + with caplog_at_warning.at_level(logging.WARNING, logger="services.workspace_tabs"): + with app.test_request_context("/api/workspaces/global/tabs"): + payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) + + assert status == 200 + messages = [r.getMessage() for r in caplog_at_warning.records] + assert any("decode Composer" in m and "cmp-json" in m for m in messages), ( + f"expected JSON decode warning for cmp-json, got: {messages}" + ) + + def test_workspace_tabs_logs_bubble_schema_drift(caplog_at_warning: pytest.LogCaptureFixture) -> None: from flask import Flask From d23fe7188363aa8e0609bf0f5b431062f969efaf Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Mon, 25 May 2026 11:26:32 -0400 Subject: [PATCH 4/6] fix: test failure with bubble none and pytest missing --- .github/workflows/tests.yml | 2 +- services/workspace_tabs.py | 4 + tests/test_parse_failure_logging.py | 181 +++++++++++------------ tests/test_workspace_tabs_null_bubble.py | 4 +- 4 files changed, 93 insertions(+), 98 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 29e917f..b5cf093 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -116,7 +116,7 @@ jobs: # new endpoint file because `pytest tests/` would also re-collect the # 178 unittest.TestCase subclasses already run in the step above — # ~2× the CI minutes for zero extra signal. - run: python -m pytest tests/test_api_endpoints.py tests/test_parse_failure_logging.py -v --tb=short + run: python -m pytest tests/test_api_endpoints.py -v --tb=short # ── PyInstaller desktop build (Windows only, once per workflow) ──────── # Closes #44. Builds the onedir bundle and smoke-tests --help so the diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 53fbba7..08f694c 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -114,6 +114,8 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: parts = row["key"].split(":") if len(parts) >= 3: bid = parts[2] + if row["value"] is None: + continue try: parsed = json.loads(row["value"]) except json.JSONDecodeError as e: @@ -192,6 +194,8 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: for row in composer_rows: composer_id = row["key"].split(":")[1] + if row["value"] is None: + continue try: parsed = json.loads(row["value"]) except json.JSONDecodeError as e: diff --git a/tests/test_parse_failure_logging.py b/tests/test_parse_failure_logging.py index 1563a9c..941835e 100644 --- a/tests/test_parse_failure_logging.py +++ b/tests/test_parse_failure_logging.py @@ -1,17 +1,15 @@ -"""pytest caplog tests for structured logging at model parse sites (issue #66).""" +"""Tests for structured logging at model parse sites (issue #66).""" from __future__ import annotations import json -import logging import os import sqlite3 import sys import tempfile +import unittest from contextlib import closing -import pytest - REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if REPO_ROOT not in sys.path: sys.path.insert(0, REPO_ROOT) @@ -36,7 +34,6 @@ def _seed_listing_with_drifted_composer(parent: str) -> str: conn = sqlite3.connect(os.path.join(global_root, "state.vscdb")) conn.execute("CREATE TABLE cursorDiskKV ([key] TEXT PRIMARY KEY, value TEXT)") - # Missing createdAt — Composer.from_dict raises SchemaError. conn.execute( "INSERT INTO cursorDiskKV VALUES (?, ?)", ( @@ -85,7 +82,6 @@ def _seed_tabs_with_drifted_bubble(parent: str) -> str: }), ), ) - # Non-dict bubble value trips Bubble.from_dict schema gate. conn.execute( "INSERT INTO cursorDiskKV VALUES (?, ?)", ("bubbleId:cmp-ok:b-bad", json.dumps("not-a-dict")), @@ -99,102 +95,97 @@ def _seed_tabs_with_drifted_bubble(parent: str) -> str: return ws_root -@pytest.fixture -def caplog_at_warning(caplog: pytest.LogCaptureFixture) -> pytest.LogCaptureFixture: - caplog.set_level(logging.WARNING) - return caplog - - -def test_listing_logs_composer_schema_drift(caplog_at_warning: pytest.LogCaptureFixture) -> None: - with tempfile.TemporaryDirectory() as tmp: - ws_root = _seed_listing_with_drifted_composer(tmp) - with caplog_at_warning.at_level(logging.WARNING, logger="services.workspace_listing"): - list_workspace_projects(ws_root, rules=[]) +class TestParseFailureLogging(unittest.TestCase): + def test_listing_logs_composer_schema_drift(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_listing_with_drifted_composer(tmp) + with self.assertLogs("services.workspace_listing", level="WARNING") as cm: + list_workspace_projects(ws_root, rules=[]) - messages = [r.getMessage() for r in caplog_at_warning.records] - assert any("Composer" in m and "cmp-drift" in m for m in messages), ( - f"expected Composer parse warning for cmp-drift, got: {messages}" - ) + messages = [r.getMessage() for r in cm.records] + self.assertTrue( + any("Composer" in m and "cmp-drift" in m for m in messages), + f"expected Composer parse warning for cmp-drift, got: {messages}", + ) + def test_workspace_tabs_logs_bubble_json_decode_failure(self) -> None: + from flask import Flask + + app = Flask(__name__) + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] + + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_tabs_with_drifted_bubble(tmp) + global_db = os.path.join(tmp, "globalStorage", "state.vscdb") + with closing(sqlite3.connect(global_db)) as conn: + conn.execute( + "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)", + ("bubbleId:cmp-ok:b-json", "{not valid json"), + ) + conn.commit() + with self.assertLogs("services.workspace_tabs", level="WARNING") as cm: + with app.test_request_context("/api/workspaces/global/tabs"): + payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) + + self.assertEqual(status, 200) + messages = [r.getMessage() for r in cm.records] + self.assertTrue( + any("decode Bubble" in m and "b-json" in m for m in messages), + f"expected JSON decode warning for b-json, got: {messages}", + ) -def test_workspace_tabs_logs_bubble_json_decode_failure( - caplog_at_warning: pytest.LogCaptureFixture, -) -> None: - from flask import Flask + def test_workspace_tabs_logs_composer_json_decode_failure(self) -> None: + from flask import Flask - app = Flask(__name__) - app.config["TESTING"] = True - app.config["EXCLUSION_RULES"] = [] + app = Flask(__name__) + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] - with tempfile.TemporaryDirectory() as tmp: - ws_root = _seed_tabs_with_drifted_bubble(tmp) - global_db = os.path.join(tmp, "globalStorage", "state.vscdb") - with closing(sqlite3.connect(global_db)) as conn: - conn.execute( - "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)", - ("bubbleId:cmp-ok:b-json", "{not valid json"), + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_tabs_with_drifted_bubble(tmp) + global_db = os.path.join(tmp, "globalStorage", "state.vscdb") + bad_composer_value = ( + '{"fullConversationHeadersOnly": [{"bubbleId": "b1"}], "createdAt":' ) - conn.commit() - with caplog_at_warning.at_level(logging.WARNING, logger="services.workspace_tabs"): - with app.test_request_context("/api/workspaces/global/tabs"): - payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) - - assert status == 200 - messages = [r.getMessage() for r in caplog_at_warning.records] - assert any("decode Bubble" in m and "b-json" in m for m in messages), ( - f"expected JSON decode warning for b-json, got: {messages}" - ) - - -def test_workspace_tabs_logs_composer_json_decode_failure( - caplog_at_warning: pytest.LogCaptureFixture, -) -> None: - from flask import Flask - - app = Flask(__name__) - app.config["TESTING"] = True - app.config["EXCLUSION_RULES"] = [] - - with tempfile.TemporaryDirectory() as tmp: - ws_root = _seed_tabs_with_drifted_bubble(tmp) - global_db = os.path.join(tmp, "globalStorage", "state.vscdb") - # Value must match composer_rows LIKE '%fullConversationHeadersOnly%' to reach parse. - bad_composer_value = ( - '{"fullConversationHeadersOnly": [{"bubbleId": "b1"}], "createdAt":' + with closing(sqlite3.connect(global_db)) as conn: + conn.execute( + "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)", + ("composerData:cmp-json", bad_composer_value), + ) + conn.commit() + with self.assertLogs("services.workspace_tabs", level="WARNING") as cm: + with app.test_request_context("/api/workspaces/global/tabs"): + payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) + + self.assertEqual(status, 200) + messages = [r.getMessage() for r in cm.records] + self.assertTrue( + any("decode Composer" in m and "cmp-json" in m for m in messages), + f"expected JSON decode warning for cmp-json, got: {messages}", ) - with closing(sqlite3.connect(global_db)) as conn: - conn.execute( - "INSERT INTO cursorDiskKV ([key], value) VALUES (?, ?)", - ("composerData:cmp-json", bad_composer_value), - ) - conn.commit() - with caplog_at_warning.at_level(logging.WARNING, logger="services.workspace_tabs"): - with app.test_request_context("/api/workspaces/global/tabs"): - payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) - - assert status == 200 - messages = [r.getMessage() for r in caplog_at_warning.records] - assert any("decode Composer" in m and "cmp-json" in m for m in messages), ( - f"expected JSON decode warning for cmp-json, got: {messages}" - ) + def test_workspace_tabs_logs_bubble_schema_drift(self) -> None: + from flask import Flask + + app = Flask(__name__) + app.config["TESTING"] = True + app.config["EXCLUSION_RULES"] = [] + + with tempfile.TemporaryDirectory() as tmp: + ws_root = _seed_tabs_with_drifted_bubble(tmp) + with self.assertLogs("services.workspace_tabs", level="WARNING") as cm: + with app.test_request_context("/api/workspaces/global/tabs"): + payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) + + self.assertEqual(status, 200) + self.assertIn("cmp-ok", [t["id"] for t in payload.get("tabs", [])]) + messages = [r.getMessage() for r in cm.records] + self.assertTrue( + any("Bubble" in m and "b-bad" in m for m in messages), + f"expected Bubble parse warning for b-bad, got: {messages}", + ) -def test_workspace_tabs_logs_bubble_schema_drift(caplog_at_warning: pytest.LogCaptureFixture) -> None: - from flask import Flask - - app = Flask(__name__) - app.config["TESTING"] = True - app.config["EXCLUSION_RULES"] = [] - - with tempfile.TemporaryDirectory() as tmp: - ws_root = _seed_tabs_with_drifted_bubble(tmp) - with caplog_at_warning.at_level(logging.WARNING, logger="services.workspace_tabs"): - with app.test_request_context("/api/workspaces/global/tabs"): - payload, status = assemble_workspace_tabs("global", ws_root, rules=[]) - assert status == 200 - assert "cmp-ok" in [t["id"] for t in payload.get("tabs", [])] - messages = [r.getMessage() for r in caplog_at_warning.records] - assert any("Bubble" in m and "b-bad" in m for m in messages), ( - f"expected Bubble parse warning for b-bad, got: {messages}" - ) +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_workspace_tabs_null_bubble.py b/tests/test_workspace_tabs_null_bubble.py index 3ce69ce..b5aa840 100644 --- a/tests/test_workspace_tabs_null_bubble.py +++ b/tests/test_workspace_tabs_null_bubble.py @@ -2,8 +2,8 @@ A cursorDiskKV row with a NULL value column previously caused json.loads(None) -> TypeError, which propagated as a 500 response. -The fix uses ``_try_loads_kv_value`` in ``services/workspace_tabs.py`` so -NULL / unparseable cursorDiskKV values are skipped without raising. +Bubble rows with NULL or invalid JSON values are skipped in +``services/workspace_tabs.py`` without raising. """ import json From 2cc885f34eddcca3f72ddd9050ac3f4c4ea99670 Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Mon, 25 May 2026 11:47:06 -0400 Subject: [PATCH 5/6] fix: Log NULL KV rows instead of silently skipping them. --- services/workspace_tabs.py | 8 ++++++++ tests/test_workspace_tabs_null_bubble.py | 16 +++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 08f694c..80037ce 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -115,6 +115,10 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: if len(parts) >= 3: bid = parts[2] if row["value"] is None: + _logger.warning( + "Skipping Bubble cursorDiskKV row with NULL value: key=%r", + row["key"], + ) continue try: parsed = json.loads(row["value"]) @@ -195,6 +199,10 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: for row in composer_rows: composer_id = row["key"].split(":")[1] if row["value"] is None: + _logger.warning( + "Skipping Composer cursorDiskKV row with NULL value: key=%r", + row["key"], + ) continue try: parsed = json.loads(row["value"]) diff --git a/tests/test_workspace_tabs_null_bubble.py b/tests/test_workspace_tabs_null_bubble.py index b5aa840..3d83eea 100644 --- a/tests/test_workspace_tabs_null_bubble.py +++ b/tests/test_workspace_tabs_null_bubble.py @@ -72,15 +72,21 @@ def tearDown(self): def test_null_bubble_row_is_skipped_without_exception(self): """assemble_workspace_tabs must not raise when a bubble row has NULL value.""" try: - _payload, status = assemble_workspace_tabs( - workspace_id="global", - workspace_path=self.workspace_path, - rules=[], - ) + with self.assertLogs("services.workspace_tabs", level="WARNING") as cm: + _payload, status = assemble_workspace_tabs( + workspace_id="global", + workspace_path=self.workspace_path, + rules=[], + ) except TypeError as exc: self.fail(f"NULL bubble row raised TypeError: {exc}") self.assertEqual(status, 200, "NULL bubble row must not turn tabs load into an error response") + messages = [r.getMessage() for r in cm.records] + self.assertTrue( + any("NULL value" in m and "bubble-null" in m for m in messages), + f"expected NULL-value warning for bubble-null row, got: {messages}", + ) def test_healthy_bubbles_still_load_when_null_row_present(self): """The healthy bubble surfaces in a tab even when a NULL row is present.""" From af2fd94e87f1a28c379dc62ddde1dac85eeb8b6e Mon Sep 17 00:00:00 2001 From: bradjin8 Date: Mon, 25 May 2026 14:35:48 -0400 Subject: [PATCH 6/6] fix: resolve merge conflicts; apply review feedback on logging --- api/logs.py | 8 ++----- api/search.py | 14 +++--------- api/workspaces.py | 12 +++++------ services/workspace_listing.py | 11 ++++------ services/workspace_resolver.py | 25 +++++----------------- services/workspace_tabs.py | 39 ++++++++++++++++++++-------------- utils/path_helpers.py | 14 ++++++++++++ 7 files changed, 57 insertions(+), 66 deletions(-) diff --git a/api/logs.py b/api/logs.py index 6c15be3..f5607ea 100644 --- a/api/logs.py +++ b/api/logs.py @@ -14,7 +14,7 @@ from flask import Blueprint, jsonify from utils.workspace_path import resolve_workspace_path -from utils.path_helpers import to_epoch_ms +from utils.path_helpers import to_epoch_ms, warn_workspace_json_read bp = Blueprint("logs", __name__) _logger = logging.getLogger(__name__) @@ -95,11 +95,7 @@ def get_logs(): wd = json.load(f) workspace_folder = wd.get("folder") except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - name, - e, - ) + warn_workspace_json_read(_logger, name, e) try: # closing() guarantees .close() on scope exit (issue #17). diff --git a/api/search.py b/api/search.py index bc4e51a..35e511c 100644 --- a/api/search.py +++ b/api/search.py @@ -16,7 +16,7 @@ from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.workspace_path import resolve_workspace_path, get_cli_chats_path -from utils.path_helpers import to_epoch_ms +from utils.path_helpers import to_epoch_ms, warn_workspace_json_read from utils.text_extract import extract_text_from_bubble from utils.cli_chat_reader import list_cli_projects, traverse_blobs, messages_to_bubbles from models import Bubble, Composer, SchemaError @@ -115,11 +115,7 @@ def search(): if fn: ws_id_to_name[name] = _url_unquote(fn) except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - name, - e, - ) + warn_workspace_json_read(_logger, name, e) except Exception as e: _logger.warning( "Failed to list workspace entries under %s: %s", @@ -305,11 +301,7 @@ def search(): wd = json.load(f) workspace_folder = wd.get("folder") except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - name, - e, - ) + warn_workspace_json_read(_logger, name, e) workspace_name = _workspace_display_name_from_folder(workspace_folder, fallback=name) # try/finally guarantees .close() on every exit path (issue #17). diff --git a/api/workspaces.py b/api/workspaces.py index 091d03a..2efc2fd 100644 --- a/api/workspaces.py +++ b/api/workspaces.py @@ -15,7 +15,11 @@ from utils.workspace_path import resolve_workspace_path, get_cli_chats_path from utils.cli_chat_reader import list_cli_projects -from utils.path_helpers import get_workspace_folder_paths, get_workspace_display_name +from utils.path_helpers import ( + get_workspace_folder_paths, + get_workspace_display_name, + warn_workspace_json_read, +) from utils.workspace_descriptor import read_json_file from services.workspace_resolver import ( _infer_workspace_name_from_context, @@ -118,11 +122,7 @@ def get_workspace(workspace_id): if inferred: workspace_name = inferred except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - workspace_id, - e, - ) + warn_workspace_json_read(_logger, workspace_id, e) inferred = _infer_workspace_name_from_context(workspace_path, workspace_id) if inferred: workspace_name = inferred diff --git a/services/workspace_listing.py b/services/workspace_listing.py index 66d19d6..dafb9e0 100644 --- a/services/workspace_listing.py +++ b/services/workspace_listing.py @@ -14,6 +14,7 @@ get_workspace_folder_paths, normalize_file_path, to_epoch_ms, + warn_workspace_json_read, ) from utils.workspace_descriptor import read_json_file from utils.workspace_path import get_cli_chats_path @@ -133,8 +134,8 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: cid, e, ) - except Exception as e: - _logger.error("Failed to load composer rows from global storage: %s", e) + except Exception: + _logger.exception("Failed to load composer rows from global storage") # Group workspace entries by normalized folder path folder_to_entries: dict[str, list] = {} @@ -148,11 +149,7 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: if first_folder: norm_folder = normalize_file_path(first_folder) except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - entry["name"], - e, - ) + warn_workspace_json_read(_logger, entry["name"], e) if not norm_folder: norm_folder = entry["name"] # fallback to workspace ID entry_folder_map[entry["name"]] = norm_folder diff --git a/services/workspace_resolver.py b/services/workspace_resolver.py index 50af03d..cf34be1 100644 --- a/services/workspace_resolver.py +++ b/services/workspace_resolver.py @@ -15,6 +15,7 @@ get_workspace_display_name, get_workspace_folder_paths, normalize_file_path, + warn_workspace_json_read, ) from utils.workspace_descriptor import basename_from_pathish, read_json_file from services.workspace_db import _open_global_db @@ -139,11 +140,7 @@ def _get_project_from_file_path( best_len = len(wp) best_match = entry["name"] except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - entry["name"], - e, - ) + warn_workspace_json_read(_logger, entry["name"], e) return best_match @@ -159,11 +156,7 @@ def _create_project_name_to_workspace_id_map(workspace_entries): if folder_name: mapping[folder_name] = entry["name"] except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - entry["name"], - e, - ) + warn_workspace_json_read(_logger, entry["name"], e) return mapping @@ -176,11 +169,7 @@ def _create_workspace_path_to_id_map(workspace_entries): normalized = normalize_file_path(folder) out[normalized] = entry["name"] except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - entry["name"], - e, - ) + warn_workspace_json_read(_logger, entry["name"], e) return out @@ -294,11 +283,7 @@ def _determine_project_for_conversation( if name: folder_name_to_ws.append({"name": name, "id": entry["name"]}) except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - entry["name"], - e, - ) + warn_workspace_json_read(_logger, entry["name"], e) best_id = None best_len = 0 diff --git a/services/workspace_tabs.py b/services/workspace_tabs.py index 80037ce..36b7143 100644 --- a/services/workspace_tabs.py +++ b/services/workspace_tabs.py @@ -13,6 +13,7 @@ get_workspace_folder_paths, normalize_file_path, to_epoch_ms, + warn_workspace_json_read, ) from utils.exclusion_rules import build_searchable_text, is_excluded_by_rules from utils.text_extract import extract_text_from_bubble @@ -46,6 +47,19 @@ def _try_loads_kv_value(raw: str | None) -> Any | None: return None +_KV_VALUE_LOG_LIMIT = 200 + + +def _kv_value_log_preview(value: object | None, limit: int = _KV_VALUE_LOG_LIMIT) -> str: + """Truncated KV payload for warning logs (avoids multi-MB log lines on bad rows).""" + if value is None: + return "None" + text = value if isinstance(value, str) else str(value) + if len(text) > limit: + return text[:limit] + "..." + return text + + def assemble_workspace_tabs( workspace_id: str, workspace_path: str, @@ -73,11 +87,7 @@ def assemble_workspace_tabs( if first_folder: target_folder = normalize_file_path(first_folder) except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - workspace_id, - e, - ) + warn_workspace_json_read(_logger, workspace_id, e) if target_folder: for entry in workspace_entries: try: @@ -87,11 +97,7 @@ def assemble_workspace_tabs( if f2 and normalize_file_path(f2) == target_folder: matching_ws_ids.add(entry["name"]) except Exception as e: - _logger.warning( - "Failed to read workspace.json for %s: %s", - entry["name"], - e, - ) + warn_workspace_json_read(_logger, entry["name"], e) bubble_map: dict[str, dict] = {} code_block_diff_map: dict[str, list] = {} @@ -122,12 +128,12 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: continue try: parsed = json.loads(row["value"]) - except json.JSONDecodeError as e: + except (json.JSONDecodeError, TypeError, ValueError) as e: _logger.warning( - "Failed to decode Bubble from %s: %s (value: %r)", + "Failed to decode Bubble from %s: %s (value_preview=%r)", row["key"], e, - row["value"], + _kv_value_log_preview(row["value"]), ) continue try: @@ -206,12 +212,13 @@ def _safe_fetchall(query: str, params: tuple = ()) -> list: continue try: parsed = json.loads(row["value"]) - except json.JSONDecodeError as e: + except (json.JSONDecodeError, TypeError, ValueError) as e: _logger.warning( - "Failed to decode Composer from composerData:%s: %s (value: %r)", + "Failed to decode Composer from composerData:%s: %s (key=%s, value_preview=%r)", composer_id, e, - row["value"], + row["key"], + _kv_value_log_preview(row["value"]), ) continue try: diff --git a/utils/path_helpers.py b/utils/path_helpers.py index 3b4201d..55af44f 100644 --- a/utils/path_helpers.py +++ b/utils/path_helpers.py @@ -1,5 +1,6 @@ """Path utility functions mirroring src/utils/path.ts""" +import logging import os import sys from datetime import datetime @@ -142,3 +143,16 @@ def get_workspace_display_name(workspace_data: dict, fallback: str | None = None if decoded: return decoded return fallback or "" + + +def warn_workspace_json_read( + logger: logging.Logger, + workspace_id: str, + err: BaseException, +) -> None: + """Log a standard warning when workspace.json cannot be read (shared across services/api).""" + logger.warning( + "Failed to read workspace.json for %s: %s", + workspace_id, + err, + )