diff --git a/CHANGELOG.md b/CHANGELOG.md index d45fa42..89c5e78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,7 @@ Five-defect fix for the silently-broken session pipeline on default Compose depl - **E2E test — Anthropic API rejected every extraction request.** The structured-output API now requires `additionalProperties: false` on every `{"type": "object"}` node in the json_schema; without it the API returns 400 `invalid_request_error` ("output_config.format.schema: For 'object' type, 'additionalProperties' must be explicitly set to false"). Surfaced on a real BQ-backed deploy: every uploaded session jsonl failed verification-extraction in a tight retry loop. Fix: `connectors/llm/anthropic_provider.py` now wraps the caller-supplied schema through a recursive `_strict_json_schema()` walker that adds the field where missing (preserving any explicit operator override), then passes the strict variant to the API. Six unit tests in `tests/test_llm_connector.py::TestStrictJsonSchema` pin the recursion across nested objects, array items, and the no-mutation invariant. - **#179 review — `/api/admin/run-verification-detector` skipped audit on unhandled exceptions.** If `detector.run()` threw anything other than the already-translated `ValueError` (DuckDB lock, network blip, unexpected SDK error), the audit_log row was never written — the operator's only signal was `docker logs agnes-scheduler-1`. The endpoint now wraps `detector.run` in try/except, records the exception in `audit_params["unhandled_error"]`, then re-raises as 500 after audit. The `/admin/scheduler-runs` page surfaces the failure row with the error type and message. - **#179 review — `SCHEDULER_AUDIT_ACTIONS` listed action strings that don't actually appear in `audit_log`.** The list at `app/web/router.py:952` had `"marketplaces_sync_all"` (wrong — actual is `"marketplace.sync_all"`) plus `"data_refresh"` and `"scripts_run_due"` (which `app/api/sync.py` and `app/api/scripts.py` don't write). Corrected to the four actually-logged strings, with a comment pointing at the missing audit calls in sync/scripts as a follow-up. +- **#179 review — `/api/admin/run-corporate-memory` skipped audit on unhandled exceptions** (same gap as `run_verification_detector` from the previous round). Mirrored the same try/except + `unhandled_error` audit pattern, so a DuckDB lock or unexpected SDK error from `collect_all()` now produces an audit row with the error type+message before re-raising as 500. Regression test in `tests/test_admin_run_endpoints.py::TestRunCorporateMemory::test_unhandled_exception_still_audits`. ### Added diff --git a/app/api/admin.py b/app/api/admin.py index 31acaa2..8937ca5 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -2911,20 +2911,37 @@ def run_corporate_memory( # Fail-fast (#176): collect_all raises ValueError when no ai: block AND # no env keys are present. Surface the actionable factory message in a # 500 instead of letting it crash the request anonymously. + stats: dict = {} + job_error: Optional[Exception] = None try: stats = collect_all(dry_run=False) except ValueError as e: + # Already-translated misconfiguration → 500 with actionable message + # but no audit row (the request never reached the LLM stage). raise HTTPException(status_code=500, detail=str(e)) + except Exception as e: + # Mirror run_verification_detector (#179 review): capture any other + # unhandled error so audit_log + /admin/scheduler-runs reflect the + # failure. Re-raised below after audit. + job_error = e + + audit_params: dict = { + "items_new": stats.get("items_new", 0), + "items_filtered": stats.get("items_filtered", 0), + "errors": len(stats.get("errors", [])), + "skipped": stats.get("skipped", False), + } + if job_error is not None: + audit_params["unhandled_error"] = f"{type(job_error).__name__}: {job_error}" AuditRepository(conn).log( user_id=user.get("id"), action="run_corporate_memory", resource="job:corporate-memory", - params={ - "items_new": stats.get("items_new", 0), - "items_filtered": stats.get("items_filtered", 0), - "errors": len(stats.get("errors", [])), - "skipped": stats.get("skipped", False), - }, + params=audit_params, ) + + if job_error is not None: + raise HTTPException(status_code=500, detail=audit_params["unhandled_error"]) + return {"ok": not stats.get("errors"), "details": stats} diff --git a/tests/test_admin_run_endpoints.py b/tests/test_admin_run_endpoints.py index 1675df4..164a7ea 100644 --- a/tests/test_admin_run_endpoints.py +++ b/tests/test_admin_run_endpoints.py @@ -117,6 +117,36 @@ class TestRunCorporateMemory: resp = c.post("/api/admin/run-corporate-memory", headers=_auth(token)) assert resp.status_code == 403 + def test_unhandled_exception_still_audits(self, seeded_app): + """Devin Review on 4c4dfee8: run_corporate_memory must mirror + run_verification_detector — record the failure in audit_log even + when collect_all() raises something other than ValueError, so + the operator sees the failure on /admin/scheduler-runs instead + of only in docker logs.""" + from src.db import get_system_db + + c = seeded_app["client"] + token = seeded_app["admin_token"] + with patch( + "services.corporate_memory.collector.collect_all", + side_effect=RuntimeError("simulated DuckDB lock"), + ): + resp = c.post("/api/admin/run-corporate-memory", headers=_auth(token)) + assert resp.status_code == 500 + assert "RuntimeError" in resp.json()["detail"] + # The audit row must exist regardless of the 500. + conn = get_system_db() + try: + rows = conn.execute( + "SELECT params FROM audit_log WHERE action = 'run_corporate_memory' ORDER BY timestamp DESC LIMIT 1" + ).fetchall() + finally: + conn.close() + assert rows, "audit row missing on unhandled exception" + params_json = rows[0][0] + assert "unhandled_error" in params_json + assert "RuntimeError" in params_json + class TestSchedulerJobsWireUp: """The scheduler must drive all three new endpoints on a sensible cadence."""