fix(session-collector-api): mirror sibling endpoints' audit-on-exception (Devin Review on #179)
Devin flagged that run_session_collector still had the same audit-skip gap I fixed in run_verification_detector and run_corporate_memory in the previous two rounds — a PermissionError walking /home, an OSError on /data/user_sessions mkdir, or any other unhandled exception from collector.run() would skip the audit_log row and only show in docker logs. Same try/except + unhandled_error pattern as the sibling endpoints. All three LLM-pipeline run-* endpoints now record their failures the same way; /admin/scheduler-runs sees them. Regression test in tests/test_admin_run_endpoints.py::TestRunSessionCollector::test_unhandled_exception_still_audits.
This commit is contained in:
parent
9ebe991b55
commit
d878764ac1
3 changed files with 52 additions and 2 deletions
|
|
@ -48,6 +48,7 @@ Five-defect fix for the silently-broken session pipeline on default Compose depl
|
||||||
- **#179 review — `/api/admin/run-verification-detector` skipped audit on unhandled exceptions.** If `detector.run()` threw anything other than the already-translated `ValueError` (DuckDB lock, network blip, unexpected SDK error), the audit_log row was never written — the operator's only signal was `docker logs agnes-scheduler-1`. The endpoint now wraps `detector.run` in try/except, records the exception in `audit_params["unhandled_error"]`, then re-raises as 500 after audit. The `/admin/scheduler-runs` page surfaces the failure row with the error type and message.
|
- **#179 review — `/api/admin/run-verification-detector` skipped audit on unhandled exceptions.** If `detector.run()` threw anything other than the already-translated `ValueError` (DuckDB lock, network blip, unexpected SDK error), the audit_log row was never written — the operator's only signal was `docker logs agnes-scheduler-1`. The endpoint now wraps `detector.run` in try/except, records the exception in `audit_params["unhandled_error"]`, then re-raises as 500 after audit. The `/admin/scheduler-runs` page surfaces the failure row with the error type and message.
|
||||||
- **#179 review — `SCHEDULER_AUDIT_ACTIONS` listed action strings that don't actually appear in `audit_log`.** The list at `app/web/router.py:952` had `"marketplaces_sync_all"` (wrong — actual is `"marketplace.sync_all"`) plus `"data_refresh"` and `"scripts_run_due"` (which `app/api/sync.py` and `app/api/scripts.py` don't write). Corrected to the four actually-logged strings, with a comment pointing at the missing audit calls in sync/scripts as a follow-up.
|
- **#179 review — `SCHEDULER_AUDIT_ACTIONS` listed action strings that don't actually appear in `audit_log`.** The list at `app/web/router.py:952` had `"marketplaces_sync_all"` (wrong — actual is `"marketplace.sync_all"`) plus `"data_refresh"` and `"scripts_run_due"` (which `app/api/sync.py` and `app/api/scripts.py` don't write). Corrected to the four actually-logged strings, with a comment pointing at the missing audit calls in sync/scripts as a follow-up.
|
||||||
- **#179 review — `/api/admin/run-corporate-memory` skipped audit on unhandled exceptions** (same gap as `run_verification_detector` from the previous round). Mirrored the same try/except + `unhandled_error` audit pattern, so a DuckDB lock or unexpected SDK error from `collect_all()` now produces an audit row with the error type+message before re-raising as 500. Regression test in `tests/test_admin_run_endpoints.py::TestRunCorporateMemory::test_unhandled_exception_still_audits`.
|
- **#179 review — `/api/admin/run-corporate-memory` skipped audit on unhandled exceptions** (same gap as `run_verification_detector` from the previous round). Mirrored the same try/except + `unhandled_error` audit pattern, so a DuckDB lock or unexpected SDK error from `collect_all()` now produces an audit row with the error type+message before re-raising as 500. Regression test in `tests/test_admin_run_endpoints.py::TestRunCorporateMemory::test_unhandled_exception_still_audits`.
|
||||||
|
- **#179 review — `/api/admin/run-session-collector` skipped audit on unhandled exceptions** (third occurrence of the same pattern, completes the trilogy of LLM-pipeline endpoints). Mirrored the same try/except + `unhandled_error` audit pattern from the other two endpoints, so a `PermissionError` walking `/home`, an `OSError` on `/data/user_sessions` mkdir, or any other unhandled exception from `collector.run()` now produces an audit row before re-raising as 500. Regression test in `tests/test_admin_run_endpoints.py::TestRunSessionCollector::test_unhandled_exception_still_audits`.
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2818,13 +2818,33 @@ def run_session_collector(
|
||||||
|
|
||||||
# Call run() not main(): main() does argparse.parse_args() which would
|
# Call run() not main(): main() does argparse.parse_args() which would
|
||||||
# try to parse uvicorn's sys.argv and SystemExit(2) the worker.
|
# try to parse uvicorn's sys.argv and SystemExit(2) the worker.
|
||||||
rc, stats = collector.run(dry_run=False, verbose=False)
|
rc: int = 1
|
||||||
|
stats: dict = {}
|
||||||
|
job_error: Optional[Exception] = None
|
||||||
|
try:
|
||||||
|
rc, stats = collector.run(dry_run=False, verbose=False)
|
||||||
|
except Exception as e:
|
||||||
|
# Mirror run_verification_detector / run_corporate_memory
|
||||||
|
# (#179 review): capture any unhandled error so audit_log +
|
||||||
|
# /admin/scheduler-runs reflect the failure. Re-raised below
|
||||||
|
# after audit. Filesystem permission, OSError on /home walking,
|
||||||
|
# etc. are realistic failure modes worth surfacing.
|
||||||
|
job_error = e
|
||||||
|
|
||||||
|
audit_params: dict = {"rc": rc, **stats}
|
||||||
|
if job_error is not None:
|
||||||
|
audit_params["unhandled_error"] = f"{type(job_error).__name__}: {job_error}"
|
||||||
|
|
||||||
AuditRepository(conn).log(
|
AuditRepository(conn).log(
|
||||||
user_id=user.get("id"),
|
user_id=user.get("id"),
|
||||||
action="run_session_collector",
|
action="run_session_collector",
|
||||||
resource="job:session-collector",
|
resource="job:session-collector",
|
||||||
params={"rc": rc, **stats},
|
params=audit_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if job_error is not None:
|
||||||
|
raise HTTPException(status_code=500, detail=audit_params["unhandled_error"])
|
||||||
|
|
||||||
return {"ok": rc == 0, "details": {"rc": rc, **stats}}
|
return {"ok": rc == 0, "details": {"rc": rc, **stats}}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,35 @@ class TestRunSessionCollector:
|
||||||
resp = c.post("/api/admin/run-session-collector")
|
resp = c.post("/api/admin/run-session-collector")
|
||||||
assert resp.status_code == 401
|
assert resp.status_code == 401
|
||||||
|
|
||||||
|
def test_unhandled_exception_still_audits(self, seeded_app):
|
||||||
|
"""Devin Review on 9ebe991b: run_session_collector must mirror
|
||||||
|
run_verification_detector / run_corporate_memory — record the
|
||||||
|
failure in audit_log even when collector.run() raises (e.g.
|
||||||
|
permission error walking /home/), so /admin/scheduler-runs sees
|
||||||
|
the failure instead of only docker logs."""
|
||||||
|
from src.db import get_system_db
|
||||||
|
|
||||||
|
c = seeded_app["client"]
|
||||||
|
token = seeded_app["admin_token"]
|
||||||
|
with patch(
|
||||||
|
"services.session_collector.collector.run",
|
||||||
|
side_effect=PermissionError("simulated /home permission denied"),
|
||||||
|
):
|
||||||
|
resp = c.post("/api/admin/run-session-collector", headers=_auth(token))
|
||||||
|
assert resp.status_code == 500
|
||||||
|
assert "PermissionError" in resp.json()["detail"]
|
||||||
|
conn = get_system_db()
|
||||||
|
try:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT params FROM audit_log WHERE action = 'run_session_collector' ORDER BY timestamp DESC LIMIT 1"
|
||||||
|
).fetchall()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
assert rows, "audit row missing on unhandled exception"
|
||||||
|
params_json = rows[0][0]
|
||||||
|
assert "unhandled_error" in params_json
|
||||||
|
assert "PermissionError" in params_json
|
||||||
|
|
||||||
|
|
||||||
class TestRunVerificationDetector:
|
class TestRunVerificationDetector:
|
||||||
def test_admin_can_trigger_verification_detector(self, seeded_app, monkeypatch):
|
def test_admin_can_trigger_verification_detector(self, seeded_app, monkeypatch):
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue