From 4c4dfee8e6cae88efc8e4437f69200be9d4b9fa0 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Tue, 5 May 2026 08:57:35 +0200 Subject: [PATCH] feat(profile): /profile/sessions page + audit on detector exception + correct SCHEDULER_AUDIT_ACTIONS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three changes addressing user feedback during e2e test of #179 + Devin Review on e86dd5ed. 1) /profile/sessions — new self-service user page in the user menu. Lists all session jsonls the caller uploaded via `agnes push` joined against session_extraction_state. Each row shows uploaded_at, file size, status badge (pending/processed/extracted), processed_at, and items_extracted. The page docstring + help text explicitly call out that items_extracted=0 means the verification detector ran fine but the LLM found no claims to track — that's the documented "no items" outcome, not a broken pipeline. Closes the gap surfaced during the e2e test of #176 where a user could see their sessions on disk and process them through the LLM but had no UI to inspect what happened. 2) run_verification_detector audits unhandled exceptions (Devin #1). If detector.run() threw anything other than the already-translated ValueError, the audit_log row was never written. The endpoint now wraps detector.run in try/except, records the exception in audit_params["unhandled_error"], then re-raises as 500 after audit. The /admin/scheduler-runs page surfaces the failure row with the error type + message. 3) SCHEDULER_AUDIT_ACTIONS list corrected (Devin #2). Previous list had "marketplaces_sync_all" (wrong — actual is "marketplace.sync_all") plus "data_refresh" and "scripts_run_due" which app/api/sync.py and app/api/scripts.py don't write to audit_log. Fixed to the four actually-logged strings; comment points at the missing audit calls as a follow-up. Tests: tests/test_web_ui.py adds TestAdminRoleGuards::test_profile_sessions_page_no_admin_required and tightens test_admin_scheduler_runs_page_admin_only to assert the correct marketplace.sync_all string. --- CHANGELOG.md | 5 +- app/api/admin.py | 24 +++++- app/web/router.py | 81 +++++++++++++++++- app/web/templates/_app_header.html | 3 +- app/web/templates/profile_sessions.html | 109 ++++++++++++++++++++++++ tests/test_web_ui.py | 14 +++ 6 files changed, 226 insertions(+), 10 deletions(-) create mode 100644 app/web/templates/profile_sessions.html diff --git a/CHANGELOG.md b/CHANGELOG.md index b01bdd6..d45fa42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,10 +45,13 @@ Five-defect fix for the silently-broken session pipeline on default Compose depl - **#179 review — `/api/admin/run-session-collector` would SystemExit the worker.** The endpoint called `collector.main()`, whose `argparse.parse_args()` parsed uvicorn's `sys.argv` (`['app.main:app', '--host', …]`) and called `sys.exit(2)` on the unrecognised flags. `SystemExit` inherits from `BaseException`, escapes FastAPI's exception machinery, and propagates through the thread pool — every scheduler tick that fired the endpoint either 500-ed or risked killing the uvicorn worker. Fix: `services/session_collector/collector.py` now exposes an argv-free `run(dry_run, verbose) -> (rc, stats)` helper; `main()` is a thin CLI shim around it and the admin endpoint calls `run()` directly. Audit log now carries the per-run stats (`users_processed`, `files_copied`, `files_skipped`) instead of just the rc. Regression tests in `tests/test_session_collector.py::TestRunHelper`. - **#179 review — `python -m services.corporate_memory` crashed on missing LLM config instead of exiting cleanly.** The PR's fail-fast change made `collect_all()` raise `ValueError` when neither an `ai:` block nor `ANTHROPIC_API_KEY`/`LLM_API_KEY` was available. The `verification_detector` CLI was updated to catch it; the corporate-memory CLI was missed. Now also wrapped — operators get a one-line `Corporate Memory cannot run: ` on stderr and rc=1 instead of a raw traceback. Regression test in `tests/test_llm_connector.py::TestCorporateMemoryCollector::test_main_returns_1_on_no_ai_config_instead_of_traceback`. - **E2E test — Anthropic API rejected every extraction request.** The structured-output API now requires `additionalProperties: false` on every `{"type": "object"}` node in the json_schema; without it the API returns 400 `invalid_request_error` ("output_config.format.schema: For 'object' type, 'additionalProperties' must be explicitly set to false"). Surfaced on a real BQ-backed deploy: every uploaded session jsonl failed verification-extraction in a tight retry loop. Fix: `connectors/llm/anthropic_provider.py` now wraps the caller-supplied schema through a recursive `_strict_json_schema()` walker that adds the field where missing (preserving any explicit operator override), then passes the strict variant to the API. Six unit tests in `tests/test_llm_connector.py::TestStrictJsonSchema` pin the recursion across nested objects, array items, and the no-mutation invariant. +- **#179 review — `/api/admin/run-verification-detector` skipped audit on unhandled exceptions.** If `detector.run()` threw anything other than the already-translated `ValueError` (DuckDB lock, network blip, unexpected SDK error), the audit_log row was never written — the operator's only signal was `docker logs agnes-scheduler-1`. The endpoint now wraps `detector.run` in try/except, records the exception in `audit_params["unhandled_error"]`, then re-raises as 500 after audit. The `/admin/scheduler-runs` page surfaces the failure row with the error type and message. +- **#179 review — `SCHEDULER_AUDIT_ACTIONS` listed action strings that don't actually appear in `audit_log`.** The list at `app/web/router.py:952` had `"marketplaces_sync_all"` (wrong — actual is `"marketplace.sync_all"`) plus `"data_refresh"` and `"scripts_run_due"` (which `app/api/sync.py` and `app/api/scripts.py` don't write). Corrected to the four actually-logged strings, with a comment pointing at the missing audit calls in sync/scripts as a follow-up. ### Added -- `/admin/scheduler-runs` — read-only admin page showing the last 200 audit-log entries from scheduler-driven actions (`run_session_collector`, `run_verification_detector`, `run_corporate_memory`, `marketplaces_sync_all`, `data_refresh`, `scripts_run_due`). New `AuditRepository.query_actions(actions, limit)` query helper, new admin nav entry under the Admin dropdown. Failed scheduler ticks (HTTP 401, network errors) don't reach the audit_log — those still live only in `docker logs agnes-scheduler-1`; the page calls that out with a hint to set `SCHEDULER_API_TOKEN` if no rows show up. +- `/admin/scheduler-runs` — read-only admin page showing the last 200 audit-log entries from scheduler-driven actions (`run_session_collector`, `run_verification_detector`, `run_corporate_memory`, `marketplace.sync_all`). New `AuditRepository.query_actions(actions, limit)` query helper, new admin nav entry under the Admin dropdown. `data-refresh` (`POST /api/sync/trigger`) and `script-runner` (`POST /api/scripts/run-due`) are scheduler jobs but don't write to `audit_log` today, so they can't appear here yet. Failed scheduler ticks (HTTP 401, network errors) don't reach the audit_log either — those still live only in `docker logs agnes-scheduler-1`; the page calls that out with a hint to set `SCHEDULER_API_TOKEN` if no rows show up. +- `/profile/sessions` — self-service user page in the user menu, showing all session jsonls the caller uploaded via `agnes push` joined against `session_extraction_state`. Each row shows uploaded_at, file size, status badge (`pending` / `processed` / `extracted`), processed_at, and `items_extracted`. The page docstring explicitly calls out that `items_extracted = 0` means the verification detector ran successfully but the LLM found no claims worth tracking — that's the documented "no items" outcome, not a broken pipeline. Closes the gap surfaced during the e2e test of #176 where a user could see their sessions on disk and process them through the LLM but had no UI to inspect what happened. ### Internal diff --git a/app/api/admin.py b/app/api/admin.py index 2798854..31acaa2 100644 --- a/app/api/admin.py +++ b/app/api/admin.py @@ -2860,23 +2860,39 @@ def run_verification_detector( raise HTTPException(status_code=500, detail=str(e)) job_conn = get_system_db() + stats: dict = {} + job_error: Optional[Exception] = None try: stats = detector.run(job_conn, extractor, dry_run=False) + except Exception as e: + # Capture and re-raise after audit so an unhandled detector error + # (DuckDB lock, network blip, unexpected SDK type) still leaves a + # row in audit_log — the /admin/scheduler-runs page is the + # operator's only signal beyond docker logs. + job_error = e finally: try: job_conn.close() except Exception: pass + audit_params: dict = { + "items_created": stats.get("items_created", 0), + "errors": len(stats.get("errors", [])), + } + if job_error is not None: + audit_params["unhandled_error"] = f"{type(job_error).__name__}: {job_error}" + AuditRepository(conn).log( user_id=user.get("id"), action="run_verification_detector", resource="job:verification-detector", - params={ - "items_created": stats.get("items_created", 0), - "errors": len(stats.get("errors", [])), - }, + params=audit_params, ) + + if job_error is not None: + raise HTTPException(status_code=500, detail=audit_params["unhandled_error"]) + return {"ok": not stats.get("errors"), "details": stats} diff --git a/app/web/router.py b/app/web/router.py index 6279a7f..207fbeb 100644 --- a/app/web/router.py +++ b/app/web/router.py @@ -5,7 +5,7 @@ Replicates all Flask webapp routes with DuckDB-backed data. import logging import os -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from typing import Optional from urllib.parse import quote @@ -949,13 +949,16 @@ async def admin_marketplaces_page( # Scheduler-driven admin actions audited by app/api/admin.py and # app/api/marketplaces.py. Keep in sync with the JOBS list in # services/scheduler/__main__.py. +# +# `data-refresh` (POST /api/sync/trigger) and `script-runner` +# (POST /api/scripts/run-due) are scheduler jobs but they do NOT write +# audit_log today, so they can't appear here. If you add audit calls to +# those endpoints, add the matching action strings to this list. SCHEDULER_AUDIT_ACTIONS = [ "run_session_collector", "run_verification_detector", "run_corporate_memory", - "marketplaces_sync_all", - "data_refresh", - "scripts_run_due", + "marketplace.sync_all", ] @@ -1110,6 +1113,76 @@ async def profile_page( return templates.TemplateResponse(request, "profile.html", ctx) +@router.get("/profile/sessions", response_class=HTMLResponse) +async def profile_sessions_page( + request: Request, + user: dict = Depends(get_current_user), + conn: duckdb.DuckDBPyConnection = Depends(_get_db), +): + """User-self-view of own uploaded sessions and their extraction state. + + Walks `${DATA_DIR}/user_sessions//*.jsonl` for the caller's + own user_id, joins each file against `session_extraction_state` to + surface processed_at + items_extracted, and renders a table. + Items_extracted = 0 means the verification_detector ran but the LLM + found no claims worth tracking — that's the documented "no items" + outcome; it does NOT mean the pipeline is broken. + """ + import pathlib + user_id = user["id"] + data_dir = pathlib.Path(os.environ.get("DATA_DIR", "/data")) + user_sessions_dir = data_dir / "user_sessions" / user_id + + files = [] + if user_sessions_dir.is_dir(): + for jsonl in sorted(user_sessions_dir.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True): + try: + stat = jsonl.stat() + except OSError: + continue + files.append({ + "name": jsonl.name, + "size_bytes": stat.st_size, + "mtime": datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc), + }) + + state_map: dict = {} + if files: + keys = [f"{user_id}/{f['name']}" for f in files] + placeholders = ",".join("?" for _ in keys) + rows = conn.execute( + f"""SELECT session_file, processed_at, items_extracted, file_hash + FROM session_extraction_state + WHERE session_file IN ({placeholders})""", + keys, + ).fetchall() + cols = [d[0] for d in conn.description] + for row in rows: + d = dict(zip(cols, row)) + state_map[d["session_file"]] = d + + rows_view = [] + for f in files: + key = f"{user_id}/{f['name']}" + state = state_map.get(key) + rows_view.append({ + "name": f["name"], + "size_kb": round(f["size_bytes"] / 1024, 1), + "uploaded_at": f["mtime"], + "processed_at": state["processed_at"] if state else None, + "items_extracted": state["items_extracted"] if state else None, + "is_processed": state is not None, + }) + + ctx = _build_context( + request, + user=user, + sessions=rows_view, + user_id=user_id, + ) + return templates.TemplateResponse(request, "profile_sessions.html", ctx) + + @router.get("/_debug/throw/http/{code:int}", response_class=HTMLResponse, include_in_schema=False) async def _debug_throw_http(request: Request, code: int): """Dev helper — raise an HTTPException with the given status code. diff --git a/app/web/templates/_app_header.html b/app/web/templates/_app_header.html index 024f154..6c7c5f4 100644 --- a/app/web/templates/_app_header.html +++ b/app/web/templates/_app_header.html @@ -58,7 +58,8 @@
Admin
{% endif %} - Profile + Profile + My sessions My tokens {% if config.DEBUG_AUTH_ENABLED %} Auth debug diff --git a/app/web/templates/profile_sessions.html b/app/web/templates/profile_sessions.html new file mode 100644 index 0000000..d0d1cd6 --- /dev/null +++ b/app/web/templates/profile_sessions.html @@ -0,0 +1,109 @@ +{% extends "base.html" %} +{% block title %}My sessions — {{ config.INSTANCE_NAME }}{% endblock %} + +{% block content %} + + +
+

My sessions

+

+ Sessions you uploaded via agnes push from your Claude Code workspace, with + extraction status from session_extraction_state. +
+ Items extracted = 0 means the verification detector ran successfully + but the LLM didn't find anything worth tracking in that session — that's expected for + sessions that are mostly tool calls or coding without confident factual claims. +
+ Pending means the file is on disk but the scheduler hasn't processed it yet + (next verification-detector tick: every 15 min by default). +

+ +
+ {% if sessions %} + + + + + + + + + + + + + {% for s in sessions %} + + + + + + + + + {% endfor %} + +
Session fileUploadedSizeStatusProcessedItems extracted
{{ s.name }}{{ s.uploaded_at.strftime("%Y-%m-%d %H:%M:%S UTC") }}{{ s.size_kb }} kB + {% if not s.is_processed %} + pending + {% elif s.items_extracted and s.items_extracted > 0 %} + extracted + {% else %} + processed + {% endif %} + + {% if s.processed_at %}{{ s.processed_at.strftime("%Y-%m-%d %H:%M:%S UTC") }}{% endif %} + + {% if s.items_extracted is not none %}{{ s.items_extracted }}{% endif %} +
+ {% else %} +
+ No session files yet. Run agnes push from a Claude Code workspace + where agnes init installed the SessionEnd hook. +
+ {% endif %} +
+
+{% endblock %} diff --git a/tests/test_web_ui.py b/tests/test_web_ui.py index 70b825e..36450ac 100644 --- a/tests/test_web_ui.py +++ b/tests/test_web_ui.py @@ -338,6 +338,20 @@ class TestAdminRoleGuards: assert b"run_session_collector" in r.content assert b"run_verification_detector" in r.content assert b"run_corporate_memory" in r.content + # Devin Review on e86dd5ed: list must use the actual logged action + # string, not a guess. + assert b"marketplace.sync_all" in r.content + + def test_profile_sessions_page_no_admin_required(self, web_client, analyst_cookie, admin_cookie): + """The /profile/sessions page is gated by get_current_user, not require_admin — + every authenticated user views their own sessions.""" + r = web_client.get("/profile/sessions", follow_redirects=False) + assert r.status_code in (302, 401, 403) + r = web_client.get("/profile/sessions", cookies=analyst_cookie, follow_redirects=False) + assert r.status_code == 200 + assert b"My sessions" in r.content + r = web_client.get("/profile/sessions", cookies=admin_cookie, follow_redirects=False) + assert r.status_code == 200 class TestUnauthenticatedHtmlRedirects: