From 7fc536589177510ac408bb9c4cc4532bc2d4ec3c Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr <139972147+ZdenekSrotyr@users.noreply.github.com> Date: Thu, 7 May 2026 17:41:22 +0200 Subject: [PATCH] =?UTF-8?q?release:=200.46.3=20=E2=80=94=20self-heal=20ses?= =?UTF-8?q?sion=20pipeline=20+=20clearer=20diagnose=20(#220)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Verified against production: `claude -p` headless mode doesn't fire SessionEnd hooks (proven via `--output-format stream-json --include-hook-events`: zero `SessionEnd` events), so any session JSONLs from `-p` invocations stay orphaned locally and never reach the server. Fix: add `agnes push --quiet` as a third SessionStart entry — symmetric self-heal alongside the existing `agnes pull` entry. Existing workspaces pick this up on their next `agnes init` via the marker-based migration already in `cli/lib/hooks.py`. Separately: a colleague's fresh install showed `agnes diagnose` warning "uploads are not being processed", which led them to suspect their `agnes push` was broken. The warning is actually about the LLM-based `verification-detector` backlog (uploads themselves were arriving fine — confirmed by 23+3 JSONLs landed on the server while the warning was firing). Reword the warning to "verification-detector backlog" + add `last_processed` to the diagnose dict so operators don't have to grep logs to confirm. ## Test plan - [x] `pytest tests/test_lib_hooks.py` — updated count + added `agnes push in SessionStart` assertion. - [x] `pytest tests/test_setup_hooks_template.py` — updated. - [x] `pytest tests/test_clean_install_integration.py` — updated. - [x] `pytest tests/test_health_session_pipeline.py` — updated warning text + asserted `last_processed` field. --- Open in Devin Review --- CHANGELOG.md | 10 ++++++ CLAUDE.md | 11 +++--- app/api/health.py | 8 +++-- cli/lib/hooks.py | 8 ++++- docs/setup/claude_settings.json | 8 +++++ pyproject.toml | 2 +- tests/test_health_session_pipeline.py | 18 +++++++--- tests/test_lib_hooks.py | 49 +++++++++++++++++---------- 8 files changed, 84 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99878ff..5ebc7b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,16 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ## [Unreleased] +## [0.46.3] — 2026-05-07 + +### Added + +- `agnes init` now installs a third SessionStart hook entry (`agnes push --quiet`) so orphan session JSONLs left behind by `claude -p` headless invocations (where Claude Code does NOT fire SessionEnd) or abnormal exits get uploaded on the next interactive session start. Symmetric self-healing alongside the existing `agnes pull` SessionStart entry. Existing workspaces pick up the third entry on their next `agnes init` invocation via the existing migration path in `cli/lib/hooks.py:_OUR_COMMAND_MARKERS`. + +### Fixed + +- `agnes diagnose` `session_pipeline` warning previously read "uploads are not being processed", which led users to suspect their `agnes push` uploads were failing. The warning now reads "verification-detector backlog" and includes `last_processed` so operators see at a glance that uploads are fine and only the LLM extraction step is behind. + ## [0.46.2] — 2026-05-07 ### Fixed diff --git a/CLAUDE.md b/CLAUDE.md index cb0d483..480a9c2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -153,12 +153,15 @@ docker compose up `agnes pull` is the canonical analyst-side distribution path: pulls the RBAC-filtered manifest from the server, downloads parquets whose MD5 changed (skipping `query_mode='remote'` rows), rebuilds local DuckDB views over them. `agnes push` mirrors it for the upload direction (sessions, CLAUDE.local.md). -`agnes init` writes two hooks into `/.claude/settings.json`: +`agnes init` writes hooks into `/.claude/settings.json`: -- `SessionStart` → `agnes pull --quiet` — pulls fresh parquets at the start of every Claude Code session -- `SessionEnd` → `agnes push --quiet` — uploads session jsonl + `CLAUDE.local.md` to the server +- `SessionStart` (3 entries): + 1. `agnes self-upgrade --quiet; agnes pull --quiet` — chained: bump CLI to the server-pinned version first, then pull fresh parquets / rebuild local DuckDB views. + 2. `agnes refresh-marketplace --quiet` — keep the per-user Claude Code marketplace clone in sync (separate entry so a fresh-workspace failure here doesn't suppress the data pull above). + 3. `agnes push --quiet` — self-heal: upload any orphan session JSONLs left behind by previous `claude -p` invocations (where Claude Code does NOT fire SessionEnd) or abnormal exits. Symmetric with `agnes pull`. +- `SessionEnd` (1 entry) → `agnes push --quiet` — upload this session's JSONL + `CLAUDE.local.md` to the server. -Both pass `--quiet` so they don't pollute Claude Code stdout, and trail with `|| true` so a server outage never blocks a session. Workspace-level (not user-home) so the hooks fire only when Claude Code opens this analyst workspace, not in unrelated sessions on the same machine. +All entries pass `--quiet` so they don't pollute Claude Code stdout, and trail with `|| true` so a server outage never blocks a session. Workspace-level (not user-home) so the hooks fire only when Claude Code opens this analyst workspace, not in unrelated sessions on the same machine. Admin RBAC for auto-sync: `query_mode IN ('local', 'materialized')` plus a `resource_grants` row for one of the analyst's groups → table appears in their manifest → `agnes pull` downloads it. No per-user sync config; the admin layer is the single source of truth. diff --git a/app/api/health.py b/app/api/health.py index e937bbd..1c2d324 100644 --- a/app/api/health.py +++ b/app/api/health.py @@ -186,12 +186,14 @@ def _check_session_pipeline(conn: duckdb.DuckDBPyConnection) -> dict: return { "status": "warning", "detail": ( - f"session jsonls newer than session_extraction_state by ~{lag_seconds}s " - f"(grace={grace_seconds}s). Check the verification-detector scheduler " - f"job — uploads are not being processed." + f"verification-detector backlog: ~{lag_seconds}s " + f"(grace={grace_seconds}s, last_processed={last_processed.isoformat()}). " + f"Uploads are unaffected — only the LLM extraction step is behind. " + f"Check the verification-detector scheduler job." ), "lag_seconds": lag_seconds, "session_files": len(session_files), + "last_processed": last_processed.isoformat(), } return {"status": "ok", "session_files": len(session_files)} diff --git a/cli/lib/hooks.py b/cli/lib/hooks.py index ecbfaf7..5fa82d4 100644 --- a/cli/lib/hooks.py +++ b/cli/lib/hooks.py @@ -13,7 +13,7 @@ Design notes: Third-party hooks (mixed entries, foreign commands) are left alone. - Uses `|| true` in the hook command so the hook never blocks a session on a transient sync error. -- SessionStart gets two entries: +- SessionStart gets three entries: 1. Chained `agnes self-upgrade; agnes pull` — self-upgrade runs first so any wire-protocol bump lands before pull tries to use the new CLI version. Both `|| true`-guarded so an upgrade failure doesn't @@ -21,6 +21,11 @@ Design notes: 2. `agnes refresh-marketplace` — independent entry so a fresh workspace (no marketplace cloned yet) failing this command doesn't suppress the data pull above. + 3. `agnes push` — uploads any session JSONLs that haven't reached the + server yet (orphans from `claude -p` headless mode where Claude Code + does NOT fire SessionEnd, or from abnormal session exits). Symmetric + with `agnes pull` so the workspace heals on the next interactive + session start. """ from __future__ import annotations @@ -96,6 +101,7 @@ def install_claude_hooks(workspace: Path) -> None: "agnes self-upgrade --quiet 2>/dev/null || true; " "agnes pull --quiet 2>/dev/null || true", 'bash -c "agnes refresh-marketplace --quiet 2>/dev/null || true"', + 'bash -c "agnes push --quiet 2>/dev/null || true"', ]) _replace_or_add("SessionEnd", [ "agnes push --quiet 2>/dev/null || true", diff --git a/docs/setup/claude_settings.json b/docs/setup/claude_settings.json index ddc64a6..1b64d40 100644 --- a/docs/setup/claude_settings.json +++ b/docs/setup/claude_settings.json @@ -8,6 +8,14 @@ "command": "agnes pull --quiet 2>/dev/null || true" } ] + }, + { + "hooks": [ + { + "type": "command", + "command": "bash -c \"agnes push --quiet 2>/dev/null || true\"" + } + ] } ], "SessionEnd": [ diff --git a/pyproject.toml b/pyproject.toml index d585a5b..e701db8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "agnes-the-ai-analyst" -version = "0.46.2" +version = "0.46.3" description = "Agnes — AI Data Analyst platform for AI analytical systems" requires-python = ">=3.11,<3.14" license = "MIT" diff --git a/tests/test_health_session_pipeline.py b/tests/test_health_session_pipeline.py index b3a103d..ac12377 100644 --- a/tests/test_health_session_pipeline.py +++ b/tests/test_health_session_pipeline.py @@ -89,10 +89,20 @@ class TestSessionPipelineHealthCheck: assert resp.status_code == 200 body = resp.json() services = body["services"] - assert services["session_pipeline"]["status"] == "warning" - # Actionable detail must point at the verification-detector job. - detail = services["session_pipeline"].get("detail", "") - assert "verification-detector" in detail or "session" in detail.lower() + entry = services["session_pipeline"] + assert entry["status"] == "warning" + # Actionable detail must read as "verification-detector backlog" so + # operators don't misread it as an upload failure (the previous text + # "uploads are not being processed" misled users on fresh installs + # whose `agnes push` was actually working fine). + detail = entry.get("detail", "") + assert "verification-detector backlog" in detail, detail + assert "uploads are unaffected" in detail.lower(), detail + # `last_processed` ISO timestamp must surface so operators see at a + # glance when extraction last succeeded — no log-grep required. + assert "last_processed" in entry, entry + # Round-trip parse: fromisoformat tolerates both naive and tz-aware ISO. + datetime.fromisoformat(entry["last_processed"]) # Warning bubbles up to overall status='degraded' (existing pattern). assert body["status"] == "degraded" diff --git a/tests/test_lib_hooks.py b/tests/test_lib_hooks.py index b64c759..168e956 100644 --- a/tests/test_lib_hooks.py +++ b/tests/test_lib_hooks.py @@ -27,12 +27,14 @@ def test_install_creates_settings_file(tmp_path): install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) starts = _commands_for(cfg, "SessionStart") - # SessionStart has two entries: (1) chained self-upgrade ; pull — + # SessionStart has three entries: (1) chained self-upgrade ; pull — # self-upgrade runs first so a wire-protocol bump lands before pull # tries to use the new CLI; (2) refresh-marketplace as a separate # entry so a failure (e.g. fresh workspace with no clone) doesn't - # suppress the data pull above. - assert len(starts) == 2 + # suppress the data pull above; (3) push as a self-heal for orphan + # session JSONLs from `claude -p` headless mode (where Claude Code + # does NOT fire SessionEnd) or abnormal exits. + assert len(starts) == 3 chain = next( (c for c in starts if "agnes self-upgrade" in c and "agnes pull" in c), None, @@ -50,6 +52,14 @@ def test_install_creates_settings_file(tmp_path): assert refresh.startswith("bash -c "), ( f"refresh-marketplace hook must be wrapped in bash -c for Windows; got: {refresh!r}" ) + # The push self-heal entry is also bash-c-wrapped for Windows parity. + push_start = next((c for c in starts if "agnes push" in c), None) + assert push_start is not None, ( + "Expected SessionStart self-heal `agnes push` entry for orphan JSONLs" + ) + assert push_start.startswith("bash -c "), ( + f"push self-heal hook must be wrapped in bash -c for Windows; got: {push_start!r}" + ) ends = _commands_for(cfg, "SessionEnd") assert len(ends) == 1 assert "agnes push --quiet" in ends[0] @@ -59,9 +69,9 @@ def test_install_idempotent(tmp_path): install_claude_hooks(tmp_path) install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) - # Two SessionStart entries (pull + refresh-marketplace), one SessionEnd - # entry (push). Re-install must NOT duplicate them. - assert len(cfg["hooks"]["SessionStart"]) == 2 + # Three SessionStart entries (pull + refresh-marketplace + push self-heal), + # one SessionEnd entry (push). Re-install must NOT duplicate them. + assert len(cfg["hooks"]["SessionStart"]) == 3 assert len(cfg["hooks"]["SessionEnd"]) == 1 @@ -79,9 +89,10 @@ def test_install_replaces_old_da_sync_entries(tmp_path): install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) starts = _commands_for(cfg, "SessionStart") - assert len(starts) == 2 + assert len(starts) == 3 assert any("agnes pull" in c for c in starts) assert any("agnes refresh-marketplace" in c for c in starts) + assert any("agnes push" in c for c in starts) # Legacy command must be gone from BOTH starts. assert not any("da sync" in c for c in starts) @@ -89,8 +100,8 @@ def test_install_replaces_old_da_sync_entries(tmp_path): def test_install_replaces_prior_single_pull_entry(tmp_path): """Workspaces bootstrapped by a CLI version that only installed a single SessionStart entry (`agnes pull`, no refresh-marketplace) must - upgrade to the two-entry layout on the next install — not end up with - three entries (one old + two new).""" + upgrade to the three-entry layout on the next install — not end up + with four entries (one old + three new).""" settings_path = tmp_path / ".claude" / "settings.json" settings_path.parent.mkdir(parents=True) settings_path.write_text(json.dumps({ @@ -103,9 +114,10 @@ def test_install_replaces_prior_single_pull_entry(tmp_path): install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) starts = _commands_for(cfg, "SessionStart") - assert len(starts) == 2 + assert len(starts) == 3 assert any("agnes pull" in c for c in starts) assert any("agnes refresh-marketplace" in c for c in starts) + assert any("agnes push" in c for c in starts) def test_install_replaces_v0_43_chained_self_upgrade_pull_entry(tmp_path): @@ -134,14 +146,15 @@ def test_install_replaces_v0_43_chained_self_upgrade_pull_entry(tmp_path): install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) starts = _commands_for(cfg, "SessionStart") - # Exactly two entries — the v0.43 chained line was replaced, not stacked. - assert len(starts) == 2, starts + # Exactly three entries — the v0.43 chained line was replaced, not stacked. + assert len(starts) == 3, starts chain = next( (c for c in starts if "agnes self-upgrade" in c and "agnes pull" in c), None, ) assert chain is not None assert any("agnes refresh-marketplace" in c for c in starts) + assert any("agnes push" in c for c in starts) # SessionEnd untouched (single push entry). ends = _commands_for(cfg, "SessionEnd") assert len(ends) == 1 @@ -160,11 +173,12 @@ def test_install_preserves_third_party_hooks(tmp_path): install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) starts = _commands_for(cfg, "SessionStart") - # Third-party entry stays + both agnes entries get added. - assert len(starts) == 3 + # Third-party entry stays + all three agnes entries get added. + assert len(starts) == 4 assert any("echo hi from another tool" in c for c in starts) assert any("agnes pull" in c for c in starts) assert any("agnes refresh-marketplace" in c for c in starts) + assert any("agnes push" in c for c in starts) # Other event types untouched. assert cfg["hooks"]["PreToolUse"][0]["hooks"][0]["command"] == "echo pre" @@ -208,7 +222,8 @@ def test_install_idempotent_chained_entry(tmp_path): install_claude_hooks(tmp_path) install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) - # Two SessionStart entries (chained self-upgrade+pull plus refresh- - # marketplace) — re-install must not duplicate either. - assert len(cfg["hooks"]["SessionStart"]) == 2 + # Three SessionStart entries (chained self-upgrade+pull, refresh- + # marketplace, and the push self-heal) — re-install must not + # duplicate any of them. + assert len(cfg["hooks"]["SessionStart"]) == 3 assert len(cfg["hooks"]["SessionEnd"]) == 1