From 7fc536589177510ac408bb9c4cc4532bc2d4ec3c Mon Sep 17 00:00:00 2001
From: ZdenekSrotyr <139972147+ZdenekSrotyr@users.noreply.github.com>
Date: Thu, 7 May 2026 17:41:22 +0200
Subject: [PATCH] =?UTF-8?q?release:=200.46.3=20=E2=80=94=20self-heal=20ses?=
=?UTF-8?q?sion=20pipeline=20+=20clearer=20diagnose=20(#220)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
## Summary
Verified against production: `claude -p` headless mode doesn't fire SessionEnd hooks (proven via `--output-format stream-json --include-hook-events`: zero `SessionEnd` events), so any session JSONLs from `-p` invocations stay orphaned locally and never reach the server. Fix: add `agnes push --quiet` as a third SessionStart entry — symmetric self-heal alongside the existing `agnes pull` entry. Existing workspaces pick this up on their next `agnes init` via the marker-based migration already in `cli/lib/hooks.py`.
Separately: a colleague's fresh install showed `agnes diagnose` warning "uploads are not being processed", which led them to suspect their `agnes push` was broken. The warning is actually about the LLM-based `verification-detector` backlog (uploads themselves were arriving fine — confirmed by 23+3 JSONLs landed on the server while the warning was firing). Reword the warning to "verification-detector backlog" + add `last_processed` to the diagnose dict so operators don't have to grep logs to confirm.
## Test plan
- [x] `pytest tests/test_lib_hooks.py` — updated count + added `agnes push in SessionStart` assertion.
- [x] `pytest tests/test_setup_hooks_template.py` — updated.
- [x] `pytest tests/test_clean_install_integration.py` — updated.
- [x] `pytest tests/test_health_session_pipeline.py` — updated warning text + asserted `last_processed` field.
---
---
CHANGELOG.md | 10 ++++++
CLAUDE.md | 11 +++---
app/api/health.py | 8 +++--
cli/lib/hooks.py | 8 ++++-
docs/setup/claude_settings.json | 8 +++++
pyproject.toml | 2 +-
tests/test_health_session_pipeline.py | 18 +++++++---
tests/test_lib_hooks.py | 49 +++++++++++++++++----------
8 files changed, 84 insertions(+), 30 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 99878ff..5ebc7b3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,16 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
## [Unreleased]
+## [0.46.3] — 2026-05-07
+
+### Added
+
+- `agnes init` now installs a third SessionStart hook entry (`agnes push --quiet`) so orphan session JSONLs left behind by `claude -p` headless invocations (where Claude Code does NOT fire SessionEnd) or abnormal exits get uploaded on the next interactive session start. Symmetric self-healing alongside the existing `agnes pull` SessionStart entry. Existing workspaces pick up the third entry on their next `agnes init` invocation via the existing migration path in `cli/lib/hooks.py:_OUR_COMMAND_MARKERS`.
+
+### Fixed
+
+- `agnes diagnose` `session_pipeline` warning previously read "uploads are not being processed", which led users to suspect their `agnes push` uploads were failing. The warning now reads "verification-detector backlog" and includes `last_processed` so operators see at a glance that uploads are fine and only the LLM extraction step is behind.
+
## [0.46.2] — 2026-05-07
### Fixed
diff --git a/CLAUDE.md b/CLAUDE.md
index cb0d483..480a9c2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -153,12 +153,15 @@ docker compose up
`agnes pull` is the canonical analyst-side distribution path: pulls the RBAC-filtered manifest from the server, downloads parquets whose MD5 changed (skipping `query_mode='remote'` rows), rebuilds local DuckDB views over them. `agnes push` mirrors it for the upload direction (sessions, CLAUDE.local.md).
-`agnes init` writes two hooks into `/.claude/settings.json`:
+`agnes init` writes hooks into `/.claude/settings.json`:
-- `SessionStart` → `agnes pull --quiet` — pulls fresh parquets at the start of every Claude Code session
-- `SessionEnd` → `agnes push --quiet` — uploads session jsonl + `CLAUDE.local.md` to the server
+- `SessionStart` (3 entries):
+ 1. `agnes self-upgrade --quiet; agnes pull --quiet` — chained: bump CLI to the server-pinned version first, then pull fresh parquets / rebuild local DuckDB views.
+ 2. `agnes refresh-marketplace --quiet` — keep the per-user Claude Code marketplace clone in sync (separate entry so a fresh-workspace failure here doesn't suppress the data pull above).
+ 3. `agnes push --quiet` — self-heal: upload any orphan session JSONLs left behind by previous `claude -p` invocations (where Claude Code does NOT fire SessionEnd) or abnormal exits. Symmetric with `agnes pull`.
+- `SessionEnd` (1 entry) → `agnes push --quiet` — upload this session's JSONL + `CLAUDE.local.md` to the server.
-Both pass `--quiet` so they don't pollute Claude Code stdout, and trail with `|| true` so a server outage never blocks a session. Workspace-level (not user-home) so the hooks fire only when Claude Code opens this analyst workspace, not in unrelated sessions on the same machine.
+All entries pass `--quiet` so they don't pollute Claude Code stdout, and trail with `|| true` so a server outage never blocks a session. Workspace-level (not user-home) so the hooks fire only when Claude Code opens this analyst workspace, not in unrelated sessions on the same machine.
Admin RBAC for auto-sync: `query_mode IN ('local', 'materialized')` plus a `resource_grants` row for one of the analyst's groups → table appears in their manifest → `agnes pull` downloads it. No per-user sync config; the admin layer is the single source of truth.
diff --git a/app/api/health.py b/app/api/health.py
index e937bbd..1c2d324 100644
--- a/app/api/health.py
+++ b/app/api/health.py
@@ -186,12 +186,14 @@ def _check_session_pipeline(conn: duckdb.DuckDBPyConnection) -> dict:
return {
"status": "warning",
"detail": (
- f"session jsonls newer than session_extraction_state by ~{lag_seconds}s "
- f"(grace={grace_seconds}s). Check the verification-detector scheduler "
- f"job — uploads are not being processed."
+ f"verification-detector backlog: ~{lag_seconds}s "
+ f"(grace={grace_seconds}s, last_processed={last_processed.isoformat()}). "
+ f"Uploads are unaffected — only the LLM extraction step is behind. "
+ f"Check the verification-detector scheduler job."
),
"lag_seconds": lag_seconds,
"session_files": len(session_files),
+ "last_processed": last_processed.isoformat(),
}
return {"status": "ok", "session_files": len(session_files)}
diff --git a/cli/lib/hooks.py b/cli/lib/hooks.py
index ecbfaf7..5fa82d4 100644
--- a/cli/lib/hooks.py
+++ b/cli/lib/hooks.py
@@ -13,7 +13,7 @@ Design notes:
Third-party hooks (mixed entries, foreign commands) are left alone.
- Uses `|| true` in the hook command so the hook never blocks a session on
a transient sync error.
-- SessionStart gets two entries:
+- SessionStart gets three entries:
1. Chained `agnes self-upgrade; agnes pull` — self-upgrade runs first
so any wire-protocol bump lands before pull tries to use the new
CLI version. Both `|| true`-guarded so an upgrade failure doesn't
@@ -21,6 +21,11 @@ Design notes:
2. `agnes refresh-marketplace` — independent entry so a fresh
workspace (no marketplace cloned yet) failing this command doesn't
suppress the data pull above.
+ 3. `agnes push` — uploads any session JSONLs that haven't reached the
+ server yet (orphans from `claude -p` headless mode where Claude Code
+ does NOT fire SessionEnd, or from abnormal session exits). Symmetric
+ with `agnes pull` so the workspace heals on the next interactive
+ session start.
"""
from __future__ import annotations
@@ -96,6 +101,7 @@ def install_claude_hooks(workspace: Path) -> None:
"agnes self-upgrade --quiet 2>/dev/null || true; "
"agnes pull --quiet 2>/dev/null || true",
'bash -c "agnes refresh-marketplace --quiet 2>/dev/null || true"',
+ 'bash -c "agnes push --quiet 2>/dev/null || true"',
])
_replace_or_add("SessionEnd", [
"agnes push --quiet 2>/dev/null || true",
diff --git a/docs/setup/claude_settings.json b/docs/setup/claude_settings.json
index ddc64a6..1b64d40 100644
--- a/docs/setup/claude_settings.json
+++ b/docs/setup/claude_settings.json
@@ -8,6 +8,14 @@
"command": "agnes pull --quiet 2>/dev/null || true"
}
]
+ },
+ {
+ "hooks": [
+ {
+ "type": "command",
+ "command": "bash -c \"agnes push --quiet 2>/dev/null || true\""
+ }
+ ]
}
],
"SessionEnd": [
diff --git a/pyproject.toml b/pyproject.toml
index d585a5b..e701db8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "agnes-the-ai-analyst"
-version = "0.46.2"
+version = "0.46.3"
description = "Agnes — AI Data Analyst platform for AI analytical systems"
requires-python = ">=3.11,<3.14"
license = "MIT"
diff --git a/tests/test_health_session_pipeline.py b/tests/test_health_session_pipeline.py
index b3a103d..ac12377 100644
--- a/tests/test_health_session_pipeline.py
+++ b/tests/test_health_session_pipeline.py
@@ -89,10 +89,20 @@ class TestSessionPipelineHealthCheck:
assert resp.status_code == 200
body = resp.json()
services = body["services"]
- assert services["session_pipeline"]["status"] == "warning"
- # Actionable detail must point at the verification-detector job.
- detail = services["session_pipeline"].get("detail", "")
- assert "verification-detector" in detail or "session" in detail.lower()
+ entry = services["session_pipeline"]
+ assert entry["status"] == "warning"
+ # Actionable detail must read as "verification-detector backlog" so
+ # operators don't misread it as an upload failure (the previous text
+ # "uploads are not being processed" misled users on fresh installs
+ # whose `agnes push` was actually working fine).
+ detail = entry.get("detail", "")
+ assert "verification-detector backlog" in detail, detail
+ assert "uploads are unaffected" in detail.lower(), detail
+ # `last_processed` ISO timestamp must surface so operators see at a
+ # glance when extraction last succeeded — no log-grep required.
+ assert "last_processed" in entry, entry
+ # Round-trip parse: fromisoformat tolerates both naive and tz-aware ISO.
+ datetime.fromisoformat(entry["last_processed"])
# Warning bubbles up to overall status='degraded' (existing pattern).
assert body["status"] == "degraded"
diff --git a/tests/test_lib_hooks.py b/tests/test_lib_hooks.py
index b64c759..168e956 100644
--- a/tests/test_lib_hooks.py
+++ b/tests/test_lib_hooks.py
@@ -27,12 +27,14 @@ def test_install_creates_settings_file(tmp_path):
install_claude_hooks(tmp_path)
cfg = _read_settings(tmp_path)
starts = _commands_for(cfg, "SessionStart")
- # SessionStart has two entries: (1) chained self-upgrade ; pull —
+ # SessionStart has three entries: (1) chained self-upgrade ; pull —
# self-upgrade runs first so a wire-protocol bump lands before pull
# tries to use the new CLI; (2) refresh-marketplace as a separate
# entry so a failure (e.g. fresh workspace with no clone) doesn't
- # suppress the data pull above.
- assert len(starts) == 2
+ # suppress the data pull above; (3) push as a self-heal for orphan
+ # session JSONLs from `claude -p` headless mode (where Claude Code
+ # does NOT fire SessionEnd) or abnormal exits.
+ assert len(starts) == 3
chain = next(
(c for c in starts if "agnes self-upgrade" in c and "agnes pull" in c),
None,
@@ -50,6 +52,14 @@ def test_install_creates_settings_file(tmp_path):
assert refresh.startswith("bash -c "), (
f"refresh-marketplace hook must be wrapped in bash -c for Windows; got: {refresh!r}"
)
+ # The push self-heal entry is also bash-c-wrapped for Windows parity.
+ push_start = next((c for c in starts if "agnes push" in c), None)
+ assert push_start is not None, (
+ "Expected SessionStart self-heal `agnes push` entry for orphan JSONLs"
+ )
+ assert push_start.startswith("bash -c "), (
+ f"push self-heal hook must be wrapped in bash -c for Windows; got: {push_start!r}"
+ )
ends = _commands_for(cfg, "SessionEnd")
assert len(ends) == 1
assert "agnes push --quiet" in ends[0]
@@ -59,9 +69,9 @@ def test_install_idempotent(tmp_path):
install_claude_hooks(tmp_path)
install_claude_hooks(tmp_path)
cfg = _read_settings(tmp_path)
- # Two SessionStart entries (pull + refresh-marketplace), one SessionEnd
- # entry (push). Re-install must NOT duplicate them.
- assert len(cfg["hooks"]["SessionStart"]) == 2
+ # Three SessionStart entries (pull + refresh-marketplace + push self-heal),
+ # one SessionEnd entry (push). Re-install must NOT duplicate them.
+ assert len(cfg["hooks"]["SessionStart"]) == 3
assert len(cfg["hooks"]["SessionEnd"]) == 1
@@ -79,9 +89,10 @@ def test_install_replaces_old_da_sync_entries(tmp_path):
install_claude_hooks(tmp_path)
cfg = _read_settings(tmp_path)
starts = _commands_for(cfg, "SessionStart")
- assert len(starts) == 2
+ assert len(starts) == 3
assert any("agnes pull" in c for c in starts)
assert any("agnes refresh-marketplace" in c for c in starts)
+ assert any("agnes push" in c for c in starts)
# Legacy command must be gone from BOTH starts.
assert not any("da sync" in c for c in starts)
@@ -89,8 +100,8 @@ def test_install_replaces_old_da_sync_entries(tmp_path):
def test_install_replaces_prior_single_pull_entry(tmp_path):
"""Workspaces bootstrapped by a CLI version that only installed a
single SessionStart entry (`agnes pull`, no refresh-marketplace) must
- upgrade to the two-entry layout on the next install — not end up with
- three entries (one old + two new)."""
+ upgrade to the three-entry layout on the next install — not end up
+ with four entries (one old + three new)."""
settings_path = tmp_path / ".claude" / "settings.json"
settings_path.parent.mkdir(parents=True)
settings_path.write_text(json.dumps({
@@ -103,9 +114,10 @@ def test_install_replaces_prior_single_pull_entry(tmp_path):
install_claude_hooks(tmp_path)
cfg = _read_settings(tmp_path)
starts = _commands_for(cfg, "SessionStart")
- assert len(starts) == 2
+ assert len(starts) == 3
assert any("agnes pull" in c for c in starts)
assert any("agnes refresh-marketplace" in c for c in starts)
+ assert any("agnes push" in c for c in starts)
def test_install_replaces_v0_43_chained_self_upgrade_pull_entry(tmp_path):
@@ -134,14 +146,15 @@ def test_install_replaces_v0_43_chained_self_upgrade_pull_entry(tmp_path):
install_claude_hooks(tmp_path)
cfg = _read_settings(tmp_path)
starts = _commands_for(cfg, "SessionStart")
- # Exactly two entries — the v0.43 chained line was replaced, not stacked.
- assert len(starts) == 2, starts
+ # Exactly three entries — the v0.43 chained line was replaced, not stacked.
+ assert len(starts) == 3, starts
chain = next(
(c for c in starts if "agnes self-upgrade" in c and "agnes pull" in c),
None,
)
assert chain is not None
assert any("agnes refresh-marketplace" in c for c in starts)
+ assert any("agnes push" in c for c in starts)
# SessionEnd untouched (single push entry).
ends = _commands_for(cfg, "SessionEnd")
assert len(ends) == 1
@@ -160,11 +173,12 @@ def test_install_preserves_third_party_hooks(tmp_path):
install_claude_hooks(tmp_path)
cfg = _read_settings(tmp_path)
starts = _commands_for(cfg, "SessionStart")
- # Third-party entry stays + both agnes entries get added.
- assert len(starts) == 3
+ # Third-party entry stays + all three agnes entries get added.
+ assert len(starts) == 4
assert any("echo hi from another tool" in c for c in starts)
assert any("agnes pull" in c for c in starts)
assert any("agnes refresh-marketplace" in c for c in starts)
+ assert any("agnes push" in c for c in starts)
# Other event types untouched.
assert cfg["hooks"]["PreToolUse"][0]["hooks"][0]["command"] == "echo pre"
@@ -208,7 +222,8 @@ def test_install_idempotent_chained_entry(tmp_path):
install_claude_hooks(tmp_path)
install_claude_hooks(tmp_path)
cfg = _read_settings(tmp_path)
- # Two SessionStart entries (chained self-upgrade+pull plus refresh-
- # marketplace) — re-install must not duplicate either.
- assert len(cfg["hooks"]["SessionStart"]) == 2
+ # Three SessionStart entries (chained self-upgrade+pull, refresh-
+ # marketplace, and the push self-heal) — re-install must not
+ # duplicate any of them.
+ assert len(cfg["hooks"]["SessionStart"]) == 3
assert len(cfg["hooks"]["SessionEnd"]) == 1