release: 0.46.4 — detach SessionEnd push so it survives claude -p SIGTERM (#222)
## Summary `claude -p` (headless mode) gives SessionEnd hook subprocesses ~1 second before SIGTERM, regardless of work in progress. `agnes push` for a typical workspace takes 5-30s. The current synchronous SessionEnd hook (`agnes push --quiet 2>/dev/null || true`) was therefore being killed mid-first-upload — `|| true` masks the SIGTERM as exit 0, so this regression was invisible until I traced it via a wrapper script and Claude's `~/.claude/debug/<sid>.txt` log. Fix: wrap SessionEnd push in `bash -c "( nohup agnes push --quiet </dev/null >/dev/null 2>&1 & ) ; true"`. The subshell exits immediately, orphaning the upload child to init so it survives the hook subprocess kill. Same `bash -c` pattern as the existing `refresh-marketplace` SessionStart entry (for Windows compatibility). End-to-end verified against production: claude exited in 5s, detached child completed the upload, file `491e3a23-...jsonl` landed on the server within 30s with mtime 14:30 UTC. ## Test plan - [x] `pytest tests/test_lib_hooks.py` — added `test_session_end_push_is_detached` regression test asserting `nohup`, `&`, `</dev/null` are all present. - [x] `pytest tests/test_setup_hooks_template.py` — assertions loosened from `==` to `in` where necessary. - [x] Verified end-to-end against production with the detached wrapper before opening this PR (manual probe). <!-- devin-review-badge-begin --> --- <a href="https://app.devin.ai/review/keboola/agnes-the-ai-analyst/pull/222" target="_blank"> <picture> <source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1"> <img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open in Devin Review"> </picture> </a> <!-- devin-review-badge-end -->
This commit is contained in:
parent
7fc5365891
commit
8d0bb43b06
5 changed files with 107 additions and 4 deletions
|
|
@ -10,6 +10,12 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.46.4] — 2026-05-07
|
||||
|
||||
### Fixed
|
||||
|
||||
- SessionEnd `agnes push` hook previously synchronous-ran in the foreground; Claude Code's `-p` (headless) mode terminates SessionEnd hook subprocesses after ~1 second regardless of work in progress, so the upload was killed mid-stream and most session JSONLs never reached the server. Now wrapped in `bash -c "( nohup agnes push ... & ) ; true"` so the upload child detaches from the hook subprocess and survives Claude's aggressive shutdown. Existing workspaces pick up the detached form on their next `agnes init` invocation via the existing migration path. Verified end-to-end against production: `claude -p` exited in 5s, the detached child completed the upload, and the session JSONL landed on the server within 30s.
|
||||
|
||||
## [0.46.3] — 2026-05-07
|
||||
|
||||
### Added
|
||||
|
|
|
|||
|
|
@ -26,6 +26,19 @@ Design notes:
|
|||
does NOT fire SessionEnd, or from abnormal session exits). Symmetric
|
||||
with `agnes pull` so the workspace heals on the next interactive
|
||||
session start.
|
||||
|
||||
- SessionEnd gets one entry: `agnes push --quiet`, wrapped to detach into
|
||||
the background. Claude Code in `-p` (headless) mode terminates SessionEnd
|
||||
hook subprocesses after ~1 second regardless of work in progress, so a
|
||||
synchronous `agnes push` (which uploads N session JSONLs serially and
|
||||
typically takes 5-30s) gets killed mid-stream and most files never reach
|
||||
the server. The `( nohup ... & )` subshell orphans the upload child so
|
||||
it survives the Claude shutdown. Errors are routed to /dev/null — no
|
||||
worse than the previous `2>/dev/null` form. Operators who want visibility
|
||||
into push failures can manually run `agnes push --json`. The SessionStart
|
||||
entry (3) above remains the safety net for orphans from any prior session
|
||||
whose SessionEnd push didn't run at all (genuine crash, kill, terminal
|
||||
close).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -103,8 +116,18 @@ def install_claude_hooks(workspace: Path) -> None:
|
|||
'bash -c "agnes refresh-marketplace --quiet 2>/dev/null || true"',
|
||||
'bash -c "agnes push --quiet 2>/dev/null || true"',
|
||||
])
|
||||
# SessionEnd push must run detached. Claude Code in `-p` (headless) mode
|
||||
# SIGTERMs hook subprocesses after ~1 second regardless of work in
|
||||
# progress; a synchronous `agnes push` (5-30s for a typical workspace)
|
||||
# gets killed mid-first-upload and most session JSONLs never reach the
|
||||
# server. The subshell `( ... & )` backgrounds the child and exits
|
||||
# immediately, orphaning it to init/launchd so it survives the hook
|
||||
# subprocess kill. `bash -c` mirrors the refresh-marketplace pattern
|
||||
# for Windows compatibility (Claude Code on Windows runs hook commands
|
||||
# directly, no shell). `; true` keeps the line exit-0 like the old
|
||||
# `|| true` form.
|
||||
_replace_or_add("SessionEnd", [
|
||||
"agnes push --quiet 2>/dev/null || true",
|
||||
'bash -c "( nohup agnes push --quiet </dev/null >/dev/null 2>&1 & ) ; true"',
|
||||
])
|
||||
|
||||
settings_path.write_text(json.dumps(cfg, indent=2) + "\n", encoding="utf-8")
|
||||
|
|
|
|||
|
|
@ -5,7 +5,15 @@
|
|||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "agnes pull --quiet 2>/dev/null || true"
|
||||
"command": "agnes self-upgrade --quiet 2>/dev/null || true; agnes pull --quiet 2>/dev/null || true"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "bash -c \"agnes refresh-marketplace --quiet 2>/dev/null || true\""
|
||||
}
|
||||
]
|
||||
},
|
||||
|
|
@ -23,7 +31,7 @@
|
|||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "agnes push --quiet 2>/dev/null || true"
|
||||
"command": "bash -c \"( nohup agnes push --quiet </dev/null >/dev/null 2>&1 & ) ; true\""
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
[project]
|
||||
name = "agnes-the-ai-analyst"
|
||||
version = "0.46.3"
|
||||
version = "0.46.4"
|
||||
description = "Agnes — AI Data Analyst platform for AI analytical systems"
|
||||
requires-python = ">=3.11,<3.14"
|
||||
license = "MIT"
|
||||
|
|
|
|||
|
|
@ -227,3 +227,69 @@ def test_install_idempotent_chained_entry(tmp_path):
|
|||
# duplicate any of them.
|
||||
assert len(cfg["hooks"]["SessionStart"]) == 3
|
||||
assert len(cfg["hooks"]["SessionEnd"]) == 1
|
||||
|
||||
|
||||
def test_session_end_push_is_detached(tmp_path):
|
||||
"""Regression test for the headless-mode SIGTERM bug.
|
||||
|
||||
Claude Code in `-p` (headless) mode SIGTERMs SessionEnd hook
|
||||
subprocesses ~1s after launch, regardless of whether the hook is
|
||||
still working. `agnes push` for a typical workspace (10 session
|
||||
JSONLs) takes 5-30s, so a synchronous form gets killed mid-first-
|
||||
upload and most files never reach the server. The hook MUST run
|
||||
detached so the upload child survives the hook subprocess being
|
||||
torn down.
|
||||
|
||||
This test pins the wrapper shape — `bash -c "( nohup ... & ) ; true"` —
|
||||
so a future refactor that re-introduces the synchronous form fails
|
||||
loudly here instead of silently regressing in production.
|
||||
"""
|
||||
install_claude_hooks(tmp_path)
|
||||
cfg = _read_settings(tmp_path)
|
||||
ends = _commands_for(cfg, "SessionEnd")
|
||||
assert len(ends) == 1
|
||||
cmd = ends[0]
|
||||
assert "agnes push" in cmd, f"SessionEnd must still call agnes push; got: {cmd!r}"
|
||||
# Detachment markers — every one of these is load-bearing:
|
||||
# - `nohup` ignores SIGHUP if the controlling terminal disappears
|
||||
# - `&` backgrounds the child inside the subshell
|
||||
# - `</dev/null` decouples stdin so the parent doesn't wait on a pipe
|
||||
# - `>/dev/null 2>&1` decouples stdout/stderr likewise
|
||||
assert "nohup" in cmd, f"SessionEnd push must use nohup for detachment; got: {cmd!r}"
|
||||
assert "&" in cmd, f"SessionEnd push must background with &; got: {cmd!r}"
|
||||
assert "</dev/null" in cmd, (
|
||||
f"SessionEnd push must redirect stdin from /dev/null; got: {cmd!r}"
|
||||
)
|
||||
assert ">/dev/null 2>&1" in cmd, (
|
||||
f"SessionEnd push must redirect stdout/stderr to /dev/null; got: {cmd!r}"
|
||||
)
|
||||
# `bash -c` wrapping is required because Claude Code on Windows runs
|
||||
# hook commands directly (no shell), so the subshell + redirection
|
||||
# syntax wouldn't parse otherwise.
|
||||
assert cmd.startswith("bash -c "), (
|
||||
f"SessionEnd push must be wrapped in bash -c for Windows; got: {cmd!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_install_replaces_old_synchronous_session_end_push(tmp_path):
|
||||
"""A workspace bootstrapped before the detachment fix has the old
|
||||
synchronous `agnes push --quiet 2>/dev/null || true` SessionEnd entry.
|
||||
On the next `agnes init`, that entry must be matched by the
|
||||
`agnes push` marker and replaced with the new detached form — not
|
||||
stacked alongside it."""
|
||||
settings_path = tmp_path / ".claude" / "settings.json"
|
||||
settings_path.parent.mkdir(parents=True)
|
||||
settings_path.write_text(json.dumps({
|
||||
"hooks": {
|
||||
"SessionEnd": [
|
||||
{"hooks": [{"type": "command", "command": "agnes push --quiet 2>/dev/null || true"}]},
|
||||
],
|
||||
}
|
||||
}))
|
||||
install_claude_hooks(tmp_path)
|
||||
cfg = _read_settings(tmp_path)
|
||||
ends = _commands_for(cfg, "SessionEnd")
|
||||
assert len(ends) == 1, ends
|
||||
assert "nohup" in ends[0], (
|
||||
f"Old synchronous push entry must have been replaced with the detached form; got: {ends!r}"
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in a new issue