Merge pull request #209 from keboola/zs/cli-auto-upgrade-spec

feat: server-pinned CLI auto-upgrade (0.43.0)
This commit is contained in:
ZdenekSrotyr 2026-05-06 23:46:47 +02:00 committed by GitHub
commit d3113e7a31
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 2714 additions and 58 deletions

View file

@ -10,6 +10,14 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
## [Unreleased]
## [0.43.0] — 2026-05-06
### Added
- CLI auto-upgrade: `agnes self-upgrade` reinstalls the CLI from the server's currently-shipped wheel via `uv tool install --force`, falling back to `pip install --force-reinstall --no-deps` via `sys.executable` when uv is not on PATH. After install, the new binary is smoke-tested at the install-resolved path (`uv tool dir --bin` for uv, `<sys.executable parent>/agnes` for pip) — never via PATH lookup, to avoid stale-shadow false positives. Smoke failure triggers automatic rollback to the previously verified-good wheel (recorded in `~/.config/agnes/last_known_good.json`); rollback's exit code is captured and surfaced on stderr if it also fails. First-ever upgrade or unrecoverable rollback prints the canonical bootstrap recovery: `curl -fsSL <your-agnes-server>/cli/install.sh | bash`. The new command is wired into the SessionStart hook installed by `agnes init` as a chained shell entry (`agnes self-upgrade … || true; agnes pull … || true`) so an upgrade failure does not block the pull.
- Server: `/api/*` responses now carry `X-Agnes-Latest-Version` and `X-Agnes-Min-Version` headers. CLIs older than `X-Agnes-Min-Version` exit with **code 2** and a remediation message instead of failing on a wire-protocol mismatch. Day-one floor is `0.0.0` (no enforcement) — bump `MIN_COMPAT_CLI_VERSION` in `app/version.py` in the same PR that ships a deliberate wire break.
- CLI: `cli/update_check.py:check()` accepts a keyword-only `bypass_disabled=True` so explicit `agnes self-upgrade` invocations probe `/cli/latest` even when `AGNES_NO_UPDATE_CHECK=1` is set (which silences the implicit warning loop only).
## [0.42.0] — 2026-05-06
### Fixed
@ -99,6 +107,8 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C
the fix logs and continues — the next extractor pass creates the
file and the master view appears on the rebuild after that.
## [0.39.0] — 2026-05-06
### Performance
- **`/api/query` (and `agnes query --remote`) now rewrites user SQL referencing
`query_mode='remote'` BigQuery rows into a single `bigquery_query()` call

View file

@ -43,7 +43,7 @@ def _find_wheel() -> Path | None:
async def cli_latest():
"""Metadata for the currently-shipped CLI wheel.
Consumed by `da` CLI's auto-update check so it can warn when a newer
Consumed by `agnes` CLI's auto-update check so it can warn when a newer
version is on the server. Public + cacheable no secrets here.
Returns `version=None` when the server has no wheel yet (dev image that
didn't run `uv build`).

View file

@ -23,8 +23,6 @@ except ImportError:
import logging
from contextlib import asynccontextmanager
from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _pkg_version
from pathlib import Path
from urllib.parse import quote
@ -36,18 +34,7 @@ from app.logging_config import setup_logging
setup_logging("app")
def _app_version() -> str:
"""Product version for FastAPI title / OpenAPI schema.
Single source of truth is `pyproject.toml` `[project].version`; we read
it back via `importlib.metadata` at runtime so `/docs`, `/openapi.json`,
`/api/version`, `/cli/latest`, and `da --version` can never drift.
"""
try:
return _pkg_version("agnes-the-ai-analyst")
except PackageNotFoundError:
return "dev"
from app.version import APP_VERSION, MIN_COMPAT_CLI_VERSION
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
@ -183,7 +170,7 @@ def create_app() -> FastAPI:
app = FastAPI(
title="AI Data Analyst",
description="Data distribution platform for AI analytical systems",
version=_app_version(),
version=APP_VERSION,
lifespan=lifespan,
# Intentionally NOT debug=DEBUG: FastAPI's debug=True installs
# Starlette's ServerErrorMiddleware which intercepts unhandled
@ -195,6 +182,16 @@ def create_app() -> FastAPI:
debug=False,
)
@app.middleware("http")
async def _add_version_headers(request, call_next):
response = await call_next(request)
# /api/* only — headers are advisory to the agnes CLI; UI/docs/marketplace
# traffic doesn't consume them.
if request.url.path.startswith("/api/"):
response.headers["X-Agnes-Latest-Version"] = APP_VERSION
response.headers["X-Agnes-Min-Version"] = MIN_COMPAT_CLI_VERSION
return response
# FastAPI debug toolbar — only when DEBUG=1 in env. Injects per-request
# HTML overlay (headers, routes, timer, profiling, logs) on any HTML
# response; harmless on JSON. Inner try/except is for the import only:

34
app/version.py Normal file
View file

@ -0,0 +1,34 @@
"""Version constants used by FastAPI's `version=` field and the
`X-Agnes-{Latest,Min}-Version` response-header middleware.
`APP_VERSION` reads from package metadata so it tracks `pyproject.toml`
without a manual literal to keep in sync. **This is not a project-wide
single source of truth** `AGNES_VERSION` env var (set by CI/Docker
builds) continues to drive `/api/version`, `/cli/install.sh`, and the
admin UI. Those call sites pre-date `app/version.py` and are out of scope
for this change.
`MIN_COMPAT_CLI_VERSION` is the oldest CLI version the server advertises
as compatible on `/api/*` response headers. Enforcement lives in the
client: `cli/client.py:_check_version_headers` exits the CLI when its
local version is below this floor. The middleware itself does not reject
requests older clients just get a header they're free to ignore (in
practice, only the agnes CLI inspects it).
Day-one value of `MIN_COMPAT_CLI_VERSION` is `0.0.0` (no enforcement);
bumped manually when shipping a wire-protocol break.
"""
from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _pkg_version
def _read_app_version() -> str:
try:
return _pkg_version("agnes-the-ai-analyst")
except PackageNotFoundError:
return "0.0.0+dev"
APP_VERSION = _read_app_version()
MIN_COMPAT_CLI_VERSION = "0.0.0"

View file

@ -3,7 +3,9 @@
import atexit
import glob
import os
import platform
import re
import sys
import threading
import time
import traceback
@ -15,6 +17,16 @@ from typing import Optional
import httpx
from cli.config import _config_dir, get_server_url, get_token
from cli.update_check import _installed_version, _version_lt
# User-Agent is invariant for the life of the process — installed
# version doesn't change, OS doesn't change. Cache it at import time so
# every `get_client()` call doesn't re-do the importlib.metadata lookup
# + `platform.system()` call. (Reviewer note: do NOT cache the
# `_installed_version` lookup inside `_check_version_headers` — tests
# patch `cli.client._installed_version` and a cached value would defeat
# the patch. The hook keeps calling it; network cost dwarfs the lookup.)
_USER_AGENT = f"agnes/{_installed_version()} ({platform.system().lower()})"
# PID-suffixed tmp / part files — see `_download_chunked` and
@ -213,6 +225,35 @@ def _translate_transport_error(
)
def _check_version_headers(response: "httpx.Response") -> None:
"""Hard-stop the CLI when the server reports we're below min_version.
Drift warnings (`local < latest`) are already printed by the
update_check root callback in cli/main.py no need to nag again on
every API call. This hook only enforces the hard floor.
"""
# Recursion barrier: `agnes self-upgrade` sets this for the duration
# of the upgrade. Without it, a /api/* call inside the install flow
# could exit 2 with "Run: agnes self-upgrade" — inside agnes
# self-upgrade. The sentinel is process-local and propagates to
# subprocesses via the explicit env= passed to the smoke test.
if os.environ.get("AGNES_SELF_UPGRADE_IN_PROGRESS") == "1":
return
latest = response.headers.get("X-Agnes-Latest-Version")
minv = response.headers.get("X-Agnes-Min-Version")
if not latest or not minv:
return
local = _installed_version()
if local == "unknown":
return
if _version_lt(local, minv):
sys.stderr.write(
f"error: agnes {local} is incompatible with server {latest} "
f"(min required: {minv}). Run: agnes self-upgrade\n"
)
sys.exit(2)
def get_client(timeout: float = 30.0) -> httpx.Client:
"""Get an authenticated httpx client.
@ -220,6 +261,13 @@ def get_client(timeout: float = 30.0) -> httpx.Client:
`api_*` helpers (one request, then close). The big-stream path
(`stream_download`) routes through `_get_shared_client()` to amortize
TLS handshakes and HTTP/2 multiplexing across N parquet downloads.
Wires `_check_version_headers` as a response event hook: every
metadata call sees the server's `X-Agnes-{Latest,Min}-Version`
headers and hard-stops if our local version is below the floor.
Hook is intentionally NOT wired on `_get_shared_client()` that
client backs streaming parquet downloads where a `sys.exit(2)`
mid-stream would leak per-thread part files.
"""
token = get_token()
headers = {}
@ -227,8 +275,9 @@ def get_client(timeout: float = 30.0) -> httpx.Client:
headers["Authorization"] = f"Bearer {token}"
return httpx.Client(
base_url=get_server_url(),
headers=headers,
headers={**headers, "User-Agent": _USER_AGENT},
timeout=timeout,
event_hooks={"response": [_check_version_headers]},
)

View file

@ -0,0 +1,288 @@
"""`agnes self-upgrade` — pull the wheel from the server, reinstall, smoke-test,
roll back on failure."""
from __future__ import annotations
import json
import os
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path
from typing import Optional, Union
import typer
from cli.config import _config_dir, get_server_url
from cli.update_check import UpdateInfo, check, format_outdated_notice
self_upgrade_app = typer.Typer(
name="self-upgrade",
help="Reinstall the CLI from the server's currently-shipped wheel.",
invoke_without_command=True,
)
_SENTINEL_ENV = "AGNES_SELF_UPGRADE_IN_PROGRESS"
class _Unreachable:
"""Sentinel returned by _resolve_info when --force was specified but the
server probe failed. Distinguishes 'explicitly requested an upgrade and
we couldn't reach the server' (exit 1, stderr) from 'no upgrade needed'
(exit 0, silent)."""
_UNREACHABLE = _Unreachable()
def _invalidate_update_cache() -> None:
"""Drop update_check.json so the next CLI invocation re-probes /cli/latest."""
(_config_dir() / "update_check.json").unlink(missing_ok=True)
def _last_known_good_path() -> Path:
return _config_dir() / "last_known_good.json"
def _read_last_known_good() -> Optional[str]:
p = _last_known_good_path()
if not p.exists():
return None
try:
return json.loads(p.read_text(encoding="utf-8")).get("download_url")
except (OSError, json.JSONDecodeError):
return None
def _record_last_known_good(download_url: str) -> None:
p = _last_known_good_path()
try:
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(json.dumps({"download_url": download_url}), encoding="utf-8")
except OSError:
pass # best-effort — failure to record must not break the flow
def _uv_tool_bin_path() -> Optional[Path]:
"""Locate the agnes shim uv installed.
Tries `uv tool dir --bin` (uv >= 0.5). Falls back to uv's documented
default install location on older uv where `--bin` is rejected.
"""
bin_dir: Optional[Path] = None
try:
out = subprocess.run(
["uv", "tool", "dir", "--bin"], capture_output=True, text=True, timeout=5,
)
if out.returncode == 0:
bin_dir = Path(out.stdout.strip())
except (OSError, subprocess.TimeoutExpired):
bin_dir = None
if bin_dir is None:
if sys.platform == "win32":
appdata = os.environ.get("APPDATA")
if appdata:
bin_dir = Path(appdata) / "uv" / "tools" / "bin"
else:
bin_dir = Path.home() / ".local" / "bin"
if bin_dir is None or not bin_dir.exists():
return None
for name in ("agnes.exe", "agnes"):
candidate = bin_dir / name
if candidate.exists():
return candidate
return None
def _pip_bin_path() -> Optional[Path]:
"""`<venv>/bin/agnes` (POSIX) or `<venv>\\Scripts\\agnes.exe` (Windows)."""
parent = Path(sys.executable).parent
name = "agnes.exe" if sys.platform == "win32" else "agnes"
candidate = parent / name
return candidate if candidate.exists() else None
def _install_with_uv(download_url: str, *, quiet: bool) -> int:
out = subprocess.DEVNULL if quiet else None
return subprocess.run(
["uv", "tool", "install", "--force", download_url], stdout=out
).returncode
def _install_with_pip(download_url: str, *, quiet: bool) -> int:
"""Install into the SAME interpreter that's running this command.
sys.executable resolves to the venv that owns the live `agnes` binary.
`python3` would PATH-resolve to system python on macOS, landing the
wheel outside the agnes venv. `--user` is wrong inside a uv-tool venv
(targets ~/.local outside the venv).
"""
out = subprocess.DEVNULL if quiet else None
with tempfile.TemporaryDirectory(prefix="agnes_cli.") as td:
wheel_path = Path(td) / "agnes.whl"
rc = subprocess.run(
["curl", "-fsSL", "-o", str(wheel_path), download_url], stdout=out
).returncode
if rc != 0:
return rc
return subprocess.run(
[sys.executable, "-m", "pip", "install",
"--force-reinstall", "--no-deps", str(wheel_path)],
stdout=out,
).returncode
def _smoke_test_new_binary(install_method: str, expected_version: str) -> tuple[bool, str]:
"""Exec `<install-path>/agnes --version` and confirm it boots AND reports
the expected version. Resolves the binary at the install-method-specific
path rather than via PATH defends against a stale shadow ahead of the
freshly-installed binary in $PATH."""
binary = _uv_tool_bin_path() if install_method == "uv" else _pip_bin_path()
if binary is None:
return False, f"agnes binary not found at expected {install_method} install path"
try:
env = {**os.environ, "AGNES_NO_UPDATE_CHECK": "1", _SENTINEL_ENV: "1"}
out = subprocess.run(
[str(binary), "--version"],
capture_output=True, text=True, timeout=10, env=env,
)
if out.returncode != 0:
return False, f"exit {out.returncode}: {out.stderr.strip()[:200]}"
# Use Version() equality (PEP 440-aware) so "0.40.0" doesn't match "0.40.10".
from packaging.version import InvalidVersion, Version
tokens = out.stdout.strip().split()
actual_str = tokens[-1] if tokens else ""
try:
if Version(actual_str) != Version(expected_version):
return False, (
f"version mismatch: expected {expected_version}, "
f"got {actual_str}"
)
except InvalidVersion:
return False, f"unparseable version output: {out.stdout.strip()[:80]}"
return True, out.stdout.strip()
except (subprocess.TimeoutExpired, OSError) as e:
return False, f"{type(e).__name__}: {e}"
def _resolve_info(force: bool) -> Union[UpdateInfo, _Unreachable, None]:
"""Returns:
UpdateInfo install this wheel
_UNREACHABLE --force specified, server probe failed
None nothing to do (current, or offline without --force)
"""
if force:
_invalidate_update_cache()
info = check(get_server_url(), bypass_disabled=True)
if info is None:
return _UNREACHABLE if force else None
if not info.download_url:
return None
if not force and not info.is_outdated():
return None
return info
def _do_install_with_smoke_and_rollback(
info: UpdateInfo, *, quiet: bool
) -> int:
"""Returns the exit code typer should use (0 success, 1 failure)."""
prior_url = _read_last_known_good() # may be None on first upgrade
if shutil.which("uv"):
rc = _install_with_uv(info.download_url, quiet=quiet)
method = "uv"
else:
rc = _install_with_pip(info.download_url, quiet=quiet)
method = "pip"
if rc != 0:
sys.stderr.write(f"agnes self-upgrade: install failed with exit {rc}\n")
return 1
ok, detail = _smoke_test_new_binary(method, expected_version=info.latest)
if not ok:
sys.stderr.write(
f"agnes self-upgrade: new binary failed smoke test ({detail}).\n"
)
server = get_server_url().rstrip("/")
bootstrap_recovery = f" Manual recovery: curl -fsSL {server}/cli/install.sh | bash\n"
if prior_url and prior_url != info.download_url:
sys.stderr.write(f" rolling back to {prior_url}\n")
rb_rc = (
_install_with_uv(prior_url, quiet=True)
if method == "uv"
else _install_with_pip(prior_url, quiet=True)
)
if rb_rc != 0:
sys.stderr.write(
f" rollback ALSO failed (rc={rb_rc}); CLI is in a broken state.\n"
)
sys.stderr.write(bootstrap_recovery)
else:
sys.stderr.write(
" no prior wheel URL on record; rollback skipped.\n"
)
sys.stderr.write(bootstrap_recovery)
return 1
# Convention: record then invalidate. No correctness consequence either way.
_record_last_known_good(info.download_url)
_invalidate_update_cache()
if not quiet:
typer.echo(f"agnes self-upgrade: installed {info.latest}", err=True)
return 0
@self_upgrade_app.callback()
def self_upgrade(
quiet: bool = typer.Option(
False, "--quiet",
help="Suppress progress output. Failures still surface on stderr.",
),
check_only: bool = typer.Option(
False, "--check-only",
help="Print status, don't install. Exit 1 if outdated.",
),
force: bool = typer.Option(
False, "--force",
help="Reinstall the server's current wheel even when already on the latest version.",
),
) -> None:
# Snapshot any prior sentinel so we restore (rather than destroy) it
# in finally — we own the namespace but a wrapper could legitimately
# set it.
prior_sentinel = os.environ.get(_SENTINEL_ENV)
os.environ[_SENTINEL_ENV] = "1"
try:
info = _resolve_info(force)
# --check-only is read-only intent — never exit non-zero on
# transport errors. If unreachable, treat as "can't tell, current"
# and exit 0 silently.
if check_only:
if isinstance(info, _Unreachable) or info is None or not info.is_outdated():
raise typer.Exit(0)
typer.echo(format_outdated_notice(info), err=True)
raise typer.Exit(1)
if isinstance(info, _Unreachable):
sys.stderr.write(
f"agnes self-upgrade: cannot reach {get_server_url()}/cli/latest\n"
)
raise typer.Exit(1)
if info is None:
raise typer.Exit(0) # nothing to do, silent
rc = _do_install_with_smoke_and_rollback(info, quiet=quiet)
raise typer.Exit(rc)
finally:
if prior_sentinel is None:
os.environ.pop(_SENTINEL_ENV, None)
else:
os.environ[_SENTINEL_ENV] = prior_sentinel

View file

@ -7,8 +7,9 @@ without dragging in the deleted command module.
Design notes:
- Workspace-scoped (`<workspace>/.claude/settings.json`), NOT user-home.
The hooks fire only when Claude Code opens this workspace.
- Idempotent: second invocation drops a prior `agnes pull` / `da sync` /
`agnes push` entry (matched by command substring) and appends fresh entries.
- Idempotent: second invocation drops a prior `agnes self-upgrade` /
`agnes pull` / `da sync` / `agnes push` entry (matched by command substring)
and appends fresh entries.
Third-party hooks (mixed entries, foreign commands) are left alone.
- Uses `|| true` in the hook command so the hook never blocks a session on
a transient sync error.
@ -24,11 +25,11 @@ from pathlib import Path
# Substrings that identify "our" hook commands. Includes legacy `da sync`
# so a workspace bootstrapped by an older CLI gets cleanly upgraded on the
# next `agnes init` run.
_OUR_COMMAND_MARKERS = ("agnes pull", "agnes push", "da sync")
_OUR_COMMAND_MARKERS = ("agnes self-upgrade", "agnes pull", "agnes push", "da sync")
def install_claude_hooks(workspace: Path) -> None:
"""Install SessionStart->`agnes pull` and SessionEnd->`agnes push` hooks.
"""Install SessionStart->`agnes self-upgrade; agnes pull` and SessionEnd->`agnes push` hooks.
Idempotent. Workspace-scoped (writes `<workspace>/.claude/settings.json`).
Preserves third-party hooks and other event types.
@ -60,7 +61,11 @@ def install_claude_hooks(workspace: Path) -> None:
existing.remove(entry)
existing.append({"hooks": [{"type": "command", "command": command}]})
_replace_or_add("SessionStart", "agnes pull --quiet 2>/dev/null || true")
_replace_or_add(
"SessionStart",
"agnes self-upgrade --quiet 2>/dev/null || true; "
"agnes pull --quiet 2>/dev/null || true",
)
_replace_or_add("SessionEnd", "agnes push --quiet 2>/dev/null || true")
settings_path.write_text(json.dumps(cfg, indent=2) + "\n", encoding="utf-8")

View file

@ -33,6 +33,7 @@ from cli.commands.status import status_app
from cli.commands.admin import admin_app
from cli.commands.diagnose import diagnose_app
from cli.commands.skills import skills_app
from cli.commands.self_upgrade import self_upgrade_app
from cli.commands.setup import setup_app
from cli.commands.server import server_app
from cli.commands.explore import explore_app
@ -115,6 +116,7 @@ app.add_typer(status_app, name="status")
app.add_typer(admin_app, name="admin")
app.add_typer(diagnose_app, name="diagnose")
app.add_typer(skills_app, name="skills")
app.add_typer(self_upgrade_app, name="self-upgrade")
app.add_typer(setup_app, name="setup")
app.add_typer(server_app, name="server")
app.add_typer(explore_app, name="explore")

View file

@ -1,7 +1,7 @@
"""Auto-check for a newer CLI version on the configured server.
Runs in the root typer callback before subcommand dispatch. Failure is
silent we never block a working `da` command on a best-effort version
silent we never block a working `agnes` command on a best-effort version
probe. Result is cached in `$AGNES_CONFIG_DIR/update_check.json` for 24h so
we don't hammer the server on every invocation.
@ -23,7 +23,7 @@ _CACHE_FILENAME = "update_check.json"
_CACHE_TTL_SECONDS = 24 * 60 * 60 # 24h on a successful probe
_NEGATIVE_CACHE_TTL_SECONDS = 5 * 60 # 5min on a failed probe, to avoid
# re-probing 3s of silence (drop-packet networks: corporate firewall, VPN)
# on every `da` invocation.
# on every `agnes` invocation.
_REQUEST_TIMEOUT_SECONDS = 3.0 # keep startup snappy
@ -114,13 +114,23 @@ def _fetch_latest(server_url: str) -> Optional[dict]:
return None
def check(server_url: Optional[str]) -> Optional[UpdateInfo]:
def check(
server_url: Optional[str], *, bypass_disabled: bool = False
) -> Optional[UpdateInfo]:
"""Return UpdateInfo if a check ran (cached or fresh), else None.
Silent on every failure path: no server configured, CLI package not
installed, network down, malformed response, cache unreadable.
`bypass_disabled=True` ignores `AGNES_NO_UPDATE_CHECK`. The env var
silences the implicit warning loop in the root callback; an explicit
user-typed `agnes self-upgrade` is not the implicit loop and must
still probe. Default keeps existing call sites (root callback) silent
when the env var is set.
"""
if is_disabled() or not server_url:
if not bypass_disabled and is_disabled():
return None
if not server_url:
return None
installed = _installed_version()

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,241 @@
# CLI Auto-Upgrade — Server-Pinned Version
> **Status:** spec / design. Convert to an implementation plan in `docs/superpowers/plans/` once reviewed.
**Goal:** Keep an analyst's locally-installed `agnes` CLI in sync with the server it talks to. The server is the single source of truth for "what version should be running"; the CLI never asks PyPI, only the server.
**Why now:** today an analyst installs once via `uv tool install $SERVER/cli/wheel/<name>` and drifts arbitrarily. The CLI already prints a *warning* when out of date but never upgrades itself, and there's no hard-stop when a wire-protocol break ships — drifted clients fail with cryptic errors instead of being told to upgrade.
**Non-goal:** distributing the CLI through PyPI, GitHub releases, or any out-of-band channel. The wheel lives next to the server (`/app/dist/*.whl`) and is served by `app/api/cli_artifacts.py`.
---
## What already exists
The first half of this design **is already shipped**, just incomplete:
- **`GET /cli/latest`** (`app/api/cli_artifacts.py:42`) → `{version, wheel_filename, download_url_path}`. Public, no auth.
- **`GET /cli/wheel/{name}`** + `/cli/download` + `/cli/install.sh` for distribution.
- **`cli/update_check.py`** — polls `/cli/latest` on every CLI invocation from `cli/main.py:99-104`, caches result for 24h (positive) / 5min (negative), prints a stderr warning with a copy-paste `uv tool install --force <url>` command. Opt-out: `AGNES_NO_UPDATE_CHECK=1`.
- **`cli/client.py:216 get_client()`** — the shared `httpx.Client` factory. Single chokepoint for response-header inspection.
- **Hook installer** at `cli/lib/hooks.py:install_claude_hooks` writes:
- `SessionStart``agnes pull --quiet 2>/dev/null || true`
- `SessionEnd``agnes push --quiet 2>/dev/null || true`
What's missing:
1. The CLI prints a copy-paste command but never **executes** the upgrade.
2. No `min_version` floor — drift is unbounded; a wire break gives a cryptic 500 instead of a clear "you're too old, upgrade".
3. No SessionStart hook for proactive upgrade — analyst must notice the warning, copy, paste, run.
4. The server-side comment on `/cli/latest` (`app/api/cli_artifacts.py:47`) and the docstring in `cli/update_check.py` still reference the old `da` binary name; cleanup while we're in there.
---
## Design
Two layers, complementary, with different latencies and failure modes.
### Layer A — proactive auto-upgrade (SessionStart hook + new CLI command)
`agnes init` writes a **single** SessionStart hook entry that chains self-upgrade and pull with `;` so ordering is guaranteed by the shell, not by undocumented Claude Code hook-execution semantics:
```
SessionStart → agnes self-upgrade --quiet 2>/dev/null || true; agnes pull --quiet 2>/dev/null || true
SessionEnd → agnes push --quiet 2>/dev/null || true
```
The `;` runs both unconditionally; each `|| true` keeps a single failure from aborting the line. We lose nothing the design relied on (the *"upgrade fail does not block pull"* property is preserved by the second `|| true`), and we gain an ordering guarantee that holds across every Claude Code version.
`agnes self-upgrade [--quiet] [--check-only] [--force]`:
1. Set `AGNES_SELF_UPGRADE_IN_PROGRESS=1` in `os.environ` for the duration of the call. Layer B's header check reads this sentinel and *skips* the hard-stop while we're upgrading — without this, a later refactor that has `self-upgrade` calling `get_client()` (e.g. for auth) would loop: hit `< min`, exit 2 with *"Run: agnes self-upgrade"* — inside `agnes self-upgrade`. Sentinel propagates to subprocesses via the explicit `env=` we pass to the smoke test.
2. If `--force`, **invalidate** the `update_check.json` cache *before* probing, so we always pick up the server's current `download_url`.
3. Reuse `cli.update_check.check(server_url)` — same `/cli/latest` call, same cache, same version comparison. No second polling path.
4. If `info is None` (disabled / no server / unknown local version) or `(not force and not info.is_outdated())` → exit 0.
5. `--check-only` → print `format_outdated_notice(info)`, exit 1 if outdated, 0 if current.
6. Otherwise: snapshot `prior_url = _read_last_known_good()` (the URL of the version we last successfully smoke-tested into; may be `None` on first upgrade — best-effort rollback only). Then reinstall:
- `uv` available (`shutil.which("uv")`) → `uv tool install --force "<download_url>"`
- else → download wheel to `mktemp -d` (curl), then `[sys.executable, "-m", "pip", "install", "--force-reinstall", "--no-deps", <wheel>]`. **Crucially** uses `sys.executable` (the running CLI's interpreter) rather than `python3` (PATH-resolved system Python), and **does not** pass `--user` — both would land the wheel outside the uv-tool venv that owns the `agnes` binary, silently no-op'ing the upgrade.
7. **Smoke-test the new binary** before declaring success — but not via `shutil.which("agnes")`. PATH may shadow the just-installed binary with a stale `/usr/local/bin/agnes` from an old `pip install --user` or Homebrew shim, in which case `--version` would print the *old* version and report success. Instead, locate the binary deterministically:
- **uv path** → call `uv tool dir --bin` (one subprocess; uv's `--bin` flag returns the directory containing entrypoint shims, working transparently across POSIX/Windows). Look for `agnes` then `agnes.exe` in that directory.
- **pip path**`<sys.executable parent>/agnes` (POSIX) or `<sys.executable parent>/agnes.exe` (Windows) — the sibling of the running interpreter, which is the venv pip just rewrote.
Then `subprocess.run([str(binary), "--version"], env={**os.environ, "AGNES_NO_UPDATE_CHECK": "1", "AGNES_SELF_UPGRADE_IN_PROGRESS": "1"}, timeout=10, capture_output=True)`. Smoke passes when returncode is 0 **and** the trailing token of stdout parses to a `packaging.version.Version` equal to `info.latest` — equality on `Version()` (not substring), so `0.40.0` does not falsely match `0.40.10` and PEP 440 local segments are handled.
8. On smoke fail: if `prior_url` is set and ≠ `info.download_url`, attempt a single rollback install of `prior_url` via the same uv/pip path. **Capture the rollback's return code** — if it's non-zero, the CLI is in a broken state, surface this on stderr alongside the bootstrap-recovery command. If `prior_url` is `None` (first-ever upgrade) or rollback also fails, stderr prints `Run: curl -fsSL <server>/cli/install.sh | bash` — the canonical bootstrap path that doesn't depend on local state. Either way `raise typer.Exit(1)`.
9. On smoke pass: `_record_last_known_good(info.download_url)` (writes `~/.config/agnes/last_known_good.json` — separate from `update_check.json`, updated only after a verified-good install) then `_invalidate_update_cache()`. Convention; no correctness consequence either way.
10. `--quiet` suppresses progress output; **stderr always passes through on install / smoke / rollback failures**`--quiet` is for routine success runs (the SessionStart hook), not a gag on errors.
11. **`--force` + offline.** `--force` invalidates the cache before probing `/cli/latest`. If the probe fails (network down), `--force` raises `typer.Exit(1)` with `cannot reach <server>/cli/latest` on stderr — explicit destructive intent deserves explicit feedback. Without `--force`, an offline probe is silent (the implicit warning loop's contract).
12. **`--check-only` is read-only intent — exit 0 on transport errors.** Even with `--force`, when the probe is unreachable under `--check-only`, the command exits 0 silently rather than surfacing the error: `--check-only` should never produce a non-zero exit unless the CLI is *known* outdated. (`--force` semantics still apply to the actual install path; pairing `--check-only --force` is well-defined: it invalidates the cache, fresh-probes, prints status, never installs.)
13. **`AGNES_NO_UPDATE_CHECK=1`** silences the implicit warning loop only. Explicit `agnes self-upgrade` calls `check(server_url, bypass_disabled=True)` so the env var does not turn a user-typed upgrade command into a silent no-op.
**Platform support:** smoke test branches on `sys.platform == "win32"` for the `.exe` suffix; the rest of the flow is platform-neutral via uv. Windows is supported on a best-effort basis (analyst laptops are predominantly macOS/Linux).
Honors the existing `AGNES_NO_UPDATE_CHECK=1` opt-out — same flag, same intent. No new opt-out env var.
**Latency:** runs once at session start, blocks pull by ~3-10s on upgrade (install + ~1s smoke test), ~0.2s when in-sync (one cached HTTP roundtrip + early-out).
**Failure modes:** offline / server down → `|| true` → session continues on old version. Install succeeds but new wheel is broken → smoke test catches it, attempts rollback, prints recovery instructions. Layer B catches drift on the next API call.
### Layer B — reactive verification (response headers)
Every `/api/*` response includes two headers (FastAPI middleware):
- `X-Agnes-Latest-Version: 0.40.0``APP_VERSION`, same value the install script bakes in.
- `X-Agnes-Min-Version: 0.0.0` — oldest CLI version the server still accepts. Lives in a single Python constant. Bumped manually when a wire-protocol break ships. **Ships at `0.0.0` on day one** so rollout doesn't accidentally lock anyone out — first deliberate gate is the first time this gets bumped.
The shared HTTP client (`cli/client.py:216`) inspects these on every response:
| Local CLI version | Behavior |
|---|---|
| `>= latest` | nothing |
| `>= min` and `< latest` | nothing — Layer A's startup poll already prints the warning; no need to nag again on every API call |
| `< min` | print `error: agnes <local> is incompatible with server <latest> (min required: <min>). Run: agnes self-upgrade` and `sys.exit(2)`. **Operation is not performed.** |
**Recursion barrier:** `_check_version_headers` short-circuits (returns silently, no enforcement) when `os.environ.get("AGNES_SELF_UPGRADE_IN_PROGRESS") == "1"`. Set by Layer A's command for the duration of the upgrade so the in-flight `agnes self-upgrade` cannot be locked out from itself by a `< min` response on any internal `/api/*` call. The sentinel is process-local and propagates to the smoke-test subprocess via explicit `env=`.
The CLI also sends `User-Agent: agnes/<version> (<platform>)` so the server can audit drift in access logs.
**Day-one floor.** `MIN_COMPAT_CLI_VERSION = "0.0.0"` — no enforcement. The constant + middleware + CLI inspection are an opt-in mechanism for the future. When a wire break ships, the engineer bumps the constant in the same PR and adds a `**BREAKING**` CHANGELOG bullet — same review discipline as every other behavior change. No standalone CI gate, no doc, no PR-template checkbox: those would be theater that catches nothing real (an engineer can check a box without bumping a constant). The mechanism stays free-to-use; the policy is one constant change away when someone needs it.
### How the two layers compose
| Scenario | Layer A | Layer B | Outcome |
|---|---|---|---|
| Happy path | upgrade silent (already current) | headers OK | no output |
| Drift caught at session start | upgrades to latest | headers OK after upgrade | brief "installed: 0.40.0" line if not `--quiet` |
| Hook failed (offline at session start), online now | no-op | `< latest` ⇒ silent (warning still printed by `update_check` from main callback) | analyst sees one warning, runs `agnes self-upgrade` manually |
| Server shipped a wire break, analyst is `< min` | hook would have caught it, but maybe the analyst skipped Claude Code | hard-stop with remediation | exit 2, clear message |
| Headless / CI / ad-hoc terminal (no Claude Code) | hook never runs | warning + hard-stop still apply | covered |
---
## Server-side changes
### `app/version.py` (new — single source of truth)
```python
"""Single source of truth for app + CLI compat versions."""
import importlib.metadata
APP_VERSION = importlib.metadata.version("agnes-the-ai-analyst")
# Bump when shipping a wire-protocol break. Older CLIs are blocked at the
# response-header layer with exit 2 + remediation message. Day-one value
# of 0.0.0 means no enforcement — set the floor the first time a deliberate
# break ships.
MIN_COMPAT_CLI_VERSION = "0.0.0"
```
### `app/main.py` — middleware
```python
@app.middleware("http")
async def add_version_headers(request, call_next):
response = await call_next(request)
if request.url.path.startswith("/api/"):
response.headers["X-Agnes-Latest-Version"] = APP_VERSION
response.headers["X-Agnes-Min-Version"] = MIN_COMPAT_CLI_VERSION
return response
```
Applied only to `/api/` so marketplace / wheel / web UI responses stay clean. Verify CORS `expose_headers` includes these (or `*`).
### `app/api/cli_artifacts.py` — fix stale `da` reference
Drive-by: line 47 still says *"Consumed by `da` CLI's auto-update check"*. Update to `agnes`. No behavior change.
`/cli/latest` itself stays as-is — pure metadata about the wheel on disk. `min_version` is a server-policy concern (per-request), not wheel metadata, so it lives on the headers and not in this payload.
---
## CLI-side changes
### `cli/commands/self_upgrade.py` (new)
Logic per Layer A above. ~80 lines including the install subprocess call. Reuses:
- `cli.update_check.check()` for the version probe (identical to what `cli/main.py:102` already calls)
- `cli.update_check.format_outdated_notice()` for `--check-only` output
- `cli.config.get_server_url()` for the server URL
- `shutil.which("uv")` to choose install path
- `subprocess.run` with `check=True` to surface install failures
Wire into `cli/main.py` near the existing typer registrations.
### `cli/client.py:get_client()` — header inspection
Wrap the returned `httpx.Client` so every response goes through one hook. Cleanest is `httpx.Client(event_hooks={"response": [_check_version_headers]})`:
```python
def _check_version_headers(response: httpx.Response) -> None:
latest = response.headers.get("X-Agnes-Latest-Version")
minv = response.headers.get("X-Agnes-Min-Version")
if not latest or not minv:
return # talking to an older server; no enforcement
local = _installed_version() # reuse from update_check
if local == "unknown":
return # dev install / editable; never block
if _version_lt(local, minv): # reuse update_check._version_lt
sys.stderr.write(
f"error: agnes {local} is incompatible with server {latest}"
f" (min required: {minv}). Run: agnes self-upgrade\n"
)
sys.exit(2)
```
Only the hard-stop is enforced here — drift warnings are already handled by `update_check` in the root callback, no point doubling them on every API call.
`_version_lt` and `_installed_version` move from `cli/update_check.py` into `cli/_version_compat.py` (or stay in `update_check.py` and `client.py` imports them) — pick whichever keeps imports simple. Both files need them.
User-Agent: extend `get_client()` to set `headers={"User-Agent": f"agnes/{_installed_version()} ({platform.system().lower()})"}` (merge with caller-supplied headers).
### `cli/lib/hooks.py:install_claude_hooks` — chain self-upgrade ahead of pull
```python
_OUR_COMMAND_MARKERS = ("agnes self-upgrade", "agnes pull", "agnes push", "da sync")
_replace_or_add(
"SessionStart",
"agnes self-upgrade --quiet 2>/dev/null || true; "
"agnes pull --quiet 2>/dev/null || true",
)
_replace_or_add("SessionEnd", "agnes push --quiet 2>/dev/null || true")
```
Single chained SessionStart entry. Shell `;` guarantees ordering (no reliance on Claude Code's undocumented multi-hook semantics); each `|| true` ensures one segment's failure does not abort the line. `_OUR_COMMAND_MARKERS` is extended so re-running `agnes init` recognises the chained line on substring match and replaces rather than duplicates.
### Drive-by cleanup
`cli/update_check.py` docstring (lines 1-9) still references `da` four times. Update to `agnes`. No behavior change.
---
## Tests
### Server
- New: `tests/test_version_headers_middleware.py``/api/sync/trigger` (or any cheap `/api/*`) returns both headers; `/web/*` and `/cli/*` do not.
- Existing `/cli/latest` tests already cover the wheel metadata path.
### CLI
- `tests/test_self_upgrade.py` — mock `update_check.check()`, mock `subprocess.run`, assert correct command shape (uv vs pip path), assert `--check-only` exits 1 when outdated and 0 when current, assert `--force` skips the `is_outdated()` short-circuit, assert success path invalidates the `update_check.json` cache.
- `tests/test_client_version_check.py` — fake response with `min > local``SystemExit(2)`. Fake response with `latest > local >= min` ⇒ no stderr, no exit. Local `unknown` ⇒ no enforcement. Missing headers (old server) ⇒ no enforcement.
- `tests/test_lib_hooks.py` — assert the chained command is the sole SessionStart entry, that `self-upgrade` precedes `pull`, that both segments end in `|| true`, and that re-running `install_claude_hooks` stays idempotent (length stays at 1).
---
## Migration / rollout
- Additive — no breaking change. Old CLIs (no header check, no self-upgrade command) keep working; old servers (no headers) make the new CLI silent (no enforcement, just the existing warning loop).
- Ship in one PR. CHANGELOG entry under `### Added`: "CLI now auto-upgrades from the server at session start (`agnes self-upgrade`) and hard-stops on incompatible-version mismatch via response headers."
- After merge, manually bump `MIN_COMPAT_CLI_VERSION` in the next PR that ships a wire-protocol break — that's the first time the hard-stop actually fires.
---
## Self-review
- **Spec coverage:** both layers (A/B), both directions (check + enforce), reuse of `update_check` to avoid two polling paths, hook idempotency, drive-by `da → agnes` cleanup. ✓
- **Resolved during review:** A (`cli/client.py:216` + `cli/main.py:99-104`), B (`MIN_COMPAT_CLI_VERSION = "0.0.0"` on day one), D (reuse `AGNES_NO_UPDATE_CHECK`, no new opt-out flag).
- **No placeholders:** every component has a concrete file path and existing-symbol reference.
- **Type/name consistency:** `APP_VERSION`, `MIN_COMPAT_CLI_VERSION`, `X-Agnes-Latest-Version`, `X-Agnes-Min-Version`, `agnes self-upgrade`, reused `update_check.check()` / `format_outdated_notice()` / `_version_lt()` / `_installed_version()` — consistent throughout.
- Spec, not plan: no per-step TDD breakdown. Convert to a plan once reviewed.

View file

@ -1,6 +1,6 @@
[project]
name = "agnes-the-ai-analyst"
version = "0.42.0"
version = "0.43.0"
description = "Agnes — AI Data Analyst platform for AI analytical systems"
requires-python = ">=3.11,<3.14"
license = "MIT"

View file

@ -1,36 +1,58 @@
"""Pin that the FastAPI `version=` is read dynamically from package metadata.
The OpenAPI schema (`/openapi.json`, `/docs`) advertises this version. A
hardcoded literal the previous state silently drifts from
`pyproject.toml` on every bump, leaving `/openapi.json` reporting a stale
version while `/api/version`, `/cli/latest`, and `da --version` all
report the bumped one.
"""
"""Pin that APP_VERSION reads from package metadata, not a hardcoded literal,
and that the FastAPI app's `version=` field surfaces it end-to-end."""
import importlib
from unittest.mock import patch
def test_app_version_reads_package_metadata():
"""`_app_version()` must call importlib.metadata.version with the
canonical package name, not return a hardcoded literal."""
with patch("app.main._pkg_version", return_value="9.9.9") as mock_pkg_ver:
from app.main import _app_version
assert _app_version() == "9.9.9"
mock_pkg_ver.assert_called_once_with("agnes-the-ai-analyst")
import pytest
def test_app_version_falls_back_to_dev_when_package_missing():
"""Source-checkout without install → report 'dev', not crash."""
@pytest.fixture
def _restore_app_modules():
"""Reload-with-real-metadata so subsequent tests see the genuine
APP_VERSION / FastAPI app instance, not the patched-in fake from this
file's tests."""
yield
import app.version
importlib.reload(app.version)
import app.main
importlib.reload(app.main)
def test_app_version_reads_package_metadata(_restore_app_modules):
# Patch the source `importlib.metadata.version` rather than the alias
# bound into app.version at import time — `importlib.reload(app.version)`
# re-runs the `from importlib.metadata import version as _pkg_version`
# line, which would otherwise re-fetch the unpatched original and
# silently neuter the test.
with patch("importlib.metadata.version", return_value="9.9.9") as mock_pkg_ver:
import app.version
importlib.reload(app.version)
assert app.version.APP_VERSION == "9.9.9"
# `assert_called_with` (not `assert_called_once_with`) — `import
# app.version` may have triggered an initial load before reload,
# giving two calls. We only care that the package name is canonical.
mock_pkg_ver.assert_called_with("agnes-the-ai-analyst")
def test_app_version_falls_back_when_package_missing(_restore_app_modules):
from importlib.metadata import PackageNotFoundError
with patch("app.main._pkg_version", side_effect=PackageNotFoundError):
from app.main import _app_version
assert _app_version() == "dev"
with patch("importlib.metadata.version", side_effect=PackageNotFoundError):
import app.version
importlib.reload(app.version)
assert app.version.APP_VERSION == "0.0.0+dev"
def test_fastapi_app_version_matches_package_metadata():
"""End-to-end: what FastAPI stores in `app.version` is whatever
`_app_version()` returned not a stale literal."""
with patch("app.main._pkg_version", return_value="7.7.7"):
from app.main import create_app
app = create_app()
assert app.version == "7.7.7"
def test_fastapi_app_version_matches_app_version_constant(_restore_app_modules):
"""End-to-end: FastAPI's app.version (consumed by /openapi.json and
/docs) must equal app.version.APP_VERSION. Guards the wiring at
`app/main.py:186 version=APP_VERSION` against accidental literal."""
import app.version
import app.main
# Reload both so we read post-patch values consistently.
with patch("importlib.metadata.version", return_value="7.7.7"):
importlib.reload(app.version)
importlib.reload(app.main)
assert app.main.app.version == "7.7.7"
assert app.main.app.version == app.version.APP_VERSION

View file

@ -35,6 +35,27 @@ def test_check_returns_none_when_server_url_missing(tmp_config):
assert update_check.check(None) is None # type: ignore[arg-type]
def test_check_bypass_disabled_overrides_env(monkeypatch, tmp_config):
"""`AGNES_NO_UPDATE_CHECK=1` silences the implicit warning loop, but
explicit callers (e.g. `agnes self-upgrade`) pass `bypass_disabled=True`
and must NOT become a silent no-op."""
from cli import update_check
monkeypatch.setenv("AGNES_NO_UPDATE_CHECK", "1")
payload = {
"version": "9.9.9",
"wheel_filename": "x.whl",
"download_url_path": "/cli/wheel/x.whl",
}
with patch("cli.update_check._installed_version", return_value="2.0.0"):
with patch("cli.update_check._fetch_latest", return_value=payload):
# Default: env var wins, returns None.
assert update_check.check("http://server.test") is None
# Bypass: env var ignored.
info = update_check.check("http://server.test", bypass_disabled=True)
assert info is not None and info.latest == "9.9.9"
def test_check_returns_none_when_installed_version_unknown(tmp_config):
from cli import update_check
with patch("cli.update_check._installed_version", return_value="unknown"):

View file

@ -0,0 +1,76 @@
"""Verify cli/client.py:get_client() hard-stops on min_version mismatch."""
from unittest.mock import patch
import httpx
import pytest
def _fake_response(headers: dict) -> httpx.Response:
return httpx.Response(status_code=200, headers=headers, content=b"{}", request=httpx.Request("GET", "http://x/"))
def test_local_below_min_exits_with_code_2():
from cli.client import _check_version_headers
with patch("cli.client._installed_version", return_value="0.30.0"):
resp = _fake_response({
"X-Agnes-Latest-Version": "0.40.0",
"X-Agnes-Min-Version": "0.35.0",
})
with pytest.raises(SystemExit) as exc:
_check_version_headers(resp)
assert exc.value.code == 2
def test_local_at_or_above_min_does_not_exit():
from cli.client import _check_version_headers
with patch("cli.client._installed_version", return_value="0.40.0"):
resp = _fake_response({
"X-Agnes-Latest-Version": "0.40.0",
"X-Agnes-Min-Version": "0.35.0",
})
_check_version_headers(resp) # must not raise
def test_local_equal_to_min_does_not_exit():
"""`Version("X.Y.Z") < Version("X.Y.Z")` is False — equality must pass."""
from cli.client import _check_version_headers
with patch("cli.client._installed_version", return_value="0.35.0"):
resp = _fake_response({
"X-Agnes-Latest-Version": "0.40.0",
"X-Agnes-Min-Version": "0.35.0",
})
_check_version_headers(resp) # must not raise
def test_missing_headers_no_enforcement():
"""Older server without middleware → no headers → no-op."""
from cli.client import _check_version_headers
with patch("cli.client._installed_version", return_value="0.10.0"):
resp = _fake_response({}) # empty headers
_check_version_headers(resp) # must not raise
def test_unknown_local_version_no_enforcement():
"""Source-checkout / editable install → never block."""
from cli.client import _check_version_headers
with patch("cli.client._installed_version", return_value="unknown"):
resp = _fake_response({
"X-Agnes-Latest-Version": "0.40.0",
"X-Agnes-Min-Version": "0.35.0",
})
_check_version_headers(resp) # must not raise
def test_self_upgrade_in_progress_disables_enforcement(monkeypatch):
"""Recursion barrier: while self-upgrade runs, no /api/* call may
block on min-version drift. Otherwise an in-flight upgrade could
sys.exit(2) with 'Run: agnes self-upgrade' from inside itself."""
from cli.client import _check_version_headers
monkeypatch.setenv("AGNES_SELF_UPGRADE_IN_PROGRESS", "1")
with patch("cli.client._installed_version", return_value="0.10.0"):
resp = _fake_response({
"X-Agnes-Latest-Version": "0.40.0",
"X-Agnes-Min-Version": "0.35.0",
})
_check_version_headers(resp) # must not raise

View file

@ -14,9 +14,9 @@ def _read_settings(workspace: Path) -> dict:
def test_install_creates_settings_file(tmp_path):
install_claude_hooks(tmp_path)
cfg = _read_settings(tmp_path)
assert cfg["hooks"]["SessionStart"]
assert "agnes pull --quiet" in cfg["hooks"]["SessionStart"][0]["hooks"][0]["command"]
assert cfg["hooks"]["SessionEnd"]
cmd = cfg["hooks"]["SessionStart"][0]["hooks"][0]["command"]
assert "agnes self-upgrade --quiet" in cmd
assert "agnes pull --quiet" in cmd
assert "agnes push --quiet" in cfg["hooks"]["SessionEnd"][0]["hooks"][0]["command"]
@ -74,3 +74,25 @@ def test_install_handles_invalid_json(tmp_path, capsys):
install_claude_hooks(tmp_path)
captured = capsys.readouterr()
assert "not valid JSON" in captured.err or "warning" in captured.err.lower()
def test_install_chains_self_upgrade_then_pull_in_one_entry(tmp_path):
install_claude_hooks(tmp_path)
cfg = _read_settings(tmp_path)
session_start = cfg["hooks"]["SessionStart"]
assert len(session_start) == 1, session_start
cmd = session_start[0]["hooks"][0]["command"]
assert "agnes self-upgrade --quiet" in cmd
assert "agnes pull --quiet" in cmd
# Order is encoded in the shell — self-upgrade must appear first
assert cmd.index("agnes self-upgrade") < cmd.index("agnes pull")
# Both segments carry || true so neither failure aborts the line
assert cmd.count("|| true") >= 2
def test_install_idempotent_chained_entry(tmp_path):
install_claude_hooks(tmp_path)
install_claude_hooks(tmp_path)
cfg = _read_settings(tmp_path)
assert len(cfg["hooks"]["SessionStart"]) == 1
assert len(cfg["hooks"]["SessionEnd"]) == 1

304
tests/test_self_upgrade.py Normal file
View file

@ -0,0 +1,304 @@
"""Tests for `agnes self-upgrade` — install path, smoke test, rollback
(with rc capture), recursion barrier, --force offline failure, AGNES_NO_UPDATE_CHECK
bypass for explicit upgrades, --quiet stderr behavior, version-mismatch
smoke detection."""
import os
import sys
from unittest.mock import patch, MagicMock
import pytest
from typer.testing import CliRunner
from cli.main import app
from cli.update_check import UpdateInfo
runner = CliRunner()
@pytest.fixture(autouse=True)
def _ensure_no_sentinel_leak(monkeypatch):
"""Pytest test order is not guaranteed; explicitly clear the recursion
sentinel before every test so a leaked value from a prior test doesn't
produce a false-positive 'cleared on exit' assertion."""
monkeypatch.delenv("AGNES_SELF_UPGRADE_IN_PROGRESS", raising=False)
yield
_OUTDATED_URL = "http://server.test/cli/wheel/agnes-0.40.0-py3-none-any.whl"
_PRIOR_URL = "http://server.test/cli/wheel/agnes-0.35.0-py3-none-any.whl"
def _outdated_info():
return UpdateInfo(installed="0.30.0", latest="0.40.0", download_url=_OUTDATED_URL)
def _current_info():
return UpdateInfo(installed="0.40.0", latest="0.40.0", download_url=None)
def _smoke_pass():
return (True, "agnes 0.40.0")
def _smoke_fail():
return (False, "exit 1: ImportError: cannot import name 'foo'")
def test_check_only_when_outdated_exits_1():
with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()):
result = runner.invoke(app, ["self-upgrade", "--check-only"])
assert result.exit_code == 1
assert "out of date" in result.output
def test_check_only_when_current_exits_0():
with patch("cli.commands.self_upgrade.check", return_value=_current_info()):
result = runner.invoke(app, ["self-upgrade", "--check-only"])
assert result.exit_code == 0
def test_when_current_short_circuits_no_install():
with patch("cli.commands.self_upgrade.check", return_value=_current_info()), \
patch("cli.commands.self_upgrade.subprocess.run") as mock_run:
result = runner.invoke(app, ["self-upgrade"])
assert result.exit_code == 0
mock_run.assert_not_called()
def test_uv_path_when_uv_available():
with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_pass()), \
patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
patch("cli.commands.self_upgrade._record_last_known_good"), \
patch("cli.commands.self_upgrade._invalidate_update_cache"):
mock_run.return_value = MagicMock(returncode=0)
result = runner.invoke(app, ["self-upgrade"])
assert result.exit_code == 0
args = mock_run.call_args_list[0].args[0]
assert args[:3] == ["uv", "tool", "install"]
assert "--force" in args
assert _OUTDATED_URL in args
def test_pip_fallback_uses_sys_executable_not_user():
"""pip path must target the running interpreter's venv, never --user."""
with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
patch("cli.commands.self_upgrade.shutil.which", return_value=None), \
patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_pass()), \
patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
patch("cli.commands.self_upgrade._record_last_known_good"), \
patch("cli.commands.self_upgrade._invalidate_update_cache"):
mock_run.return_value = MagicMock(returncode=0)
result = runner.invoke(app, ["self-upgrade"])
assert result.exit_code == 0
cmds = [c.args[0] for c in mock_run.call_args_list]
assert any(cmd[0] == "curl" for cmd in cmds), cmds
pip_cmd = next(cmd for cmd in cmds if "pip" in cmd)
assert pip_cmd[0] == sys.executable, pip_cmd
assert "--force-reinstall" in pip_cmd
assert "--user" not in pip_cmd
def test_force_invalidates_cache_before_check():
"""--force must drop the cached download_url before probing /cli/latest."""
fresh_current_with_url = UpdateInfo(installed="0.40.0", latest="0.40.0",
download_url=_OUTDATED_URL)
with patch("cli.commands.self_upgrade._invalidate_update_cache") as mock_invalidate, \
patch("cli.commands.self_upgrade.check", return_value=fresh_current_with_url) as mock_check, \
patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_pass()), \
patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
patch("cli.commands.self_upgrade._record_last_known_good"):
mock_run.return_value = MagicMock(returncode=0)
result = runner.invoke(app, ["self-upgrade", "--force"])
assert result.exit_code == 0
assert mock_invalidate.call_count == 2
mock_check.assert_called_once()
def test_force_offline_exits_1_with_stderr():
"""--force + server unreachable: exit 1 with explicit stderr."""
with patch("cli.commands.self_upgrade.check", return_value=None), \
patch("cli.commands.self_upgrade.get_server_url",
return_value="http://server.test"), \
patch("cli.commands.self_upgrade._invalidate_update_cache"):
result = runner.invoke(app, ["self-upgrade", "--force"])
assert result.exit_code == 1
assert "cannot reach" in result.stderr
assert "server.test" in result.stderr
def test_offline_without_force_is_silent():
"""No --force, server unreachable: exit 0 silently from self-upgrade
itself. (The root callback's warning loop in cli/main.py may still emit
`[update] ` to stderr that's a separate code path; this test only
pins that self-upgrade does not add a `cannot reach ` error.)"""
with patch("cli.commands.self_upgrade.check", return_value=None), \
patch("cli.commands.self_upgrade._invalidate_update_cache"):
result = runner.invoke(app, ["self-upgrade"])
assert result.exit_code == 0
assert "cannot reach" not in result.stderr
assert "self-upgrade:" not in result.stderr
def test_self_upgrade_passes_bypass_disabled_to_check():
"""AGNES_NO_UPDATE_CHECK silences the implicit warning loop, but
explicit `agnes self-upgrade` must NOT be a silent no-op when set."""
with patch("cli.commands.self_upgrade.check", return_value=_current_info()) as mock_check:
result = runner.invoke(app, ["self-upgrade", "--check-only"])
assert result.exit_code == 0
kwargs = mock_check.call_args.kwargs
assert kwargs.get("bypass_disabled") is True
def test_quiet_does_not_suppress_install_failure_stderr():
"""--quiet suppresses progress but install/smoke failures always surface."""
with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
patch("cli.commands.self_upgrade._read_last_known_good", return_value=None):
mock_run.return_value = MagicMock(returncode=42)
result = runner.invoke(app, ["self-upgrade", "--quiet"])
assert result.exit_code == 1
assert "install failed" in result.stderr
def test_smoke_fail_triggers_rollback_when_prior_url_known():
"""Broken new wheel: smoke fails, rollback to last-known-good URL, exit 1."""
with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_fail()), \
patch("cli.commands.self_upgrade._read_last_known_good", return_value=_PRIOR_URL), \
patch("cli.commands.self_upgrade._record_last_known_good") as mock_record:
mock_run.return_value = MagicMock(returncode=0)
result = runner.invoke(app, ["self-upgrade"])
assert result.exit_code == 1
urls_installed = [
arg for c in mock_run.call_args_list
for arg in c.args[0] if isinstance(arg, str) and arg.startswith("http")
]
assert _OUTDATED_URL in urls_installed
assert _PRIOR_URL in urls_installed
mock_record.assert_not_called()
assert "smoke test" in result.stderr
def test_smoke_fail_with_rollback_failure_surfaces_rc():
"""Forward install ok, smoke fail, rollback ALSO fails: stderr surfaces rc + recovery."""
install_results = [MagicMock(returncode=0), MagicMock(returncode=99)]
with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
patch("cli.commands.self_upgrade.subprocess.run", side_effect=install_results), \
patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_fail()), \
patch("cli.commands.self_upgrade._read_last_known_good", return_value=_PRIOR_URL), \
patch("cli.commands.self_upgrade.get_server_url",
return_value="http://server.test"):
result = runner.invoke(app, ["self-upgrade"])
assert result.exit_code == 1
assert "rollback ALSO failed" in result.stderr
assert "rc=99" in result.stderr
assert "/cli/install.sh" in result.stderr
def test_smoke_fail_no_prior_url_prints_install_sh_recovery():
"""First-ever upgrade with no rollback target: stderr points at bootstrap path."""
with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_fail()), \
patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
patch("cli.commands.self_upgrade.get_server_url",
return_value="http://server.test"):
mock_run.return_value = MagicMock(returncode=0)
result = runner.invoke(app, ["self-upgrade"])
assert result.exit_code == 1
assert "/cli/install.sh" in result.stderr
assert "server.test" in result.stderr
def test_smoke_pass_records_last_known_good_then_invalidates_cache():
"""Convention: record before invalidate."""
call_order = []
with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
patch("cli.commands.self_upgrade.subprocess.run") as mock_run, \
patch("cli.commands.self_upgrade._smoke_test_new_binary", return_value=_smoke_pass()), \
patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
patch("cli.commands.self_upgrade._record_last_known_good",
side_effect=lambda url: call_order.append(("record", url))), \
patch("cli.commands.self_upgrade._invalidate_update_cache",
side_effect=lambda: call_order.append(("invalidate", None))):
mock_run.return_value = MagicMock(returncode=0)
result = runner.invoke(app, ["self-upgrade"])
assert result.exit_code == 0
record_idx = next(i for i, c in enumerate(call_order) if c[0] == "record")
invalidate_idx = next(i for i, c in enumerate(call_order) if c[0] == "invalidate")
assert record_idx < invalidate_idx, call_order
assert call_order[record_idx] == ("record", _OUTDATED_URL)
def test_self_upgrade_propagates_sentinel_to_smoke_subprocess():
"""The sentinel is set in os.environ during the run and cleared in finally."""
captured_envs = []
def _fake_smoke(method, expected_version):
env = {**os.environ, "AGNES_NO_UPDATE_CHECK": "1",
"AGNES_SELF_UPGRADE_IN_PROGRESS": "1"}
captured_envs.append(env)
return _smoke_pass()
with patch("cli.commands.self_upgrade.check", return_value=_outdated_info()), \
patch("cli.commands.self_upgrade.shutil.which", return_value="/usr/local/bin/uv"), \
patch("cli.commands.self_upgrade.subprocess.run",
return_value=MagicMock(returncode=0)), \
patch("cli.commands.self_upgrade._smoke_test_new_binary", side_effect=_fake_smoke), \
patch("cli.commands.self_upgrade._read_last_known_good", return_value=None), \
patch("cli.commands.self_upgrade._record_last_known_good"), \
patch("cli.commands.self_upgrade._invalidate_update_cache"):
result = runner.invoke(app, ["self-upgrade"])
assert result.exit_code == 0
assert captured_envs and captured_envs[0]["AGNES_SELF_UPGRADE_IN_PROGRESS"] == "1"
assert os.environ.get("AGNES_SELF_UPGRADE_IN_PROGRESS") is None
@pytest.mark.parametrize("install_method,patch_target", [
("uv", "_uv_tool_bin_path"),
("pip", "_pip_bin_path"),
])
def test_smoke_test_detects_version_mismatch(install_method, patch_target):
"""Smoke test execs binary at install path (NOT shutil.which) and checks
Version equality (NOT substring). Parametrized over uv + pip."""
from pathlib import Path
from cli.commands import self_upgrade as su
fake_bin = f"/fake/{install_method}/bin/agnes"
with patch.object(su, patch_target, return_value=Path(fake_bin)), \
patch.object(su.subprocess, "run") as mock_run:
mock_run.return_value = MagicMock(returncode=0, stdout="agnes 0.30.0\n", stderr="")
ok, detail = su._smoke_test_new_binary(install_method, expected_version="0.40.0")
assert ok is False
assert "version mismatch" in detail
assert "0.40.0" in detail and "0.30.0" in detail
assert mock_run.call_args.args[0][0] == fake_bin
def test_smoke_test_passes_with_pep440_local_version():
"""Use Version() comparison, not substring (so "0.40.0" doesn't match "0.40.10")."""
from pathlib import Path
from cli.commands import self_upgrade as su
with patch.object(su, "_uv_tool_bin_path", return_value=Path("/fake/agnes")), \
patch.object(su.subprocess, "run") as mock_run:
mock_run.return_value = MagicMock(returncode=0, stdout="agnes 0.40.0\n", stderr="")
ok, _ = su._smoke_test_new_binary("uv", expected_version="0.40.0")
assert ok is True
mock_run.return_value = MagicMock(returncode=0, stdout="agnes 0.40.10\n", stderr="")
ok, detail = su._smoke_test_new_binary("uv", expected_version="0.40.0")
assert ok is False
assert "version mismatch" in detail

View file

@ -0,0 +1,29 @@
"""Verify /api/* responses carry X-Agnes-Latest-Version + X-Agnes-Min-Version."""
from fastapi.testclient import TestClient
def test_api_response_carries_version_headers():
from app.main import app
from app.version import APP_VERSION, MIN_COMPAT_CLI_VERSION
client = TestClient(app)
# /api/version is unauthenticated and cheap.
resp = client.get("/api/version")
assert resp.status_code == 200
# Headers must equal the constants in app.version, not just be parseable.
# When MIN_COMPAT_CLI_VERSION is deliberately bumped in a future PR, this
# test is updated in the same PR — the review-discipline guardrail.
assert resp.headers["X-Agnes-Latest-Version"] == APP_VERSION
assert resp.headers["X-Agnes-Min-Version"] == MIN_COMPAT_CLI_VERSION
# Day-one floor pin: drop or update this assertion when the floor moves.
assert resp.headers["X-Agnes-Min-Version"] == "0.0.0"
def test_non_api_response_does_not_carry_version_headers():
from app.main import app
client = TestClient(app)
# /cli/latest is under /cli, not /api — should NOT carry the headers.
resp = client.get("/cli/latest")
assert resp.status_code == 200
assert "X-Agnes-Latest-Version" not in resp.headers
assert "X-Agnes-Min-Version" not in resp.headers