From 50e0463501b4b10fa9668cd5e6546d82e6018b62 Mon Sep 17 00:00:00 2001 From: Minas Arustamyan Date: Thu, 7 May 2026 01:06:18 +0200 Subject: [PATCH 1/6] feat(marketplace): clone-based plugin setup + auto-refresh SessionStart hook Adds end-to-end flow for installing and keeping the per-user filtered Claude Code marketplace in sync with the user's Agnes stack (admin RBAC grants \ MyAIStack opt-outs U /store installs). Setup (one-liner in install prompt step 5): `agnes refresh-marketplace --bootstrap` clones the per-user marketplace bare repo to ~/.agnes/marketplace, strips PAT from the cloned origin URL, registers the local path with Claude Code, and installs every plugin in the served manifest at --scope project. Replaces a 15-line inline shell sequence that tripped Claude Code's agent-driven `rm -rf` permission gate. Auto-refresh (SessionStart hook installed by `agnes init`): `agnes refresh-marketplace --quiet` runs every Claude Code session, fetches+resets the clone (server rebuilds as orphan commits, so pull --ff-only is impossible), and version-aware reconciles: - missing in workspace -> claude plugin install @agnes --scope project - version differs -> claude plugin update @agnes - matches -> skip Don't auto-uninstall plugins that disappeared from the manifest -- a transient empty manifest from the server would wipe the stack. Hook output: when --quiet AND something actually changed, emits Claude Code hook JSON on stdout -- `systemMessage` (transient toast) and `hookSpecificOutput.additionalContext` (model-side system reminder), both carrying the change summary plus a "/exit + restart Claude Code" instruction (Claude only scans plugins at session start). Windows hook compatibility: the refresh-marketplace hook command is wrapped in `bash -c "..."` because Claude Code on Windows runs hook commands directly without invoking a shell, so `2>/dev/null || true` would otherwise be passed as literal argv tokens. Cross-cutting: - cli/lib/marketplace.py: shared CLONE_DIR + MARKETPLACE_NAME constants. - cli/lib/hooks.py: SessionStart now has two independent entries (pull + refresh-marketplace) so a failure in one doesn't suppress the other; legacy `da sync` and prior single-pull layouts upgrade cleanly on re-init. - PAT injection on every git fetch via per-invocation credential helper (token in \$AGNES_TOKEN env, never in argv or .git/config). - Pre-snapshot of installed plugins captured BEFORE `claude plugin marketplace update` so silent auto-applied version bumps still fire notifications. - scripts/dev/agnes-client-reset.sh: cleans ~/.claude/plugins/marketplaces/agnes, ~/.claude/plugins/cache/agnes, drops uv build cache, documents workspace-scoped residue that can't be enumerated from the script. - app/web/setup_instructions.py: legacy AGNES_DEBUG_AUTH path also uses clone (direct HTTPS marketplace add is broken end-to-end on every Claude Code distribution -- stores response as single file, plugin source paths then 404). 28 new tests (test_cli_refresh_marketplace.py) + extended hook + setup template tests cover bootstrap, fetch+reset ordering, version-aware reconcile, project-path filtering, hook JSON shape, and the bash-c Windows wrapper invariant. --- app/web/setup_instructions.py | 217 +++---- cli/commands/refresh_marketplace.py | 492 ++++++++++++++++ cli/lib/hooks.py | 55 +- cli/lib/marketplace.py | 33 ++ cli/main.py | 2 + scripts/dev/agnes-client-reset.sh | 57 +- tests/test_cli_refresh_marketplace.py | 820 ++++++++++++++++++++++++++ tests/test_lib_hooks.py | 116 +++- tests/test_setup_instructions.py | 261 ++++---- tests/test_setup_page_unified.py | 7 +- 10 files changed, 1734 insertions(+), 326 deletions(-) create mode 100644 cli/commands/refresh_marketplace.py create mode 100644 cli/lib/marketplace.py create mode 100644 tests/test_cli_refresh_marketplace.py diff --git a/app/web/setup_instructions.py b/app/web/setup_instructions.py index 0cb58f4..9969646 100644 --- a/app/web/setup_instructions.py +++ b/app/web/setup_instructions.py @@ -58,18 +58,31 @@ practice and the design here exists to dodge each one: `update-ca-trust`) doesn't fix it on Windows or macOS either — the binary's bundled CA list isn't refreshable from the OS store. - So the marketplace step branches on platform: - - Windows + macOS → straight to system-`git clone` fallback - (system git honors `GIT_SSL_CAINFO`, so the clone works). - - Linux → typically the node-based npm install where - `NODE_EXTRA_CA_CERTS` does take effect; try direct first, fall - back to git clone on failure. + So the marketplace step always uses system `git clone` regardless of + platform — system git honors `GIT_SSL_CAINFO` from the combined bundle + in step 0(d). We tried having Linux attempt direct HTTPS first (where + node-based claude DOES respect `NODE_EXTRA_CA_CERTS`), but `claude + plugin marketplace add ` is broken end-to-end on every + distribution: it does succeed at downloading the marketplace.json, but + stores it as a single file. The plugin entries' `source: "./plugins/"` + paths are then resolved as local filesystem paths against that file's + parent dir — and the plugin tree obviously isn't there. Only the clone + path produces a real directory tree that `plugin install` can read. The OS trust-store registration in (c) is still done on all three platforms because it's needed for *non-claude* native tools — e.g. the system git fetch path itself (Schannel on Windows, Security framework on macOS) trusts via the OS store, not via env vars. + Marketplace refresh: after the initial clone, `agnes refresh-marketplace` + incrementally `git pull`s against the same clone and runs `claude plugin + marketplace update agnes`. Credentials are injected per-pull via a + one-shot git credential helper (PAT from `~/.config/agnes/token.json`) + so the cloned repo's `origin` URL stays PAT-free at rest. The + SessionStart hook (installed by `agnes init`) calls refresh-marketplace + on every Claude Code session so changes server-side propagate + automatically. + ## Step ordering The numbered steps are arranged so that: @@ -401,10 +414,10 @@ def _finale_lines(*, confirm_step_num: str, has_ca: bool, has_marketplace: bool) only reference earlier steps that were actually emitted, otherwise the assistant either hallucinates an answer or asks the user about a non-existent step. The CA-bundle-source bullet only makes sense when - the trust block ran (`has_ca`); the marketplace direct-vs-clone bullet - only makes sense when the marketplace block ran (`has_marketplace`). - Init + catalog + diagnose + skills + version always render, so their - bullets are unconditional.""" + the trust block ran (`has_ca`); the marketplace bullet only makes + sense when the marketplace block ran (`has_marketplace`). Init + + catalog + diagnose + skills + version always render, so their bullets + are unconditional.""" bullets = [ " - `agnes --version` output", " - First few lines of `agnes catalog` (tables you can see)", @@ -420,8 +433,8 @@ def _finale_lines(*, confirm_step_num: str, has_ca: bool, has_marketplace: bool) ) if has_marketplace: bullets.append( - " - Whether the marketplace add went via direct HTTPS or via the " - "git-clone fallback (and on which platform)" + " - Confirmation that `~/.agnes/marketplace/.git/` exists " + "(the marketplace clone) and that all requested plugins installed" ) return [ f"{confirm_step_num}) Confirm:", @@ -489,132 +502,84 @@ def _marketplace_block( layouts (this block now runs before diagnose/skills, so it's step 5 instead of the old step 7). - With `has_ca=True`: the user has the trust block from step 0, so we know - the cert is in the OS store and our env vars are set. Strategy: - - Windows: claude.exe is a Bun-compiled binary that ignores both the - Windows trust store AND NODE_EXTRA_CA_CERTS for marketplace HTTPS. - Skip the direct attempt; system `git clone` honors GIT_SSL_CAINFO - (the combined bundle from step 0) and works. - - macOS: same story. `claude` on macOS arm64 ships as a Mach-O binary - with a `__BUN` segment (single-file Bun build); empirically it - ignores SSL_CERT_FILE / NODE_EXTRA_CA_CERTS / login keychain alike, - even though `strings` shows the binary recognizes those env-var - names. Go straight to git-clone on macOS too. - - Linux: still ships node-based claude on most distros (npm install - path), where NODE_EXTRA_CA_CERTS does take effect. Try direct - first, fall back to git clone on failure. + The whole block is one CLI invocation: ``agnes refresh-marketplace + --bootstrap``. The CLI handles clone + PAT-strip + chmod + register- + with-Claude + auto-install-from-manifest internally. This is what + used to be a 15-line shell sequence inline; pulling it into the CLI + bought: - Token hygiene: after the clone, we strip the PAT from the cloned repo's - `origin` URL (`git remote set-url`) and chmod ~/.agnes/marketplace tight. - Reason: `git clone https://x:@host/...` writes the URL verbatim - into `.git/config`, where it sits in plaintext for anything that reads - home (cloud sync, antivirus scanners, peer processes). claude's - marketplace registration uses the local FS path, not the remote URL, - so stripping the token after clone is harmless — to refresh later, the - user re-runs setup from the dashboard with a fresh PAT. + 1. **Claude Code permission gate friendliness.** The agent-driven + onboarding flow inside Claude Code denies ``rm -rf`` by default; + the inline script tripped on it. Wrapping the destructive prep + inside agnes lets the CLI's already-trusted permission grant + cover it (Python ``shutil.rmtree`` doesn't pattern-match the + shell ``rm -rf`` block). + 2. **Idempotence without inline ``rm``.** Re-running the install + prompt over an existing clone now does fetch+reset under the + hood (no destructive cleanup needed). The prompt's "safe to + re-run" promise holds without forcing the operator to delete + anything by hand. + 3. **One source of truth.** ``agnes refresh-marketplace`` is also + the SessionStart hook command, so install + refresh share the + same code path — version-aware reconcile, hook JSON output, + credential helper PAT injection, all consistent. - With `has_ca=False`: the legacy path. If `self_signed_tls=True` we emit - the host-scoped `git config sslVerify=false` downgrade so the marketplace - git-clone (under claude's hood) works against an untrusted endpoint. + Why always clone (with the CLI doing it) instead of trying direct + HTTPS marketplace add first? ``claude plugin marketplace add + `` does succeed against our ``/marketplace.git/`` endpoint + (returns 200 + JSON), but Claude Code stores the response as a + single-file marketplace and resolves plugin ``source: + "./plugins/"`` paths as local filesystem refs — so the + subsequent ``claude plugin install`` looks for plugin trees at + ``/plugins//`` and 404s because the dir is a + file. Only the git-clone path produces a real directory tree with + plugin contents in place. Broken end-to-end on every Claude Code + distribution; cloning is the only reliable install path. + + With ``has_ca=False`` and ``self_signed_tls=True`` (legacy path, + AGNES_DEBUG_AUTH instances): we emit the host-scoped ``git config + sslVerify=false`` downgrade so system git's clone (which agnes + invokes via subprocess) accepts the un-trusted endpoint. With a + publicly-trusted cert (Let's Encrypt etc.) or a CA bundle in step 0, + no extra config needed — system git already trusts the chain. """ - if has_ca: - lines: list[str] = [ - "", - f"{step_num}) Register the Agnes Claude Code marketplace and install plugins.", - "", - " Strategy depends on platform:", - " - Windows + macOS: `claude` ships as a Bun-compiled native binary on", - " these platforms, which ignores the OS trust store and our CA env", - " vars for marketplace HTTPS. Skip the direct attempt and use a", - " system `git clone` (system git honors GIT_SSL_CAINFO from step 0).", - " - Linux: claude is typically the node-based npm install, where", - " NODE_EXTRA_CA_CERTS works. Try direct first; fall back to git", - " clone on failure.", - "", - " # Re-detect $PLATFORM — env vars from step 0 don't persist across", - " # separate Bash invocations (per the IMPORTANT note in step 0(e)),", - " # so without this the case below would fall through `*)` on every", - " # platform and never attempt the direct path on Linux.", - " case \"$(uname -s)\" in", - " Darwin) PLATFORM=macos ;;", - " Linux) PLATFORM=linux ;;", - " MINGW*|MSYS*|CYGWIN*) PLATFORM=windows ;;", - " esac", - "", - " case \"$PLATFORM\" in", - " linux)", - " if claude plugin marketplace add \"https://x:{token}@{server_host}/marketplace.git/\" 2>/dev/null; then", - " MARKETPLACE_VIA=direct", - " else", - " MARKETPLACE_VIA=clone", - " fi", - " ;;", - " *)", - " MARKETPLACE_VIA=clone", - " ;;", - " esac", - "", - " if [ \"$MARKETPLACE_VIA\" = \"clone\" ]; then", - " # Heads-up: 'git: credential-manager-core is not a git command' is a", - " # harmless warning from a stale git config — the clone itself succeeds.", - " rm -rf ~/.agnes/marketplace", - " git clone \"https://x:{token}@{server_host}/marketplace.git/\" ~/.agnes/marketplace || {", - " echo \"ERROR: marketplace clone failed — verify step 0 trust block + network reachability\" >&2", - " exit 1", - " }", - " # Strip the PAT from the cloned repo's origin URL so it doesn't sit", - " # in plaintext at ~/.agnes/marketplace/.git/config. Future marketplace", - " # refreshes go via re-running setup (new PAT) from the dashboard, not", - " # via `git pull` against this clone.", - " git -C ~/.agnes/marketplace remote set-url origin \"https://{server_host}/marketplace.git/\"", - " # Best-effort tighten on POSIX; chmod is a no-op on Windows NTFS via", - " # MSYS / Git Bash, hence the `|| true` so the step never fails there.", - " chmod 700 ~/.agnes/marketplace ~/.agnes/marketplace/.git 2>/dev/null || true", - " chmod 600 ~/.agnes/marketplace/.git/config 2>/dev/null || true", - " claude plugin marketplace add ~/.agnes/marketplace || {", - " echo \"ERROR: claude plugin marketplace add failed\" >&2", - " exit 1", - " }", - " fi", - "", - ] - for name in plugin_install_names: - lines.append( - f" claude plugin install {name}@{_MARKETPLACE_NAME} --scope project || {{" - ) - lines.append( - f" echo \"ERROR: claude plugin install {name}@{_MARKETPLACE_NAME} failed\" >&2; exit 1" - ) - lines.append(" }") - lines.extend([ - "", - " These run non-interactively. After they finish, tell the user to /exit", - " and run `claude` again so the new plugins load.", - ]) - return lines - - # Legacy path: no ca_pem on disk. Keep the old behavior verbatim - # (host-scoped sslVerify=false when self_signed_tls is set, otherwise - # plain direct HTTPS) so existing AGNES_DEBUG_AUTH instances keep - # working until they roll a fullchain.pem. - lines = [ + lines: list[str] = [ "", f"{step_num}) Register the Agnes Claude Code marketplace and install plugins:", ] - if self_signed_tls: + + # The legacy AGNES_DEBUG_AUTH path needs sslVerify=false so system git + # accepts the self-signed cert during the bootstrap clone. has_ca path + # has GIT_SSL_CAINFO already set by step 0(d), so no extra config + # needed there. + if not has_ca and self_signed_tls: lines.extend([ - " # Self-signed TLS cert on this Agnes instance — scoped to the host above.", + " # Self-signed TLS cert on this Agnes instance — host-scoped", + " # `sslVerify=false` so the marketplace `git clone` accepts it.", + " # Without a CA bundle we can't do better than this; flip your", + " # AGNES_DEBUG_AUTH instance to a real fullchain.pem to drop this line.", " git config --global http.\"{server_url}/\".sslVerify false", ]) - lines.append( - " claude plugin marketplace add \"https://x:{token}@{server_host}/marketplace.git/\"" - ) - for name in plugin_install_names: - lines.append(f" claude plugin install {name}@{_MARKETPLACE_NAME} --scope project") + lines.extend([ + " # `agnes refresh-marketplace --bootstrap` does:", + " # 1. clone the per-user marketplace bare repo to ~/.agnes/marketplace", + " # 2. strip the PAT from the cloned origin URL (refreshes use a", + " # per-invocation git credential helper, not the URL)", + " # 3. best-effort chmod 700/600 on POSIX (no-op on Windows NTFS)", + " # 4. `claude plugin marketplace add ~/.agnes/marketplace`", + " # 5. install every plugin listed in the served manifest", + " # Idempotent — re-runs over an existing clone do fetch+reset+reconcile", + " # via the same path the SessionStart hook uses.", + " agnes refresh-marketplace --bootstrap || {", + " echo \"ERROR: agnes refresh-marketplace --bootstrap failed\" >&2", + " exit 1", + " }", "", " These run non-interactively. After they finish, tell the user to /exit", - " and run `claude` again so the new plugins load.", + " and run `claude` again so the new plugins load. From then on, the", + " SessionStart hook keeps the marketplace clone in sync via", + " `agnes refresh-marketplace --quiet` on every Claude Code session.", ]) return lines diff --git a/cli/commands/refresh_marketplace.py b/cli/commands/refresh_marketplace.py new file mode 100644 index 0000000..499758e --- /dev/null +++ b/cli/commands/refresh_marketplace.py @@ -0,0 +1,492 @@ +"""`agnes refresh-marketplace` — reconcile this workspace's plugins with +the user's current Agnes stack. + +Three call paths share the same code: + - `agnes refresh-marketplace --bootstrap` — first-time setup; clones the + per-user marketplace bare repo, registers it with Claude Code, then + falls through to fetch+reset+reconcile so plugins land installed. + - `agnes refresh-marketplace` — manual re-sync after a known stack change. + - `agnes refresh-marketplace --quiet` — SessionStart hook context. Emits + a Claude Code hook JSON object on stdout when something actually got + installed/updated; silent otherwise. + +Reconcile is version-aware (install missing / update on version diff / +skip on match). Server-side stack composition lives in +`src/marketplace_filter.py:resolve_user_marketplace`. Plugin installs use +`--scope project` so they land in the workspace the hook fired in. +""" + +from __future__ import annotations + +import json +import os +import shutil +import subprocess +from pathlib import Path +from typing import Optional +from urllib.parse import urlparse + +import typer + +from cli.config import get_server_url, get_token +from cli.error_render import render_error +from cli.lib.marketplace import CLONE_DIR, MARKETPLACE_NAME + + +refresh_marketplace_app = typer.Typer( + help="Reconcile the workspace plugins with the user's current Agnes stack." +) + + +# Per-invocation credential helper. `!` runs the rest as a shell +# command. Reads the PAT from $AGNES_TOKEN — set in the subprocess env only, +# never on the command line — and emits the credential protocol's two +# key=value lines on stdout. +_CREDENTIAL_HELPER = '!f() { printf "username=x\\npassword=%s\\n" "$AGNES_TOKEN"; }; f' + + +@refresh_marketplace_app.callback(invoke_without_command=True) +def refresh_marketplace( + quiet: bool = typer.Option( + False, "--quiet", + help="Suppress success stdout (errors and warnings still surface on stderr).", + ), + bootstrap: bool = typer.Option( + False, "--bootstrap", + help=( + "If no marketplace clone exists yet, clone it and register the " + "local path with Claude Code. Used by the install flow as a " + "one-liner replacement for an inline `git clone` + chmod + " + "`claude plugin marketplace add` sequence." + ), + ), +): + """Sync the marketplace clone, re-register with Claude, install/update plugins.""" + clone_exists = (CLONE_DIR / ".git").is_dir() + + # Hook contexts hit the no-clone path on every workspace that didn't + # bootstrap; silent exit keeps logs clean. Don't read the token here — + # workspaces with the hook installed but no agnes token configured + # (fresh CI checkout, etc.) must silent-noop, not surface auth_failed. + if not clone_exists and not bootstrap: + if not quiet: + typer.echo( + f"No marketplace clone at {CLONE_DIR} — nothing to refresh. " + "Re-run setup with `agnes refresh-marketplace --bootstrap` " + "(or re-run setup from the dashboard) to clone it." + ) + raise typer.Exit(0) + + token = get_token() + if not token: + typer.echo( + render_error(0, {"detail": { + "kind": "auth_failed", + "hint": "No token. Run: agnes auth import-token --token ", + }}), + err=True, + ) + raise typer.Exit(1) + + if not clone_exists: + if not _bootstrap_clone(token, quiet=quiet): + raise typer.Exit(1) + + events: dict[str, list[str]] = {"installed": [], "updated": []} + + if not _git_fetch_and_reset(token, quiet=quiet): + raise typer.Exit(1) + + # Snapshot installed versions BEFORE `claude plugin marketplace update`. + # On local-path marketplaces Claude silently auto-applies version bumps + # (re-reads the manifest off disk and updates the installed cache), so + # an after-snapshot would always match the manifest on real version-bump + # scenarios — `events["updated"]` would stay empty and no notification + # would fire despite the plugin having actually changed. + installed_pre = _list_installed_agnes_plugins_in_cwd() + + _claude_marketplace_update(quiet=quiet) + + _reconcile_with_manifest(quiet=quiet, events=events, installed_pre=installed_pre) + + if quiet and (events["installed"] or events["updated"]): + _emit_hook_message(events) + elif not quiet and (events["installed"] or events["updated"]): + typer.echo( + "\nRestart Claude Code (`/exit`, then `claude`) to load the " + "new/updated plugins — they're on disk now but Claude only " + "picks them up on session start." + ) + + +def _bootstrap_clone(token: str, *, quiet: bool) -> bool: + """Initial clone of the per-user marketplace bare repo into ~/.agnes/marketplace. + + Wrapping the destructive prep in the agnes binary lets the CLI's + permission grant cover the cleanup (Python `shutil.rmtree` doesn't + pattern-match the `rm -rf` shell pattern Claude Code's onboarding flow + denies). Strips the PAT from the cloned origin URL so it doesn't sit + in plaintext at `.git/config` (refreshes use the credential helper). + Returns False on any failure. + """ + server_url = get_server_url() + if not server_url: + typer.echo("error: no server URL configured; run `agnes init` first.", err=True) + return False + + parsed = urlparse(server_url) + if not parsed.hostname: + typer.echo(f"error: server URL has no hostname: {server_url!r}", err=True) + return False + server_host = parsed.hostname + if parsed.port: + server_host = f"{server_host}:{parsed.port}" + scheme = parsed.scheme or "https" + + # Stale dir without a `.git/` subdir means an interrupted prior install; + # remove it so the fresh clone has somewhere to land. + if CLONE_DIR.exists(): + try: + shutil.rmtree(CLONE_DIR, ignore_errors=False) + except OSError as exc: + typer.echo(f"error: could not remove stale {CLONE_DIR}: {exc}", err=True) + return False + + CLONE_DIR.parent.mkdir(parents=True, exist_ok=True) + + auth_url = f"{scheme}://x:{token}@{server_host}/marketplace.git/" + clean_url = f"{scheme}://{server_host}/marketplace.git/" + + if not quiet: + typer.echo(f"Cloning marketplace from {clean_url} into {CLONE_DIR}...") + + try: + result = subprocess.run( + ["git", "clone", auth_url, str(CLONE_DIR)], + capture_output=True, text=True, encoding="utf-8", errors="replace", check=False, + ) + except FileNotFoundError: + typer.echo("error: `git` not found in PATH; cannot clone marketplace.", err=True) + return False + if result.returncode != 0: + if result.stderr: + typer.echo(result.stderr.rstrip(), err=True) + return False + + set_url = subprocess.run( + ["git", "-C", str(CLONE_DIR), "remote", "set-url", "origin", clean_url], + capture_output=True, text=True, encoding="utf-8", errors="replace", check=False, + ) + if set_url.returncode != 0: + typer.echo( + f"warn: could not strip PAT from origin URL: {set_url.stderr.rstrip()}", + err=True, + ) + + # Best-effort chmod — no-op on Windows NTFS via Git Bash, tightens 700/600 + # on POSIX so other users on the box can't read `.git/config`. + for path, mode in ( + (CLONE_DIR, 0o700), + (CLONE_DIR / ".git", 0o700), + (CLONE_DIR / ".git" / "config", 0o600), + ): + try: + path.chmod(mode) + except OSError: + pass + + if shutil.which("claude") is not None: + add = subprocess.run( + ["claude", "plugin", "marketplace", "add", str(CLONE_DIR)], + capture_output=True, text=True, encoding="utf-8", errors="replace", check=False, + ) + if add.returncode != 0: + typer.echo( + f"warn: `claude plugin marketplace add {CLONE_DIR}` exited {add.returncode}.", + err=True, + ) + if add.stderr: + typer.echo(add.stderr.rstrip(), err=True) + elif not quiet and add.stdout: + typer.echo(add.stdout.rstrip()) + + if not quiet: + typer.echo(f"Marketplace bootstrapped at {CLONE_DIR}.") + return True + + +def _git_fetch_and_reset(token: str, *, quiet: bool) -> bool: + """Fetch from origin then hard-reset to FETCH_HEAD. + + Not `pull --ff-only`: the marketplace bare repo on the server rebuilds + as a fresh orphan commit on every content change, so two snapshots + have unrelated histories and fast-forward is impossible. + """ + env = {**os.environ, "AGNES_TOKEN": token} + fetch_cmd = [ + "git", + "-c", f"credential.helper={_CREDENTIAL_HELPER}", + "-C", str(CLONE_DIR), + "fetch", "origin", + ] + try: + fetch = subprocess.run( + fetch_cmd, env=env, capture_output=True, text=True, + encoding="utf-8", errors="replace", check=False, + ) + except FileNotFoundError: + typer.echo("error: `git` not found in PATH; cannot refresh marketplace.", err=True) + return False + if fetch.returncode != 0: + if fetch.stdout: + typer.echo(fetch.stdout, err=True) + if fetch.stderr: + typer.echo(fetch.stderr, err=True) + return False + + reset = subprocess.run( + ["git", "-C", str(CLONE_DIR), "reset", "--hard", "FETCH_HEAD"], + capture_output=True, text=True, encoding="utf-8", errors="replace", check=False, + ) + if reset.returncode != 0: + if reset.stdout: + typer.echo(reset.stdout, err=True) + if reset.stderr: + typer.echo(reset.stderr, err=True) + return False + + if not quiet and reset.stdout: + typer.echo(reset.stdout.rstrip()) + return True + + +def _claude_marketplace_update(*, quiet: bool) -> None: + """Tell Claude Code to re-read the marketplace clone. Soft-fail if `claude` is missing.""" + if shutil.which("claude") is None: + typer.echo( + "warn: `claude` not in PATH — git fetch succeeded, but Claude Code " + "won't see the changes until the next session start.", + err=True, + ) + return + result = subprocess.run( + ["claude", "plugin", "marketplace", "update", MARKETPLACE_NAME], + capture_output=True, text=True, encoding="utf-8", errors="replace", check=False, + ) + if result.returncode != 0: + typer.echo( + f"warn: `claude plugin marketplace update {MARKETPLACE_NAME}` exited {result.returncode}.", + err=True, + ) + if result.stderr: + typer.echo(result.stderr.rstrip(), err=True) + return + if not quiet and result.stdout: + typer.echo(result.stdout.rstrip()) + + +def _reconcile_with_manifest( + *, + quiet: bool, + events: dict[str, list[str]], + installed_pre: Optional[dict[str, str]] = None, +) -> None: + """Make installed plugins match the served manifest. + + Missing → `claude plugin install @agnes --scope project`. + Version differs → `claude plugin update @agnes`. + Match → skip. + + `installed_pre` is the snapshot taken before `claude plugin marketplace + update` ran; we diff against it (not a fresh read) so version bumps + Claude silently auto-applied are still detected. Bootstrap path passes + None and we read live — there's no pre-state to preserve. + + Don't auto-uninstall plugins that disappeared from the manifest — a + transient empty manifest from the server would wipe the user's stack. + """ + if shutil.which("claude") is None: + return + + manifest = _read_marketplace_plugin_versions() + if manifest is None: + typer.echo("warn: could not read marketplace.json from the clone; skipping reconcile.", err=True) + return + if not manifest: + return + + installed = installed_pre if installed_pre is not None else _list_installed_agnes_plugins_in_cwd() + if installed is None: + typer.echo("warn: could not enumerate installed plugins; skipping reconcile.", err=True) + return + + to_install: list[str] = [] + to_update: list[str] = [] + for name, manifest_version in sorted(manifest.items()): + installed_version = installed.get(name) + if installed_version is None: + to_install.append(name) + elif installed_version != manifest_version: + to_update.append(name) + + if not to_install and not to_update: + if not quiet: + typer.echo(f"All {len(manifest)} Agnes-stack plugin(s) up to date.") + return + + if not quiet: + if to_install: + typer.echo(f"Installing {len(to_install)} new plugin(s): " + ", ".join(to_install)) + if to_update: + typer.echo(f"Updating {len(to_update)} plugin(s) to latest version: " + ", ".join(to_update)) + + for name in to_install: + target = f"{name}@{MARKETPLACE_NAME}" + result = subprocess.run( + ["claude", "plugin", "install", target, "--scope", "project"], + capture_output=True, text=True, encoding="utf-8", errors="replace", check=False, + ) + if result.returncode != 0: + typer.echo( + f"warn: `claude plugin install {target} --scope project` exited {result.returncode}.", + err=True, + ) + if result.stderr: + typer.echo(result.stderr.rstrip(), err=True) + continue + events["installed"].append(name) + if not quiet and result.stdout: + typer.echo(result.stdout.rstrip()) + + for name in to_update: + target = f"{name}@{MARKETPLACE_NAME}" + result = subprocess.run( + ["claude", "plugin", "update", target], + capture_output=True, text=True, encoding="utf-8", errors="replace", check=False, + ) + if result.returncode != 0: + typer.echo( + f"warn: `claude plugin update {target}` exited {result.returncode}.", + err=True, + ) + if result.stderr: + typer.echo(result.stderr.rstrip(), err=True) + continue + events["updated"].append(name) + if not quiet and result.stdout: + typer.echo(result.stdout.rstrip()) + + +def _emit_hook_message(events: dict[str, list[str]]) -> None: + """Emit Claude Code hook JSON summarizing what changed. + + `systemMessage` is a transient toast (often missed). `additionalContext` + is wrapped in a system reminder Claude reads at session start, so the + model can mention the change if it's relevant to the user's first ask. + Plugins require a Claude Code restart — they land on disk this session + but only load on next session start. + """ + parts: list[str] = [] + if events["installed"]: + parts.append( + f"installed {len(events['installed'])} plugin(s): " + + ", ".join(events["installed"]) + ) + if events["updated"]: + parts.append( + f"updated {len(events['updated'])} plugin(s): " + + ", ".join(events["updated"]) + ) + summary = "Your Agnes stack changed: " + "; ".join(parts) + "." + restart_hint = ( + "Run `/exit` and then `claude` again to load the changes — " + "Claude Code only picks up new/updated plugins on session start." + ) + payload = { + "systemMessage": f"{summary} {restart_hint}", + "hookSpecificOutput": { + "hookEventName": "SessionStart", + "additionalContext": f"{summary} {restart_hint}", + }, + } + typer.echo(json.dumps(payload)) + + +def _read_marketplace_plugin_versions() -> Optional[dict[str, str]]: + """Map `plugin name → version` from the local marketplace.json. + + None on missing/unreadable/malformed manifest. Empty dict means a + valid manifest with no plugins (RBAC-empty, no /store installs). + """ + manifest_path = CLONE_DIR / ".claude-plugin" / "marketplace.json" + try: + raw = manifest_path.read_text(encoding="utf-8") + except OSError: + return None + try: + payload = json.loads(raw) + except json.JSONDecodeError: + return None + plugins = payload.get("plugins") + if not isinstance(plugins, list): + return None + versions: dict[str, str] = {} + for entry in plugins: + if not isinstance(entry, dict): + continue + name = entry.get("name") + version = entry.get("version") + if isinstance(name, str) and name and isinstance(version, str) and version: + versions[name] = version + return versions + + +def _list_installed_agnes_plugins_in_cwd() -> Optional[dict[str, str]]: + """Map `plugin name → installed version` for agnes plugins in this workspace. + + Filters `claude plugin list --json` by `id` ending in `@agnes` AND + `projectPath == cwd` so plugins from sibling workspaces don't get + counted. None on any structured-answer failure. + """ + if shutil.which("claude") is None: + return None + try: + result = subprocess.run( + ["claude", "plugin", "list", "--json"], + capture_output=True, text=True, encoding="utf-8", errors="replace", check=False, + ) + except FileNotFoundError: + return None + if result.returncode != 0 or not result.stdout.strip(): + return None + try: + payload = json.loads(result.stdout) + except json.JSONDecodeError: + return None + if not isinstance(payload, list): + return None + + cwd = Path.cwd().resolve() + suffix = f"@{MARKETPLACE_NAME}" + versions: dict[str, str] = {} + for entry in payload: + if not isinstance(entry, dict): + return None + plugin_id = entry.get("id", "") + if not isinstance(plugin_id, str) or not plugin_id.endswith(suffix): + continue + project_path = entry.get("projectPath") + if not isinstance(project_path, str): + continue + try: + if Path(project_path).resolve() != cwd: + continue + except OSError: + continue + version = entry.get("version") + if not isinstance(version, str) or not version: + continue + name = plugin_id[: -len(suffix)] + if name: + versions[name] = version + return versions diff --git a/cli/lib/hooks.py b/cli/lib/hooks.py index 98aaf9a..ecbfaf7 100644 --- a/cli/lib/hooks.py +++ b/cli/lib/hooks.py @@ -7,12 +7,20 @@ without dragging in the deleted command module. Design notes: - Workspace-scoped (`/.claude/settings.json`), NOT user-home. The hooks fire only when Claude Code opens this workspace. -- Idempotent: second invocation drops a prior `agnes self-upgrade` / - `agnes pull` / `da sync` / `agnes push` entry (matched by command substring) - and appends fresh entries. +- Idempotent: second invocation drops prior `agnes self-upgrade` / + `agnes pull` / `agnes push` / `agnes refresh-marketplace` / `da sync` + entries (matched by command substring) and appends fresh entries. Third-party hooks (mixed entries, foreign commands) are left alone. - Uses `|| true` in the hook command so the hook never blocks a session on a transient sync error. +- SessionStart gets two entries: + 1. Chained `agnes self-upgrade; agnes pull` — self-upgrade runs first + so any wire-protocol bump lands before pull tries to use the new + CLI version. Both `|| true`-guarded so an upgrade failure doesn't + block the pull. + 2. `agnes refresh-marketplace` — independent entry so a fresh + workspace (no marketplace cloned yet) failing this command doesn't + suppress the data pull above. """ from __future__ import annotations @@ -25,11 +33,19 @@ from pathlib import Path # Substrings that identify "our" hook commands. Includes legacy `da sync` # so a workspace bootstrapped by an older CLI gets cleanly upgraded on the # next `agnes init` run. -_OUR_COMMAND_MARKERS = ("agnes self-upgrade", "agnes pull", "agnes push", "da sync") +_OUR_COMMAND_MARKERS = ( + "agnes self-upgrade", + "agnes pull", + "agnes push", + "agnes refresh-marketplace", + "da sync", +) def install_claude_hooks(workspace: Path) -> None: - """Install SessionStart->`agnes self-upgrade; agnes pull` and SessionEnd->`agnes push` hooks. + """Install SessionStart hooks (`agnes self-upgrade; agnes pull` chained + + `agnes refresh-marketplace` as a separate entry) and SessionEnd hook + (`agnes push`). Idempotent. Workspace-scoped (writes `/.claude/settings.json`). Preserves third-party hooks and other event types. @@ -51,21 +67,38 @@ def install_claude_hooks(workspace: Path) -> None: hooks = cfg.setdefault("hooks", {}) - def _replace_or_add(event: str, command: str) -> None: + def _replace_or_add(event: str, commands: list[str]) -> None: existing = hooks.setdefault(event, []) + # Remove ALL prior entries that look like ours (every command in + # the entry matches one of our markers). Third-party entries + # — which have commands like `echo hi from another tool` — fall + # through unchanged. for entry in list(existing): entry_cmds = [h.get("command", "") for h in entry.get("hooks", [])] if entry_cmds and all( any(marker in c for marker in _OUR_COMMAND_MARKERS) for c in entry_cmds ): existing.remove(entry) - existing.append({"hooks": [{"type": "command", "command": command}]}) + # Append fresh entries — one per command. Independent entries mean + # a failure in one (e.g. refresh-marketplace on a workspace that + # never cloned the marketplace) doesn't suppress the other. + for cmd in commands: + existing.append({"hooks": [{"type": "command", "command": cmd}]}) - _replace_or_add( - "SessionStart", + # `refresh-marketplace` is wrapped in `bash -c` because Claude Code on + # Windows runs hook commands directly (no shell), so the `2>/dev/null + # || true` redirection + short-circuit syntax never gets interpreted. + # The self-upgrade+pull chained entry pre-dates the Windows fix and + # isn't churned for parity (the same redirection fluff applies but + # changing the existing wire would force every workspace to re-write + # its settings.json on the next `agnes init` for no behaviour gain). + _replace_or_add("SessionStart", [ "agnes self-upgrade --quiet 2>/dev/null || true; " "agnes pull --quiet 2>/dev/null || true", - ) - _replace_or_add("SessionEnd", "agnes push --quiet 2>/dev/null || true") + 'bash -c "agnes refresh-marketplace --quiet 2>/dev/null || true"', + ]) + _replace_or_add("SessionEnd", [ + "agnes push --quiet 2>/dev/null || true", + ]) settings_path.write_text(json.dumps(cfg, indent=2) + "\n", encoding="utf-8") diff --git a/cli/lib/marketplace.py b/cli/lib/marketplace.py new file mode 100644 index 0000000..8f07952 --- /dev/null +++ b/cli/lib/marketplace.py @@ -0,0 +1,33 @@ +"""Shared constants for the Claude Code marketplace clone. + +`agnes init` (via setup_instructions) clones the per-user filtered +marketplace bare-repo to `~/.agnes/marketplace`, then registers that path +with Claude Code via `claude plugin marketplace add `. The marketplace +is named "agnes" inside Claude Code's registry. + +Both the clone path and the registry name are referenced from multiple +places (`agnes refresh-marketplace`, future `agnes init` automation, the +clipboard-copied setup script in `app/web/setup_instructions.py`). Having +them as constants here keeps them in sync — drift between the setup script +and the refresh command would silently break the refresh flow. + +The setup-instructions clipboard text MUST keep the literal string +`~/.agnes/marketplace` for the clone target so users can copy-paste without +needing the agnes CLI to be installed yet (chicken-and-egg). The CLI side +uses `Path.home() / ".agnes" / "marketplace"` for portability. +""" + +from __future__ import annotations + +from pathlib import Path + +# Filesystem location of the marketplace clone. Synchronized with +# `app/web/setup_instructions.py:_marketplace_block` which writes the +# literal `~/.agnes/marketplace` into the clipboard-copied setup script. +CLONE_DIR: Path = Path.home() / ".agnes" / "marketplace" + +# The marketplace name as registered in Claude Code (`claude plugin +# marketplace list` shows this). Must match +# `app.marketplace_server.packager.MARKETPLACE_NAME` server-side and the +# `_MARKETPLACE_NAME` literal in `setup_instructions.py`. +MARKETPLACE_NAME: str = "agnes" diff --git a/cli/main.py b/cli/main.py index 7b7dc6b..31b5a9c 100644 --- a/cli/main.py +++ b/cli/main.py @@ -28,6 +28,7 @@ from cli.commands.auth import auth_app from cli.commands.init import init_app from cli.commands.pull import pull_app from cli.commands.push import push_app +from cli.commands.refresh_marketplace import refresh_marketplace_app from cli.commands.query import query_command from cli.commands.status import status_app from cli.commands.admin import admin_app @@ -111,6 +112,7 @@ app.add_typer(auth_app, name="auth") app.add_typer(init_app, name="init") app.add_typer(pull_app, name="pull") app.add_typer(push_app, name="push") +app.add_typer(refresh_marketplace_app, name="refresh-marketplace") app.command("query")(query_command) app.add_typer(status_app, name="status") app.add_typer(admin_app, name="admin") diff --git a/scripts/dev/agnes-client-reset.sh b/scripts/dev/agnes-client-reset.sh index 9b03d04..483638e 100755 --- a/scripts/dev/agnes-client-reset.sh +++ b/scripts/dev/agnes-client-reset.sh @@ -48,15 +48,25 @@ step() { echo; echo "==> $*"; } if [ "$YES" -eq 0 ] && [ "$DRY" -eq 0 ]; then cat </.claude/settings.json + you ran 'agnes init' in. Those reference 'agnes pull' / + 'agnes refresh-marketplace' / 'agnes push' and stay until you either + re-init that workspace or delete the file. They're harmless when the + CLI is uninstalled (the hook command becomes a no-op via '|| true'). + Platform: $PLATFORM EOF printf "Continue? [y/N] " @@ -127,7 +137,18 @@ else fi # --------------------------------------------------------------------------- -# 3. The 'agnes' CLI itself, installed via 'uv tool install'. +# 3. The 'agnes' CLI itself, installed via 'uv tool install'. Plus the uv +# *build cache* keyed by `agnes-the-ai-analyst==`. +# +# Why drop the cache too: uv keys its build cache by name+version, and +# our wheel ships at a stable version string (e.g. `0.38.3`) across many +# server-side commits. Two distinct builds with the same version number +# (a stale cached one + a fresh one served from the dashboard wheel +# endpoint) are indistinguishable to the resolver — `uv tool install +# --force ` happily reuses the cached build instead of +# fetching the new wheel. That's invisible to the operator until they +# run a freshly-deployed CLI command and find it missing. Reset means +# "fresh state", so the cache has to go too. # --------------------------------------------------------------------------- step "Uninstall 'agnes' CLI" if command -v uv >/dev/null 2>&1; then @@ -136,6 +157,11 @@ if command -v uv >/dev/null 2>&1; then else echo " (agnes-the-ai-analyst not in 'uv tool list' — skipping)" fi + # Always-safe: `uv cache clean ` exits 0 with a "no entries" line + # when the package isn't cached, so it's a no-op when there's nothing + # to drop. We do this even if uv tool list didn't show the package + # (the cache and the active install track separately). + run "uv cache clean agnes-the-ai-analyst 2>/dev/null || true" else echo " (uv not found — skipping)" # Defensive cleanup if uv is gone but the binary lingers. @@ -148,10 +174,22 @@ fi step "Remove Agnes filesystem state" # Honor the same AGNES_CONFIG_DIR override the CLI reads. AGNES_CONFIG_DIR_RESOLVED="${AGNES_CONFIG_DIR:-$HOME/.config/agnes}" +# `~/.claude/plugins/cache/agnes/` and `~/.claude/plugins/marketplaces/agnes` +# are normally cleaned by `claude plugin marketplace remove agnes` (step 2), +# but we wipe them defensively because: +# - `claude` may not be on PATH (e.g. uninstalled in a previous step, +# fresh machine, etc.) — step 2 silently skips, leaving stale dirs. +# - Claude Code's cleanup of `cache/` is lazy in some versions; partial +# dirs from interrupted installs survive `marketplace remove`. +# `rm -rf` handles both file-shaped and dir-shaped registrations +# (the registration entry is a single JSON file when the marketplace was +# added via HTTPS, a full git working tree when added via local path). for path in \ "$AGNES_CONFIG_DIR_RESOLVED" \ "$HOME/.agnes" \ "$HOME/.claude/skills/agnes" \ + "$HOME/.claude/plugins/marketplaces/agnes" \ + "$HOME/.claude/plugins/cache/agnes" \ ; do if [ -e "$path" ]; then run "rm -rf \"$path\"" @@ -215,6 +253,15 @@ from /install on the Agnes server to validate a fresh-machine install. Sanity checks for "fresh state": command -v agnes # should be absent ls ~/.config/agnes ~/.agnes # both should not exist + ls ~/.claude/plugins/marketplaces/agnes ~/.claude/plugins/cache/agnes # both gone env | grep -E 'AGNES|SSL_CERT_FILE|NODE_EXTRA_CA_CERTS' # empty claude plugin marketplace list # no 'agnes' entry + +If you used 'agnes init' in workspaces other than the one you're in now, +those workspaces still have: + /.claude/settings.json # SessionStart/End hooks + /CLAUDE.md # RBAC-filtered docs from agnes init + /AGNES_WORKSPACE.md # human-facing workspace docs +Delete those by hand if you want a fully clean slate per workspace. The +hook commands no-op safely while the CLI is uninstalled (`|| true`). EOF diff --git a/tests/test_cli_refresh_marketplace.py b/tests/test_cli_refresh_marketplace.py new file mode 100644 index 0000000..08009e5 --- /dev/null +++ b/tests/test_cli_refresh_marketplace.py @@ -0,0 +1,820 @@ +"""Tests for `agnes refresh-marketplace` Typer wrapper.""" + +from __future__ import annotations + +import json +import re +import subprocess +from pathlib import Path +from typing import Optional + +import pytest +from typer.testing import CliRunner + +from cli.commands import refresh_marketplace as rm_module +from cli.commands.refresh_marketplace import refresh_marketplace_app + +# CI-safety: Typer/rich emits ANSI escapes in --help output. Strip before asserts. +_ANSI_RE = re.compile(r"\x1b\[[0-9;]*m") + + +def _clean(s: str) -> str: + return _ANSI_RE.sub("", s) + + +runner = CliRunner() + + +# --- Test fixtures and helpers -------------------------------------------------- + + +class _RecordedCall: + """Captures a single subprocess.run invocation for assertion.""" + + def __init__(self, cmd: list[str], env: Optional[dict] = None) -> None: + self.cmd = cmd + self.env = env or {} + + +class _SubprocessRecorder: + """Replaces subprocess.run with a recording stub. Each scripted result + is matched by command-prefix against incoming calls.""" + + def __init__(self) -> None: + self.calls: list[_RecordedCall] = [] + self.scripts: list[tuple[tuple[str, ...], subprocess.CompletedProcess]] = [] + + def script(self, prefix: tuple[str, ...], returncode: int = 0, + stdout: str = "", stderr: str = "") -> None: + """Register a scripted response. Calls whose cmd starts with + ``prefix`` get this CompletedProcess. Most-specific (longest) + prefixes match first, so a ``claude plugin list --json`` script + wins over a generic ``claude`` fallback.""" + self.scripts.append( + (prefix, subprocess.CompletedProcess(args=list(prefix), returncode=returncode, + stdout=stdout, stderr=stderr)) + ) + + def run(self, cmd, *args, env=None, capture_output=False, text=False, check=False, **kwargs): + self.calls.append(_RecordedCall(cmd=list(cmd), env=dict(env) if env else {})) + # Match longest prefix first so more specific scripts beat generic ones. + sorted_scripts = sorted(self.scripts, key=lambda s: -len(s[0])) + for prefix, scripted in sorted_scripts: + if tuple(cmd[:len(prefix)]) == prefix: + return scripted + return subprocess.CompletedProcess(args=list(cmd), returncode=0, stdout="", stderr="") + + +@pytest.fixture +def recorder(monkeypatch) -> _SubprocessRecorder: + rec = _SubprocessRecorder() + monkeypatch.setattr(rm_module.subprocess, "run", rec.run) + return rec + + +@pytest.fixture +def with_clone(tmp_path, monkeypatch) -> Path: + """Materialize a fake `~/.agnes/marketplace/` with `.git/` and an empty + marketplace.json so the reconcile step has something to parse.""" + clone = tmp_path / "marketplace" + (clone / ".git").mkdir(parents=True) + (clone / ".claude-plugin").mkdir(parents=True) + (clone / ".claude-plugin" / "marketplace.json").write_text( + json.dumps({"name": "agnes", "plugins": []}), + encoding="utf-8", + ) + monkeypatch.setattr(rm_module, "CLONE_DIR", clone) + return clone + + +@pytest.fixture +def with_token(tmp_path, monkeypatch) -> str: + cfg_dir = tmp_path / "_cfg" + cfg_dir.mkdir(parents=True) + (cfg_dir / "token.json").write_text( + json.dumps({"access_token": "test-pat-1234", "email": "dev@localhost"}), + encoding="utf-8", + ) + monkeypatch.setenv("AGNES_CONFIG_DIR", str(cfg_dir)) + return "test-pat-1234" + + +@pytest.fixture +def claude_in_path(monkeypatch): + monkeypatch.setattr(rm_module.shutil, "which", lambda name: "/fake/claude" if name == "claude" else None) + + +@pytest.fixture +def claude_not_in_path(monkeypatch): + monkeypatch.setattr(rm_module.shutil, "which", lambda name: None) + + +def _set_marketplace_manifest(clone: Path, plugins: list[dict]) -> None: + """Rewrite the local marketplace.json with the given plugin list. + Each entry must have at least ``name`` and ``version`` (the reconcile + flow ignores entries without a version since it can't compare).""" + manifest = {"name": "agnes", "plugins": plugins} + (clone / ".claude-plugin" / "marketplace.json").write_text( + json.dumps(manifest), encoding="utf-8", + ) + + +def _plugin_list_json(entries: list[dict]) -> str: + return json.dumps(entries) + + +# --- Tests ---------------------------------------------------------------------- + + +def test_refresh_marketplace_help(): + result = runner.invoke(refresh_marketplace_app, ["--help"]) + assert result.exit_code == 0 + cleaned = _clean(result.output) + assert "--quiet" in cleaned + # --auto-upgrade is gone — version-aware reconcile is now the default. + assert "--auto-upgrade" not in cleaned + + +def test_refresh_marketplace_no_clone_is_silent_noop_with_quiet(tmp_path, monkeypatch, recorder): + monkeypatch.setattr(rm_module, "CLONE_DIR", tmp_path / "nonexistent") + result = runner.invoke(refresh_marketplace_app, ["--quiet"]) + assert result.exit_code == 0 + assert _clean(result.output) == "" + assert recorder.calls == [] + + +def test_refresh_marketplace_no_clone_explains_in_manual_mode(tmp_path, monkeypatch, recorder): + monkeypatch.setattr(rm_module, "CLONE_DIR", tmp_path / "nonexistent") + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + assert "No marketplace clone" in _clean(result.output) + assert recorder.calls == [] + + +def test_no_clone_short_circuits_before_token_check(tmp_path, monkeypatch, recorder): + """The no-clone no-op path must NOT require a token. + + The SessionStart hook (`agnes refresh-marketplace --quiet`) runs in + every workspace that has the hook installed, including ones where no + agnes token is configured (e.g. a fresh CI checkout, a workspace + that never went through `agnes init`, a project sharing the user's + SessionStart settings.json without sharing their agnes config dir). + Forcing token resolution before the no-op short-circuit would surface + spurious auth_failed errors on those legitimate no-marketplace setups. + + Regression: an earlier rev moved the token check above the clone- + exists check (needed it for --bootstrap), which broke CI on the + silent-noop tests that don't seed a token. + """ + # No token on disk, no AGNES_TOKEN env var, no clone. + cfg_dir = tmp_path / "_cfg_empty" + cfg_dir.mkdir() + monkeypatch.setenv("AGNES_CONFIG_DIR", str(cfg_dir)) + monkeypatch.delenv("AGNES_TOKEN", raising=False) + monkeypatch.setattr(rm_module, "CLONE_DIR", tmp_path / "nonexistent") + + # --quiet (hook context). + result = runner.invoke(refresh_marketplace_app, ["--quiet"]) + assert result.exit_code == 0, ( + f"hook context should silent-noop without a token; got exit " + f"{result.exit_code} and output {result.output!r}" + ) + assert _clean(result.output) == "" + assert recorder.calls == [] + + # Manual mode (no --quiet): hint, but still exit 0 + no token resolution. + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + assert "No marketplace clone" in _clean(result.output) + assert recorder.calls == [] + + +def test_refresh_marketplace_no_token_friendly_exit(with_clone, tmp_path, monkeypatch, recorder): + cfg_dir = tmp_path / "_cfg_empty" + cfg_dir.mkdir() + monkeypatch.setenv("AGNES_CONFIG_DIR", str(cfg_dir)) + monkeypatch.delenv("AGNES_TOKEN", raising=False) + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 1 + assert "Traceback" not in (_clean(result.output) + _clean(result.stderr or "")) + assert recorder.calls == [] + + +def test_refresh_marketplace_uses_fetch_plus_reset_not_pull( + with_clone, with_token, claude_in_path, recorder, +): + """Server-side bare repos rebuild as orphan commits, so `git pull --ff-only` + cannot reconcile. Refresh must `git fetch + reset --hard FETCH_HEAD`.""" + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + git_calls = [c for c in recorder.calls if c.cmd and c.cmd[0] == "git"] + assert len(git_calls) >= 2 + + fetch = git_calls[0] + assert "-c" in fetch.cmd + assert fetch.cmd[fetch.cmd.index("-c") + 1].startswith("credential.helper=") + assert "fetch" in fetch.cmd and "origin" in fetch.cmd + for arg in fetch.cmd: + assert with_token not in arg + assert fetch.env.get("AGNES_TOKEN") == with_token + + reset = git_calls[1] + assert "reset" in reset.cmd and "--hard" in reset.cmd and "FETCH_HEAD" in reset.cmd + + assert not any("pull" in c.cmd for c in git_calls) + + +def test_refresh_marketplace_calls_claude_marketplace_update_after_fetch( + with_clone, with_token, claude_in_path, recorder, +): + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + update_calls = [c for c in recorder.calls + if c.cmd[:4] == ["claude", "plugin", "marketplace", "update"]] + assert update_calls + assert update_calls[0].cmd[4] == rm_module.MARKETPLACE_NAME + + +def test_refresh_marketplace_skips_claude_when_not_in_path( + with_clone, with_token, claude_not_in_path, recorder, +): + """Claude not on PATH → git fetch+reset still runs, claude steps skipped + with stderr warning, exit 0.""" + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + assert any(c.cmd[:1] == ["git"] for c in recorder.calls) + assert not any(c.cmd[:1] == ["claude"] for c in recorder.calls) + assert "claude" in _clean(result.output).lower() + + +def test_refresh_marketplace_git_fetch_failure_exits_nonzero( + with_clone, with_token, claude_in_path, recorder, +): + recorder.script(("git", "-c"), returncode=1, stderr="fatal: unable to access ...") + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 1 + assert not any(c.cmd[:1] == ["claude"] for c in recorder.calls) + + +# --- Version-aware reconciliation ----------------------------------------------- + + +def test_reconcile_installs_missing_plugins( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """Plugin in manifest but not installed in this workspace → install.""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [ + {"name": "grpn-eng", "version": "1.0.0"}, + {"name": "grpn-fin", "version": "0.5.0"}, # new + ]) + recorder.script( + ("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([ + {"id": "grpn-eng@agnes", "version": "1.0.0", "projectPath": str(workspace)}, + ]), + ) + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + + install_targets = sorted( + c.cmd[3] for c in recorder.calls + if c.cmd[:3] == ["claude", "plugin", "install"] + ) + assert install_targets == [f"grpn-fin@{rm_module.MARKETPLACE_NAME}"] + # No update calls (version of grpn-eng matches). + update_calls = [c for c in recorder.calls if c.cmd[:3] == ["claude", "plugin", "update"]] + assert update_calls == [] + + +def test_reconcile_updates_when_manifest_version_differs( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """Plugin already installed but at older version than the manifest → + update. Critical for the /store skill+agent bundle whose version is + a content hash that bumps on every skill add/remove without changing + the plugin set.""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [ + {"name": "grpn-eng", "version": "1.1.0"}, # admin pushed new version + {"name": "agnes-store-bundle", "version": "deadbeefcafef00d"}, # bundle bumped + ]) + recorder.script( + ("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([ + {"id": "grpn-eng@agnes", "version": "1.0.0", "projectPath": str(workspace)}, + {"id": "agnes-store-bundle@agnes", "version": "0123456789abcdef", + "projectPath": str(workspace)}, + ]), + ) + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + + update_targets = sorted( + c.cmd[3] for c in recorder.calls + if c.cmd[:3] == ["claude", "plugin", "update"] + ) + assert update_targets == [ + f"agnes-store-bundle@{rm_module.MARKETPLACE_NAME}", + f"grpn-eng@{rm_module.MARKETPLACE_NAME}", + ] + # No installs (both already present). + assert not any(c.cmd[:3] == ["claude", "plugin", "install"] for c in recorder.calls) + + +def test_reconcile_noop_when_versions_match( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """Versions all match → no install/update calls (just fetch + claude + marketplace update).""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [ + {"name": "grpn-eng", "version": "1.0.0"}, + ]) + recorder.script( + ("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([ + {"id": "grpn-eng@agnes", "version": "1.0.0", "projectPath": str(workspace)}, + ]), + ) + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + assert not any(c.cmd[:3] == ["claude", "plugin", "install"] for c in recorder.calls) + assert not any(c.cmd[:3] == ["claude", "plugin", "update"] for c in recorder.calls) + + +def test_reconcile_filters_by_project_path( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """A plugin installed in a SIBLING workspace doesn't count as installed + here — must trigger install in this workspace.""" + workspace = tmp_path / "ws" + workspace.mkdir() + sibling = tmp_path / "sibling" + sibling.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [ + {"name": "grpn-eng", "version": "1.0.0"}, + ]) + recorder.script( + ("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([ + {"id": "grpn-eng@agnes", "version": "1.0.0", "projectPath": str(sibling)}, + ]), + ) + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + install_targets = sorted( + c.cmd[3] for c in recorder.calls + if c.cmd[:3] == ["claude", "plugin", "install"] + ) + assert install_targets == [f"grpn-eng@{rm_module.MARKETPLACE_NAME}"] + + +def test_reconcile_skips_third_party_marketplace( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """Plugins from non-agnes marketplaces must be ignored entirely + (not counted as installed, not considered for install/update).""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [ + {"name": "grpn-eng", "version": "1.0.0"}, + ]) + recorder.script( + ("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([ + {"id": "third-party-thing@some-other", "version": "1.0.0", + "projectPath": str(workspace)}, + ]), + ) + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + # grpn-eng must be installed (not seen as already-present). + install_targets = sorted( + c.cmd[3] for c in recorder.calls + if c.cmd[:3] == ["claude", "plugin", "install"] + ) + assert install_targets == [f"grpn-eng@{rm_module.MARKETPLACE_NAME}"] + # third-party plugin must NOT be touched in any way. + assert not any( + c.cmd[:3] == ["claude", "plugin", "update"] + and c.cmd[3].startswith("third-party-thing") + for c in recorder.calls + ) + + +def test_reconcile_handles_empty_marketplace( + with_clone, with_token, claude_in_path, recorder, +): + """Empty manifest plugins array → no install/update calls, no warning.""" + # with_clone fixture seeds an empty manifest by default. + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + assert not any(c.cmd[:3] == ["claude", "plugin", "install"] for c in recorder.calls) + assert not any(c.cmd[:3] == ["claude", "plugin", "update"] for c in recorder.calls) + + +def test_reconcile_warns_when_plugin_list_unparseable( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """If `claude plugin list --json` returns garbage, warn and skip + reconcile rather than fail. The fetch+reset already happened, so + Claude Code will pick up the changes naturally on next session.""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [{"name": "grpn-eng", "version": "1.0.0"}]) + recorder.script(("claude", "plugin", "list", "--json"), + returncode=0, stdout="not json at all") + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + assert not any(c.cmd[:3] == ["claude", "plugin", "install"] for c in recorder.calls) + assert not any(c.cmd[:3] == ["claude", "plugin", "update"] for c in recorder.calls) + + +# --- Hook JSON output ----------------------------------------------------------- + + +def test_quiet_emits_hook_json_when_plugin_installed( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """--quiet + new install → hook JSON on stdout with systemMessage + + additionalContext, both naming the plugin and the restart instruction.""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [{"name": "grpn-fin", "version": "0.5.0"}]) + recorder.script(("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([])) + + result = runner.invoke(refresh_marketplace_app, ["--quiet"]) + assert result.exit_code == 0 + + out = _clean(result.output).strip() + assert out, "expected hook JSON on stdout when a plugin was installed" + payload = json.loads(out) + assert "grpn-fin" in payload["systemMessage"] + assert "Agnes stack" in payload["systemMessage"] + assert "installed" in payload["systemMessage"] + # Restart hint: plugins land on disk this session but only load on + # next session start, so the user must /exit + restart. + assert "/exit" in payload["systemMessage"] + assert "session start" in payload["systemMessage"].lower() + + hook_specific = payload.get("hookSpecificOutput", {}) + assert hook_specific.get("hookEventName") == "SessionStart" + additional = hook_specific.get("additionalContext", "") + assert "grpn-fin" in additional + assert "/exit" in additional + assert "session start" in additional.lower() + + +def test_manual_mode_prints_restart_hint_when_anything_changed( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """When `agnes refresh-marketplace` runs without --quiet AND something + actually got installed/updated, the operator needs to know they should + /exit + restart Claude Code for the change to take effect (Claude only + scans plugins at session start). Print the hint at end of run.""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [{"name": "grpn-fin", "version": "0.5.0"}]) + recorder.script(("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([])) + + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + out = _clean(result.output) + assert "Restart Claude Code" in out or "restart" in out.lower() + assert "/exit" in out + + +def test_manual_mode_no_change_does_not_print_restart_hint( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """Manual `agnes refresh-marketplace` over an already-up-to-date stack + must NOT spam the restart hint — there's nothing to restart for.""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [{"name": "grpn-eng", "version": "1.0.0"}]) + recorder.script( + ("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([ + {"id": "grpn-eng@agnes", "version": "1.0.0", "projectPath": str(workspace)}, + ]), + ) + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + out = _clean(result.output) + # The restart hint sentence specifically — not the substring "restart" + # which might appear elsewhere benignly. + assert "Restart Claude Code" not in out + + +def test_quiet_emits_hook_json_when_bundle_silently_auto_updated_by_claude( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """Regression: when a /store skill change bumps the agnes-store-bundle + content hash, `claude plugin marketplace update agnes` silently + auto-applies the new version on local-path marketplaces (Claude + re-reads the manifest off disk and updates the installed cache). + + If we captured the installed snapshot AFTER `claude plugin marketplace + update`, the diff against the new manifest would be zero (Claude + already updated installed → matches manifest), `events["updated"]` + would stay empty, and the hook JSON wouldn't fire — leaving the user + with no notification despite the plugin actually changing. + + Pin this by scripting `claude plugin list --json` to return DIFFERENT + versions before vs after the marketplace-update call. The first + invocation (pre-snapshot) returns the old version; subsequent calls + return the new version (Claude's auto-update). Reconcile must use + the FIRST call's snapshot, detect the diff, and fire the + notification.""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [ + {"name": "agnes-store-bundle", "version": "newhash"}, + ]) + + # Track how many times `claude plugin list --json` was called so we + # can return DIFFERENT data on each invocation. The recorder's + # script() helper only does prefix-match with one fixed response + # per prefix, so we wrap its run() instead. + list_call_count = {"n": 0} + real_run = recorder.run + + def staged_run(cmd, *args, **kwargs): + if cmd[:4] == ["claude", "plugin", "list", "--json"]: + # Record the call ourselves — we're bypassing recorder.run + # below, so we have to keep `recorder.calls` in sync. + recorder.calls.append( + _RecordedCall(cmd=list(cmd), env=dict(kwargs.get("env") or {})) + ) + list_call_count["n"] += 1 + payload = ( + # Pre-snapshot (call 1): old version still observable. + _plugin_list_json([ + {"id": "agnes-store-bundle@agnes", + "version": "oldhash", + "projectPath": str(workspace)}, + ]) + if list_call_count["n"] == 1 + else + # Post-marketplace-update (call 2+): Claude auto-applied + # the new version, version now matches manifest. + _plugin_list_json([ + {"id": "agnes-store-bundle@agnes", + "version": "newhash", + "projectPath": str(workspace)}, + ]) + ) + return subprocess.CompletedProcess(args=list(cmd), returncode=0, + stdout=payload, stderr="") + return real_run(cmd, *args, **kwargs) + + monkeypatch.setattr(rm_module.subprocess, "run", staged_run) + + result = runner.invoke(refresh_marketplace_app, ["--quiet"]) + assert result.exit_code == 0 + + # Hook JSON must fire — even though by the time reconcile sees `claude + # plugin list --json` the second time, versions match. + out = _clean(result.output).strip() + assert out, "hook JSON missing — pre-snapshot ordering regression" + payload = json.loads(out) + assert "agnes-store-bundle" in payload["systemMessage"] + assert "updated" in payload["systemMessage"] + + # Sanity: pre-snapshot was captured before `claude plugin marketplace update`. + # We expect at least 2 list calls (pre-snapshot + reconcile re-read) but + # the FIRST one must have come before the marketplace update call. + list_indices = [ + i for i, c in enumerate(recorder.calls) + if c.cmd[:4] == ["claude", "plugin", "list", "--json"] + ] + market_update_indices = [ + i for i, c in enumerate(recorder.calls) + if c.cmd[:4] == ["claude", "plugin", "marketplace", "update"] + ] + assert list_indices, "no claude plugin list calls recorded" + assert market_update_indices, "no claude plugin marketplace update call recorded" + assert list_indices[0] < market_update_indices[0], ( + f"pre-snapshot must come before marketplace update; " + f"list at {list_indices[0]}, marketplace update at {market_update_indices[0]}" + ) + + +def test_quiet_emits_hook_json_when_plugin_updated( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """--quiet + version-mismatch update (e.g. /store skill add bumping + the bundle) → hook JSON with `updated` count in systemMessage.""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [ + {"name": "agnes-store-bundle", "version": "newhash"}, + ]) + recorder.script( + ("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([ + {"id": "agnes-store-bundle@agnes", "version": "oldhash", + "projectPath": str(workspace)}, + ]), + ) + result = runner.invoke(refresh_marketplace_app, ["--quiet"]) + assert result.exit_code == 0 + out = _clean(result.output).strip() + assert out + payload = json.loads(out) + assert "updated" in payload["systemMessage"] + assert "agnes-store-bundle" in payload["systemMessage"] + + +def test_quiet_emits_no_hook_json_when_nothing_changed( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """--quiet + everything in sync → silent stdout (no spurious + notification on every session start).""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [{"name": "grpn-eng", "version": "1.0.0"}]) + recorder.script( + ("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([ + {"id": "grpn-eng@agnes", "version": "1.0.0", "projectPath": str(workspace)}, + ]), + ) + result = runner.invoke(refresh_marketplace_app, ["--quiet"]) + assert result.exit_code == 0 + assert _clean(result.output).strip() == "" + + +def test_manual_mode_does_not_emit_hook_json( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """Without --quiet, output is human-readable text — no JSON envelope.""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + _set_marketplace_manifest(with_clone, [{"name": "grpn-fin", "version": "0.5.0"}]) + recorder.script(("claude", "plugin", "list", "--json"), + stdout=_plugin_list_json([])) + + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + out = _clean(result.output) + assert "grpn-fin" in out + assert not out.strip().startswith("{"), \ + f"manual mode should not emit JSON envelope; got: {out.strip()[:200]!r}" + + +# --- --bootstrap flag (initial install path) ------------------------------------ + + +def test_bootstrap_flag_appears_in_help(): + result = runner.invoke(refresh_marketplace_app, ["--help"]) + assert result.exit_code == 0 + assert "--bootstrap" in _clean(result.output) + + +def test_no_bootstrap_no_clone_is_noop_default( + tmp_path, monkeypatch, with_token, recorder, +): + """Without --bootstrap, missing clone → silent no-op (manual mode hint). + No git/claude calls happen.""" + monkeypatch.setattr(rm_module, "CLONE_DIR", tmp_path / "nonexistent") + result = runner.invoke(refresh_marketplace_app, []) + assert result.exit_code == 0 + assert "No marketplace clone" in _clean(result.output) + # No subprocess calls — we exited before fetch+reset. + assert recorder.calls == [] + + +def test_bootstrap_with_no_existing_clone_clones_and_registers( + tmp_path, monkeypatch, with_token, claude_in_path, recorder, +): + """--bootstrap on a fresh machine (no clone yet) must: + 1. git clone https://x:@host/marketplace.git/ to CLONE_DIR + 2. git remote set-url origin + 3. claude plugin marketplace add + 4. then proceed to the normal fetch+reset+reconcile flow + + PAT must be in the clone URL (HTTP Basic in user-info, the only + auth path raw `git clone` understands), but stripped from the + origin URL after the clone so it doesn't sit at rest in + .git/config.""" + # `with_token` fixture already wrote token.json + set AGNES_CONFIG_DIR; + # just append the server URL config so bootstrap can read it. + cfg_dir = tmp_path / "_cfg" + (cfg_dir / "config.yaml").write_text( + "server: https://agnes.example.com\n", encoding="utf-8", + ) + + clone_target = tmp_path / "fresh_marketplace" + monkeypatch.setattr(rm_module, "CLONE_DIR", clone_target) + + # Create the .git/ dir as a side effect of the scripted clone so the + # subsequent fetch+reset path sees a "cloned" state. + real_run = recorder.run + + def fake_run(cmd, *args, **kwargs): + if cmd[:2] == ["git", "clone"]: + (clone_target / ".git").mkdir(parents=True, exist_ok=True) + (clone_target / ".claude-plugin").mkdir(parents=True, exist_ok=True) + (clone_target / ".claude-plugin" / "marketplace.json").write_text( + json.dumps({"name": "agnes", "plugins": []}), + encoding="utf-8", + ) + return real_run(cmd, *args, **kwargs) + + monkeypatch.setattr(rm_module.subprocess, "run", fake_run) + + result = runner.invoke(refresh_marketplace_app, ["--bootstrap"]) + assert result.exit_code == 0, result.output + + # 1. git clone with embedded PAT. + clone_calls = [c for c in recorder.calls if c.cmd[:2] == ["git", "clone"]] + assert len(clone_calls) == 1 + clone = clone_calls[0] + assert any( + with_token in arg and "agnes.example.com/marketplace.git/" in arg + for arg in clone.cmd + ), f"PAT-bearing clone URL must be in argv, got: {clone.cmd}" + assert str(clone_target) in clone.cmd + + # 2. remote set-url (PAT-stripped URL). + set_url_calls = [ + c for c in recorder.calls + if c.cmd[:5] == ["git", "-C", str(clone_target), "remote", "set-url"] + ] + assert len(set_url_calls) == 1 + new_url = set_url_calls[0].cmd[6] + assert "agnes.example.com/marketplace.git/" in new_url + assert with_token not in new_url + assert "x:" not in new_url + + # 3. claude plugin marketplace add . + add_calls = [ + c for c in recorder.calls + if c.cmd[:4] == ["claude", "plugin", "marketplace", "add"] + ] + assert len(add_calls) == 1 + assert add_calls[0].cmd[4] == str(clone_target) + + +def test_bootstrap_clone_failure_exits_nonzero( + tmp_path, monkeypatch, with_token, claude_in_path, recorder, +): + """If `git clone` fails during bootstrap, exit non-zero and don't + proceed to fetch+reset.""" + # `with_token` fixture already created _cfg + token.json; just add + # the server URL config so the bootstrap path can read it. + cfg_dir = tmp_path / "_cfg" + (cfg_dir / "config.yaml").write_text( + "server: https://agnes.example.com\n", encoding="utf-8", + ) + + monkeypatch.setattr(rm_module, "CLONE_DIR", tmp_path / "fresh_marketplace") + recorder.script(("git", "clone"), returncode=1, stderr="fatal: TLS error") + + result = runner.invoke(refresh_marketplace_app, ["--bootstrap"]) + assert result.exit_code == 1 + # The fetch+reset step should NOT have run (we exit on bootstrap failure). + fetch_calls = [c for c in recorder.calls if "fetch" in c.cmd and "origin" in c.cmd] + assert fetch_calls == [] + + +def test_bootstrap_with_existing_clone_skips_clone_proceeds_to_refresh( + with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, +): + """--bootstrap on a machine that already has a clone must NOT re-clone + (idempotent). It just falls through to the normal fetch+reset path.""" + workspace = tmp_path / "ws" + workspace.mkdir() + monkeypatch.chdir(workspace) + + result = runner.invoke(refresh_marketplace_app, ["--bootstrap"]) + assert result.exit_code == 0 + + # No git clone (clone already existed). + clone_calls = [c for c in recorder.calls if c.cmd[:2] == ["git", "clone"]] + assert clone_calls == [] + # But fetch+reset DID happen. + fetch_calls = [c for c in recorder.calls if "fetch" in c.cmd and "origin" in c.cmd] + assert fetch_calls + reset_calls = [c for c in recorder.calls if "reset" in c.cmd and "--hard" in c.cmd] + assert reset_calls diff --git a/tests/test_lib_hooks.py b/tests/test_lib_hooks.py index 2db1e6f..7ee8d36 100644 --- a/tests/test_lib_hooks.py +++ b/tests/test_lib_hooks.py @@ -11,25 +11,63 @@ def _read_settings(workspace: Path) -> dict: return json.loads((workspace / ".claude" / "settings.json").read_text()) +def _commands_for(cfg: dict, event: str) -> list[str]: + """Flatten the per-event command list — each entry has a list of hooks, + each hook has a `command` field. We treat each entry as one command for + assertion purposes (matches the install_claude_hooks contract: one + entry per command).""" + return [ + entry["hooks"][0]["command"] + for entry in cfg["hooks"].get(event, []) + if entry.get("hooks") + ] + + def test_install_creates_settings_file(tmp_path): install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) - cmd = cfg["hooks"]["SessionStart"][0]["hooks"][0]["command"] - assert "agnes self-upgrade --quiet" in cmd - assert "agnes pull --quiet" in cmd - assert "agnes push --quiet" in cfg["hooks"]["SessionEnd"][0]["hooks"][0]["command"] + starts = _commands_for(cfg, "SessionStart") + # SessionStart has two entries: (1) chained self-upgrade ; pull — + # self-upgrade runs first so a wire-protocol bump lands before pull + # tries to use the new CLI; (2) refresh-marketplace as a separate + # entry so a failure (e.g. fresh workspace with no clone) doesn't + # suppress the data pull above. + assert len(starts) == 2 + chain = next( + (c for c in starts if "agnes self-upgrade" in c and "agnes pull" in c), + None, + ) + assert chain is not None, ( + "Expected one SessionStart entry chaining self-upgrade and pull" + ) + assert "agnes self-upgrade --quiet" in chain + assert "agnes pull --quiet" in chain + # The refresh-marketplace command is wrapped in `bash -c "..."` so the + # `2>/dev/null || true` shell syntax is interpreted on Windows, where + # Claude Code runs hook commands directly without invoking a shell. + refresh = next((c for c in starts if "agnes refresh-marketplace" in c), None) + assert refresh is not None + assert refresh.startswith("bash -c "), ( + f"refresh-marketplace hook must be wrapped in bash -c for Windows; got: {refresh!r}" + ) + ends = _commands_for(cfg, "SessionEnd") + assert len(ends) == 1 + assert "agnes push --quiet" in ends[0] def test_install_idempotent(tmp_path): install_claude_hooks(tmp_path) install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) - assert len(cfg["hooks"]["SessionStart"]) == 1 + # Two SessionStart entries (pull + refresh-marketplace), one SessionEnd + # entry (push). Re-install must NOT duplicate them. + assert len(cfg["hooks"]["SessionStart"]) == 2 assert len(cfg["hooks"]["SessionEnd"]) == 1 def test_install_replaces_old_da_sync_entries(tmp_path): - """Hook from a pre-rewrite workspace gets replaced cleanly.""" + """Hook from a pre-rewrite workspace gets replaced cleanly — legacy + `da sync` entries are removed, both new agnes hooks land in their place.""" settings_path = tmp_path / ".claude" / "settings.json" settings_path.parent.mkdir(parents=True) settings_path.write_text(json.dumps({ @@ -40,9 +78,34 @@ def test_install_replaces_old_da_sync_entries(tmp_path): })) install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) - assert len(cfg["hooks"]["SessionStart"]) == 1 - assert "agnes pull" in cfg["hooks"]["SessionStart"][0]["hooks"][0]["command"] - assert "da sync" not in cfg["hooks"]["SessionStart"][0]["hooks"][0]["command"] + starts = _commands_for(cfg, "SessionStart") + assert len(starts) == 2 + assert any("agnes pull" in c for c in starts) + assert any("agnes refresh-marketplace" in c for c in starts) + # Legacy command must be gone from BOTH starts. + assert not any("da sync" in c for c in starts) + + +def test_install_replaces_prior_single_pull_entry(tmp_path): + """Workspaces bootstrapped by a CLI version that only installed a + single SessionStart entry (`agnes pull`, no refresh-marketplace) must + upgrade to the two-entry layout on the next install — not end up with + three entries (one old + two new).""" + settings_path = tmp_path / ".claude" / "settings.json" + settings_path.parent.mkdir(parents=True) + settings_path.write_text(json.dumps({ + "hooks": { + "SessionStart": [ + {"hooks": [{"type": "command", "command": "agnes pull --quiet 2>/dev/null || true"}]}, + ], + } + })) + install_claude_hooks(tmp_path) + cfg = _read_settings(tmp_path) + starts = _commands_for(cfg, "SessionStart") + assert len(starts) == 2 + assert any("agnes pull" in c for c in starts) + assert any("agnes refresh-marketplace" in c for c in starts) def test_install_preserves_third_party_hooks(tmp_path): @@ -56,9 +119,13 @@ def test_install_preserves_third_party_hooks(tmp_path): })) install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) - starts = cfg["hooks"]["SessionStart"] - assert any("echo hi from another tool" in s["hooks"][0]["command"] for s in starts) - assert any("agnes pull" in s["hooks"][0]["command"] for s in starts) + starts = _commands_for(cfg, "SessionStart") + # Third-party entry stays + both agnes entries get added. + assert len(starts) == 3 + assert any("echo hi from another tool" in c for c in starts) + assert any("agnes pull" in c for c in starts) + assert any("agnes refresh-marketplace" in c for c in starts) + # Other event types untouched. assert cfg["hooks"]["PreToolUse"][0]["hooks"][0]["command"] == "echo pre" @@ -79,20 +146,29 @@ def test_install_handles_invalid_json(tmp_path, capsys): def test_install_chains_self_upgrade_then_pull_in_one_entry(tmp_path): install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) - session_start = cfg["hooks"]["SessionStart"] - assert len(session_start) == 1, session_start - cmd = session_start[0]["hooks"][0]["command"] - assert "agnes self-upgrade --quiet" in cmd - assert "agnes pull --quiet" in cmd + starts = _commands_for(cfg, "SessionStart") + # SessionStart has two entries: the chain (self-upgrade + pull) and + # the standalone refresh-marketplace. This test pins the chain + # invariant — order, both `|| true`-guarded — independent of the + # refresh-marketplace entry being present. + chain = next( + (c for c in starts if "agnes self-upgrade" in c and "agnes pull" in c), + None, + ) + assert chain is not None, starts + assert "agnes self-upgrade --quiet" in chain + assert "agnes pull --quiet" in chain # Order is encoded in the shell — self-upgrade must appear first - assert cmd.index("agnes self-upgrade") < cmd.index("agnes pull") + assert chain.index("agnes self-upgrade") < chain.index("agnes pull") # Both segments carry || true so neither failure aborts the line - assert cmd.count("|| true") >= 2 + assert chain.count("|| true") >= 2 def test_install_idempotent_chained_entry(tmp_path): install_claude_hooks(tmp_path) install_claude_hooks(tmp_path) cfg = _read_settings(tmp_path) - assert len(cfg["hooks"]["SessionStart"]) == 1 + # Two SessionStart entries (chained self-upgrade+pull plus refresh- + # marketplace) — re-install must not duplicate either. + assert len(cfg["hooks"]["SessionStart"]) == 2 assert len(cfg["hooks"]["SessionEnd"]) == 1 diff --git a/tests/test_setup_instructions.py b/tests/test_setup_instructions.py index f4ab548..7f2c884 100644 --- a/tests/test_setup_instructions.py +++ b/tests/test_setup_instructions.py @@ -78,7 +78,7 @@ def test_resolve_lines_no_plugins_unified_six_step_layout(): # report on phantom steps. assert "step 0(d)" not in joined assert "Which CA bundle source got picked" not in joined - assert "Whether the marketplace add went via direct HTTPS" not in joined + assert "~/.agnes/marketplace/.git/" not in joined # Legacy admin-only auth verbs are gone — `agnes init` subsumes them. assert "agnes auth import-token" not in joined assert "agnes auth whoami" not in joined @@ -109,7 +109,7 @@ def test_preamble_step_zero_d_reference_only_when_trust_block_emitted(): def test_finale_bullets_match_emitted_steps(): """The Confirm step's bullets must reference only steps that were actually emitted. CA bundle bullet only when has_ca=True; marketplace - direct-vs-clone bullet only when plugins are configured.""" + clone bullet only when plugins are configured.""" from app.web.setup_instructions import resolve_lines fake_ca = ( @@ -121,19 +121,19 @@ def test_finale_bullets_match_emitted_steps(): # No ca, no plugins: neither bullet present. plain = "\n".join(resolve_lines("agnes.whl")) assert "Which CA bundle source got picked" not in plain - assert "Whether the marketplace add went via direct HTTPS" not in plain + assert "~/.agnes/marketplace/.git/" not in plain # ca only: CA bullet yes, marketplace bullet no. ca_only = "\n".join(resolve_lines("agnes.whl", ca_pem=fake_ca)) assert "Which CA bundle source got picked" in ca_only - assert "Whether the marketplace add went via direct HTTPS" not in ca_only + assert "~/.agnes/marketplace/.git/" not in ca_only # plugins only: marketplace bullet yes, CA bullet no. pl_only = "\n".join( resolve_lines("agnes.whl", plugin_install_names=["foo"], server_host="h") ) assert "Which CA bundle source got picked" not in pl_only - assert "Whether the marketplace add went via direct HTTPS" in pl_only + assert "~/.agnes/marketplace/.git/" in pl_only # Both: both bullets present. both = "\n".join( @@ -145,46 +145,7 @@ def test_finale_bullets_match_emitted_steps(): ) ) assert "Which CA bundle source got picked" in both - assert "Whether the marketplace add went via direct HTTPS" in both - - -def test_marketplace_block_redetects_platform_for_self_containment(): - """Marketplace `case "$PLATFORM" in` would silently fall through to the - `*)` catch-all on every platform if `$PLATFORM` from step 0 isn't in - the current shell — which the prompt itself warns about - ("env vars do NOT persist between separate Bash invocations"). Linux - would then never get the direct-HTTPS attempt the comment promises. - The marketplace block must therefore re-detect $PLATFORM via uname - before its case statement, mirroring step 0(a).""" - from app.web.setup_instructions import resolve_lines - - fake_ca = ( - "-----BEGIN CERTIFICATE-----\n" - "FAKE\n" - "-----END CERTIFICATE-----\n" - ) - joined = "\n".join( - resolve_lines( - "agnes.whl", - plugin_install_names=["foo"], - server_host="agnes.example.com", - ca_pem=fake_ca, - ) - ) - # Locate the marketplace section. - section_idx = joined.index("Register the Agnes Claude Code marketplace") - section = joined[section_idx:] - - # Re-detection block must appear BEFORE the `case "$PLATFORM" in` - # check so the variable is set when the case runs. - redetect_idx = section.index('case "$(uname -s)" in') - platform_case_idx = section.index('case "$PLATFORM" in') - assert redetect_idx < platform_case_idx - # All three platform branches must be covered (same shape as step 0(a)). - redetect_block = section[redetect_idx:platform_case_idx] - assert "Darwin" in redetect_block and "PLATFORM=macos" in redetect_block - assert "Linux" in redetect_block and "PLATFORM=linux" in redetect_block - assert "MINGW*|MSYS*|CYGWIN*" in redetect_block and "PLATFORM=windows" in redetect_block + assert "~/.agnes/marketplace/.git/" in both def test_trust_block_rc_heredoc_writes_exactly_8_lines(): @@ -299,14 +260,25 @@ def test_resolve_lines_with_plugins_uses_install_first_diagnose_last_layout(): assert "brew install git" in joined assert "winget install --id Git.Git -e --source winget --silent" in joined assert "sudo apt-get install git" in joined or "sudo dnf install git" in joined - # Step 5 — marketplace + plugins. + # Step 5 — marketplace + plugins. Collapsed to a single CLI call: + # `agnes refresh-marketplace --bootstrap` does clone + PAT-strip + + # chmod + register-with-Claude + auto-install-from-manifest internally. + # Pulling that out of the inline shell script avoided Claude Code's + # agent-driven `rm -rf` permission gate that the old multi-line + # sequence tripped on. assert "5) Register the Agnes Claude Code marketplace and install plugins" in joined - assert ( - 'claude plugin marketplace add "https://x:{token}@agnes.example.com/marketplace.git/"' - in joined - ) - assert "claude plugin install foo@agnes --scope project" in joined - assert "claude plugin install bar@agnes --scope project" in joined + assert "agnes refresh-marketplace --bootstrap" in joined + # The destructive prep + per-plugin install commands are now inside + # the CLI; the prompt must not emit the inline shell forms in + # operator-runnable lines (comment lines documenting what the CLI + # does internally are fine — they're prose, not commands). + executable = _executable_lines(joined) + assert "rm -rf ~/.agnes/marketplace" not in executable + assert "git clone " not in executable + assert "git remote set-url origin" not in executable + assert "claude plugin marketplace add" not in executable + assert "claude plugin install foo@agnes" not in executable + assert "claude plugin install bar@agnes" not in executable # Step 6 — diagnose now AFTER marketplace (used to be step 4 right after whoami). assert "6) Run diagnostics:" in joined # Step 7 — skills, the last interactive step before Confirm. @@ -422,12 +394,17 @@ def test_render_setup_instructions_with_plugins_substitutes_all_placeholders(): assert "{token}" not in out assert "{wheel_filename}" not in out assert "{server_host}" not in out - # Token leaks into both the auth-import-token line and the marketplace URL. + # Token still appears for `agnes init` (step 2). The marketplace + # step uses `agnes refresh-marketplace --bootstrap` which reads the + # token from the agnes config that step 2 just wrote, so no token + # in any URL inside step 5. assert "T-XYZ" in out - assert "https://x:T-XYZ@agnes.example.com/marketplace.git/" in out + # Self-signed TLS line is host-scoped to server_url. assert 'git config --global http."https://agnes.example.com/".sslVerify false' in out - assert "claude plugin install foo@agnes --scope project" in out - assert "claude plugin install bar@agnes --scope project" in out + # Marketplace step is the one-liner; no per-plugin install lines. + assert "agnes refresh-marketplace --bootstrap" in out + assert "claude plugin install foo@agnes" not in out + assert "claude plugin install bar@agnes" not in out _FAKE_CA_PEM = ( @@ -541,101 +518,35 @@ def test_resolve_lines_with_ca_pem_switches_step_one_to_curl_then_local_install( assert "uv tool install --native-tls" not in joined_plain -def test_resolve_lines_with_ca_pem_marketplace_is_platform_aware(): - """When ca_pem is set + plugins requested, step 5 emits a platform branch: - Linux → try direct HTTPS first, fall back to git clone on failure - (node-based claude honors NODE_EXTRA_CA_CERTS); - Windows + macOS → straight to git-clone fallback (Bun-compiled claude - binary ignores OS trust store and CA env vars on both platforms).""" - from app.web.setup_instructions import resolve_lines - - joined = "\n".join( - resolve_lines( - "agnes.whl", - plugin_install_names=["foo"], - server_host="agnes.example.com", - ca_pem=_FAKE_CA_PEM, - ) - ) - # The platform branch + MARKETPLACE_VIA selector. - assert "MARKETPLACE_VIA=clone" in joined - assert "MARKETPLACE_VIA=direct" in joined - # Locate the marketplace step's case block specifically — there is - # ALSO a `case "$PLATFORM" in` block in step 0(c) (OS trust store - # registration), so we anchor on the marketplace section header to - # narrow the slice. - section_idx = joined.index("Register the Agnes Claude Code marketplace") - market_case_idx = joined.index('case "$PLATFORM" in', section_idx) - market_esac_idx = joined.index("esac", market_case_idx) - branch_block = joined[market_case_idx:market_esac_idx] - assert "linux)" in branch_block - # Direct attempt only in the linux branch. - assert ( - 'claude plugin marketplace add "https://x:{token}@agnes.example.com/marketplace.git/" 2>/dev/null' - in branch_block - ) - # The default `*)` branch must hard-set clone (no direct attempt). - star_idx = branch_block.index("*)") - star_branch = branch_block[star_idx:] - assert "MARKETPLACE_VIA=clone" in star_branch - assert "claude plugin marketplace add" not in star_branch - # Git-clone fallback writes to ~/.agnes/marketplace and adds it as a local path. - assert 'git clone "https://x:{token}@agnes.example.com/marketplace.git/" ~/.agnes/marketplace' in joined - assert "claude plugin marketplace add ~/.agnes/marketplace" in joined - # Harmless credential-manager-core warning is called out. - assert "credential-manager-core" in joined - # Plugin install line stays unchanged (errors checked in a sibling test). - assert "claude plugin install foo@agnes --scope project" in joined +def _executable_lines(section: str) -> str: + """Strip shell comment lines so 'not in' assertions match against + operator-runnable code, not the prose documentation we put in + comments. A line is a comment when its first non-whitespace character + is `#`.""" + out: list[str] = [] + for line in section.splitlines(): + if line.lstrip().startswith("#"): + continue + out.append(line) + return "\n".join(out) -def test_resolve_lines_with_ca_pem_marketplace_strips_pat_after_clone(): - """After `git clone https://x:@host/...`, the cloned repo's - `.git/config` holds the PAT in plaintext at `[remote "origin"] url`. - On default home setups that file syncs to iCloud/OneDrive and gets - read by antivirus / sync agents. The marketplace step must run - `git remote set-url origin ` after clone, plus a - best-effort chmod tighten. claude registers the *local path* (not the - remote URL), so stripping the token doesn't break marketplace - registration — refreshes go via re-running setup with a fresh PAT.""" - from app.web.setup_instructions import resolve_lines +def test_resolve_lines_with_ca_pem_marketplace_is_one_liner(): + """Step 5 collapses to a single CLI invocation: `agnes refresh-marketplace + --bootstrap`. The CLI does clone + PAT-strip + chmod + register-with-Claude + + auto-install internally so the prompt itself emits no `rm -rf`, no + `git clone`, no per-plugin install lines. - joined = "\n".join( - resolve_lines( - "agnes.whl", - plugin_install_names=["foo"], - server_host="agnes.example.com", - ca_pem=_FAKE_CA_PEM, - ) - ) - # Token-bearing clone line still exists (we need the token to authenticate - # the initial clone) but a token-less remote set-url line follows. - clone_idx = joined.index( - 'git clone "https://x:{token}@agnes.example.com/marketplace.git/"' - ) - set_url_idx = joined.index( - 'git -C ~/.agnes/marketplace remote set-url origin "https://agnes.example.com/marketplace.git/"' - ) - add_idx = joined.index("claude plugin marketplace add ~/.agnes/marketplace") - assert clone_idx < set_url_idx < add_idx - # Token-less URL must NOT contain the placeholder or `x:` prefix. - set_url_line_end = joined.index("\n", set_url_idx) - set_url_line = joined[set_url_idx:set_url_line_end] - assert "{token}" not in set_url_line - assert "x:" not in set_url_line + The motivation is the Claude Code agent permission gate: when a user + pastes the install prompt into a Claude Code session, the agent that + executes it is denied `rm -rf` by default. Pulling the destructive + prep into the agnes binary (which uses Python `shutil.rmtree`, not + the `rm -rf` shell pattern) lets the CLI's own permission grant cover + the cleanup — the prompt stays Claude-Code-friendly. - # Best-effort chmod tighten — wrapped in `|| true` so MSYS / Git Bash - # on Windows (where chmod is a no-op against NTFS ACLs) doesn't fail - # the step. - assert "chmod 700 ~/.agnes/marketplace ~/.agnes/marketplace/.git" in joined - assert "chmod 600 ~/.agnes/marketplace/.git/config" in joined - assert "|| true" in joined - - -def test_resolve_lines_with_ca_pem_marketplace_has_explicit_error_handling(): - """Each shell-out in the marketplace block must fail loudly with `exit 1` - on a non-zero exit, not silently fall through to the next step. Without - this, a failed `git clone` causes a confusing 'marketplace 'agnes' not - found' error from the subsequent `claude plugin install`.""" + Direct HTTPS via `claude plugin marketplace add ` is broken + end-to-end on every Claude Code distribution (see _marketplace_block + docstring), so we never emit it as an alternative.""" from app.web.setup_instructions import resolve_lines joined = "\n".join( @@ -646,19 +557,45 @@ def test_resolve_lines_with_ca_pem_marketplace_has_explicit_error_handling(): ca_pem=_FAKE_CA_PEM, ) ) - # git clone has an `|| { ... exit 1 }` guard. - assert ( - 'git clone "https://x:{token}@agnes.example.com/marketplace.git/" ' - '~/.agnes/marketplace || {' - ) in joined - # `claude plugin marketplace add ~/.agnes/marketplace` (the local path - # one — not the chmod best-effort lines) has its own guard. - assert "claude plugin marketplace add ~/.agnes/marketplace || {" in joined - # Each `claude plugin install @agnes` has its own guard so we know - # which plugin failed. - assert "claude plugin install foo@agnes --scope project || {" in joined - assert "claude plugin install bar@agnes --scope project || {" in joined - # Error messages are written to stderr, not stdout. + # The marketplace step contains the one-liner. + assert "agnes refresh-marketplace --bootstrap" in joined + # And nothing else relating to the marketplace install — the inline + # shell sequence has been pulled into the CLI. We strip comment lines + # before asserting because the prompt does include a comment block + # describing what the CLI does internally; that prose is documentation, + # not operator-runnable code. + section_idx = joined.index("Register the Agnes Claude Code marketplace") + section = _executable_lines(joined[section_idx:]) + assert "rm -rf ~/.agnes/marketplace" not in section + assert "git clone " not in section + assert "git -C ~/.agnes/marketplace remote set-url" not in section + assert "chmod 700 ~/.agnes/marketplace" not in section + assert "claude plugin marketplace add" not in section + assert "claude plugin install foo@agnes" not in section + assert "claude plugin install bar@agnes" not in section + # And no platform-aware switch in the marketplace section (there's + # still one in step 0(c) for OS trust-store registration; we anchored + # on the marketplace header above to narrow the slice). + assert 'case "$PLATFORM"' not in section + assert "MARKETPLACE_VIA=" not in section + + +def test_resolve_lines_with_ca_pem_marketplace_has_explicit_error_handling(): + """The marketplace one-liner must still fail loudly with `exit 1` on + a non-zero exit (so a CLI bootstrap failure blocks downstream steps + instead of letting them silently misbehave).""" + from app.web.setup_instructions import resolve_lines + + joined = "\n".join( + resolve_lines( + "agnes.whl", + plugin_install_names=["foo", "bar"], + server_host="agnes.example.com", + ca_pem=_FAKE_CA_PEM, + ) + ) + assert "agnes refresh-marketplace --bootstrap || {" in joined + # Error message goes to stderr. assert ">&2" in joined @@ -695,8 +632,8 @@ def test_resolve_lines_with_ca_pem_suppresses_legacy_sslverify_line(): ) # Legacy git-config sslVerify=false downgrade is suppressed when ca_pem is set. assert "git config --global" not in joined - # But the marketplace step itself still renders. - assert "claude plugin install foo@agnes --scope project" in joined + # But the marketplace step itself still renders (as the one-liner). + assert "agnes refresh-marketplace --bootstrap" in joined # And the trust block is present. assert "0) Trust the Agnes TLS certificate" in joined diff --git a/tests/test_setup_page_unified.py b/tests/test_setup_page_unified.py index 617da95..1b523d2 100644 --- a/tests/test_setup_page_unified.py +++ b/tests/test_setup_page_unified.py @@ -99,8 +99,11 @@ def test_setup_page_renders_marketplace_for_user_with_grants(client, monkeypatch assert resp.status_code == 200 text = resp.text - # Marketplace block markers. - assert "claude plugin install demo-plugin@agnes" in text + # Marketplace block marker. The per-plugin install lines moved inside + # `agnes refresh-marketplace --bootstrap`, so we check the section + # header + the one-liner instead of `claude plugin install @agnes`. + assert "Register the Agnes Claude Code marketplace" in text + assert "agnes refresh-marketplace --bootstrap" in text # Layout shift: Confirm is now step 8 (was 6 without marketplace). assert "8) Confirm:" in text # Pre-flight is in the rendered prompt at step 4. From 166c1c0752f564bc346eaacb3f9a0e2238d7cb66 Mon Sep 17 00:00:00 2001 From: Minas Arustamyan Date: Thu, 7 May 2026 01:48:37 +0200 Subject: [PATCH 2/6] fix(refresh-marketplace): pass --scope project to `claude plugin update` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without `--scope project`, `claude plugin update @agnes` operated at user scope (the default) instead of updating the project-scoped install — so version bumps in the served manifest never propagated to the workspace, even though `claude plugin install` correctly used `--scope project` for the missing-plugin path. Mirrors the install line in the same function. Any change refresh- marketplace makes to a plugin must now stay in project scope — consistent with the SessionStart hook firing per-workspace. --- cli/commands/refresh_marketplace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/commands/refresh_marketplace.py b/cli/commands/refresh_marketplace.py index 499758e..ea7d152 100644 --- a/cli/commands/refresh_marketplace.py +++ b/cli/commands/refresh_marketplace.py @@ -361,7 +361,7 @@ def _reconcile_with_manifest( for name in to_update: target = f"{name}@{MARKETPLACE_NAME}" result = subprocess.run( - ["claude", "plugin", "update", target], + ["claude", "plugin", "update", target, "--scope", "project"], capture_output=True, text=True, encoding="utf-8", errors="replace", check=False, ) if result.returncode != 0: From 3aeb0f2fbdda9e81f7c3cfea5a36e3e05a63abd6 Mon Sep 17 00:00:00 2001 From: Minas Arustamyan Date: Thu, 7 May 2026 02:28:34 +0200 Subject: [PATCH 3/6] fix(refresh-marketplace): use /reload-plugins instead of /exit + restart Claude Code's `/reload-plugins` slash command picks up newly installed plugins into the running session without forcing the user to /exit and restart Claude Code. The hook JSON `systemMessage` and `additionalContext` both now point at it. Tests updated to pin the new hint shape. --- cli/commands/refresh_marketplace.py | 8 ++++---- tests/test_cli_refresh_marketplace.py | 10 ++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/cli/commands/refresh_marketplace.py b/cli/commands/refresh_marketplace.py index ea7d152..8245904 100644 --- a/cli/commands/refresh_marketplace.py +++ b/cli/commands/refresh_marketplace.py @@ -383,8 +383,8 @@ def _emit_hook_message(events: dict[str, list[str]]) -> None: `systemMessage` is a transient toast (often missed). `additionalContext` is wrapped in a system reminder Claude reads at session start, so the model can mention the change if it's relevant to the user's first ask. - Plugins require a Claude Code restart — they land on disk this session - but only load on next session start. + Plugins land on disk during the hook; `/reload-plugins` loads them into + the running session without a restart. """ parts: list[str] = [] if events["installed"]: @@ -399,8 +399,8 @@ def _emit_hook_message(events: dict[str, list[str]]) -> None: ) summary = "Your Agnes stack changed: " + "; ".join(parts) + "." restart_hint = ( - "Run `/exit` and then `claude` again to load the changes — " - "Claude Code only picks up new/updated plugins on session start." + "Run `/reload-plugins` to load the changes into this session — " + "no restart needed." ) payload = { "systemMessage": f"{summary} {restart_hint}", diff --git a/tests/test_cli_refresh_marketplace.py b/tests/test_cli_refresh_marketplace.py index 08009e5..53ab1e1 100644 --- a/tests/test_cli_refresh_marketplace.py +++ b/tests/test_cli_refresh_marketplace.py @@ -464,17 +464,15 @@ def test_quiet_emits_hook_json_when_plugin_installed( assert "grpn-fin" in payload["systemMessage"] assert "Agnes stack" in payload["systemMessage"] assert "installed" in payload["systemMessage"] - # Restart hint: plugins land on disk this session but only load on - # next session start, so the user must /exit + restart. - assert "/exit" in payload["systemMessage"] - assert "session start" in payload["systemMessage"].lower() + # Reload hint: `/reload-plugins` loads the on-disk plugins into the + # running Claude Code session without a full restart. + assert "/reload-plugins" in payload["systemMessage"] hook_specific = payload.get("hookSpecificOutput", {}) assert hook_specific.get("hookEventName") == "SessionStart" additional = hook_specific.get("additionalContext", "") assert "grpn-fin" in additional - assert "/exit" in additional - assert "session start" in additional.lower() + assert "/reload-plugins" in additional def test_manual_mode_prints_restart_hint_when_anything_changed( From cd10aefdbdafa206715aa577040adf3c08cfeb1a Mon Sep 17 00:00:00 2001 From: Minas Arustamyan Date: Thu, 7 May 2026 02:29:58 +0200 Subject: [PATCH 4/6] fix(refresh-marketplace): align manual-mode hint with hook JSON Hook JSON path uses /reload-plugins (no restart needed); manual-mode echo path was still telling the operator to /exit + restart. Both now say /reload-plugins. Tests renamed to *_reload_hint_* to match the new wording. --- cli/commands/refresh_marketplace.py | 5 ++--- tests/test_cli_refresh_marketplace.py | 17 +++++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/cli/commands/refresh_marketplace.py b/cli/commands/refresh_marketplace.py index 8245904..c8cb180 100644 --- a/cli/commands/refresh_marketplace.py +++ b/cli/commands/refresh_marketplace.py @@ -113,9 +113,8 @@ def refresh_marketplace( _emit_hook_message(events) elif not quiet and (events["installed"] or events["updated"]): typer.echo( - "\nRestart Claude Code (`/exit`, then `claude`) to load the " - "new/updated plugins — they're on disk now but Claude only " - "picks them up on session start." + "\nRun `/reload-plugins` in Claude Code to load the " + "new/updated plugins into the running session — no restart needed." ) diff --git a/tests/test_cli_refresh_marketplace.py b/tests/test_cli_refresh_marketplace.py index 53ab1e1..73e27c0 100644 --- a/tests/test_cli_refresh_marketplace.py +++ b/tests/test_cli_refresh_marketplace.py @@ -475,13 +475,13 @@ def test_quiet_emits_hook_json_when_plugin_installed( assert "/reload-plugins" in additional -def test_manual_mode_prints_restart_hint_when_anything_changed( +def test_manual_mode_prints_reload_hint_when_anything_changed( with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, ): """When `agnes refresh-marketplace` runs without --quiet AND something actually got installed/updated, the operator needs to know they should - /exit + restart Claude Code for the change to take effect (Claude only - scans plugins at session start). Print the hint at end of run.""" + `/reload-plugins` in Claude Code to pick up the change. Print the hint + at end of run.""" workspace = tmp_path / "ws" workspace.mkdir() monkeypatch.chdir(workspace) @@ -492,15 +492,14 @@ def test_manual_mode_prints_restart_hint_when_anything_changed( result = runner.invoke(refresh_marketplace_app, []) assert result.exit_code == 0 out = _clean(result.output) - assert "Restart Claude Code" in out or "restart" in out.lower() - assert "/exit" in out + assert "/reload-plugins" in out -def test_manual_mode_no_change_does_not_print_restart_hint( +def test_manual_mode_no_change_does_not_print_reload_hint( with_clone, with_token, claude_in_path, recorder, monkeypatch, tmp_path, ): """Manual `agnes refresh-marketplace` over an already-up-to-date stack - must NOT spam the restart hint — there's nothing to restart for.""" + must NOT spam the reload hint — there's nothing to reload for.""" workspace = tmp_path / "ws" workspace.mkdir() monkeypatch.chdir(workspace) @@ -514,9 +513,7 @@ def test_manual_mode_no_change_does_not_print_restart_hint( result = runner.invoke(refresh_marketplace_app, []) assert result.exit_code == 0 out = _clean(result.output) - # The restart hint sentence specifically — not the substring "restart" - # which might appear elsewhere benignly. - assert "Restart Claude Code" not in out + assert "/reload-plugins" not in out def test_quiet_emits_hook_json_when_bundle_silently_auto_updated_by_claude( From bb36a69b1e71b8c334f9e5260d9fffc65cdd7b24 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Thu, 7 May 2026 06:22:56 +0200 Subject: [PATCH 5/6] release: 0.44.0 --- CHANGELOG.md | 40 ++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f43ff6..9f7f01e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,46 @@ CalVer image tags (`stable-YYYY.MM.N`, `dev-YYYY.MM.N`) are produced for every C ## [Unreleased] +## [0.44.0] — 2026-05-07 + +### Added +- `agnes refresh-marketplace` — single CLI command that owns the per-user + filtered Claude Code marketplace lifecycle. `--bootstrap` does the + first-time setup: clones the per-user marketplace bare repo to + `~/.agnes/marketplace`, strips the PAT from the cloned origin URL so it + doesn't sit in plaintext at rest, registers the local path with Claude + Code, and installs every plugin in the served manifest at + `--scope project`. Without `--bootstrap` it does an incremental refresh: + fetch + reset to the remote, then version-aware reconcile (install missing + plugins, update on version diff, skip on match). Plugins removed from the + manifest are deliberately NOT auto-uninstalled — a transient empty manifest + from the server would otherwise wipe the user's stack. +- `agnes init` now installs a SessionStart hook that runs + `agnes refresh-marketplace --quiet` on every Claude Code session, + alongside the existing chained `agnes self-upgrade; agnes pull` entry. + The marketplace refresh runs as a *separate* hook entry (not chained) + so a failure (e.g. fresh workspace with no clone yet) doesn't suppress + the data pull. The refresh command is wrapped in `bash -c "..."` + because Claude Code on Windows runs hook commands directly without a + shell, which would otherwise leave the `2>/dev/null || true` syntax + uninterpreted. +- When `agnes refresh-marketplace` detects an actual change, it emits + Claude Code hook JSON on stdout — `systemMessage` (transient toast) + and `additionalContext` (model-side system reminder) — both pointing + at `/reload-plugins` so the running session loads new plugins without + a restart. + +### Changed +- Install-prompt step 5 (in the dashboard-served setup payload) collapses + from a 15-line inline shell sequence — `rm -rf` + `git clone` + per-plugin + `claude plugin install` calls — to a single `agnes refresh-marketplace + --bootstrap` invocation. The old inline form tripped Claude Code's agent + `rm -rf` permission gate on first run. +- `scripts/dev/agnes-client-reset.sh`: now cleans + `~/.claude/plugins/{marketplaces,cache}/agnes`, drops the uv build cache, + and documents workspace-scoped residue that can't be enumerated from a + user-level reset. + ### Internal - `infra/modules/customer-instance` (tag `infra-v1.7.0`): `google_compute_instance.vm` now sets `allow_stopping_for_update = true`. Without it, changing `machine_type` (or any other field GCP will only mutate on a stopped VM) caused Terraform to fall back to a destroy + recreate, churning VM-local state for what should be an in-place resize. Consumers do not need to update — the field is provider-side only — but bumping the module ref to `infra-v1.7.0` enables in-place machine-type bumps. diff --git a/pyproject.toml b/pyproject.toml index d74f25b..7267555 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "agnes-the-ai-analyst" -version = "0.43.0" +version = "0.44.0" description = "Agnes — AI Data Analyst platform for AI analytical systems" requires-python = ">=3.11,<3.14" license = "MIT" From d3e8d29cfb8aea478ef6fc8b609ceb1661100909 Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Thu, 7 May 2026 06:42:48 +0200 Subject: [PATCH 6/6] =?UTF-8?q?test(hooks):=20pin=20v0.43.0=20chained-entr?= =?UTF-8?q?y=20=E2=86=92=20v0.44.0=20two-entry=20upgrade=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_lib_hooks.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/test_lib_hooks.py b/tests/test_lib_hooks.py index 7ee8d36..b64c759 100644 --- a/tests/test_lib_hooks.py +++ b/tests/test_lib_hooks.py @@ -108,6 +108,46 @@ def test_install_replaces_prior_single_pull_entry(tmp_path): assert any("agnes refresh-marketplace" in c for c in starts) +def test_install_replaces_v0_43_chained_self_upgrade_pull_entry(tmp_path): + """Workspaces bootstrapped on v0.43.0 had a single SessionStart entry + chaining `agnes self-upgrade; agnes pull` in one shell line. Upgrading + those workspaces to v0.44.0+ must collapse that entry and re-install + the new two-entry layout — not stack the v0.44 entries on top of the + v0.43 chained one (which would re-run self-upgrade twice on every + session and leave the old format around forever). + """ + settings_path = tmp_path / ".claude" / "settings.json" + settings_path.parent.mkdir(parents=True) + settings_path.write_text(json.dumps({ + "hooks": { + "SessionStart": [ + {"hooks": [{"type": "command", "command": ( + "agnes self-upgrade --quiet 2>/dev/null || true; " + "agnes pull --quiet 2>/dev/null || true" + )}]}, + ], + "SessionEnd": [ + {"hooks": [{"type": "command", "command": "agnes push --quiet 2>/dev/null || true"}]}, + ], + } + })) + install_claude_hooks(tmp_path) + cfg = _read_settings(tmp_path) + starts = _commands_for(cfg, "SessionStart") + # Exactly two entries — the v0.43 chained line was replaced, not stacked. + assert len(starts) == 2, starts + chain = next( + (c for c in starts if "agnes self-upgrade" in c and "agnes pull" in c), + None, + ) + assert chain is not None + assert any("agnes refresh-marketplace" in c for c in starts) + # SessionEnd untouched (single push entry). + ends = _commands_for(cfg, "SessionEnd") + assert len(ends) == 1 + assert "agnes push --quiet" in ends[0] + + def test_install_preserves_third_party_hooks(tmp_path): settings_path = tmp_path / ".claude" / "settings.json" settings_path.parent.mkdir(parents=True)