From 9b70ca3069d6df6fe14150c9aeba668daafbe23e Mon Sep 17 00:00:00 2001 From: ZdenekSrotyr Date: Mon, 4 May 2026 18:15:08 +0200 Subject: [PATCH] feat(cli): agnes init orchestrator + AGNES_WORKSPACE.md template --- cli/commands/init.py | 218 ++++++++++++++++++++++++++++ config/agnes_workspace_template.txt | 111 ++++++++++++++ tests/test_cli_init.py | 133 +++++++++++++++++ 3 files changed, 462 insertions(+) create mode 100644 cli/commands/init.py create mode 100644 config/agnes_workspace_template.txt create mode 100644 tests/test_cli_init.py diff --git a/cli/commands/init.py b/cli/commands/init.py new file mode 100644 index 0000000..2aa650f --- /dev/null +++ b/cli/commands/init.py @@ -0,0 +1,218 @@ +"""`agnes init` — bootstrap an analyst workspace. + +Single-paste flow: web user clicks "Generate prompt" on /setup?role=analyst, +pastes into Claude Code in an empty folder; Claude runs `agnes init` (among +other steps). Non-interactive: --token + --server-url required. + +Steps in order: +1. Detect existing workspace (`CLAUDE.md` containing the init marker) — exit 1 + unless --force, with a typed `partial_state` error. +2. Verify the PAT via `GET /api/catalog/tables` — typed `auth_failed` on 401, + `server_unreachable` on network error. +3. Persist server URL + PAT to `~/.config/agnes/` so subsequent `agnes pull` / + `agnes push` invocations (including the SessionStart/End hooks installed + below) inherit the credentials without env vars. +4. Fetch the rendered CLAUDE.md from `GET /api/welcome` (server-rendered, + RBAC-filtered, role-aware). +5. Seed `.claude/settings.json` with default model + permissions, then call + `cli.lib.hooks.install_claude_hooks` to merge in the SessionStart/End hook + commands. Idempotent on re-run. +6. Write the `.claude/CLAUDE.local.md` stub only when absent — `--force` + regenerates CLAUDE.md but **never** clobbers the operator-edited + CLAUDE.local.md. +7. Run the first `cli.lib.pull.run_pull` so the workspace ships with current + parquets, DuckDB views, and the corporate-memory bundle. +8. Render `AGNES_WORKSPACE.md` from `config/agnes_workspace_template.txt` — + client-side template, three placeholders. + +Errors render via `cli/error_render.py:render_error` with typed `kind` values +(`auth_failed`, `server_unreachable`, `partial_state`, `manifest_unauthorized`) +matching the rest of the CLI surface. + +Task 18 will register `init_app` on the root Typer app. +""" + +from __future__ import annotations + +import json +import os +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +import typer + +from cli.client import api_get +from cli.config import save_config, save_token +from cli.error_render import render_error +from cli.lib.hooks import install_claude_hooks +from cli.lib.pull import PullResult, _override_server_env, run_pull + + +# Substring that flags an already-bootstrapped workspace. The current default +# CLAUDE.md template renders `# {{ instance.name }} — AI Data Analyst` so this +# appears in every server-rendered CLAUDE.md. Operators who use a custom admin +# template can override this via the `--force` flag. +_INIT_MARKER = "AI Data Analyst" + + +init_app = typer.Typer(help="Bootstrap an analyst workspace in this directory") + + +@init_app.callback(invoke_without_command=True) +def init( + server_url: str = typer.Option(..., "--server-url", help="Agnes server URL"), + token: str = typer.Option(..., "--token", help="Personal access token"), + force: bool = typer.Option(False, "--force", help="Re-initialize an existing workspace"), + workspace_str: Optional[str] = typer.Option(None, "--workspace", help="Target dir (default: cwd)"), +): + """Bootstrap workspace: auth, CLAUDE.md, hooks, first pull, AGNES_WORKSPACE.md.""" + workspace = Path(workspace_str).resolve() if workspace_str else Path.cwd() + server_url = server_url.rstrip("/") + + # ------------------------------------------------------------------ + # Step 1: detect an existing workspace. + # ------------------------------------------------------------------ + claude_md = workspace / "CLAUDE.md" + if claude_md.exists() and not force: + try: + existing = claude_md.read_text(encoding="utf-8") + except OSError: + existing = "" + if _INIT_MARKER in existing: + typer.echo(render_error(0, {"detail": { + "kind": "partial_state", + "hint": "Workspace already initialized. Re-run with --force to redo.", + }}), err=True) + raise typer.Exit(1) + + # ------------------------------------------------------------------ + # Step 2: verify the PAT via /api/catalog/tables. + # + # `api_get` reads server URL + token from env vars (`AGNES_SERVER`, + # `AGNES_TOKEN`) via `cli.config`. Wrap the call in + # `_override_server_env` so the explicit args take effect without + # mutating the caller's environment permanently. Same mechanism as + # `cli.lib.pull.run_pull`. + # ------------------------------------------------------------------ + try: + with _override_server_env(server_url, token): + resp = api_get("/api/catalog/tables") + if resp.status_code == 401: + typer.echo(render_error(401, {"detail": { + "kind": "auth_failed", + "hint": f"Token expired or invalid — get a fresh one at {server_url}/setup?role=analyst", + }}), err=True) + raise typer.Exit(1) + resp.raise_for_status() + except typer.Exit: + raise + except Exception as exc: + typer.echo(render_error(0, {"detail": { + "kind": "server_unreachable", + "hint": f"Cannot reach {server_url} — check network or server status", + "message": str(exc), + }}), err=True) + raise typer.Exit(1) + + # ------------------------------------------------------------------ + # Step 3: save server URL + token to ~/.config/agnes/ so subsequent + # invocations (including the SessionStart hook) read them by default. + # `email=""` because the JWT carries it server-side; we don't decode + # the token on the client. + # ------------------------------------------------------------------ + save_config({"server": server_url}) + save_token(token, email="") + + # ------------------------------------------------------------------ + # Step 4: fetch the rendered CLAUDE.md from /api/welcome. + # ------------------------------------------------------------------ + workspace.mkdir(parents=True, exist_ok=True) + try: + with _override_server_env(server_url, token): + welcome_resp = api_get("/api/welcome", params={"server_url": server_url}) + welcome_resp.raise_for_status() + except Exception as exc: + typer.echo(render_error(0, {"detail": { + "kind": "server_unreachable", + "hint": "Failed to fetch CLAUDE.md from /api/welcome", + "message": str(exc), + }}), err=True) + raise typer.Exit(1) + welcome_content = welcome_resp.json().get("content", "") + claude_md.write_text(welcome_content, encoding="utf-8") + + # ------------------------------------------------------------------ + # Step 5: default settings.json + install hooks. + # + # Seed first-run model + permissions only when the file is absent; + # `install_claude_hooks` then merges SessionStart/End on top, leaving + # any third-party keys/hooks intact. Re-running init (with or without + # --force) is idempotent on settings.json. + # ------------------------------------------------------------------ + settings_path = workspace / ".claude" / "settings.json" + if not settings_path.exists(): + settings_path.parent.mkdir(parents=True, exist_ok=True) + settings_path.write_text(json.dumps( + {"model": "sonnet", "permissions": {"allow": ["Read", "Bash", "Grep", "Glob"]}}, + indent=2, + )) + install_claude_hooks(workspace) + + # ------------------------------------------------------------------ + # Step 6: CLAUDE.local.md stub — only when absent. `--force` does NOT + # overwrite; the operator's notes survive a re-init. + # ------------------------------------------------------------------ + local_md = workspace / ".claude" / "CLAUDE.local.md" + if not local_md.exists(): + local_md.parent.mkdir(parents=True, exist_ok=True) + local_md.write_text( + "# My Notes\n\nPersonal notes for this workspace. Uploaded on `agnes push`.\n", + encoding="utf-8", + ) + + # ------------------------------------------------------------------ + # Step 7: first pull. `run_pull` records per-stage failures inside + # `result.errors` rather than raising for transient issues, so any + # exception escaping here is a programming error worth surfacing. + # ------------------------------------------------------------------ + try: + result: PullResult = run_pull(server_url, token, workspace) + except Exception as exc: + typer.echo(render_error(0, {"detail": { + "kind": "manifest_unauthorized", + "hint": "Initial pull failed — workspace partially set up", + "message": str(exc), + }}), err=True) + raise typer.Exit(1) + + # ------------------------------------------------------------------ + # Step 8: render AGNES_WORKSPACE.md from the static client-side + # template. Three placeholders: created_at, server_url, workspace_path. + # ------------------------------------------------------------------ + here = Path(__file__).parent + template_path = here.parent.parent / "config" / "agnes_workspace_template.txt" + if template_path.exists(): + template = template_path.read_text(encoding="utf-8") + else: + # Defensive fallback — the template ships with the repo so this + # branch only fires on a broken install. Better than crashing. + template = "# Agnes workspace\n\nCreated: {created_at}\nServer: {server_url}\n" + workspace_md = ( + template + .replace("{created_at}", datetime.now(timezone.utc).isoformat()) + .replace("{server_url}", server_url) + .replace("{workspace_path}", str(workspace)) + ) + (workspace / "AGNES_WORKSPACE.md").write_text(workspace_md, encoding="utf-8") + + # ------------------------------------------------------------------ + # Final: human-readable summary. + # ------------------------------------------------------------------ + typer.echo("Workspace ready.") + typer.echo(f" Server : {server_url}") + typer.echo(f" Tables : {result.tables_updated} synced ({result.parquets_total} total)") + typer.echo(f" Rules : {result.rules_count}") + typer.echo(f" Workspace: {workspace}") + typer.echo("") + typer.echo("Try: agnes catalog") diff --git a/config/agnes_workspace_template.txt b/config/agnes_workspace_template.txt new file mode 100644 index 0000000..060754a --- /dev/null +++ b/config/agnes_workspace_template.txt @@ -0,0 +1,111 @@ +# Agnes analyst workspace + +**Created:** {created_at} +**Server:** {server_url} +**Workspace:** {workspace_path} + +This file documents what `agnes init` installed on this machine and in this folder. +Read this when you want to know "what is this thing", "how does it work", or +"how do I uninstall it". For Claude Code's instructions, see `CLAUDE.md`. + +--- + +## What's installed (global, per-user) + +| Path | What it is | How to remove | +|------|------------|---------------| +| `~/.local/bin/agnes` | The `agnes` CLI binary | `uv tool uninstall agnes-the-ai-analyst` | +| `~/.config/agnes/config.yaml` | Default Agnes server URL | `rm -rf ~/.config/agnes/` | +| `~/.config/agnes/token.json` | Personal access token (PAT) | `rm ~/.config/agnes/token.json` | +| `~/.agnes/ca.pem` | Server's CA cert (private CA installs only) | `rm ~/.agnes/ca.pem` | +| `~/.agnes/ca-bundle.pem` | Combined system + Agnes CA bundle | `rm ~/.agnes/ca-bundle.pem` | +| `~/.zshrc` / `~/.bashrc` block (marker `AGNES_CA_PEM_TRUST`) | `PATH` + `SSL_CERT_FILE` env | Edit rc, remove block | + +--- + +## What's in this folder + +| Path | What it is | +|------|------------| +| `./CLAUDE.md` | Rules + golden path for Claude Code (fetched from server's `/api/welcome`) | +| `./AGNES_WORKSPACE.md` | This file | +| `./.claude/settings.json` | Claude Code config: model, permissions, hooks | +| `./.claude/CLAUDE.local.md` | Your private notes (uploaded on session end) | +| `./.claude/rules/km_*.md` | Server-pushed corporate-knowledge rules (only when granted) | +| `./server/parquet/*.parquet` | Synced data — RBAC-filtered subset (only when grants exist) | +| `./user/duckdb/analytics.duckdb` | DuckDB views over the parquets — what `agnes query` reads | +| `./user/snapshots/*.parquet` | Ad-hoc materialized snapshots from `agnes snapshot create` | +| `./user/sessions/*.jsonl` | Captured Claude Code sessions (uploaded on session end) | + +Some folders only exist when they have content — `agnes pull` and `agnes push` +only create them when there's something to write. + +--- + +## How it stays fresh + +Two hooks in `./.claude/settings.json` keep this workspace in sync without +you doing anything: + +- **SessionStart** → `agnes pull --quiet` — new parquets, schema changes, and + updated rules pull down before Claude Code answers. Failure is silent; + your session continues with the last-known data. +- **SessionEnd** → `agnes push --quiet` — your session transcript and + `CLAUDE.local.md` ship to the server. + +Both are workspace-scoped — they only run when Claude Code opens this folder. + +--- + +## Cheat sheet + +```bash +# Tables you can read (server-side catalog, RBAC-filtered) +agnes catalog +agnes catalog --json | jq '.[] | select(.query_mode=="local")' + +# Schema and sample +agnes schema opportunity +agnes describe opportunity -n 10 + +# Run a SQL query (DuckDB flavor against local parquets) +agnes query "SELECT count(*) FROM opportunity WHERE stage='Closed Won'" + +# Remote BigQuery query (server-side, no local materialization) +agnes query --remote "SELECT count(*) FROM web_sessions_example" + +# Materialize a remote subset locally +agnes snapshot create web_sessions_example \ + --select event_date,country_code \ + --where "event_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY)" \ + --as recent_sessions + +# Manual data refresh (the SessionStart hook does this automatically) +agnes pull + +# Workspace status (what's synced, when) +agnes status + +# Re-generate this workspace from scratch (preserves CLAUDE.local.md) +agnes init --server-url https://agnes.example.com --token --force +``` + +--- + +## Uninstall + +```bash +# 1. Remove the CLI +uv tool uninstall agnes-the-ai-analyst + +# 2. Remove global config and trust artifacts +rm -rf ~/.config/agnes +rm -rf ~/.agnes + +# 3. Remove the env-var block from your shell rc +# Open ~/.zshrc or ~/.bashrc, find the lines between +# "# AGNES_CA_PEM_TRUST — added by Agnes setup" and the next blank line, delete. + +# 4. Remove this workspace +rm -rf ./CLAUDE.md ./AGNES_WORKSPACE.md ./.claude ./server ./user +``` diff --git a/tests/test_cli_init.py b/tests/test_cli_init.py new file mode 100644 index 0000000..fef20b3 --- /dev/null +++ b/tests/test_cli_init.py @@ -0,0 +1,133 @@ +"""Tests for `agnes init` orchestrator command.""" + +from typer.testing import CliRunner + +from cli.commands.init import init_app + +runner = CliRunner() + + +def _make_api_get(): + """Build a stub api_get fn that returns canned responses for every endpoint + `agnes init` and the inner `run_pull` touch. + + Returned closure is suitable for monkeypatching both + `cli.commands.init.api_get` and `cli.lib.pull.api_get` so the verify-PAT + call from init AND the manifest+memory-bundle calls from pull all + succeed in tests. + """ + from unittest.mock import MagicMock + + def _api_get(path, *args, **kwargs): + resp = MagicMock() + resp.status_code = 200 + if path == "/api/catalog/tables": + resp.json.return_value = [] + elif path == "/api/welcome": + resp.json.return_value = { + "content": "# Test CLAUDE.md\n\nUse `agnes pull`.\n", + } + elif path == "/api/sync/manifest": + resp.json.return_value = {"tables": {}} + elif path == "/api/memory/bundle": + resp.json.return_value = {"mandatory": [], "approved": []} + else: + resp.json.return_value = {} + # raise_for_status is a no-op MagicMock by default — fine for 200s. + return resp + + return _api_get + + +def test_init_help(): + result = runner.invoke(init_app, ["--help"]) + assert result.exit_code == 0 + assert "--server-url" in result.output + assert "--token" in result.output + assert "--force" in result.output + assert "--workspace" in result.output + + +def test_init_writes_expected_files(tmp_path, monkeypatch): + """Mocked end-to-end: init writes CLAUDE.md, settings.json, AGNES_WORKSPACE.md.""" + monkeypatch.setenv("AGNES_CONFIG_DIR", str(tmp_path / "_cfg")) + api_get = _make_api_get() + monkeypatch.setattr("cli.commands.init.api_get", api_get, raising=False) + monkeypatch.setattr("cli.lib.pull.api_get", api_get, raising=False) + + result = runner.invoke(init_app, [ + "--server-url", "http://test.example.com", + "--token", "test-pat", + "--workspace", str(tmp_path), + ]) + assert result.exit_code == 0, result.output + assert (tmp_path / "CLAUDE.md").exists() + assert "agnes pull" in (tmp_path / "CLAUDE.md").read_text() + assert (tmp_path / ".claude" / "settings.json").exists() + assert (tmp_path / ".claude" / "CLAUDE.local.md").exists() + assert (tmp_path / "AGNES_WORKSPACE.md").exists() + # run_pull always creates the analytics.duckdb file (load-bearing). + assert (tmp_path / "user" / "duckdb" / "analytics.duckdb").exists() + + +def test_init_no_dead_dirs_zero_grants(tmp_path, monkeypatch): + """Zero grants -> no .claude/rules, no server/parquet, no user/sessions.""" + monkeypatch.setenv("AGNES_CONFIG_DIR", str(tmp_path / "_cfg")) + api_get = _make_api_get() + monkeypatch.setattr("cli.commands.init.api_get", api_get, raising=False) + monkeypatch.setattr("cli.lib.pull.api_get", api_get, raising=False) + + runner.invoke(init_app, [ + "--server-url", "http://x", + "--token", "t", + "--workspace", str(tmp_path), + ]) + for forbidden in [ + "data/parquet", "data/duckdb", "data/metadata", + "user/artifacts", "user/sessions", + "server/parquet", ".claude/rules", + ]: + assert not (tmp_path / forbidden).exists(), f"forbidden created: {forbidden}" + + +def test_init_force_preserves_local_md(tmp_path, monkeypatch): + """--force regenerates CLAUDE.md but never touches CLAUDE.local.md.""" + monkeypatch.setenv("AGNES_CONFIG_DIR", str(tmp_path / "_cfg")) + api_get = _make_api_get() + monkeypatch.setattr("cli.commands.init.api_get", api_get, raising=False) + monkeypatch.setattr("cli.lib.pull.api_get", api_get, raising=False) + + # First init seeds the workspace + writes the default CLAUDE.local.md stub. + r1 = runner.invoke(init_app, [ + "--server-url", "http://x", + "--token", "t", + "--workspace", str(tmp_path), + ]) + assert r1.exit_code == 0, r1.output + (tmp_path / ".claude" / "CLAUDE.local.md").write_text("# my notes") + + # Second init with --force must overwrite CLAUDE.md but leave the + # operator-written CLAUDE.local.md alone. + r2 = runner.invoke(init_app, [ + "--server-url", "http://x", + "--token", "t", + "--workspace", str(tmp_path), + "--force", + ]) + assert r2.exit_code == 0, r2.output + assert "my notes" in (tmp_path / ".claude" / "CLAUDE.local.md").read_text() + + +def test_init_partial_state_friendly_exit(tmp_path, monkeypatch): + """CLAUDE.md exists with marker but no settings.json -> friendly hint, exit 1.""" + monkeypatch.setenv("AGNES_CONFIG_DIR", str(tmp_path / "_cfg")) + workspace = tmp_path + (workspace / "CLAUDE.md").write_text("# AI Data Analyst\n") + # Without --force, init should refuse and print a hint + result = runner.invoke(init_app, [ + "--server-url", "http://x", + "--token", "t", + "--workspace", str(workspace), + ]) + assert result.exit_code == 1 + assert "Traceback" not in (result.output + (result.stderr or ""))