agnes-the-ai-analyst/cli/commands/init.py
ZdenekSrotyr c97fd504c5 release: 0.45.0 — easy-wins bundle (#84 #164 #177 #178 #203 #204)
Operator-and-analyst quality bundle: a security fix for the optional
Telegram bot, two CLI gaps closed, and three rounds of UX polish on
`agnes diagnose` and `agnes pull` so non-TTY consumers (CI runners,
Claude Code SessionStart hooks, sub-agent watchdogs) get readable,
actionable signal.

- Pairing-code RNG: random.choices -> secrets.choice (CSPRNG).
- Telegram script runner: refuse out-of-shape usernames before sudo -u.

CLAUDE.md.bak.<ISO-timestamp> before regenerating.

- agnes admin unregister-table <id> -> DELETE /api/admin/registry/{id}
- agnes admin update-table <id> --field=value ...  -> PUT /api/admin/registry/{id}

response but never promotes the headline. BQ billing-equals-data check
downgraded warning -> info.

default (5 s / 1 MiB vs 30 s / 10%) so sub-agent watchdogs don't kill
the pull as a hung process. New env knobs:
AGNES_PULL_PROGRESS_INTERVAL_{SECONDS,BYTES}.

--include-schema (or ?include=schema) to opt back in.

Tests: 120 passed across the touched modules, including new tests for
each fix. Pre-existing failures on main (DB migration v1->v9, binary
rename) are unrelated and not introduced here.
2026-05-07 11:43:16 +02:00

274 lines
13 KiB
Python

"""`agnes init` — bootstrap an analyst workspace.
Single-paste flow: web user clicks "Generate prompt" on /setup?role=analyst,
pastes into Claude Code in an empty folder; Claude runs `agnes init` (among
other steps). Non-interactive: --token + --server-url required.
Steps in order:
1. Detect existing workspace (`CLAUDE.md` containing the init marker) — exit 1
unless --force, with a typed `partial_state` error.
2. Verify the PAT via `GET /api/catalog/tables` — typed `auth_failed` on 401,
`server_unreachable` on network error.
3. Persist server URL + PAT to `~/.config/agnes/` so subsequent `agnes pull` /
`agnes push` invocations (including the SessionStart/End hooks installed
below) inherit the credentials without env vars.
4. Fetch the rendered CLAUDE.md from `GET /api/welcome` (server-rendered,
RBAC-filtered, role-aware).
5. Seed `.claude/settings.json` with default model + permissions, then call
`cli.lib.hooks.install_claude_hooks` to merge in the SessionStart/End hook
commands. Idempotent on re-run.
6. Write the `.claude/CLAUDE.local.md` stub only when absent — `--force`
regenerates CLAUDE.md but **never** clobbers the operator-edited
CLAUDE.local.md.
7. Run the first `cli.lib.pull.run_pull` so the workspace ships with current
parquets, DuckDB views, and the corporate-memory bundle.
8. Render `AGNES_WORKSPACE.md` from `config/agnes_workspace_template.txt` —
client-side template, three placeholders.
Errors render via `cli/error_render.py:render_error` with typed `kind` values
(`auth_failed`, `server_unreachable`, `partial_state`, `manifest_unauthorized`)
matching the rest of the CLI surface.
Task 18 will register `init_app` on the root Typer app.
"""
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
import typer
from cli.client import api_get
from cli.config import save_config, save_token
from cli.error_render import render_error
from cli.lib.hooks import install_claude_hooks
from cli.lib.pull import PullResult, _override_server_env, run_pull
# Substring that flags an already-bootstrapped workspace. The current default
# CLAUDE.md template renders `# {{ instance.name }} — AI Data Analyst` so this
# appears in every server-rendered CLAUDE.md. Operators who use a custom admin
# template can override this via the `--force` flag.
_INIT_MARKER = "AI Data Analyst"
init_app = typer.Typer(help="Bootstrap an analyst workspace in this directory")
@init_app.callback(invoke_without_command=True)
def init(
server_url: str = typer.Option(..., "--server-url", help="Agnes server URL"),
token: str = typer.Option(..., "--token", help="Personal access token"),
force: bool = typer.Option(False, "--force", help="Re-initialize an existing workspace"),
workspace_str: Optional[str] = typer.Option(None, "--workspace", help="Target dir (default: cwd)"),
skip_materialize: bool = typer.Option(
False, "--skip-materialize",
help=(
"Skip materialized-mode tables on the first pull. The first "
"init can otherwise spend tens of minutes silently downloading "
"a single multi-GB scheduled-query parquet. Materialized rows "
"are still discoverable via `agnes catalog`; rerun `agnes pull` "
"without this flag once you actually need them locally."
),
),
):
"""Bootstrap workspace: auth, CLAUDE.md, hooks, first pull, AGNES_WORKSPACE.md."""
workspace = Path(workspace_str).resolve() if workspace_str else Path.cwd()
server_url = server_url.rstrip("/")
# ------------------------------------------------------------------
# Step 1: detect an existing workspace.
# ------------------------------------------------------------------
claude_md = workspace / "CLAUDE.md"
if claude_md.exists() and not force:
try:
existing = claude_md.read_text(encoding="utf-8")
except OSError:
existing = ""
if _INIT_MARKER in existing:
typer.echo(render_error(0, {"detail": {
"kind": "partial_state",
"hint": "Workspace already initialized. Re-run with --force to redo.",
}}), err=True)
raise typer.Exit(1)
# On --force, snapshot the existing CLAUDE.md before regenerating it
# so an operator who edited it can recover their notes (issue #164).
# Backup name carries an ISO timestamp so multiple `--force` runs in
# the same workspace don't clobber each other. We write the backup
# *after* the existing-workspace gate above so the un-forced path is
# unchanged.
if claude_md.exists() and force:
try:
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
backup_path = workspace / f"CLAUDE.md.bak.{ts}"
backup_path.write_bytes(claude_md.read_bytes())
typer.echo(f"Backed up existing CLAUDE.md → {backup_path.name}")
except OSError as exc:
# FS error on the backup is annoying but shouldn't abort the
# init. Surface it so the operator knows their pre-existing
# CLAUDE.md is about to be overwritten without a recoverable
# copy on disk, then proceed.
typer.echo(
f"Warning: could not write CLAUDE.md backup ({exc}); "
f"continuing with --force overwrite",
err=True,
)
# ------------------------------------------------------------------
# Step 2: verify the PAT via /api/catalog/tables.
#
# `api_get` reads server URL + token from env vars (`AGNES_SERVER`,
# `AGNES_TOKEN`) via `cli.config`. Wrap the call in
# `_override_server_env` so the explicit args take effect without
# mutating the caller's environment permanently. Same mechanism as
# `cli.lib.pull.run_pull`.
# ------------------------------------------------------------------
try:
with _override_server_env(server_url, token):
resp = api_get("/api/catalog/tables")
if resp.status_code == 401:
typer.echo(render_error(401, {"detail": {
"kind": "auth_failed",
"hint": f"Token expired or invalid — get a fresh one at {server_url}/setup",
}}), err=True)
raise typer.Exit(1)
resp.raise_for_status()
except typer.Exit:
raise
except Exception as exc:
typer.echo(render_error(0, {"detail": {
"kind": "server_unreachable",
"hint": f"Cannot reach {server_url} — check network or server status",
"message": str(exc),
}}), err=True)
raise typer.Exit(1)
# ------------------------------------------------------------------
# Step 3: save server URL + token to ~/.config/agnes/ so subsequent
# invocations (including the SessionStart hook) read them by default.
# `email=""` because the JWT carries it server-side; we don't decode
# the token on the client.
# ------------------------------------------------------------------
save_config({"server": server_url})
save_token(token, email="")
# ------------------------------------------------------------------
# Step 4: fetch the rendered CLAUDE.md from /api/welcome.
# ------------------------------------------------------------------
workspace.mkdir(parents=True, exist_ok=True)
try:
with _override_server_env(server_url, token):
welcome_resp = api_get("/api/welcome", params={"server_url": server_url})
welcome_resp.raise_for_status()
except Exception as exc:
typer.echo(render_error(0, {"detail": {
"kind": "server_unreachable",
"hint": "Failed to fetch CLAUDE.md from /api/welcome",
"message": str(exc),
}}), err=True)
raise typer.Exit(1)
welcome_content = welcome_resp.json().get("content", "")
claude_md.write_text(welcome_content, encoding="utf-8")
# ------------------------------------------------------------------
# Step 5: default settings.json + install hooks.
#
# Seed first-run model + permissions only when the file is absent;
# `install_claude_hooks` then merges SessionStart/End on top, leaving
# any third-party keys/hooks intact. Re-running init (with or without
# --force) is idempotent on settings.json.
# ------------------------------------------------------------------
settings_path = workspace / ".claude" / "settings.json"
if not settings_path.exists():
settings_path.parent.mkdir(parents=True, exist_ok=True)
settings_path.write_text(json.dumps(
{"model": "sonnet", "permissions": {"allow": ["Read", "Bash", "Grep", "Glob"]}},
indent=2,
), encoding="utf-8")
install_claude_hooks(workspace)
# ------------------------------------------------------------------
# Step 6: CLAUDE.local.md stub — only when absent. `--force` does NOT
# overwrite; the operator's notes survive a re-init.
# ------------------------------------------------------------------
local_md = workspace / ".claude" / "CLAUDE.local.md"
if not local_md.exists():
local_md.parent.mkdir(parents=True, exist_ok=True)
local_md.write_text(
"# My Notes\n\nPersonal notes for this workspace. Uploaded on `agnes push`.\n",
encoding="utf-8",
)
# ------------------------------------------------------------------
# Step 7: first pull. `run_pull` records per-stage failures inside
# `result.errors` rather than raising for transient issues, so any
# exception escaping here is a programming error worth surfacing.
# ------------------------------------------------------------------
try:
# `agnes init` always runs interactively (analyst typing the
# command), so progress is on by default — Pavel's #185 Phase 1
# was a 44-minute silent download on the very first install.
# Pass it through to run_pull.
result: PullResult = run_pull(
server_url, token, workspace,
skip_materialize=skip_materialize,
show_progress=True,
)
except Exception as exc:
typer.echo(render_error(0, {"detail": {
"kind": "manifest_unauthorized",
"hint": "Initial pull failed — workspace partially set up",
"message": str(exc),
}}), err=True)
raise typer.Exit(1)
# `run_pull` records per-stage failures into `result.errors` and only
# raises for programming errors. A manifest-stage failure here means
# the analyst has a saved token + saved server URL but no parquets,
# no DuckDB views — surface a typed error so the operator knows the
# workspace is not actually queryable. Common cause: PAT validates
# against /api/catalog/tables but lacks resource_grants for any tables.
manifest_err = next((e for e in result.errors if e.get("stage") == "manifest"), None)
if manifest_err:
typer.echo(render_error(0, {"detail": {
"kind": "manifest_unauthorized",
"hint": "Manifest fetch failed — workspace partially set up. "
"Check that the PAT has resource_grants for at least one table.",
"message": manifest_err.get("error", ""),
}}), err=True)
raise typer.Exit(1)
# ------------------------------------------------------------------
# Step 8: render AGNES_WORKSPACE.md from the static client-side
# template. Three placeholders: created_at, server_url, workspace_path.
# ------------------------------------------------------------------
here = Path(__file__).parent
template_path = here.parent.parent / "config" / "agnes_workspace_template.txt"
if template_path.exists():
template = template_path.read_text(encoding="utf-8")
else:
# Defensive fallback — the template ships with the repo so this
# branch only fires on a broken install. Better than crashing.
template = "# Agnes workspace\n\nCreated: {created_at}\nServer: {server_url}\n"
workspace_md = (
template
.replace("{created_at}", datetime.now(timezone.utc).isoformat())
.replace("{server_url}", server_url)
.replace("{workspace_path}", str(workspace))
)
(workspace / "AGNES_WORKSPACE.md").write_text(workspace_md, encoding="utf-8")
# ------------------------------------------------------------------
# Final: human-readable summary.
# ------------------------------------------------------------------
typer.echo("Workspace ready.")
typer.echo(f" Server : {server_url}")
typer.echo(f" Tables : {result.tables_updated} synced ({result.parquets_total} total)")
typer.echo(f" Rules : {result.rules_count}")
typer.echo(f" Workspace: {workspace}")
typer.echo("")
typer.echo("Try: agnes catalog")