Three first-try-failure-surface fixes from Pavel's #185 trace + the template guidance question, all under PR #188's umbrella so they land together with the file_server / parallel pull / Tier 1 work. 1. CLI clean-error wrapper — new AgnesTransportError raised by the api_*/stream_download helpers when httpx times out / drops / refuses, plus a top-level Typer wrapper (cli/main.py) that prints one-line "Error: …" + actionable hint and exits non-zero. Full traceback goes to ~/.config/agnes/last-error.log for support forwarding. Unhandled Exceptions are caught at the same boundary so no Python traceback ever leaks to the analyst's terminal. Pavel's #185 Phase 3B: a 30-frame httpx traceback from a slow BQ --remote query made it look like a CLI bug. Now: clean message + hint pointing at `agnes snapshot create` / partition-column guidance. Entry point in pyproject.toml flipped from `cli.main:app` → `cli.main:_run_with_clean_errors` so the wrapper actually runs under the installed `agnes` binary. 2. agnes init / agnes pull --skip-materialize + progress bar. --skip-materialize omits query_mode='materialized' rows from the download set so a first init doesn't spend 44 minutes silently pulling a single 6 GB parquet (Pavel's #185 Phase 1). Rich-driven per-file progress bar with label/bytes/rate/ETA renders to stderr when not --quiet and not --json. Aggregates across the parallel ThreadPoolExecutor workers added earlier in this PR. 3. config/claude_md_template.txt: explicit one-line snippet pointing at `agnes catalog --json | jq '.tables[] | select(.id=="<id>")'` for per-table descriptions + restated invariant: "the description field on each catalog row is the authoritative business-rules text — re-read live, never copy into this file." Resolves the regression-or-feature debate between Pavel (wants annotations) and the user feedback that landed in the prior commit (don't embed table-specific content; tables change). Catalog command stays the source of truth.
132 lines
5.2 KiB
Python
132 lines
5.2 KiB
Python
"""`agnes pull` — refresh registered data into the workspace.
|
|
|
|
Thin Typer wrapper around `cli/lib/pull.py:run_pull`. Used by:
|
|
- Manual invocation: analyst types `agnes pull` to force a refresh.
|
|
- SessionStart hook: `agnes pull --quiet 2>/dev/null || true` runs at the start
|
|
of every Claude Code session in this workspace.
|
|
|
|
Errors render via `cli/error_render.py:render_error()` for typed-error
|
|
shape consistency with other CLI commands. The wrapper intentionally does
|
|
no I/O of its own — config lookup, manifest fetch, parquet download, view
|
|
rebuild, and rules-bundle write all live in `run_pull`. This keeps the
|
|
command code trivially testable and the data-refresh primitive reusable
|
|
from other entrypoints (init, analyst setup, future MCP tools).
|
|
|
|
Task 18 will register `pull_app` on the root Typer app and delete the
|
|
legacy `agnes sync` command. Until then this module is callable only via
|
|
direct import (which is exactly what the test does).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import typer
|
|
|
|
from cli.config import get_server_url, get_token
|
|
from cli.error_render import render_error
|
|
from cli.lib.pull import PullResult, run_pull
|
|
|
|
|
|
pull_app = typer.Typer(help="Refresh registered data from the server")
|
|
|
|
|
|
@pull_app.callback(invoke_without_command=True)
|
|
def pull(
|
|
quiet: bool = typer.Option(False, "--quiet", help="Suppress success stdout (errors still surface on stderr)"),
|
|
as_json: bool = typer.Option(False, "--json", help="Emit a single JSON object summarizing the pull"),
|
|
dry_run: bool = typer.Option(False, "--dry-run", help="Compute the delta without writing anything to disk"),
|
|
skip_materialize: bool = typer.Option(
|
|
False, "--skip-materialize",
|
|
help=(
|
|
"Skip materialized-mode tables (server-side scheduled BQ "
|
|
"scan results, often multi-GB). Their data is still discoverable "
|
|
"via `agnes catalog` and remote-mode tables still pull. Useful "
|
|
"for a fast first init when an analyst only needs --remote access."
|
|
),
|
|
),
|
|
):
|
|
"""Refresh data from the server into ./server/parquet + ./user/duckdb."""
|
|
server_url = get_server_url()
|
|
if not server_url:
|
|
# `get_server_url()` falls back to a localhost default today, so this
|
|
# branch is mostly a defensive guard — if a future config change ever
|
|
# returns an empty string we still want a friendly hint, not a crash
|
|
# halfway through the manifest fetch.
|
|
typer.echo(
|
|
render_error(0, {"detail": {
|
|
"kind": "server_unreachable",
|
|
"hint": "No server configured. Run: agnes init --server-url <URL> --token <PAT>",
|
|
}}),
|
|
err=True,
|
|
)
|
|
raise typer.Exit(1)
|
|
|
|
token = get_token()
|
|
if not token:
|
|
typer.echo(
|
|
render_error(0, {"detail": {
|
|
"kind": "auth_failed",
|
|
"hint": "No token. Run: agnes auth import-token --token <PAT>",
|
|
}}),
|
|
err=True,
|
|
)
|
|
raise typer.Exit(1)
|
|
|
|
workspace = Path(os.environ.get("AGNES_LOCAL_DIR", ".")).resolve()
|
|
|
|
# Show progress unless quiet (SessionStart hooks) or json (machine-
|
|
# readable output where Rich's terminal-control sequences would be
|
|
# garbage in the consumer's parser).
|
|
show_progress = not (quiet or as_json)
|
|
try:
|
|
result: PullResult = run_pull(
|
|
server_url, token, workspace,
|
|
dry_run=dry_run,
|
|
skip_materialize=skip_materialize,
|
|
show_progress=show_progress,
|
|
)
|
|
except Exception as exc:
|
|
# `run_pull` is documented to record per-table / per-stage failures
|
|
# under `result.errors` rather than raising, so reaching this branch
|
|
# means something genuinely unexpected blew up (e.g. a programming
|
|
# error in a helper). Render it through the same typed-error pipe so
|
|
# the operator gets a consistent shape, then exit non-zero.
|
|
typer.echo(
|
|
render_error(0, {"detail": {
|
|
"kind": "manifest_unauthorized",
|
|
"hint": f"Pull failed: {exc}",
|
|
"message": str(exc),
|
|
}}),
|
|
err=True,
|
|
)
|
|
raise typer.Exit(1)
|
|
|
|
if as_json:
|
|
typer.echo(json.dumps({
|
|
"tables_updated": result.tables_updated,
|
|
"parquets_total": result.parquets_total,
|
|
"rules_count": result.rules_count,
|
|
"duration_s": round(result.duration_s, 3),
|
|
"errors": result.errors,
|
|
}))
|
|
return
|
|
|
|
if quiet:
|
|
# Quiet mode is for the SessionStart hook — silent on success so
|
|
# Claude Code's stdout stays clean. Errors still flow to stderr so
|
|
# the user sees them in their terminal even when the hook redirects
|
|
# `2>/dev/null` (the hook explicitly forwards stderr too in the
|
|
# canonical `agnes init` template).
|
|
if result.errors:
|
|
for e in result.errors:
|
|
typer.echo(f"warn: {e}", err=True)
|
|
return
|
|
|
|
typer.echo(f"Updated {result.tables_updated} tables ({result.parquets_total} total).")
|
|
typer.echo(f"Rules: {result.rules_count}.")
|
|
if result.errors:
|
|
for e in result.errors:
|
|
typer.echo(f"warn: {e}", err=True)
|