agnes-the-ai-analyst/cli/commands/catalog.py
ZdenekSrotyr 5458ccc41b
hygiene: BQ error hint dispatch + catalog ENTITY column (#274)
Two analyst-UX papercuts surfaced by the v0.53.4 onboarding smoke test.

1) /api/query remote_estimate_failed hint now branches on the BigQuery
   error class instead of always claiming a column doesn't exist. The
   previous hardcoded "Most often this means a column referenced …
   doesn't exist" misled analysts whenever BigQuery actually rejected
   on syntax — concretely, `SELECT COUNT(*) AS rows FROM …` fails with
   `Syntax error: Unexpected keyword ROWS at [1:20]` (`rows` is a BQ
   reserved word) and the hint pointed at non-existent columns.

   New _hint_for_bq_bad_request() helper dispatches:
   - "Syntax error" / "Unexpected keyword" → reserved-keyword alias hint
     with `AS row_count` workaround
   - "Unrecognized name" / "not found inside" → `agnes schema <id>`
   - "Table not found" → `agnes catalog`
   - fallback → enumerate all three

   4 unit tests in TestHintForBqBadRequest pin each branch. Existing
   guardrail tests (test_fallback_fails_fast_on_pure_duckdb_syntax,
   test_remote_estimate_failed_surfaces_first_error_when_attempts_differ)
   continue to pass — both hint substrings they assert on still appear in
   the relevant branches.

2) `agnes catalog` replaces the FLAVOR column with ENTITY. FLAVOR
   rendered t['sql_flavor'] which duplicated SOURCE for any catalog
   dominated by one source type — analysts saw `SOURCE=bigquery
   FLAVOR=bigquery` on every row. ENTITY instead surfaces the upstream
   BigQuery entity_type (BASE TABLE / VIEW / MATERIALIZED_VIEW) for
   remote rows; non-remote rows render `-`. The distinction matters
   operationally: views don't support predicate pushdown, so `agnes
   query --remote` against a view trips the cost guardrail where the
   same query against a BASE TABLE pushes down cleanly. The
   entity_type field has been in the v2 catalog response since 0.51.0;
   this PR just stops hiding it behind a column header that conveyed
   no information.

JSON output (`agnes catalog --json`) is unchanged — only the human-
readable column changed. No DB migration; no API change.

Verified: 4161 tests pass locally; 25 in test_api_query_guardrail.py
green; the 4 new TestHintForBqBadRequest cases pin each branch.
2026-05-12 18:32:29 +00:00

140 lines
4.8 KiB
Python

"""`agnes catalog` — list registered tables and metric definitions (spec §4.1)."""
import json as json_lib
from typing import Optional
import typer
from cli.client import api_get
from cli.v2_client import api_get_json, V2ClientError
catalog_app = typer.Typer(help="List tables (and metrics, with --metrics) visible to you")
@catalog_app.callback(invoke_without_command=True)
def catalog(
ctx: typer.Context,
json: bool = typer.Option(False, "--json", help="Emit raw JSON"),
refresh: bool = typer.Option(False, "--refresh", help="Bypass client-side cache"),
metrics: bool = typer.Option(
False,
"--metrics",
help="List metric definitions instead of tables. Combine with --show <id> for details.",
),
show: Optional[str] = typer.Option(
None,
"--show",
help="With --metrics: show details for one metric id (e.g. revenue/mrr).",
),
):
"""List tables visible to you (RBAC-filtered).
With ``--metrics`` lists registered metric definitions; pair with
``--show <id>`` to dump one definition.
"""
if ctx.invoked_subcommand is not None:
return
if metrics:
if show:
_show_one_metric(show, as_json=json)
else:
_list_metrics(as_json=json)
return
try:
data = api_get_json("/api/v2/catalog", refresh=int(refresh))
except V2ClientError as e:
typer.echo(f"Error: catalog fetch failed: {e}", err=True)
raise typer.Exit(5)
if json:
typer.echo(json_lib.dumps(data, indent=2))
return
# Human-readable table.
# ENTITY column shows the upstream entity_type for remote BigQuery rows
# (BASE TABLE / VIEW / MATERIALIZED_VIEW) — matters because views don't
# support predicate pushdown, so an analyst should reach for `agnes
# snapshot create` rather than `agnes query --remote` on a view.
# For non-remote rows (local / materialized) entity_type is NULL upstream
# and we render a dash — those modes don't have an analogous distinction.
typer.echo(f"{'ID':30s} {'SOURCE':10s} {'MODE':8s} {'ENTITY':18s} NAME")
for t in data.get("tables", []):
entity = t.get("entity_type") or "-"
typer.echo(
f"{t['id']:30s} {t['source_type']:10s} {t['query_mode']:8s} "
f"{entity:18s} {t.get('name', '')}"
)
def _list_metrics(as_json: bool, category: Optional[str] = None) -> None:
"""List metric definitions from the server."""
params = {}
if category:
params["category"] = category
resp = api_get("/api/metrics", params=params)
if resp.status_code != 200:
typer.echo(f"Failed: {resp.json().get('detail', resp.text)}", err=True)
raise typer.Exit(1)
data = resp.json()
metrics = data if isinstance(data, list) else data.get("metrics", [])
if as_json:
typer.echo(json_lib.dumps(metrics, indent=2, default=str))
return
if not metrics:
typer.echo("No metrics found.")
return
# Group by category for display
by_category: dict = {}
for m in metrics:
cat = m.get("category", "uncategorized")
by_category.setdefault(cat, []).append(m)
for cat, items in sorted(by_category.items()):
typer.echo(f"\n[{cat}]")
for m in items:
name = m.get("name", m.get("id", "?"))
display = m.get("display_name", name)
unit = m.get("unit", "")
unit_str = f" ({unit})" if unit else ""
typer.echo(f" {name:30s} {display}{unit_str}")
def _show_one_metric(metric_id: str, as_json: bool) -> None:
"""Show details for a single metric."""
resp = api_get(f"/api/metrics/{metric_id}")
if resp.status_code == 404:
typer.echo(f"Metric not found: {metric_id}", err=True)
raise typer.Exit(1)
if resp.status_code != 200:
typer.echo(f"Failed: {resp.json().get('detail', resp.text)}", err=True)
raise typer.Exit(1)
m = resp.json()
if as_json:
typer.echo(json_lib.dumps(m, indent=2, default=str))
return
typer.echo(f"ID: {m.get('id', metric_id)}")
typer.echo(f"Name: {m.get('name', '')}")
typer.echo(f"Display Name: {m.get('display_name', '')}")
typer.echo(f"Category: {m.get('category', '')}")
typer.echo(f"Type: {m.get('type', '')}")
if m.get("unit"):
typer.echo(f"Unit: {m['unit']}")
if m.get("grain"):
typer.echo(f"Grain: {m['grain']}")
if m.get("table_name"):
typer.echo(f"Table: {m['table_name']}")
if m.get("description"):
typer.echo(f"Description: {m['description']}")
if m.get("sql"):
typer.echo(f"SQL:\n {m['sql']}")
if m.get("synonyms"):
typer.echo(f"Synonyms: {', '.join(m['synonyms'])}")