agnes-the-ai-analyst/cli/commands/query.py
ZdenekSrotyr 917f9aaef0
release: 0.47.2 — restore #218 + #219 fixes silently reverted by #217 (#225)
## Summary

Smoke-testing the just-shipped 0.47.1 against production exposed two regressions:

1. `agnes query --remote "SELECT FROM unit_economics WHERE bad_col=1"` returned `Table "unit_economics" must be qualified` (the OLD error) instead of `Unrecognized name: bad_col` (the #218 fix's intended behavior).
2. `agnes query "DESCRIBE unit_economics"` showed only DuckDB's misleading `Did you mean order_economics?` with no Agnes hint paragraph (the #219 fix is missing).

Root cause: PR #217's squash merge (`506a378c`) carried stale snapshots of `app/api/query.py` and `cli/commands/query.py` from before #218 and #219 merged. The rebase-and-merge auto-merged those files cleanly (no conflict markers) but the result silently reverted both fixes.

Restore the two changes verbatim. Tests for both fixes already on main and continue to pass against the restored code.

## Test plan

- [x] `pytest tests/test_api_query_guardrail.py tests/test_cli_query.py` — clean
- [x] Manual repro against prod after deploy: both flows now surface the intended diagnostic.
<!-- devin-review-badge-begin -->

---

<a href="https://app.devin.ai/review/keboola/agnes-the-ai-analyst/pull/225" target="_blank">
  <picture>
    <source media="(prefers-color-scheme: dark)" srcset="https://static.devin.ai/assets/gh-open-in-devin-review-dark.svg?v=1">
    <img src="https://static.devin.ai/assets/gh-open-in-devin-review-light.svg?v=1" alt="Open in Devin Review">
  </picture>
</a>
<!-- devin-review-badge-end -->
2026-05-07 19:57:18 +02:00

224 lines
8.7 KiB
Python

"""Query commands — agnes query."""
import json
import os
import re
import sys
from pathlib import Path
from typing import List, Optional
import typer
def query_command(
sql: Optional[str] = typer.Argument(None, help="SQL query to execute (positional)"),
sql_opt: Optional[str] = typer.Option(None, "--sql", help="SQL query to execute (named option)"),
remote: bool = typer.Option(False, "--remote", help="Execute on server instead of locally"),
fmt: str = typer.Option("table", "--format", "-f", help="Output format: table, json, csv"),
limit: int = typer.Option(1000, "--limit", help="Max rows to return"),
register_bq: Optional[List[str]] = typer.Option(
None,
"--register-bq",
help="Register a BigQuery result as a DuckDB view. Format: alias=BQ_SQL. Can be repeated.",
),
stdin: bool = typer.Option(False, "--stdin", help="Read SQL from stdin as JSON {\"sql\": \"...\"}"),
):
"""Execute SQL query against DuckDB."""
# Resolve SQL from exactly one of: positional, --sql, or --stdin
sources_provided = sum([
sql is not None,
sql_opt is not None,
stdin,
])
if sources_provided == 0:
typer.echo("Error: provide SQL as a positional argument, --sql option, or --stdin flag.", err=True)
raise typer.Exit(1)
if sources_provided > 1:
typer.echo("Error: only one of positional SQL, --sql, or --stdin may be used at a time.", err=True)
raise typer.Exit(1)
if stdin:
raw = sys.stdin.read()
try:
payload = json.loads(raw)
resolved_sql = payload["sql"]
# Extract register_bq from stdin JSON
stdin_bq = payload.get("register_bq", {})
if stdin_bq and isinstance(stdin_bq, dict):
register_bq = [f"{k}={v}" for k, v in stdin_bq.items()]
except (json.JSONDecodeError, KeyError) as exc:
typer.echo(f"Error: failed to parse stdin JSON: {exc}", err=True)
raise typer.Exit(1)
elif sql_opt is not None:
resolved_sql = sql_opt
else:
resolved_sql = sql
if register_bq:
_query_hybrid(resolved_sql, fmt, limit, register_bq)
elif remote:
_query_remote(resolved_sql, fmt, limit)
else:
_query_local(resolved_sql, fmt, limit)
def _query_local(sql: str, fmt: str, limit: int):
"""Run query against local DuckDB."""
import duckdb
local_dir = Path(os.environ.get("AGNES_LOCAL_DIR", "."))
db_path = local_dir / "user" / "duckdb" / "analytics.duckdb"
if not db_path.exists():
typer.echo("Local DuckDB not found. Run: agnes pull", err=True)
raise typer.Exit(1)
conn = duckdb.connect(str(db_path), read_only=True)
try:
result = conn.execute(sql).fetchmany(limit)
columns = [desc[0] for desc in conn.description] if conn.description else []
_output(columns, result, fmt)
except Exception as e:
typer.echo(f"Query error: {e}", err=True)
# DuckDB's "Did you mean <similar materialized view>" suggestion is
# misleading when the unresolvable identifier is actually a
# `query_mode='remote'` table — those have no local view by design.
# Append a friendly hint pointing the user at `agnes catalog`,
# `agnes schema`, and `agnes query --remote`. We don't verify against
# the remote registry here (this command is offline-friendly), so the
# hint is conditional ("might be") — safe even when the name was just
# a typo.
m = re.search(r"Table with name ([A-Za-z_][A-Za-z0-9_]*) does not exist", str(e))
if m:
typer.echo("", err=True)
typer.echo(
f"Note: `{m.group(1)}` might be a `query_mode='remote'` table. Local "
"DuckDB only holds views for `local` and `materialized` tables — "
"`remote` ones live on BigQuery and are not synced.\n"
" - List all registered tables: agnes catalog\n"
" - Inspect column schema: agnes schema <name>\n"
" - Run a query against BigQuery: agnes query --remote \"<SQL>\"",
err=True,
)
raise typer.Exit(1)
finally:
conn.close()
def _query_remote(sql: str, fmt: str, limit: int):
"""Run query against server DuckDB via API."""
from cli.client import QUERY_TIMEOUT_S, api_post
from cli.error_render import render_error
resp = api_post(
"/api/query",
json={"sql": sql, "limit": limit},
timeout=QUERY_TIMEOUT_S,
)
if resp.status_code != 200:
# Parse JSON body if possible, fall back to text. The shared
# renderer pretty-prints typed BQ errors (cross_project_forbidden,
# remote_scan_too_large, bq_path_not_registered) instead of
# flattening the structured detail to a single truncated line.
try:
body = resp.json()
except Exception:
body = resp.text
typer.echo(render_error(resp.status_code, body), err=True)
raise typer.Exit(1)
data = resp.json()
_output(data["columns"], data["rows"], fmt)
if data.get("truncated"):
typer.echo(f"(truncated at {limit} rows)", err=True)
def _query_hybrid(sql: str, fmt: str, limit: int, register_bq_specs: List[str]):
"""Run a hybrid query: register BigQuery results as DuckDB views, then execute locally."""
import duckdb
from src.remote_query import RemoteQueryEngine, RemoteQueryError, load_config
local_dir = Path(os.environ.get("AGNES_LOCAL_DIR", "."))
db_path = local_dir / "user" / "duckdb" / "analytics.duckdb"
if not db_path.exists():
typer.echo("Local DuckDB not found. Run: agnes pull", err=True)
raise typer.Exit(1)
conn = duckdb.connect(str(db_path), read_only=True)
try:
config = load_config()
engine_kwargs = {k: v for k, v in config.items() if k in (
"max_bq_registration_rows", "max_memory_mb", "max_result_rows", "timeout_seconds"
)}
# CLI --limit flag overrides config max_result_rows
engine_kwargs["max_result_rows"] = limit
engine = RemoteQueryEngine(conn, **engine_kwargs)
for spec in register_bq_specs:
if "=" not in spec:
typer.echo(
f"Error: --register-bq spec must be 'alias=BQ_SQL', got: {spec!r}",
err=True,
)
raise typer.Exit(1)
alias, bq_sql = spec.split("=", 1)
alias = alias.strip()
bq_sql = bq_sql.strip()
try:
info = engine.register_bq(alias, bq_sql)
typer.echo(
f"Registered BQ alias '{alias}': {info['rows']:,} rows, "
f"{info['memory_mb']:.1f} MiB",
err=True,
)
except RemoteQueryError as exc:
# Use the shared renderer so typed BqAccessError details
# (carried via RemoteQueryError.details) surface as a
# multi-line block with the operator-facing hint.
from cli.error_render import render_error
synthetic = {"detail": {
"kind": exc.error_type,
"alias": alias,
"message": str(exc),
**(exc.details or {}),
}}
typer.echo(render_error(400, synthetic), err=True)
raise typer.Exit(1)
try:
result = engine.execute(sql)
except RemoteQueryError as exc:
from cli.error_render import render_error
synthetic = {"detail": {
"kind": exc.error_type,
"message": str(exc),
**(exc.details or {}),
}}
typer.echo(render_error(400, synthetic), err=True)
raise typer.Exit(1)
_output(result["columns"], result["rows"], fmt)
if result.get("truncated"):
typer.echo(f"(truncated at {result['row_count']} rows)", err=True)
finally:
conn.close()
def _output(columns: list, rows: list, fmt: str):
if fmt == "json":
output = [dict(zip(columns, row)) for row in rows]
typer.echo(json.dumps(output, indent=2, default=str))
elif fmt == "csv":
typer.echo(",".join(columns))
for row in rows:
typer.echo(",".join(str(v) if v is not None else "" for v in row))
else:
# Table format using rich
from rich.console import Console
from rich.table import Table
console = Console()
table = Table()
for col in columns:
table.add_column(col)
for row in rows:
table.add_row(*(str(v) if v is not None else "" for v in row))
console.print(table)