feat(cli): #160 shared structured error renderer for BQ-typed responses

The reporter (#160) saw `USER_PROJECT_DENIED` raw in the CLI because
all three CLI error-rendering paths flatten typed BqAccessError /
guardrail / RBAC dicts to a truncated single-line string, hiding the
structured `hint` field that explains how to fix the misconfig.

Fix: shared `cli/error_render.py:render_error(status_code, body)` that
recognizes the canonical typed shapes and pretty-prints them. Falls
back to truncated-and-flattened form for unrecognized bodies, so the
renderer never makes worse-than-status-quo output.

Recognized shapes:
- {detail: {kind: ..., hint?, billing_project?, data_project?}}
  — typed BqAccessError responses from /api/v2/scan, /sample, /schema,
  /api/query (when /api/query escalates a BQ failure)
- {detail: {reason: 'remote_scan_too_large', scan_bytes, limit_bytes,
  tables, suggestion}} — new /api/query cost-guardrail rejection
- {detail: {reason: 'bq_path_not_registered'/'bq_path_access_denied',
  path, hint?, registered_as?}} — new /api/query RBAC patch
- {detail: '...'} — string detail (legacy endpoints)

Wired through 3 CLI paths:
- cli/v2_client.py: V2ClientError.__str__ delegates to render_error;
  pre-truncation removed from V2ClientError.message (was hiding hints
  past 200 chars).
- cli/commands/query.py:_query_remote: parse JSON body, call renderer
  on error.
- cli/commands/query.py:_query_hybrid: catch RemoteQueryError, build
  synthetic `{detail: {kind: error_type, **details}}` payload, render.

tests/test_cli_query.py:test_remote_query_failure: assertion updated
from `"Query failed"` (no longer printed) to `HTTP 400` + `bad SQL`
(what the renderer surfaces for string detail).

Sample output for cross_project_forbidden:

  Error: cross_project_forbidden (HTTP 502)
    billing_project: (empty)
    data_project: prj-example-data-001
    message: USER_PROJECT_DENIED on bigquery.googleapis.com
    hint: Set data_source.bigquery.billing_project in
        /admin/server-config to a project where the SA has
        serviceusage.services.use, or grant the SA that role on the
        data project.

19 tests pass — 10 from T4a now GREEN + 3 prior cli_query tests still
green + 6 ancillary.
This commit is contained in:
ZdenekSrotyr 2026-05-03 19:41:27 +02:00
parent 77cdb65f76
commit 57482be263
5 changed files with 173 additions and 16 deletions

View file

@ -86,10 +86,19 @@ def _query_local(sql: str, fmt: str, limit: int):
def _query_remote(sql: str, fmt: str, limit: int):
"""Run query against server DuckDB via API."""
from cli.client import api_post
from cli.error_render import render_error
resp = api_post("/api/query", json={"sql": sql, "limit": limit})
if resp.status_code != 200:
typer.echo(f"Query failed: {resp.json().get('detail', resp.text)}", err=True)
# Parse JSON body if possible, fall back to text. The shared
# renderer pretty-prints typed BQ errors (cross_project_forbidden,
# remote_scan_too_large, bq_path_not_registered) instead of
# flattening the structured detail to a single truncated line.
try:
body = resp.json()
except Exception:
body = resp.text
typer.echo(render_error(resp.status_code, body), err=True)
raise typer.Exit(1)
data = resp.json()
@ -137,13 +146,29 @@ def _query_hybrid(sql: str, fmt: str, limit: int, register_bq_specs: List[str]):
err=True,
)
except RemoteQueryError as exc:
typer.echo(f"BQ registration failed for '{alias}': {exc}", err=True)
# Use the shared renderer so typed BqAccessError details
# (carried via RemoteQueryError.details) surface as a
# multi-line block with the operator-facing hint.
from cli.error_render import render_error
synthetic = {"detail": {
"kind": exc.error_type,
"alias": alias,
"message": str(exc),
**(exc.details or {}),
}}
typer.echo(render_error(400, synthetic), err=True)
raise typer.Exit(1)
try:
result = engine.execute(sql)
except RemoteQueryError as exc:
typer.echo(f"Query error: {exc}", err=True)
from cli.error_render import render_error
synthetic = {"detail": {
"kind": exc.error_type,
"message": str(exc),
**(exc.details or {}),
}}
typer.echo(render_error(400, synthetic), err=True)
raise typer.Exit(1)
_output(result["columns"], result["rows"], fmt)

111
cli/error_render.py Normal file
View file

@ -0,0 +1,111 @@
"""Shared CLI renderer for HTTP error responses.
Three CLI paths surface BigQuery / guardrail / RBAC typed errors today:
- ``da query --remote`` (POST /api/query)
- ``da query --register-bq`` (RemoteQueryEngine wrapping BqAccessError)
- ``da fetch`` / ``da schema`` etc. (cli.v2_client wrappers around v2 endpoints)
All three previously flattened the structured ``detail`` JSON to a
truncated single-line string, hiding the operator-facing hint that
explains how to fix ``USER_PROJECT_DENIED`` / cost-cap rejection /
unregistered ``bq.*`` paths. This module recognizes a few canonical
shapes and pretty-prints them; falls back to truncated form for
anything unrecognized so the renderer never makes a worse-than-
status-quo error message.
Closes #160 §4.7.
"""
from __future__ import annotations
import json
from textwrap import fill
from typing import Any
# Keys that hold long human-readable text — wrap separately so the line
# break is at a word boundary, not mid-key.
_WRAP_KEYS = ("hint", "suggestion")
# Keys to render first in the key/value block (when present); other keys
# follow in declaration order so a future server-side detail addition
# surfaces automatically without a renderer change.
_PRIORITY_KEYS = ("kind", "reason", "path", "registered_as",
"billing_project", "data_project", "scan_bytes",
"limit_bytes", "tables", "current", "limit",
"retry_after_seconds")
def render_error(status_code: int, body: Any) -> str:
"""Format an HTTP error body for stderr.
Recognized shapes (pretty-printed):
- ``{"detail": {"kind": str, ...}}`` typed BqAccessError
- ``{"detail": {"reason": str, ...}}`` guardrail / RBAC dicts
Anything else: fallback ``f"HTTP {status_code}: {str(body)[:500]}"``.
"""
detail = _detail_dict(body)
if detail is not None and ("kind" in detail or "reason" in detail):
return _format_dict(status_code, detail)
if isinstance(body, dict) and isinstance(body.get("detail"), str):
return f"HTTP {status_code}: {body['detail']}"
text = str(body) if not isinstance(body, str) else body
if len(text) > 500:
text = text[:497] + "..."
return f"HTTP {status_code}: {text}"
def _detail_dict(body: Any) -> dict | None:
"""Return ``body['detail']`` when it's a dict, else None."""
if isinstance(body, dict):
d = body.get("detail")
if isinstance(d, dict):
return d
return None
def _format_dict(status_code: int, detail: dict) -> str:
"""Multi-line render of a recognized typed-error dict."""
label = detail.get("kind") or detail.get("reason") or "error"
lines: list[str] = [f"Error: {label} (HTTP {status_code})"]
seen: set[str] = {"kind", "reason"} # already in the label line
# Priority keys first
for key in _PRIORITY_KEYS:
if key in seen:
continue
if key in detail and detail[key] not in (None, ""):
lines.append(_kv_line(key, detail[key]))
seen.add(key)
# Anything else not already shown and not a wrap key
for key, value in detail.items():
if key in seen or key in _WRAP_KEYS:
continue
if value not in (None, ""):
lines.append(_kv_line(key, value))
seen.add(key)
# Wrap keys last — they're the long human-readable explanation
for key in _WRAP_KEYS:
if key in detail and detail[key]:
wrapped = fill(
str(detail[key]),
width=80,
initial_indent=f" {key}: ",
subsequent_indent=" ",
)
lines.append(wrapped)
return "\n".join(lines)
def _kv_line(key: str, value: Any) -> str:
"""Format one `` key: value`` line. Lists join with comma; dicts
json-encode (rare but defensive)."""
if isinstance(value, list):
rendered = ", ".join(str(v) for v in value) if value else "(empty)"
elif isinstance(value, dict):
rendered = json.dumps(value, default=str)
elif value == "":
rendered = "(empty)"
else:
rendered = str(value)
return f" {key}: {rendered}"

View file

@ -1,7 +1,7 @@
"""HTTP client helpers for /api/v2/* endpoints (CLI side)."""
from __future__ import annotations
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Any
import io
@ -9,16 +9,24 @@ import httpx
import pyarrow as pa
from cli.config import get_server_url, get_token
from cli.error_render import render_error
@dataclass
class V2ClientError(Exception):
status_code: int
body: Any
# `message` retained for backwards compat with any existing caller
# that reads `.message`. Renderer is the canonical str path now.
message: str = ""
def __str__(self) -> str:
return f"HTTP {self.status_code}: {self.message or self.body}"
# Prefer the structured renderer — it pretty-prints typed BQ errors
# (cross_project_forbidden, remote_scan_too_large, etc.) instead
# of the historical truncate-and-flatten form. Falls back to
# truncated form for unrecognized bodies, so we never make output
# WORSE than the status-quo (#160 §4.7).
return render_error(self.status_code, self.body)
def _headers() -> dict:
@ -26,12 +34,20 @@ def _headers() -> dict:
return {"Authorization": f"Bearer {token}"} if token else {}
def _parse_error_body(r: httpx.Response) -> Any:
if "json" in r.headers.get("content-type", ""):
try:
return r.json()
except Exception:
return r.text
return r.text
def api_get_json(path: str, **params) -> dict:
url = f"{get_server_url().rstrip('/')}{path}"
r = httpx.get(url, headers=_headers(), params=params or None, timeout=30)
if r.status_code >= 400:
body = r.json() if "json" in r.headers.get("content-type", "") else r.text
raise V2ClientError(status_code=r.status_code, body=body, message=str(body)[:200])
raise V2ClientError(status_code=r.status_code, body=_parse_error_body(r))
return r.json()
@ -39,8 +55,7 @@ def api_post_json(path: str, payload: dict) -> dict:
url = f"{get_server_url().rstrip('/')}{path}"
r = httpx.post(url, json=payload, headers=_headers(), timeout=120)
if r.status_code >= 400:
body = r.json() if "json" in r.headers.get("content-type", "") else r.text
raise V2ClientError(status_code=r.status_code, body=body, message=str(body)[:200])
raise V2ClientError(status_code=r.status_code, body=_parse_error_body(r))
return r.json()
@ -49,7 +64,6 @@ def api_post_arrow(path: str, payload: dict) -> pa.Table:
url = f"{get_server_url().rstrip('/')}{path}"
r = httpx.post(url, json=payload, headers=_headers(), timeout=600)
if r.status_code >= 400:
body = r.json() if "json" in r.headers.get("content-type", "") else r.text
raise V2ClientError(status_code=r.status_code, body=body, message=str(body)[:200])
raise V2ClientError(status_code=r.status_code, body=_parse_error_body(r))
reader = pa.ipc.open_stream(io.BytesIO(r.content))
return reader.read_all()

View file

@ -37,11 +37,15 @@ class TestRemoteQuery:
assert result.exit_code == 0
def test_remote_query_failure(self):
"""--remote prints error message on API failure."""
"""--remote prints error message on API failure (#160 §4.7: shared
renderer surfaces the detail; the prior `Query failed: ...` prefix
was dropped in favor of HTTP-status + structured detail)."""
with patch("cli.client.api_post", return_value=_resp(400, {"detail": "bad SQL"})):
result = runner.invoke(app, ["query", "SELECT bad", "--remote"])
assert result.exit_code == 1
assert "Query failed" in result.output
# Renderer formats string-detail as `HTTP 400: bad SQL`
assert "HTTP 400" in result.output
assert "bad SQL" in result.output
def test_remote_query_truncated(self):
"""Truncated result shows warning."""

View file

@ -25,12 +25,15 @@ def test_blocked_keyword_carries_keyword_in_details():
try:
engine.execute("DROP TABLE foo")
except RemoteQueryError as exc:
assert exc.error_type == "blocked_keyword", exc.error_type
# 'drop' is the keyword that triggers the block.
# Blocked-keyword raise lives at src/remote_query.py:134-138 with
# error_type="query_error" (not "blocked_keyword" — that's the
# keyword name in details, not the type).
assert exc.error_type == "query_error", exc.error_type
assert exc.details, "blocked_keyword raise must populate details"
assert "blocked_keyword" in exc.details
assert exc.details["blocked_keyword"] == "drop "
else:
raise AssertionError("expected blocked_keyword RemoteQueryError")
raise AssertionError("expected RemoteQueryError")
def test_query_must_be_select_carries_no_unnecessary_details():