agnes-the-ai-analyst/tests/test_cli_admin_materialized.py
ZdenekSrotyr 36012e0833 fix(admin): register-table real-world UX gaps for materialized BQ
Three items from operator feedback after running the actual flow:

(1) Help docstring lied: "--bucket / --source-table ignored" for
materialized rows. Reality: --bucket is load-bearing because
`agnes schema <name>` builds the BQ identifier as
`bq.<bucket>.<source_table>`. An empty bucket registered the row but
broke schema/describe with HTTP 400 "unsafe BQ identifier in
registry". Fix: docstring rewritten to reflect reality, plus
client-side validation rejects materialized + empty bucket with a
clear error pointing at the right knob.

(2) Post-register UX cliff: `agnes pull` after register-table reports
"Updated 0 tables (1 total)" because registration adds a registry
row but does NOT trigger a parquet build. Operators routinely
assume something's broken when they need to run
`agnes setup first-sync` to kick off the materialization. Hint
emitted on success now points at first-sync.

(3) RBAC gotcha: `agnes catalog` is RBAC-filtered via
`resource_grants`, so non-admin users don't see freshly-registered
rows until a grant is created. Hint emitted on success now points at
`agnes admin grant create <group> table <name>`.

Tests: 8/8 in test_cli_admin_materialized.py, including two new
regression tests for the validation + the hint output.
2026-05-04 23:06:17 +02:00

227 lines
7.6 KiB
Python

"""`agnes admin register-table --query-mode materialized --query @file.sql`
sends source_query in the payload; existing local/remote paths still work
unchanged."""
from typer.testing import CliRunner
from unittest.mock import MagicMock
from cli.main import app
def _fake_resp(status_code, body=None):
resp = MagicMock()
resp.status_code = status_code
resp.json = lambda: body or {"id": "x", "name": "x", "status": "registered"}
return resp
def test_register_materialized_with_inline_query(monkeypatch):
captured = {}
def fake_post(path, json):
captured["path"] = path
captured["json"] = json
return _fake_resp(201)
monkeypatch.setattr("cli.commands.admin.api_post", fake_post)
runner = CliRunner()
result = runner.invoke(app, [
"admin", "register-table", "orders_90d",
"--source-type", "bigquery",
"--query-mode", "materialized",
"--bucket", "fin",
"--query", "SELECT date FROM `prj.ds.orders`",
"--sync-schedule", "every 6h",
])
assert result.exit_code == 0, result.stdout
assert captured["path"] == "/api/admin/register-table"
assert captured["json"]["query_mode"] == "materialized"
assert captured["json"]["source_query"] == "SELECT date FROM `prj.ds.orders`"
assert captured["json"]["sync_schedule"] == "every 6h"
def test_register_materialized_reads_query_from_file(tmp_path, monkeypatch):
sql_file = tmp_path / "orders.sql"
sql_file.write_text(
"SELECT date, SUM(revenue) FROM `prj.ds.orders` GROUP BY 1\n"
)
captured = {}
def fake_post(path, json):
captured["json"] = json
return _fake_resp(201)
monkeypatch.setattr("cli.commands.admin.api_post", fake_post)
runner = CliRunner()
result = runner.invoke(app, [
"admin", "register-table", "orders_90d",
"--source-type", "bigquery",
"--query-mode", "materialized",
"--bucket", "fin",
"--query", f"@{sql_file}",
"--sync-schedule", "daily 03:00",
])
assert result.exit_code == 0, result.stdout
assert "SELECT date, SUM(revenue)" in captured["json"]["source_query"]
assert not captured["json"]["source_query"].endswith("\n")
def test_register_materialized_without_query_fails(monkeypatch):
"""--query-mode materialized without --query is a client-side error,
no API call made."""
called = {"count": 0}
def fake_post(*args, **kwargs):
called["count"] += 1
return _fake_resp(201)
monkeypatch.setattr("cli.commands.admin.api_post", fake_post)
runner = CliRunner()
result = runner.invoke(app, [
"admin", "register-table", "orders_90d",
"--source-type", "bigquery",
"--query-mode", "materialized",
])
assert result.exit_code != 0
assert called["count"] == 0
combined = result.stdout + (result.stderr or "")
assert "--query" in combined
def test_register_local_mode_does_not_send_source_query(monkeypatch):
"""Default local mode shouldn't send source_query — server-side
validator forbids it on local."""
captured = {}
def fake_post(path, json):
captured["json"] = json
return _fake_resp(201)
monkeypatch.setattr("cli.commands.admin.api_post", fake_post)
runner = CliRunner()
result = runner.invoke(app, [
"admin", "register-table", "kbc_orders",
"--source-type", "keboola",
"--bucket", "in.c-crm",
])
assert result.exit_code == 0
assert "source_query" not in captured["json"]
assert "sync_schedule" not in captured["json"]
def test_register_query_at_path_missing_file_fails(monkeypatch):
"""@file.sql where the file doesn't exist surfaces a clear error."""
monkeypatch.setattr(
"cli.commands.admin.api_post", lambda *a, **kw: _fake_resp(201),
)
runner = CliRunner()
result = runner.invoke(app, [
"admin", "register-table", "x",
"--source-type", "bigquery",
"--query-mode", "materialized",
"--query", "@/tmp/definitely-does-not-exist-9b4f7e2c.sql",
])
assert result.exit_code != 0
def test_register_remote_path_unchanged(monkeypatch):
"""The pre-existing --bucket / --source-table / --query-mode remote
flow still works without --query."""
captured = {}
def fake_post(path, json):
captured["json"] = json
return _fake_resp(200)
monkeypatch.setattr("cli.commands.admin.api_post", fake_post)
runner = CliRunner()
result = runner.invoke(app, [
"admin", "register-table", "live_orders",
"--source-type", "bigquery",
"--bucket", "analytics",
"--source-table", "orders",
"--query-mode", "remote",
])
assert result.exit_code == 0
assert captured["json"]["query_mode"] == "remote"
assert "source_query" not in captured["json"]
assert captured["json"]["bucket"] == "analytics"
assert captured["json"]["source_table"] == "orders"
def test_register_materialized_without_bucket_fails_with_clear_error(monkeypatch):
"""`--query-mode materialized` without `--bucket` is a client-side
error. Pre-fix the CLI sent `bucket=""` to the server; registration
succeeded but `agnes schema <name>` later 400ed with "unsafe BQ
identifier in registry" because the schema endpoint built
`bq.\"\".\"<src>\"` from the empty bucket. Catching this at register
time gives operators a clear pointer at the right knob instead of
accept-then-fail-later UX."""
called = {"count": 0}
def fake_post(*args, **kwargs):
called["count"] += 1
return _fake_resp(201)
monkeypatch.setattr("cli.commands.admin.api_post", fake_post)
runner = CliRunner()
result = runner.invoke(app, [
"admin", "register-table", "category_summary",
"--source-type", "bigquery",
"--query-mode", "materialized",
"--query", "SELECT 1",
# No --bucket on purpose.
])
assert result.exit_code != 0
# API never called — fail fast on client side.
assert called["count"] == 0
combined = result.stdout + (result.stderr or "")
assert "--bucket" in combined
# The error must explain WHY it's required, not just say "missing".
assert "schema" in combined.lower() or "identifier" in combined.lower()
def test_register_table_emits_first_sync_and_grant_hints(monkeypatch):
"""After a successful register-table for a materialized row, the CLI
output must point operators at:
(a) `agnes setup first-sync` — registration adds a registry row but
does NOT trigger a parquet build, and `agnes pull` then reports
"Updated 0 tables (1 total)" until the next scheduler tick.
(b) `agnes admin grant create <group> table <id>` — `agnes catalog`
is RBAC-filtered, so non-admin users won't see the new row
until a grant is created.
Without these hints operators bounce between symptoms and assume
something's broken when it's just unstated post-register UX."""
monkeypatch.setattr(
"cli.commands.admin.api_post",
lambda *a, **kw: _fake_resp(201),
)
runner = CliRunner()
result = runner.invoke(app, [
"admin", "register-table", "category_summary",
"--source-type", "bigquery",
"--query-mode", "materialized",
"--bucket", "analytics",
"--query", "SELECT category, SUM(rev) FROM `prj.ds.tx` GROUP BY 1",
])
assert result.exit_code == 0, result.stdout
out = result.stdout
assert "agnes setup first-sync" in out
assert "agnes admin grant create" in out
# The grant hint should mention the registered name so operators can
# copy-paste the next command verbatim.
assert "category_summary" in out