Replaces the BigQuery wrap-view pattern with a discovery + scoped-fetch toolkit driven by the analyst's Claude session. Adds /api/v2/{catalog,schema,sample,scan,scan/estimate}, da catalog/schema/describe/fetch/snapshot/disk-info CLI commands, sqlglot-backed WHERE validator, process-local quota tracker, agent rails skill (cli/skills/agnes-data-querying.md). BREAKING: BQ wrap views off by default — set data_source.bigquery.legacy_wrap_views=true for one cycle. Backward-compat field_validator on primary_key. Catalog cache now matches documented 300s TTL with RBAC fresh per request. Cuts release v0.14.0.
115 lines
4.3 KiB
Python
115 lines
4.3 KiB
Python
import importlib
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def reload_db(tmp_path, monkeypatch):
|
|
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
import src.db as db_module
|
|
importlib.reload(db_module)
|
|
yield db_module
|
|
|
|
|
|
def _seed_two_tables(conn):
|
|
from src.repositories.table_registry import TableRegistryRepository
|
|
repo = TableRegistryRepository(conn)
|
|
repo.register(
|
|
id="orders", name="orders", source_type="keboola",
|
|
bucket="sales", source_table="orders", query_mode="local",
|
|
is_public=True,
|
|
)
|
|
repo.register(
|
|
id="bq_view", name="bq_view", source_type="bigquery",
|
|
bucket="ds", source_table="bq_view", query_mode="remote",
|
|
is_public=True,
|
|
)
|
|
|
|
|
|
class TestCatalogShape:
|
|
def test_admin_sees_both_tables(self, reload_db):
|
|
from app.api.v2_catalog import build_catalog
|
|
conn = reload_db.get_system_db()
|
|
try:
|
|
_seed_two_tables(conn)
|
|
admin = {"role": "admin", "email": "a@x.com"}
|
|
data = build_catalog(conn, admin)
|
|
ids = {t["id"] for t in data["tables"]}
|
|
assert {"orders", "bq_view"} <= ids
|
|
finally:
|
|
conn.close()
|
|
|
|
def test_local_table_has_duckdb_flavor(self, reload_db):
|
|
from app.api.v2_catalog import build_catalog
|
|
conn = reload_db.get_system_db()
|
|
try:
|
|
_seed_two_tables(conn)
|
|
admin = {"role": "admin", "email": "a@x.com"}
|
|
data = build_catalog(conn, admin)
|
|
row = next(t for t in data["tables"] if t["id"] == "orders")
|
|
assert row["sql_flavor"] == "duckdb"
|
|
assert row["query_mode"] == "local"
|
|
finally:
|
|
conn.close()
|
|
|
|
def test_bq_table_has_bigquery_flavor(self, reload_db):
|
|
from app.api.v2_catalog import build_catalog
|
|
conn = reload_db.get_system_db()
|
|
try:
|
|
_seed_two_tables(conn)
|
|
admin = {"role": "admin", "email": "a@x.com"}
|
|
data = build_catalog(conn, admin)
|
|
row = next(t for t in data["tables"] if t["id"] == "bq_view")
|
|
assert row["sql_flavor"] == "bigquery"
|
|
assert row["query_mode"] == "remote"
|
|
assert "where_examples" in row
|
|
assert "fetch_via" in row
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
class TestCatalogCacheRbac:
|
|
"""Regression: the per-user payload cache used to leave revoked users
|
|
seeing tables for up to TTL. Cache the underlying rows globally; enforce
|
|
RBAC fresh per request. Same pattern as v2_schema.py / v2_sample.py."""
|
|
|
|
def test_rbac_decision_is_fresh_per_call_not_cached(self, reload_db, monkeypatch):
|
|
from app.api import v2_catalog
|
|
|
|
conn = reload_db.get_system_db()
|
|
try:
|
|
_seed_two_tables(conn)
|
|
user = {"role": "analyst", "email": "u@x.com"}
|
|
|
|
# First call: a fake can_access_table that grants both tables.
|
|
calls = []
|
|
|
|
def grant_all(_user, table_id, _conn):
|
|
calls.append(("grant", table_id))
|
|
return True
|
|
|
|
monkeypatch.setattr(v2_catalog, "can_access_table", grant_all)
|
|
data1 = v2_catalog.build_catalog(conn, user)
|
|
ids1 = {t["id"] for t in data1["tables"]}
|
|
assert {"orders", "bq_view"} <= ids1
|
|
|
|
# Second call (cache HIT on raw rows): can_access_table now denies
|
|
# `orders`. The user must NOT see it any more — RBAC re-evaluates.
|
|
def deny_orders(_user, table_id, _conn):
|
|
calls.append(("eval", table_id))
|
|
return table_id != "orders"
|
|
|
|
monkeypatch.setattr(v2_catalog, "can_access_table", deny_orders)
|
|
data2 = v2_catalog.build_catalog(conn, user)
|
|
ids2 = {t["id"] for t in data2["tables"]}
|
|
assert "orders" not in ids2, \
|
|
f"revoked table 'orders' still visible — cache leaked stale RBAC: {ids2}"
|
|
assert "bq_view" in ids2
|
|
|
|
# And RBAC ran on the second call (the eval calls are present).
|
|
assert any(kind == "eval" for kind, _ in calls), \
|
|
"RBAC was not re-evaluated on cached call"
|
|
finally:
|
|
conn.close()
|
|
v2_catalog._table_rows_cache.clear() if hasattr(
|
|
v2_catalog._table_rows_cache, "clear"
|
|
) else None
|