Replaces the BigQuery wrap-view pattern with a discovery + scoped-fetch toolkit driven by the analyst's Claude session. Adds /api/v2/{catalog,schema,sample,scan,scan/estimate}, da catalog/schema/describe/fetch/snapshot/disk-info CLI commands, sqlglot-backed WHERE validator, process-local quota tracker, agent rails skill (cli/skills/agnes-data-querying.md). BREAKING: BQ wrap views off by default — set data_source.bigquery.legacy_wrap_views=true for one cycle. Backward-compat field_validator on primary_key. Catalog cache now matches documented 300s TTL with RBAC fresh per request. Cuts release v0.14.0.
88 lines
3.6 KiB
Python
88 lines
3.6 KiB
Python
# tests/test_v2_schema.py
|
|
import importlib
|
|
from unittest.mock import patch, MagicMock
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def reload_db(tmp_path, monkeypatch):
|
|
monkeypatch.setenv("DATA_DIR", str(tmp_path))
|
|
import src.db as db_module
|
|
importlib.reload(db_module)
|
|
yield db_module
|
|
|
|
|
|
def _seed_bq_table(conn, *, is_public=True):
|
|
from src.repositories.table_registry import TableRegistryRepository
|
|
TableRegistryRepository(conn).register(
|
|
id="bq_view", name="bq_view", source_type="bigquery",
|
|
bucket="ds", source_table="bq_view", query_mode="remote",
|
|
is_public=is_public,
|
|
)
|
|
|
|
|
|
class TestSchemaEndpoint:
|
|
def test_bq_table_returns_columns_and_dialect_hints(self, reload_db, monkeypatch):
|
|
from app.api import v2_schema
|
|
# Stub the BQ schema fetch to avoid hitting real BQ
|
|
monkeypatch.setattr(
|
|
v2_schema, "_fetch_bq_schema",
|
|
lambda project, dataset, table: [
|
|
{"name": "event_date", "type": "DATE", "nullable": False, "description": ""},
|
|
{"name": "country_code", "type": "STRING", "nullable": True, "description": ""},
|
|
],
|
|
)
|
|
monkeypatch.setattr(v2_schema, "_fetch_bq_table_options", lambda *a: {"partition_by": "event_date", "clustered_by": []})
|
|
|
|
conn = reload_db.get_system_db()
|
|
try:
|
|
_seed_bq_table(conn)
|
|
user = {"role": "admin", "email": "a@x.com"}
|
|
data = v2_schema.build_schema(conn, user, "bq_view", project_id="my-proj")
|
|
finally:
|
|
conn.close()
|
|
assert data["table_id"] == "bq_view"
|
|
assert data["sql_flavor"] == "bigquery"
|
|
assert {c["name"] for c in data["columns"]} == {"event_date", "country_code"}
|
|
assert "where_dialect_hints" in data
|
|
assert data["partition_by"] == "event_date"
|
|
|
|
def test_unknown_table_raises_404(self, reload_db):
|
|
from app.api.v2_schema import build_schema, NotFound
|
|
conn = reload_db.get_system_db()
|
|
try:
|
|
user = {"role": "admin", "email": "a@x.com"}
|
|
with pytest.raises(NotFound):
|
|
build_schema(conn, user, "missing", project_id="my-proj")
|
|
finally:
|
|
conn.close()
|
|
|
|
def test_rbac_check_runs_before_cache(self, reload_db, monkeypatch):
|
|
"""Regression: cache lookup used to happen before the RBAC check, and the
|
|
cache key had no user component — so an unauthorized user could read
|
|
cached schema fetched by an authorized one. The fix moves RBAC ahead."""
|
|
from app.api import v2_schema
|
|
monkeypatch.setattr(
|
|
v2_schema, "_fetch_bq_schema",
|
|
lambda *a, **kw: [{"name": "x", "type": "STRING", "nullable": True, "description": ""}],
|
|
)
|
|
monkeypatch.setattr(v2_schema, "_fetch_bq_table_options", lambda *a: {})
|
|
# Stub can_access_table to deny non-admins
|
|
monkeypatch.setattr(
|
|
"app.api.v2_schema.can_access_table",
|
|
lambda user, tid, conn: False,
|
|
)
|
|
|
|
conn = reload_db.get_system_db()
|
|
try:
|
|
# Register the table NOT public so RBAC has to gate it.
|
|
_seed_bq_table(conn, is_public=False)
|
|
# Admin warms the cache.
|
|
admin = {"role": "admin", "email": "admin@x.com"}
|
|
v2_schema.build_schema(conn, admin, "bq_view", project_id="p")
|
|
# Non-admin must hit RBAC denial — cache must NOT short-circuit.
|
|
other = {"role": "viewer", "email": "viewer@x.com"}
|
|
with pytest.raises(PermissionError):
|
|
v2_schema.build_schema(conn, other, "bq_view", project_id="p")
|
|
finally:
|
|
conn.close()
|