agnes-the-ai-analyst/tests/test_v2_schema.py

# tests/test_v2_schema.py
import importlib
from unittest.mock import patch, MagicMock
import pytest


@pytest.fixture
def reload_db(tmp_path, monkeypatch):
    monkeypatch.setenv("DATA_DIR", str(tmp_path))
    import src.db as db_module
    importlib.reload(db_module)
    yield db_module


def _seed_bq_table(conn, *, is_public=True):
    from src.repositories.table_registry import TableRegistryRepository
    TableRegistryRepository(conn).register(
        id="bq_view", name="bq_view", source_type="bigquery",
        bucket="ds", source_table="bq_view", query_mode="remote",
        is_public=is_public,
    )


class TestSchemaEndpoint:
    def test_bq_table_returns_columns_and_dialect_hints(self, reload_db, monkeypatch):
        from app.api import v2_schema
        # Stub the BQ schema fetch to avoid hitting real BQ
        monkeypatch.setattr(
            v2_schema, "_fetch_bq_schema",
            lambda project, dataset, table: [
                {"name": "event_date", "type": "DATE", "nullable": False, "description": ""},
                {"name": "country_code", "type": "STRING", "nullable": True, "description": ""},
            ],
        )
        monkeypatch.setattr(v2_schema, "_fetch_bq_table_options", lambda *a: {"partition_by": "event_date", "clustered_by": []})

        conn = reload_db.get_system_db()
        try:
            _seed_bq_table(conn)
            user = {"role": "admin", "email": "a@x.com"}
            data = v2_schema.build_schema(conn, user, "bq_view", project_id="my-proj")
        finally:
            conn.close()
        assert data["table_id"] == "bq_view"
        assert data["sql_flavor"] == "bigquery"
        assert {c["name"] for c in data["columns"]} == {"event_date", "country_code"}
        assert "where_dialect_hints" in data
        assert data["partition_by"] == "event_date"

    def test_unknown_table_raises_404(self, reload_db):
        from app.api.v2_schema import build_schema, NotFound
        conn = reload_db.get_system_db()
        try:
            user = {"role": "admin", "email": "a@x.com"}
            with pytest.raises(NotFound):
                build_schema(conn, user, "missing", project_id="my-proj")
        finally:
            conn.close()

    def test_rbac_check_runs_before_cache(self, reload_db, monkeypatch):
        """Regression: cache lookup used to happen before the RBAC check, and the
        cache key had no user component — so an unauthorized user could read
        cached schema fetched by an authorized one. The fix moves RBAC ahead."""
        from app.api import v2_schema
        monkeypatch.setattr(
            v2_schema, "_fetch_bq_schema",
            lambda *a, **kw: [{"name": "x", "type": "STRING", "nullable": True, "description": ""}],
        )
        monkeypatch.setattr(v2_schema, "_fetch_bq_table_options", lambda *a: {})
        # Stub can_access_table to deny non-admins
        monkeypatch.setattr(
            "app.api.v2_schema.can_access_table",
            lambda user, tid, conn: False,
        )

        conn = reload_db.get_system_db()
        try:
            # Register the table NOT public so RBAC has to gate it.
            _seed_bq_table(conn, is_public=False)
            # Admin warms the cache.
            admin = {"role": "admin", "email": "admin@x.com"}
            v2_schema.build_schema(conn, admin, "bq_view", project_id="p")
            # Non-admin must hit RBAC denial — cache must NOT short-circuit.
            other = {"role": "viewer", "email": "viewer@x.com"}
            with pytest.raises(PermissionError):
                v2_schema.build_schema(conn, other, "bq_view", project_id="p")
        finally:
            conn.close()