"""GET /api/admin/discover-tables — BigQuery branch. Two-step shape: dataset list (no `dataset` query param) → table list (with `dataset=name`). The UI populates the dataset autocomplete first, then fetches tables only after the operator picks a dataset, avoiding the per-dataset `list_tables()` cost on projects with hundreds of datasets. """ import pytest from unittest.mock import MagicMock, patch from connectors.bigquery.access import BqAccess, BqProjects def _auth(token): return {"Authorization": f"Bearer {token}"} @pytest.fixture def bq_instance(monkeypatch): """Force `data_source.type='bigquery'` so the endpoint routes to the BQ branch.""" fake_cfg = { "data_source": { "type": "bigquery", "bigquery": {"project": "my-test-project", "location": "us"}, }, } monkeypatch.setattr( "app.instance_config.load_instance_config", lambda: fake_cfg, raising=False, ) from app.instance_config import reset_cache reset_cache() yield fake_cfg reset_cache() def _stub_bq_with_client(client_mock): """Build a BqAccess wired to return `client_mock` from .client(). The duckdb_session_factory is unused by the discover endpoint — supply a no-op.""" from contextlib import contextmanager @contextmanager def _noop(_p): yield None return BqAccess( BqProjects(billing="my-test-project", data="my-test-project"), client_factory=lambda _p: client_mock, duckdb_session_factory=_noop, ) def test_discover_returns_dataset_list(seeded_app, bq_instance, monkeypatch): """Without `dataset` param: list datasets in the configured project.""" client = MagicMock() ds_a = MagicMock() ds_a.dataset_id = "analytics" ds_a.project = "my-test-project" ds_b = MagicMock() ds_b.dataset_id = "raw" ds_b.project = "my-test-project" client.list_datasets.return_value = [ds_a, ds_b] monkeypatch.setattr( "connectors.bigquery.access.get_bq_access", lambda: _stub_bq_with_client(client), ) c = seeded_app["client"] token = seeded_app["admin_token"] r = c.get("/api/admin/discover-tables", headers=_auth(token)) assert r.status_code == 200, r.json() body = r.json() assert body["source"] == "bigquery" assert body["count"] == 2 # Sorted alphabetically by dataset_id. assert [d["dataset_id"] for d in body["datasets"]] == ["analytics", "raw"] assert body["datasets"][0]["full_id"] == "my-test-project.analytics" def test_discover_returns_table_list_for_dataset(seeded_app, bq_instance, monkeypatch): """With `?dataset=analytics`: list tables + views in that dataset.""" client = MagicMock() t_orders = MagicMock() t_orders.table_id = "orders" t_orders.table_type = "TABLE" t_orders.project = "my-test-project" t_orders.dataset_id = "analytics" t_view = MagicMock() t_view.table_id = "orders_active" t_view.table_type = "VIEW" t_view.project = "my-test-project" t_view.dataset_id = "analytics" client.list_tables.return_value = [t_view, t_orders] # unsorted monkeypatch.setattr( "connectors.bigquery.access.get_bq_access", lambda: _stub_bq_with_client(client), ) c = seeded_app["client"] token = seeded_app["admin_token"] r = c.get( "/api/admin/discover-tables?dataset=analytics", headers=_auth(token), ) assert r.status_code == 200, r.json() body = r.json() assert body["source"] == "bigquery" assert body["dataset"] == "analytics" assert body["count"] == 2 # Sorted by table_id. assert [t["table_id"] for t in body["tables"]] == ["orders", "orders_active"] by_id = {t["table_id"]: t for t in body["tables"]} assert by_id["orders"]["table_type"] == "TABLE" assert by_id["orders_active"]["table_type"] == "VIEW" # Verify dataset filter was passed through. client.list_tables.assert_called_once_with("analytics") def test_discover_keboola_branch_unchanged(seeded_app, monkeypatch): """Negative — when source_type is keboola, BQ logic isn't reached. Skipped when the Keboola SDK (`kbcstorage`) is not installed: CI runners don't ship it because the dev container only needs it for instances that actually configure source_type=keboola, and the route's lazy import would fail before the test stub gets a chance to fire. The branch-unchanged contract is tested separately by the Keboola integration suite when the package is present. """ pytest.importorskip("kbcstorage") fake_cfg = {"data_source": {"type": "keboola", "keboola": {}}} monkeypatch.setattr( "app.instance_config.load_instance_config", lambda: fake_cfg, raising=False, ) from app.instance_config import reset_cache reset_cache() # Stub the Keboola client so the test doesn't reach the network. fake_client = MagicMock() fake_client.discover_all_tables.return_value = [{"id": "in.c-foo.bar"}] monkeypatch.setattr( "connectors.keboola.client.KeboolaClient", lambda *a, **kw: fake_client, ) c = seeded_app["client"] token = seeded_app["admin_token"] try: r = c.get("/api/admin/discover-tables", headers=_auth(token)) assert r.status_code == 200, r.json() body = r.json() assert body["source"] == "keboola" assert body["count"] == 1 finally: reset_cache() def test_discover_bq_not_configured_returns_500(seeded_app, monkeypatch): """When data_source.bigquery.project is missing, BqAccess returns its not_configured sentinel — endpoint surfaces the structured error.""" fake_cfg = { "data_source": { "type": "bigquery", "bigquery": {}, # no project }, } monkeypatch.setattr( "app.instance_config.load_instance_config", lambda: fake_cfg, raising=False, ) from app.instance_config import reset_cache reset_cache() c = seeded_app["client"] token = seeded_app["admin_token"] try: r = c.get("/api/admin/discover-tables", headers=_auth(token)) # not_configured is mapped to 500 in BqAccessError.HTTP_STATUS. assert r.status_code == 500, r.json() detail = r.json().get("detail", {}) assert detail.get("kind") == "not_configured" finally: reset_cache() def test_admin_tables_html_wires_discover_buttons(seeded_app, bq_instance): """Structural — the BQ register modal in the rendered HTML now has the Discover (datasets) and List tables buttons + datalists wired to the endpoint.""" c = seeded_app["client"] token = seeded_app["admin_token"] r = c.get("/admin/tables", headers=_auth(token)) assert r.status_code == 200, r.text html = r.text assert "discoverBqDatasets" in html assert "discoverBqTables" in html assert 'id="bqDatasetList"' in html assert 'id="bqTableList"' in html assert "list=\"bqDatasetList\"" in html assert "list=\"bqTableList\"" in html